diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index 844c7b6af1..b5fa065a81 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -7,7 +7,7 @@ cd web && pnpm install pipx install uv echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc -echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution\"" >> ~/.bashrc +echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution\"" >> ~/.bashrc echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev:inspect\"" >> ~/.bashrc echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc diff --git a/.github/actions/setup-web/action.yml b/.github/actions/setup-web/action.yml new file mode 100644 index 0000000000..54702c914a --- /dev/null +++ b/.github/actions/setup-web/action.yml @@ -0,0 +1,13 @@ +name: Setup Web Environment + +runs: + using: composite + steps: + - name: Setup Vite+ + uses: voidzero-dev/setup-vp@b5d848f5a62488f3d3d920f8aa6ac318a60c5f07 # v1 + with: + node-version-file: "./web/.nvmrc" + cache: true + run-install: | + - cwd: ./web + args: ['--frozen-lockfile'] diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 78f6eefd0d..a183f0b58c 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,35 +3,210 @@ version: 2 updates: - package-ecosystem: "pip" directory: "/api" - open-pull-requests-limit: 2 + open-pull-requests-limit: 10 schedule: interval: "weekly" groups: - python-dependencies: + flask: + patterns: + - "flask" + - "flask-*" + - "werkzeug" + - "gunicorn" + google: + patterns: + - "google-*" + - "googleapis-*" + opentelemetry: + patterns: + - "opentelemetry-*" + pydantic: + patterns: + - "pydantic" + - "pydantic-*" + llm: + patterns: + - "langfuse" + - "langsmith" + - "litellm" + - "mlflow*" + - "opik" + - "weave*" + - "arize*" + - "tiktoken" + - "transformers" + database: + patterns: + - "sqlalchemy" + - "psycopg2*" + - "psycogreen" + - "redis*" + - "alembic*" + storage: + patterns: + - "boto3*" + - "botocore*" + - "azure-*" + - "bce-*" + - "cos-python-*" + - "esdk-obs-*" + - "google-cloud-storage" + - "opendal" + - "oss2" + - "supabase*" + - "tos*" + vdb: + patterns: + - "alibabacloud*" + - "chromadb" + - "clickhouse-*" + - "clickzetta-*" + - "couchbase" + - "elasticsearch" + - "opensearch-py" + - "oracledb" + - "pgvect*" + - "pymilvus" + - "pymochow" + - "pyobvector" + - "qdrant-client" + - "intersystems-*" + - "tablestore" + - "tcvectordb" + - "tidb-vector" + - "upstash-*" + - "volcengine-*" + - "weaviate-*" + - "xinference-*" + - "mo-vector" + - "mysql-connector-*" + dev: + patterns: + - "coverage" + - "dotenv-linter" + - "faker" + - "lxml-stubs" + - "basedpyright" + - "ruff" + - "pytest*" + - "types-*" + - "boto3-stubs" + - "hypothesis" + - "pandas-stubs" + - "scipy-stubs" + - "import-linter" + - "celery-types" + - "mypy*" + - "pyrefly" + python-packages: patterns: - "*" - package-ecosystem: "uv" directory: "/api" - open-pull-requests-limit: 2 + open-pull-requests-limit: 10 schedule: interval: "weekly" groups: - uv-dependencies: + flask: + patterns: + - "flask" + - "flask-*" + - "werkzeug" + - "gunicorn" + google: + patterns: + - "google-*" + - "googleapis-*" + opentelemetry: + patterns: + - "opentelemetry-*" + pydantic: + patterns: + - "pydantic" + - "pydantic-*" + llm: + patterns: + - "langfuse" + - "langsmith" + - "litellm" + - "mlflow*" + - "opik" + - "weave*" + - "arize*" + - "tiktoken" + - "transformers" + database: + patterns: + - "sqlalchemy" + - "psycopg2*" + - "psycogreen" + - "redis*" + - "alembic*" + storage: + patterns: + - "boto3*" + - "botocore*" + - "azure-*" + - "bce-*" + - "cos-python-*" + - "esdk-obs-*" + - "google-cloud-storage" + - "opendal" + - "oss2" + - "supabase*" + - "tos*" + vdb: + patterns: + - "alibabacloud*" + - "chromadb" + - "clickhouse-*" + - "clickzetta-*" + - "couchbase" + - "elasticsearch" + - "opensearch-py" + - "oracledb" + - "pgvect*" + - "pymilvus" + - "pymochow" + - "pyobvector" + - "qdrant-client" + - "intersystems-*" + - "tablestore" + - "tcvectordb" + - "tidb-vector" + - "upstash-*" + - "volcengine-*" + - "weaviate-*" + - "xinference-*" + - "mo-vector" + - "mysql-connector-*" + dev: + patterns: + - "coverage" + - "dotenv-linter" + - "faker" + - "lxml-stubs" + - "basedpyright" + - "ruff" + - "pytest*" + - "types-*" + - "boto3-stubs" + - "hypothesis" + - "pandas-stubs" + - "scipy-stubs" + - "import-linter" + - "celery-types" + - "mypy*" + - "pyrefly" + python-packages: patterns: - "*" - - package-ecosystem: "npm" - directory: "/web" + - package-ecosystem: "github-actions" + directory: "/" + open-pull-requests-limit: 5 schedule: interval: "weekly" - open-pull-requests-limit: 2 groups: - storybook: - patterns: - - "storybook" - - "@storybook/*" - npm-dependencies: + github-actions-dependencies: patterns: - "*" - exclude-patterns: - - "storybook" - - "@storybook/*" diff --git a/.github/workflows/anti-slop.yml b/.github/workflows/anti-slop.yml new file mode 100644 index 0000000000..c0d1818691 --- /dev/null +++ b/.github/workflows/anti-slop.yml @@ -0,0 +1,19 @@ +name: Anti-Slop PR Check + +on: + pull_request_target: + types: [opened, edited, synchronize] + +permissions: + pull-requests: write + contents: read + +jobs: + anti-slop: + runs-on: ubuntu-latest + steps: + - uses: peakoss/anti-slop@v0 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + close-pr: false + failure-add-pr-labels: "needs-revision" diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index 52e3272f99..deba7d6b30 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -22,12 +22,12 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: true python-version: ${{ matrix.python-version }} @@ -51,7 +51,7 @@ jobs: run: sh .github/workflows/expose_service_ports.sh - name: Set up Sandbox - uses: hoverkraft-tech/compose-action@v2 + uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0 with: compose-file: | docker/docker-compose.middleware.yaml diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 4571fd1cd1..80f892589d 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -12,22 +12,34 @@ jobs: if: github.repository == 'langgenius/dify' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Check Docker Compose inputs id: docker-compose-changes - uses: tj-actions/changed-files@v47 + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 with: files: | docker/generate_docker_compose docker/.env.example docker/docker-compose-template.yaml docker/docker-compose.yaml - - uses: actions/setup-python@v6 + - name: Check web inputs + id: web-changes + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 + with: + files: | + web/** + - name: Check api inputs + id: api-changes + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 + with: + files: | + api/** + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: "3.11" - - uses: astral-sh/setup-uv@v7 + - uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 - name: Generate Docker Compose if: steps.docker-compose-changes.outputs.any_changed == 'true' @@ -35,7 +47,8 @@ jobs: cd docker ./generate_docker_compose - - run: | + - if: steps.api-changes.outputs.any_changed == 'true' + run: | cd api uv sync --dev # fmt first to avoid line too long @@ -46,11 +59,13 @@ jobs: uv run ruff format .. - name: count migration progress + if: steps.api-changes.outputs.any_changed == 'true' run: | cd api ./cnt_base.sh - name: ast-grep + if: steps.api-changes.outputs.any_changed == 'true' run: | # ast-grep exits 1 if no matches are found; allow idempotent runs. uvx --from ast-grep-cli ast-grep --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all || true @@ -84,4 +99,14 @@ jobs: run: | uvx --python 3.13 mdformat . --exclude ".agents/skills/**" - - uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27 + - name: Setup web environment + if: steps.web-changes.outputs.any_changed == 'true' + uses: ./.github/actions/setup-web + + - name: ESLint autofix + if: steps.web-changes.outputs.any_changed == 'true' + run: | + cd web + vp exec eslint --concurrency=2 --prune-suppressions --quiet || true + + - uses: autofix-ci/action@7a166d7532b277f34e16238930461bf77f9d7ed8 # v1.3.3 diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index c9ca37166d..6ae8b70e9c 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -53,26 +53,26 @@ jobs: echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0 with: username: ${{ env.DOCKERHUB_USER }} password: ${{ env.DOCKERHUB_TOKEN }} - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 - name: Extract metadata for Docker id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0 with: images: ${{ env[matrix.image_name_env] }} - name: Build Docker image id: build - uses: docker/build-push-action@v6 + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 with: context: "{{defaultContext}}:${{ matrix.context }}" platforms: ${{ matrix.platform }} @@ -93,7 +93,7 @@ jobs: touch "/tmp/digests/${sanitized_digest}" - name: Upload digest - uses: actions/upload-artifact@v6 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: digests-${{ matrix.context }}-${{ env.PLATFORM_PAIR }} path: /tmp/digests/* @@ -115,21 +115,21 @@ jobs: context: "web" steps: - name: Download digests - uses: actions/download-artifact@v7 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: path: /tmp/digests pattern: digests-${{ matrix.context }}-* merge-multiple: true - name: Login to Docker Hub - uses: docker/login-action@v3 + uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0 with: username: ${{ env.DOCKERHUB_USER }} password: ${{ env.DOCKERHUB_TOKEN }} - name: Extract metadata for Docker id: meta - uses: docker/metadata-action@v5 + uses: docker/metadata-action@030e881283bb7a6894de51c315a6bfe6a94e05cf # v6.0.0 with: images: ${{ env[matrix.image_name_env] }} tags: | diff --git a/.github/workflows/db-migration-test.yml b/.github/workflows/db-migration-test.yml index e20cf9850b..570dd3fd8c 100644 --- a/.github/workflows/db-migration-test.yml +++ b/.github/workflows/db-migration-test.yml @@ -13,13 +13,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: true python-version: "3.12" @@ -40,7 +40,7 @@ jobs: cp middleware.env.example middleware.env - name: Set up Middlewares - uses: hoverkraft-tech/compose-action@v2.0.2 + uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0 with: compose-file: | docker/docker-compose.middleware.yaml @@ -63,13 +63,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 persist-credentials: false - name: Setup UV and Python - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: true python-version: "3.12" @@ -94,7 +94,7 @@ jobs: sed -i 's/DB_USERNAME=postgres/DB_USERNAME=mysql/' middleware.env - name: Set up Middlewares - uses: hoverkraft-tech/compose-action@v2.0.2 + uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0 with: compose-file: | docker/docker-compose.middleware.yaml diff --git a/.github/workflows/deploy-agent-dev.yml b/.github/workflows/deploy-agent-dev.yml index dd759f7ba5..cd5fe9242e 100644 --- a/.github/workflows/deploy-agent-dev.yml +++ b/.github/workflows/deploy-agent-dev.yml @@ -19,7 +19,7 @@ jobs: github.event.workflow_run.head_branch == 'deploy/agent-dev' steps: - name: Deploy to server - uses: appleboy/ssh-action@v1 + uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 # v1.2.5 with: host: ${{ secrets.AGENT_DEV_SSH_HOST }} username: ${{ secrets.SSH_USER }} diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index 38fa0b9a7f..954537663a 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -16,7 +16,7 @@ jobs: github.event.workflow_run.head_branch == 'deploy/dev' steps: - name: Deploy to server - uses: appleboy/ssh-action@v1 + uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 # v1.2.5 with: host: ${{ secrets.SSH_HOST }} username: ${{ secrets.SSH_USER }} diff --git a/.github/workflows/deploy-hitl.yml b/.github/workflows/deploy-hitl.yml index a3fd52afc6..c6f1cc7e6f 100644 --- a/.github/workflows/deploy-hitl.yml +++ b/.github/workflows/deploy-hitl.yml @@ -16,7 +16,7 @@ jobs: github.event.workflow_run.head_branch == 'build/feat/hitl' steps: - name: Deploy to server - uses: appleboy/ssh-action@v1 + uses: appleboy/ssh-action@0ff4204d59e8e51228ff73bce53f80d53301dee2 # v1.2.5 with: host: ${{ secrets.HITL_SSH_HOST }} username: ${{ secrets.SSH_USER }} diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index cadc1b5507..340b380dc9 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -32,13 +32,13 @@ jobs: context: "web" steps: - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4.0.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4.0.0 - name: Build Docker Image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@d08e5c354a6adb9ed34480a06d141179aa583294 # v7.0.0 with: push: false context: "{{defaultContext}}:${{ matrix.context }}" diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml index 06782b53c1..278e10bc04 100644 --- a/.github/workflows/labeler.yml +++ b/.github/workflows/labeler.yml @@ -9,6 +9,6 @@ jobs: pull-requests: write runs-on: ubuntu-latest steps: - - uses: actions/labeler@v6 + - uses: actions/labeler@634933edcd8ababfe52f92936142cc22ac488b1b # v6.0.1 with: sync-labels: true diff --git a/.github/workflows/main-ci.yml b/.github/workflows/main-ci.yml index d6653de950..fd104e9496 100644 --- a/.github/workflows/main-ci.yml +++ b/.github/workflows/main-ci.yml @@ -27,8 +27,8 @@ jobs: vdb-changed: ${{ steps.changes.outputs.vdb }} migration-changed: ${{ steps.changes.outputs.migration }} steps: - - uses: actions/checkout@v6 - - uses: dorny/paths-filter@v3 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2 id: changes with: filters: | @@ -39,6 +39,7 @@ jobs: web: - 'web/**' - '.github/workflows/web-tests.yml' + - '.github/actions/setup-web/**' vdb: - 'api/core/rag/datasource/**' - 'docker/**' @@ -61,6 +62,9 @@ jobs: needs: check-changes if: needs.check-changes.outputs.web-changed == 'true' uses: ./.github/workflows/web-tests.yml + with: + base_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.base.sha || github.event.before }} + head_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} style-check: name: Style Check diff --git a/.github/workflows/pyrefly-diff-comment.yml b/.github/workflows/pyrefly-diff-comment.yml index f9fbcba465..0278e1e0d3 100644 --- a/.github/workflows/pyrefly-diff-comment.yml +++ b/.github/workflows/pyrefly-diff-comment.yml @@ -21,7 +21,7 @@ jobs: if: ${{ github.event.workflow_run.conclusion == 'success' && github.event.workflow_run.pull_requests[0].head.repo.full_name != github.repository }} steps: - name: Download pyrefly diff artifact - uses: actions/github-script@v8 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | @@ -49,7 +49,7 @@ jobs: run: unzip -o pyrefly_diff.zip - name: Post comment - uses: actions/github-script@v8 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/pyrefly-diff.yml b/.github/workflows/pyrefly-diff.yml index 14338e85b3..ea152dec97 100644 --- a/.github/workflows/pyrefly-diff.yml +++ b/.github/workflows/pyrefly-diff.yml @@ -17,12 +17,12 @@ jobs: pull-requests: write steps: - name: Checkout PR branch - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 - name: Setup Python & UV - uses: astral-sh/setup-uv@v5 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: true @@ -55,7 +55,7 @@ jobs: echo ${{ github.event.pull_request.number }} > pr_number.txt - name: Upload pyrefly diff - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: pyrefly_diff path: | @@ -64,7 +64,7 @@ jobs: - name: Comment PR with pyrefly diff if: ${{ github.event.pull_request.head.repo.full_name == github.repository }} - uses: actions/github-script@v8 + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/semantic-pull-request.yml b/.github/workflows/semantic-pull-request.yml index b15c26a096..c21331ec0d 100644 --- a/.github/workflows/semantic-pull-request.yml +++ b/.github/workflows/semantic-pull-request.yml @@ -16,6 +16,6 @@ jobs: runs-on: ubuntu-latest steps: - name: Check title - uses: amannn/action-semantic-pull-request@v6.1.1 + uses: amannn/action-semantic-pull-request@48f256284bd46cdaab1048c3721360e808335d50 # v6.1.1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index b6df1d7e93..5cf52daed2 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -18,7 +18,7 @@ jobs: pull-requests: write steps: - - uses: actions/stale@v10 + - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0 with: days-before-issue-stale: 15 days-before-issue-close: 3 diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index eb13c3d096..868bacc6e5 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -19,13 +19,13 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Check changed files id: changed-files - uses: tj-actions/changed-files@v47 + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 with: files: | api/** @@ -33,7 +33,7 @@ jobs: - name: Setup UV and Python if: steps.changed-files.outputs.any_changed == 'true' - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: false python-version: "3.12" @@ -67,42 +67,28 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Check changed files id: changed-files - uses: tj-actions/changed-files@v47 + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 with: files: | web/** .github/workflows/style.yml + .github/actions/setup-web/** - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - package_json_file: web/package.json - run_install: false - - - name: Setup NodeJS - uses: actions/setup-node@v6 + - name: Setup web environment if: steps.changed-files.outputs.any_changed == 'true' - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ./web/pnpm-lock.yaml - - - name: Web dependencies - if: steps.changed-files.outputs.any_changed == 'true' - working-directory: ./web - run: pnpm install --frozen-lockfile + uses: ./.github/actions/setup-web - name: Web style check if: steps.changed-files.outputs.any_changed == 'true' working-directory: ./web run: | - pnpm run lint:ci + vp run lint:ci # pnpm run lint:report # continue-on-error: true @@ -116,17 +102,17 @@ jobs: - name: Web tsslint if: steps.changed-files.outputs.any_changed == 'true' working-directory: ./web - run: pnpm run lint:tss + run: vp run lint:tss - name: Web type check if: steps.changed-files.outputs.any_changed == 'true' working-directory: ./web - run: pnpm run type-check + run: vp run type-check - name: Web dead code check if: steps.changed-files.outputs.any_changed == 'true' working-directory: ./web - run: pnpm run knip + run: vp run knip superlinter: name: SuperLinter @@ -134,14 +120,14 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 persist-credentials: false - name: Check changed files id: changed-files - uses: tj-actions/changed-files@v47 + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 with: files: | **.sh @@ -152,7 +138,7 @@ jobs: .editorconfig - name: Super-linter - uses: super-linter/super-linter/slim@v8 + uses: super-linter/super-linter/slim@61abc07d755095a68f4987d1c2c3d1d64408f1f9 # v8.5.0 if: steps.changed-files.outputs.any_changed == 'true' env: BASH_SEVERITY: warning diff --git a/.github/workflows/tool-test-sdks.yaml b/.github/workflows/tool-test-sdks.yaml index d9a1168636..3fc351c0c2 100644 --- a/.github/workflows/tool-test-sdks.yaml +++ b/.github/workflows/tool-test-sdks.yaml @@ -21,12 +21,12 @@ jobs: working-directory: sdks/nodejs-client steps: - - uses: actions/checkout@v6 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Use Node.js - uses: actions/setup-node@v6 + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 with: node-version: 22 cache: '' diff --git a/.github/workflows/translate-i18n-claude.yml b/.github/workflows/translate-i18n-claude.yml index b431c36a8b..62724c84e5 100644 --- a/.github/workflows/translate-i18n-claude.yml +++ b/.github/workflows/translate-i18n-claude.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} @@ -48,18 +48,8 @@ jobs: git config --global user.name "github-actions[bot]" git config --global user.email "github-actions[bot]@users.noreply.github.com" - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - package_json_file: web/package.json - run_install: false - - - name: Set up Node.js - uses: actions/setup-node@v6 - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ./web/pnpm-lock.yaml + - name: Setup web environment + uses: ./.github/actions/setup-web - name: Detect changed files and generate diff id: detect_changes @@ -130,7 +120,7 @@ jobs: - name: Run Claude Code for Translation Sync if: steps.detect_changes.outputs.CHANGED_FILES != '' - uses: anthropics/claude-code-action@v1 + uses: anthropics/claude-code-action@26ec041249acb0a944c0a47b6c0c13f05dbc5b44 # v1.0.70 with: anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/trigger-i18n-sync.yml b/.github/workflows/trigger-i18n-sync.yml index 66a29453b4..1caaddd47a 100644 --- a/.github/workflows/trigger-i18n-sync.yml +++ b/.github/workflows/trigger-i18n-sync.yml @@ -21,7 +21,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: fetch-depth: 0 @@ -59,7 +59,7 @@ jobs: - name: Trigger i18n sync workflow if: steps.detect.outputs.has_changes == 'true' - uses: peter-evans/repository-dispatch@v3 + uses: peter-evans/repository-dispatch@28959ce8df70de7be546dd1250a005dd32156697 # v4.0.1 with: token: ${{ secrets.GITHUB_TOKEN }} event-type: i18n-sync diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml index 7735afdaca..84a1182f94 100644 --- a/.github/workflows/vdb-tests.yml +++ b/.github/workflows/vdb-tests.yml @@ -19,19 +19,19 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Free Disk Space - uses: endersonmenezes/free-disk-space@v3 + uses: endersonmenezes/free-disk-space@7901478139cff6e9d44df5972fd8ab8fcade4db1 # v3.2.2 with: remove_dotnet: true remove_haskell: true remove_tool_cache: true - name: Setup UV and Python - uses: astral-sh/setup-uv@v7 + uses: astral-sh/setup-uv@6ee6290f1cbc4156c0bdd66691b2c144ef8df19a # v7.4.0 with: enable-cache: true python-version: ${{ matrix.python-version }} @@ -60,7 +60,7 @@ jobs: # tiflash - name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase, OceanBase) - uses: hoverkraft-tech/compose-action@v2.0.2 + uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0 with: compose-file: | docker/docker-compose.yaml diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml index 659620b2a9..fd2b941ce3 100644 --- a/.github/workflows/web-tests.yml +++ b/.github/workflows/web-tests.yml @@ -2,6 +2,13 @@ name: Web Tests on: workflow_call: + inputs: + base_sha: + required: false + type: string + head_sha: + required: false + type: string permissions: contents: read @@ -14,6 +21,8 @@ jobs: test: name: Web Tests (${{ matrix.shardIndex }}/${{ matrix.shardTotal }}) runs-on: ubuntu-latest + env: + VITEST_COVERAGE_SCOPE: app-components strategy: fail-fast: false matrix: @@ -26,32 +35,19 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - package_json_file: web/package.json - run_install: false - - - name: Setup Node.js - uses: actions/setup-node@v6 - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ./web/pnpm-lock.yaml - - - name: Install dependencies - run: pnpm install --frozen-lockfile + - name: Setup web environment + uses: ./.github/actions/setup-web - name: Run tests - run: pnpm vitest run --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --coverage + run: vp test run --reporter=blob --shard=${{ matrix.shardIndex }}/${{ matrix.shardTotal }} --coverage - name: Upload blob report if: ${{ !cancelled() }} - uses: actions/upload-artifact@v6 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: blob-report-${{ matrix.shardIndex }} path: web/.vitest-reports/* @@ -63,6 +59,8 @@ jobs: if: ${{ !cancelled() }} needs: [test] runs-on: ubuntu-latest + env: + VITEST_COVERAGE_SCOPE: app-components defaults: run: shell: bash @@ -70,35 +68,29 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: + fetch-depth: 0 persist-credentials: false - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - package_json_file: web/package.json - run_install: false - - - name: Setup Node.js - uses: actions/setup-node@v6 - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ./web/pnpm-lock.yaml - - - name: Install dependencies - run: pnpm install --frozen-lockfile + - name: Setup web environment + uses: ./.github/actions/setup-web - name: Download blob reports - uses: actions/download-artifact@v6 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: path: web/.vitest-reports pattern: blob-report-* merge-multiple: true - name: Merge reports - run: pnpm vitest --merge-reports --coverage --silent=passed-only + run: vp test --merge-reports --reporter=json --reporter=agent --coverage + + - name: Check app/components diff coverage + env: + BASE_SHA: ${{ inputs.base_sha }} + HEAD_SHA: ${{ inputs.head_sha }} + run: node ./scripts/check-components-diff-coverage.mjs - name: Coverage Summary if: always() @@ -419,7 +411,7 @@ jobs: - name: Upload Coverage Artifact if: steps.coverage-summary.outputs.has_coverage == 'true' - uses: actions/upload-artifact@v6 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: web-coverage-report path: web/coverage @@ -435,38 +427,24 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: persist-credentials: false - name: Check changed files id: changed-files - uses: tj-actions/changed-files@v47 + uses: tj-actions/changed-files@22103cc46bda19c2b464ffe86db46df6922fd323 # v47.0.5 with: files: | web/** .github/workflows/web-tests.yml + .github/actions/setup-web/** - - name: Install pnpm - uses: pnpm/action-setup@v4 - with: - package_json_file: web/package.json - run_install: false - - - name: Setup NodeJS - uses: actions/setup-node@v6 + - name: Setup web environment if: steps.changed-files.outputs.any_changed == 'true' - with: - node-version: 22 - cache: pnpm - cache-dependency-path: ./web/pnpm-lock.yaml - - - name: Web dependencies - if: steps.changed-files.outputs.any_changed == 'true' - working-directory: ./web - run: pnpm install --frozen-lockfile + uses: ./.github/actions/setup-web - name: Web build check if: steps.changed-files.outputs.any_changed == 'true' working-directory: ./web - run: pnpm run build + run: vp run build diff --git a/.vscode/launch.json.template b/.vscode/launch.json.template index 700b815c3b..c3e2c50c52 100644 --- a/.vscode/launch.json.template +++ b/.vscode/launch.json.template @@ -37,7 +37,7 @@ "-c", "1", "-Q", - "dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution", + "dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution", "--loglevel", "INFO" ], diff --git a/Makefile b/Makefile index 0aff26b3e5..55871c86a7 100644 --- a/Makefile +++ b/Makefile @@ -68,8 +68,9 @@ lint: @echo "✅ Linting complete" type-check: - @echo "📝 Running type checks (basedpyright + mypy)..." + @echo "📝 Running type checks (basedpyright + pyrefly + mypy)..." @./dev/basedpyright-check $(PATH_TO_CHECK) + @./dev/pyrefly-check-local @uv --directory api run mypy --exclude-gitignore --exclude 'tests/' --exclude 'migrations/' --check-untyped-defs --disable-error-code=import-untyped . @echo "✅ Type checks complete" @@ -131,7 +132,7 @@ help: @echo " make format - Format code with ruff" @echo " make check - Check code with ruff" @echo " make lint - Format, fix, and lint code (ruff, imports, dotenv)" - @echo " make type-check - Run type checks (basedpyright, mypy)" + @echo " make type-check - Run type checks (basedpyright, pyrefly, mypy)" @echo " make test - Run backend unit tests (or TARGET_TESTS=./api/tests/)" @echo "" @echo "Docker Build Targets:" diff --git a/README.md b/README.md index 90961a5346..bef8f6b782 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ README in বাংলা

-Dify is an open-source platform for developing LLM applications. Its intuitive interface combines agentic AI workflows, RAG pipelines, agent capabilities, model management, observability features, and more—allowing you to quickly move from prototype to production. +Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features (including [Opik](https://www.comet.com/docs/opik/integrations/dify), [Langfuse](https://docs.langfuse.com), and [Arize Phoenix](https://docs.arize.com/phoenix)) and more, letting you quickly go from prototype to production. Here's a list of the core features: ## Quick start @@ -133,7 +133,7 @@ Star Dify on GitHub and be instantly notified of new releases. ### Custom configurations -If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments). +If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments). #### Customizing Suggested Questions diff --git a/api/.env.example b/api/.env.example index 9ee733831b..1749c615cc 100644 --- a/api/.env.example +++ b/api/.env.example @@ -191,7 +191,6 @@ VECTOR_INDEX_NAME_PREFIX=Vector_index # Weaviate configuration WEAVIATE_ENDPOINT=http://localhost:8080 WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih -WEAVIATE_GRPC_ENABLED=false WEAVIATE_BATCH_SIZE=100 WEAVIATE_TOKENIZATION=word diff --git a/api/.importlinter b/api/.importlinter index e4536b1f10..4109c007d9 100644 --- a/api/.importlinter +++ b/api/.importlinter @@ -43,10 +43,7 @@ forbidden_modules = extensions.ext_redis allow_indirect_imports = True ignore_imports = - dify_graph.nodes.agent.agent_node -> extensions.ext_database - dify_graph.nodes.llm.file_saver -> extensions.ext_database dify_graph.nodes.llm.node -> extensions.ext_database - dify_graph.nodes.tool.tool_node -> extensions.ext_database dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis dify_graph.model_runtime.model_providers.model_provider_factory -> extensions.ext_redis @@ -92,9 +89,6 @@ forbidden_modules = core.trigger core.variables ignore_imports = - dify_graph.nodes.agent.agent_node -> core.model_manager - dify_graph.nodes.agent.agent_node -> core.provider_manager - dify_graph.nodes.agent.agent_node -> core.tools.tool_manager dify_graph.nodes.llm.llm_utils -> core.model_manager dify_graph.nodes.llm.protocols -> core.model_manager dify_graph.nodes.llm.llm_utils -> dify_graph.model_runtime.model_providers.__base.large_language_model @@ -102,9 +96,6 @@ ignore_imports = dify_graph.nodes.tool.tool_node -> core.callback_handler.workflow_tool_callback_handler dify_graph.nodes.tool.tool_node -> core.tools.tool_engine dify_graph.nodes.tool.tool_node -> core.tools.tool_manager - dify_graph.nodes.agent.agent_node -> core.agent.entities - dify_graph.nodes.agent.agent_node -> core.agent.plugin_entities - dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node -> core.app.app_config.entities dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.advanced_prompt_transform dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.simple_prompt_transform dify_graph.nodes.parameter_extractor.parameter_extractor_node -> dify_graph.model_runtime.model_providers.__base.large_language_model @@ -112,14 +103,10 @@ ignore_imports = dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.model_manager dify_graph.nodes.question_classifier.question_classifier_node -> core.model_manager dify_graph.nodes.tool.tool_node -> core.tools.utils.message_transformer - dify_graph.nodes.tool.tool_node -> models - dify_graph.nodes.agent.agent_node -> models.model - dify_graph.nodes.llm.file_saver -> core.helper.ssrf_proxy dify_graph.nodes.llm.node -> core.helper.code_executor dify_graph.nodes.llm.node -> core.llm_generator.output_parser.errors dify_graph.nodes.llm.node -> core.llm_generator.output_parser.structured_output dify_graph.nodes.llm.node -> core.model_manager - dify_graph.nodes.agent.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.node -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.llm.node -> core.prompt.utils.prompt_message_util @@ -128,19 +115,12 @@ ignore_imports = dify_graph.nodes.parameter_extractor.parameter_extractor_node -> core.prompt.utils.prompt_message_util dify_graph.nodes.question_classifier.entities -> core.prompt.entities.advanced_prompt_entities dify_graph.nodes.question_classifier.question_classifier_node -> core.prompt.utils.prompt_message_util - dify_graph.nodes.knowledge_index.entities -> core.rag.retrieval.retrieval_methods dify_graph.nodes.llm.node -> models.dataset - dify_graph.nodes.agent.agent_node -> core.tools.utils.message_transformer dify_graph.nodes.llm.file_saver -> core.tools.signature dify_graph.nodes.llm.file_saver -> core.tools.tool_file_manager dify_graph.nodes.tool.tool_node -> core.tools.errors - dify_graph.nodes.agent.agent_node -> extensions.ext_database - dify_graph.nodes.llm.file_saver -> extensions.ext_database dify_graph.nodes.llm.node -> extensions.ext_database - dify_graph.nodes.tool.tool_node -> extensions.ext_database - dify_graph.nodes.agent.agent_node -> models dify_graph.nodes.llm.node -> models.model - dify_graph.nodes.agent.agent_node -> services dify_graph.nodes.tool.tool_node -> services dify_graph.model_runtime.model_providers.__base.ai_model -> configs dify_graph.model_runtime.model_providers.__base.ai_model -> extensions.ext_redis diff --git a/api/AGENTS.md b/api/AGENTS.md index 13adb42276..d43d2528b8 100644 --- a/api/AGENTS.md +++ b/api/AGENTS.md @@ -62,6 +62,22 @@ This is the default standard for backend code in this repo. Follow it for new co - Code should usually include type annotations that match the repo’s current Python version (avoid untyped public APIs and “mystery” values). - Prefer modern typing forms (e.g. `list[str]`, `dict[str, int]`) and avoid `Any` unless there’s a strong reason. +- For dictionary-like data with known keys and value types, prefer `TypedDict` over `dict[...]` or `Mapping[...]`. +- For optional keys in typed payloads, use `NotRequired[...]` (or `total=False` when most fields are optional). +- Keep `dict[...]` / `Mapping[...]` for truly dynamic key spaces where the key set is unknown. + +```python +from datetime import datetime +from typing import NotRequired, TypedDict + + +class UserProfile(TypedDict): + user_id: str + email: str + created_at: datetime + nickname: NotRequired[str] +``` + - For classes, declare member variables at the top of the class body (before `__init__`) so the class shape is obvious at a glance: ```python diff --git a/api/commands.py b/api/commands.py deleted file mode 100644 index 8d2ccf26de..0000000000 --- a/api/commands.py +++ /dev/null @@ -1,2724 +0,0 @@ -import base64 -import datetime -import json -import logging -import secrets -import time -from typing import Any - -import click -import sqlalchemy as sa -from flask import current_app -from pydantic import TypeAdapter -from sqlalchemy import select -from sqlalchemy.exc import SQLAlchemyError -from sqlalchemy.orm import sessionmaker - -from configs import dify_config -from constants.languages import languages -from core.helper import encrypter -from core.plugin.entities.plugin_daemon import CredentialType -from core.plugin.impl.plugin import PluginInstaller -from core.rag.datasource.vdb.vector_factory import Vector -from core.rag.datasource.vdb.vector_type import VectorType -from core.rag.index_processor.constant.built_in_field import BuiltInField -from core.rag.models.document import ChildDocument, Document -from core.sandbox import SandboxBuilder, SandboxType -from core.tools.utils.system_encryption import encrypt_system_params -from events.app_event import app_was_created -from extensions.ext_database import db -from extensions.ext_redis import redis_client -from extensions.ext_storage import storage -from extensions.storage.opendal_storage import OpenDALStorage -from extensions.storage.storage_type import StorageType -from libs.db_migration_lock import DbMigrationAutoRenewLock -from libs.helper import email as email_validate -from libs.password import hash_password, password_pattern, valid_password -from libs.rsa import generate_key_pair -from models import Tenant -from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment -from models.dataset import Document as DatasetDocument -from models.model import App, AppAnnotationSetting, AppMode, Conversation, MessageAnnotation, UploadFile -from models.oauth import DatasourceOauthParamConfig, DatasourceProvider -from models.provider import Provider, ProviderModel -from models.provider_ids import DatasourceProviderID, ToolProviderID -from models.source import DataSourceApiKeyAuthBinding, DataSourceOauthBinding -from models.tools import ToolOAuthSystemClient -from services.account_service import AccountService, RegisterService, TenantService -from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs -from services.plugin.data_migration import PluginDataMigration -from services.plugin.plugin_migration import PluginMigration -from services.plugin.plugin_service import PluginService -from services.retention.conversation.messages_clean_policy import create_message_clean_policy -from services.retention.conversation.messages_clean_service import MessagesCleanService -from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup -from tasks.remove_app_and_related_data_task import delete_draft_variables_batch - -logger = logging.getLogger(__name__) - -DB_UPGRADE_LOCK_TTL_SECONDS = 60 - - -@click.command("reset-password", help="Reset the account password.") -@click.option("--email", prompt=True, help="Account email to reset password for") -@click.option("--new-password", prompt=True, help="New password") -@click.option("--password-confirm", prompt=True, help="Confirm new password") -def reset_password(email, new_password, password_confirm): - """ - Reset password of owner account - Only available in SELF_HOSTED mode - """ - if str(new_password).strip() != str(password_confirm).strip(): - click.echo(click.style("Passwords do not match.", fg="red")) - return - normalized_email = email.strip().lower() - - with sessionmaker(db.engine, expire_on_commit=False).begin() as session: - account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session) - - if not account: - click.echo(click.style(f"Account not found for email: {email}", fg="red")) - return - - try: - valid_password(new_password) - except: - click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) - return - - # generate password salt - salt = secrets.token_bytes(16) - base64_salt = base64.b64encode(salt).decode() - - # encrypt password with salt - password_hashed = hash_password(new_password, salt) - base64_password_hashed = base64.b64encode(password_hashed).decode() - account.password = base64_password_hashed - account.password_salt = base64_salt - AccountService.reset_login_error_rate_limit(normalized_email) - click.echo(click.style("Password reset successfully.", fg="green")) - - -@click.command("reset-email", help="Reset the account email.") -@click.option("--email", prompt=True, help="Current account email") -@click.option("--new-email", prompt=True, help="New email") -@click.option("--email-confirm", prompt=True, help="Confirm new email") -def reset_email(email, new_email, email_confirm): - """ - Replace account email - :return: - """ - if str(new_email).strip() != str(email_confirm).strip(): - click.echo(click.style("New emails do not match.", fg="red")) - return - normalized_new_email = new_email.strip().lower() - - with sessionmaker(db.engine, expire_on_commit=False).begin() as session: - account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session) - - if not account: - click.echo(click.style(f"Account not found for email: {email}", fg="red")) - return - - try: - email_validate(normalized_new_email) - except: - click.echo(click.style(f"Invalid email: {new_email}", fg="red")) - return - - account.email = normalized_new_email - click.echo(click.style("Email updated successfully.", fg="green")) - - -@click.command( - "reset-encrypt-key-pair", - help="Reset the asymmetric key pair of workspace for encrypt LLM credentials. " - "After the reset, all LLM credentials will become invalid, " - "requiring re-entry." - "Only support SELF_HOSTED mode.", -) -@click.confirmation_option( - prompt=click.style( - "Are you sure you want to reset encrypt key pair? This operation cannot be rolled back!", fg="red" - ) -) -def reset_encrypt_key_pair(): - """ - Reset the encrypted key pair of workspace for encrypt LLM credentials. - After the reset, all LLM credentials will become invalid, requiring re-entry. - Only support SELF_HOSTED mode. - """ - if dify_config.EDITION != "SELF_HOSTED": - click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red")) - return - with sessionmaker(db.engine, expire_on_commit=False).begin() as session: - tenants = session.query(Tenant).all() - for tenant in tenants: - if not tenant: - click.echo(click.style("No workspaces found. Run /install first.", fg="red")) - return - - tenant.encrypt_public_key = generate_key_pair(tenant.id) - - session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete() - session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete() - - click.echo( - click.style( - f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", - fg="green", - ) - ) - - -@click.command("vdb-migrate", help="Migrate vector db.") -@click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.") -def vdb_migrate(scope: str): - if scope in {"knowledge", "all"}: - migrate_knowledge_vector_database() - if scope in {"annotation", "all"}: - migrate_annotation_vector_database() - - -def migrate_annotation_vector_database(): - """ - Migrate annotation datas to target vector database . - """ - click.echo(click.style("Starting annotation data migration.", fg="green")) - create_count = 0 - skipped_count = 0 - total_count = 0 - page = 1 - while True: - try: - # get apps info - per_page = 50 - with sessionmaker(db.engine, expire_on_commit=False).begin() as session: - apps = ( - session.query(App) - .where(App.status == "normal") - .order_by(App.created_at.desc()) - .limit(per_page) - .offset((page - 1) * per_page) - .all() - ) - if not apps: - break - except SQLAlchemyError: - raise - - page += 1 - for app in apps: - total_count = total_count + 1 - click.echo( - f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped." - ) - try: - click.echo(f"Creating app annotation index: {app.id}") - with sessionmaker(db.engine, expire_on_commit=False).begin() as session: - app_annotation_setting = ( - session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() - ) - - if not app_annotation_setting: - skipped_count = skipped_count + 1 - click.echo(f"App annotation setting disabled: {app.id}") - continue - # get dataset_collection_binding info - dataset_collection_binding = ( - session.query(DatasetCollectionBinding) - .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id) - .first() - ) - if not dataset_collection_binding: - click.echo(f"App annotation collection binding not found: {app.id}") - continue - annotations = session.scalars( - select(MessageAnnotation).where(MessageAnnotation.app_id == app.id) - ).all() - dataset = Dataset( - id=app.id, - tenant_id=app.tenant_id, - indexing_technique="high_quality", - embedding_model_provider=dataset_collection_binding.provider_name, - embedding_model=dataset_collection_binding.model_name, - collection_binding_id=dataset_collection_binding.id, - ) - documents = [] - if annotations: - for annotation in annotations: - document = Document( - page_content=annotation.question_text, - metadata={"annotation_id": annotation.id, "app_id": app.id, "doc_id": annotation.id}, - ) - documents.append(document) - - vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"]) - click.echo(f"Migrating annotations for app: {app.id}.") - - try: - vector.delete() - click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green")) - except Exception as e: - click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red")) - raise e - if documents: - try: - click.echo( - click.style( - f"Creating vector index with {len(documents)} annotations for app {app.id}.", - fg="green", - ) - ) - vector.create(documents) - click.echo(click.style(f"Created vector index for app {app.id}.", fg="green")) - except Exception as e: - click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red")) - raise e - click.echo(f"Successfully migrated app annotation {app.id}.") - create_count += 1 - except Exception as e: - click.echo( - click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red") - ) - continue - - click.echo( - click.style( - f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.", - fg="green", - ) - ) - - -def migrate_knowledge_vector_database(): - """ - Migrate vector database datas to target vector database . - """ - click.echo(click.style("Starting vector database migration.", fg="green")) - create_count = 0 - skipped_count = 0 - total_count = 0 - vector_type = dify_config.VECTOR_STORE - upper_collection_vector_types = { - VectorType.MILVUS, - VectorType.PGVECTOR, - VectorType.VASTBASE, - VectorType.RELYT, - VectorType.WEAVIATE, - VectorType.ORACLE, - VectorType.ELASTICSEARCH, - VectorType.OPENGAUSS, - VectorType.TABLESTORE, - VectorType.MATRIXONE, - } - lower_collection_vector_types = { - VectorType.ANALYTICDB, - VectorType.CHROMA, - VectorType.MYSCALE, - VectorType.PGVECTO_RS, - VectorType.TIDB_VECTOR, - VectorType.OPENSEARCH, - VectorType.TENCENT, - VectorType.BAIDU, - VectorType.VIKINGDB, - VectorType.UPSTASH, - VectorType.COUCHBASE, - VectorType.OCEANBASE, - } - page = 1 - while True: - try: - stmt = ( - select(Dataset).where(Dataset.indexing_technique == "high_quality").order_by(Dataset.created_at.desc()) - ) - - datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) - if not datasets.items: - break - except SQLAlchemyError: - raise - - page += 1 - for dataset in datasets: - total_count = total_count + 1 - click.echo( - f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped." - ) - try: - click.echo(f"Creating dataset vector database index: {dataset.id}") - if dataset.index_struct_dict: - if dataset.index_struct_dict["type"] == vector_type: - skipped_count = skipped_count + 1 - continue - collection_name = "" - dataset_id = dataset.id - if vector_type in upper_collection_vector_types: - collection_name = Dataset.gen_collection_name_by_id(dataset_id) - elif vector_type == VectorType.QDRANT: - if dataset.collection_binding_id: - dataset_collection_binding = ( - db.session.query(DatasetCollectionBinding) - .where(DatasetCollectionBinding.id == dataset.collection_binding_id) - .one_or_none() - ) - if dataset_collection_binding: - collection_name = dataset_collection_binding.collection_name - else: - raise ValueError("Dataset Collection Binding not found") - else: - collection_name = Dataset.gen_collection_name_by_id(dataset_id) - - elif vector_type in lower_collection_vector_types: - collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower() - else: - raise ValueError(f"Vector store {vector_type} is not supported.") - - index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}} - dataset.index_struct = json.dumps(index_struct_dict) - vector = Vector(dataset) - click.echo(f"Migrating dataset {dataset.id}.") - - try: - vector.delete() - click.echo( - click.style(f"Deleted vector index {collection_name} for dataset {dataset.id}.", fg="green") - ) - except Exception as e: - click.echo( - click.style( - f"Failed to delete vector index {collection_name} for dataset {dataset.id}.", fg="red" - ) - ) - raise e - - dataset_documents = db.session.scalars( - select(DatasetDocument).where( - DatasetDocument.dataset_id == dataset.id, - DatasetDocument.indexing_status == "completed", - DatasetDocument.enabled == True, - DatasetDocument.archived == False, - ) - ).all() - - documents = [] - segments_count = 0 - for dataset_document in dataset_documents: - segments = db.session.scalars( - select(DocumentSegment).where( - DocumentSegment.document_id == dataset_document.id, - DocumentSegment.status == "completed", - DocumentSegment.enabled == True, - ) - ).all() - - for segment in segments: - document = Document( - page_content=segment.content, - metadata={ - "doc_id": segment.index_node_id, - "doc_hash": segment.index_node_hash, - "document_id": segment.document_id, - "dataset_id": segment.dataset_id, - }, - ) - if dataset_document.doc_form == "hierarchical_model": - child_chunks = segment.get_child_chunks() - if child_chunks: - child_documents = [] - for child_chunk in child_chunks: - child_document = ChildDocument( - page_content=child_chunk.content, - metadata={ - "doc_id": child_chunk.index_node_id, - "doc_hash": child_chunk.index_node_hash, - "document_id": segment.document_id, - "dataset_id": segment.dataset_id, - }, - ) - child_documents.append(child_document) - document.children = child_documents - - documents.append(document) - segments_count = segments_count + 1 - - if documents: - try: - click.echo( - click.style( - f"Creating vector index with {len(documents)} documents of {segments_count}" - f" segments for dataset {dataset.id}.", - fg="green", - ) - ) - all_child_documents = [] - for doc in documents: - if doc.children: - all_child_documents.extend(doc.children) - vector.create(documents) - if all_child_documents: - vector.create(all_child_documents) - click.echo(click.style(f"Created vector index for dataset {dataset.id}.", fg="green")) - except Exception as e: - click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red")) - raise e - db.session.add(dataset) - db.session.commit() - click.echo(f"Successfully migrated dataset {dataset.id}.") - create_count += 1 - except Exception as e: - db.session.rollback() - click.echo(click.style(f"Error creating dataset index: {e.__class__.__name__} {str(e)}", fg="red")) - continue - - click.echo( - click.style( - f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets.", fg="green" - ) - ) - - -@click.command("convert-to-agent-apps", help="Convert Agent Assistant to Agent App.") -def convert_to_agent_apps(): - """ - Convert Agent Assistant to Agent App. - """ - click.echo(click.style("Starting convert to agent apps.", fg="green")) - - proceeded_app_ids = [] - - while True: - # fetch first 1000 apps - sql_query = """SELECT a.id AS id FROM apps a - INNER JOIN app_model_configs am ON a.app_model_config_id=am.id - WHERE a.mode = 'chat' - AND am.agent_mode is not null - AND ( - am.agent_mode like '%"strategy": "function_call"%' - OR am.agent_mode like '%"strategy": "react"%' - ) - AND ( - am.agent_mode like '{"enabled": true%' - OR am.agent_mode like '{"max_iteration": %' - ) ORDER BY a.created_at DESC LIMIT 1000 - """ - - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql_query)) - - apps = [] - for i in rs: - app_id = str(i.id) - if app_id not in proceeded_app_ids: - proceeded_app_ids.append(app_id) - app = db.session.query(App).where(App.id == app_id).first() - if app is not None: - apps.append(app) - - if len(apps) == 0: - break - - for app in apps: - click.echo(f"Converting app: {app.id}") - - try: - app.mode = AppMode.AGENT_CHAT - db.session.commit() - - # update conversation mode to agent - db.session.query(Conversation).where(Conversation.app_id == app.id).update( - {Conversation.mode: AppMode.AGENT_CHAT} - ) - - db.session.commit() - click.echo(click.style(f"Converted app: {app.id}", fg="green")) - except Exception as e: - click.echo(click.style(f"Convert app error: {e.__class__.__name__} {str(e)}", fg="red")) - - click.echo(click.style(f"Conversion complete. Converted {len(proceeded_app_ids)} agent apps.", fg="green")) - - -@click.command("add-qdrant-index", help="Add Qdrant index.") -@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.") -def add_qdrant_index(field: str): - click.echo(click.style("Starting Qdrant index creation.", fg="green")) - - create_count = 0 - - try: - bindings = db.session.query(DatasetCollectionBinding).all() - if not bindings: - click.echo(click.style("No dataset collection bindings found.", fg="red")) - return - import qdrant_client - from qdrant_client.http.exceptions import UnexpectedResponse - from qdrant_client.http.models import PayloadSchemaType - - from core.rag.datasource.vdb.qdrant.qdrant_vector import PathQdrantParams, QdrantConfig - - for binding in bindings: - if dify_config.QDRANT_URL is None: - raise ValueError("Qdrant URL is required.") - qdrant_config = QdrantConfig( - endpoint=dify_config.QDRANT_URL, - api_key=dify_config.QDRANT_API_KEY, - root_path=current_app.root_path, - timeout=dify_config.QDRANT_CLIENT_TIMEOUT, - grpc_port=dify_config.QDRANT_GRPC_PORT, - prefer_grpc=dify_config.QDRANT_GRPC_ENABLED, - ) - try: - params = qdrant_config.to_qdrant_params() - # Check the type before using - if isinstance(params, PathQdrantParams): - # PathQdrantParams case - client = qdrant_client.QdrantClient(path=params.path) - else: - # UrlQdrantParams case - params is UrlQdrantParams - client = qdrant_client.QdrantClient( - url=params.url, - api_key=params.api_key, - timeout=int(params.timeout), - verify=params.verify, - grpc_port=params.grpc_port, - prefer_grpc=params.prefer_grpc, - ) - # create payload index - client.create_payload_index(binding.collection_name, field, field_schema=PayloadSchemaType.KEYWORD) - create_count += 1 - except UnexpectedResponse as e: - # Collection does not exist, so return - if e.status_code == 404: - click.echo(click.style(f"Collection not found: {binding.collection_name}.", fg="red")) - continue - # Some other error occurred, so re-raise the exception - else: - click.echo( - click.style( - f"Failed to create Qdrant index for collection: {binding.collection_name}.", fg="red" - ) - ) - - except Exception: - click.echo(click.style("Failed to create Qdrant client.", fg="red")) - - click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green")) - - -@click.command("old-metadata-migration", help="Old metadata migration.") -def old_metadata_migration(): - """ - Old metadata migration. - """ - click.echo(click.style("Starting old metadata migration.", fg="green")) - - page = 1 - while True: - try: - stmt = ( - select(DatasetDocument) - .where(DatasetDocument.doc_metadata.is_not(None)) - .order_by(DatasetDocument.created_at.desc()) - ) - documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) - except SQLAlchemyError: - raise - if not documents: - break - for document in documents: - if document.doc_metadata: - doc_metadata = document.doc_metadata - for key in doc_metadata: - for field in BuiltInField: - if field.value == key: - break - else: - dataset_metadata = ( - db.session.query(DatasetMetadata) - .where(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key) - .first() - ) - if not dataset_metadata: - dataset_metadata = DatasetMetadata( - tenant_id=document.tenant_id, - dataset_id=document.dataset_id, - name=key, - type="string", - created_by=document.created_by, - ) - db.session.add(dataset_metadata) - db.session.flush() - dataset_metadata_binding = DatasetMetadataBinding( - tenant_id=document.tenant_id, - dataset_id=document.dataset_id, - metadata_id=dataset_metadata.id, - document_id=document.id, - created_by=document.created_by, - ) - db.session.add(dataset_metadata_binding) - else: - dataset_metadata_binding = ( - db.session.query(DatasetMetadataBinding) # type: ignore - .where( - DatasetMetadataBinding.dataset_id == document.dataset_id, - DatasetMetadataBinding.document_id == document.id, - DatasetMetadataBinding.metadata_id == dataset_metadata.id, - ) - .first() - ) - if not dataset_metadata_binding: - dataset_metadata_binding = DatasetMetadataBinding( - tenant_id=document.tenant_id, - dataset_id=document.dataset_id, - metadata_id=dataset_metadata.id, - document_id=document.id, - created_by=document.created_by, - ) - db.session.add(dataset_metadata_binding) - db.session.commit() - page += 1 - click.echo(click.style("Old metadata migration completed.", fg="green")) - - -@click.command("create-tenant", help="Create account and tenant.") -@click.option("--email", prompt=True, help="Tenant account email.") -@click.option("--name", prompt=True, help="Workspace name.") -@click.option("--language", prompt=True, help="Account language, default: en-US.") -def create_tenant(email: str, language: str | None = None, name: str | None = None): - """ - Create tenant account - """ - if not email: - click.echo(click.style("Email is required.", fg="red")) - return - - # Create account - email = email.strip().lower() - - if "@" not in email: - click.echo(click.style("Invalid email address.", fg="red")) - return - - account_name = email.split("@")[0] - - if language not in languages: - language = "en-US" - - # Validates name encoding for non-Latin characters. - name = name.strip().encode("utf-8").decode("utf-8") if name else None - - # generate random password - new_password = secrets.token_urlsafe(16) - - # register account - account = RegisterService.register( - email=email, - name=account_name, - password=new_password, - language=language, - create_workspace_required=False, - ) - TenantService.create_owner_tenant_if_not_exist(account, name) - - click.echo( - click.style( - f"Account and tenant created.\nAccount: {email}\nPassword: {new_password}", - fg="green", - ) - ) - - -@click.command("upgrade-db", help="Upgrade the database") -def upgrade_db(): - click.echo("Preparing database migration...") - lock = DbMigrationAutoRenewLock( - redis_client=redis_client, - name="db_upgrade_lock", - ttl_seconds=DB_UPGRADE_LOCK_TTL_SECONDS, - logger=logger, - log_context="db_migration", - ) - if lock.acquire(blocking=False): - migration_succeeded = False - try: - click.echo(click.style("Starting database migration.", fg="green")) - - # run db migration - import flask_migrate - - flask_migrate.upgrade() - - migration_succeeded = True - click.echo(click.style("Database migration successful!", fg="green")) - - except Exception as e: - logger.exception("Failed to execute database migration") - click.echo(click.style(f"Database migration failed: {e}", fg="red")) - raise SystemExit(1) - finally: - status = "successful" if migration_succeeded else "failed" - lock.release_safely(status=status) - else: - click.echo("Database migration skipped") - - -@click.command("fix-app-site-missing", help="Fix app related site missing issue.") -def fix_app_site_missing(): - """ - Fix app related site missing issue. - """ - click.echo(click.style("Starting fix for missing app-related sites.", fg="green")) - - failed_app_ids = [] - while True: - sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id -where sites.id is null limit 1000""" - with db.engine.begin() as conn: - rs = conn.execute(sa.text(sql)) - - processed_count = 0 - for i in rs: - processed_count += 1 - app_id = str(i.id) - - if app_id in failed_app_ids: - continue - - try: - app = db.session.query(App).where(App.id == app_id).first() - if not app: - logger.info("App %s not found", app_id) - continue - - tenant = app.tenant - if tenant: - accounts = tenant.get_accounts() - if not accounts: - logger.info("Fix failed for app %s", app.id) - continue - - account = accounts[0] - logger.info("Fixing missing site for app %s", app.id) - app_was_created.send(app, account=account) - except Exception: - failed_app_ids.append(app_id) - click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red")) - logger.exception("Failed to fix app related site missing issue, app_id: %s", app_id) - continue - - if not processed_count: - break - - click.echo(click.style("Fix for missing app-related sites completed successfully!", fg="green")) - - -@click.command("migrate-data-for-plugin", help="Migrate data for plugin.") -def migrate_data_for_plugin(): - """ - Migrate data for plugin. - """ - click.echo(click.style("Starting migrate data for plugin.", fg="white")) - - PluginDataMigration.migrate() - - click.echo(click.style("Migrate data for plugin completed.", fg="green")) - - -@click.command("extract-plugins", help="Extract plugins.") -@click.option("--output_file", prompt=True, help="The file to store the extracted plugins.", default="plugins.jsonl") -@click.option("--workers", prompt=True, help="The number of workers to extract plugins.", default=10) -def extract_plugins(output_file: str, workers: int): - """ - Extract plugins. - """ - click.echo(click.style("Starting extract plugins.", fg="white")) - - PluginMigration.extract_plugins(output_file, workers) - - click.echo(click.style("Extract plugins completed.", fg="green")) - - -@click.command("extract-unique-identifiers", help="Extract unique identifiers.") -@click.option( - "--output_file", - prompt=True, - help="The file to store the extracted unique identifiers.", - default="unique_identifiers.json", -) -@click.option( - "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" -) -def extract_unique_plugins(output_file: str, input_file: str): - """ - Extract unique plugins. - """ - click.echo(click.style("Starting extract unique plugins.", fg="white")) - - PluginMigration.extract_unique_plugins_to_file(input_file, output_file) - - click.echo(click.style("Extract unique plugins completed.", fg="green")) - - -@click.command("install-plugins", help="Install plugins.") -@click.option( - "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" -) -@click.option( - "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl" -) -@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100) -def install_plugins(input_file: str, output_file: str, workers: int): - """ - Install plugins. - """ - click.echo(click.style("Starting install plugins.", fg="white")) - - PluginMigration.install_plugins(input_file, output_file, workers) - - click.echo(click.style("Install plugins completed.", fg="green")) - - -@click.command("clear-free-plan-tenant-expired-logs", help="Clear free plan tenant expired logs.") -@click.option("--days", prompt=True, help="The days to clear free plan tenant expired logs.", default=30) -@click.option("--batch", prompt=True, help="The batch size to clear free plan tenant expired logs.", default=100) -@click.option( - "--tenant_ids", - prompt=True, - multiple=True, - help="The tenant ids to clear free plan tenant expired logs.", -) -def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[str]): - """ - Clear free plan tenant expired logs. - """ - click.echo(click.style("Starting clear free plan tenant expired logs.", fg="white")) - - ClearFreePlanTenantExpiredLogs.process(days, batch, tenant_ids) - - click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green")) - - -@click.command("clean-workflow-runs", help="Clean expired workflow runs and related data for free tenants.") -@click.option( - "--before-days", - "--days", - default=30, - show_default=True, - type=click.IntRange(min=0), - help="Delete workflow runs created before N days ago.", -) -@click.option("--batch-size", default=200, show_default=True, help="Batch size for selecting workflow runs.") -@click.option( - "--from-days-ago", - default=None, - type=click.IntRange(min=0), - help="Lower bound in days ago (older). Must be paired with --to-days-ago.", -) -@click.option( - "--to-days-ago", - default=None, - type=click.IntRange(min=0), - help="Upper bound in days ago (newer). Must be paired with --from-days-ago.", -) -@click.option( - "--start-from", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", -) -@click.option( - "--end-before", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", -) -@click.option( - "--dry-run", - is_flag=True, - help="Preview cleanup results without deleting any workflow run data.", -) -def clean_workflow_runs( - before_days: int, - batch_size: int, - from_days_ago: int | None, - to_days_ago: int | None, - start_from: datetime.datetime | None, - end_before: datetime.datetime | None, - dry_run: bool, -): - """ - Clean workflow runs and related workflow data for free tenants. - """ - if (start_from is None) ^ (end_before is None): - raise click.UsageError("--start-from and --end-before must be provided together.") - - if (from_days_ago is None) ^ (to_days_ago is None): - raise click.UsageError("--from-days-ago and --to-days-ago must be provided together.") - - if from_days_ago is not None and to_days_ago is not None: - if start_from or end_before: - raise click.UsageError("Choose either day offsets or explicit dates, not both.") - if from_days_ago <= to_days_ago: - raise click.UsageError("--from-days-ago must be greater than --to-days-ago.") - now = datetime.datetime.now() - start_from = now - datetime.timedelta(days=from_days_ago) - end_before = now - datetime.timedelta(days=to_days_ago) - before_days = 0 - - start_time = datetime.datetime.now(datetime.UTC) - click.echo(click.style(f"Starting workflow run cleanup at {start_time.isoformat()}.", fg="white")) - - WorkflowRunCleanup( - days=before_days, - batch_size=batch_size, - start_from=start_from, - end_before=end_before, - dry_run=dry_run, - ).run() - - end_time = datetime.datetime.now(datetime.UTC) - elapsed = end_time - start_time - click.echo( - click.style( - f"Workflow run cleanup completed. start={start_time.isoformat()} " - f"end={end_time.isoformat()} duration={elapsed}", - fg="green", - ) - ) - - -@click.command( - "archive-workflow-runs", - help="Archive workflow runs for paid plan tenants to S3-compatible storage.", -) -@click.option("--tenant-ids", default=None, help="Optional comma-separated tenant IDs for grayscale rollout.") -@click.option("--before-days", default=90, show_default=True, help="Archive runs older than N days.") -@click.option( - "--from-days-ago", - default=None, - type=click.IntRange(min=0), - help="Lower bound in days ago (older). Must be paired with --to-days-ago.", -) -@click.option( - "--to-days-ago", - default=None, - type=click.IntRange(min=0), - help="Upper bound in days ago (newer). Must be paired with --from-days-ago.", -) -@click.option( - "--start-from", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Archive runs created at or after this timestamp (UTC if no timezone).", -) -@click.option( - "--end-before", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Archive runs created before this timestamp (UTC if no timezone).", -) -@click.option("--batch-size", default=100, show_default=True, help="Batch size for processing.") -@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to archive.") -@click.option("--limit", default=None, type=int, help="Maximum number of runs to archive.") -@click.option("--dry-run", is_flag=True, help="Preview without archiving.") -@click.option("--delete-after-archive", is_flag=True, help="Delete runs and related data after archiving.") -def archive_workflow_runs( - tenant_ids: str | None, - before_days: int, - from_days_ago: int | None, - to_days_ago: int | None, - start_from: datetime.datetime | None, - end_before: datetime.datetime | None, - batch_size: int, - workers: int, - limit: int | None, - dry_run: bool, - delete_after_archive: bool, -): - """ - Archive workflow runs for paid plan tenants older than the specified days. - - This command archives the following tables to storage: - - workflow_node_executions - - workflow_node_execution_offload - - workflow_pauses - - workflow_pause_reasons - - workflow_trigger_logs - - The workflow_runs and workflow_app_logs tables are preserved for UI listing. - """ - from services.retention.workflow_run.archive_paid_plan_workflow_run import WorkflowRunArchiver - - run_started_at = datetime.datetime.now(datetime.UTC) - click.echo( - click.style( - f"Starting workflow run archiving at {run_started_at.isoformat()}.", - fg="white", - ) - ) - - if (start_from is None) ^ (end_before is None): - click.echo(click.style("start-from and end-before must be provided together.", fg="red")) - return - - if (from_days_ago is None) ^ (to_days_ago is None): - click.echo(click.style("from-days-ago and to-days-ago must be provided together.", fg="red")) - return - - if from_days_ago is not None and to_days_ago is not None: - if start_from or end_before: - click.echo(click.style("Choose either day offsets or explicit dates, not both.", fg="red")) - return - if from_days_ago <= to_days_ago: - click.echo(click.style("from-days-ago must be greater than to-days-ago.", fg="red")) - return - now = datetime.datetime.now() - start_from = now - datetime.timedelta(days=from_days_ago) - end_before = now - datetime.timedelta(days=to_days_ago) - before_days = 0 - - if start_from and end_before and start_from >= end_before: - click.echo(click.style("start-from must be earlier than end-before.", fg="red")) - return - if workers < 1: - click.echo(click.style("workers must be at least 1.", fg="red")) - return - - archiver = WorkflowRunArchiver( - days=before_days, - batch_size=batch_size, - start_from=start_from, - end_before=end_before, - workers=workers, - tenant_ids=[tid.strip() for tid in tenant_ids.split(",")] if tenant_ids else None, - limit=limit, - dry_run=dry_run, - delete_after_archive=delete_after_archive, - ) - summary = archiver.run() - click.echo( - click.style( - f"Summary: processed={summary.total_runs_processed}, archived={summary.runs_archived}, " - f"skipped={summary.runs_skipped}, failed={summary.runs_failed}, " - f"time={summary.total_elapsed_time:.2f}s", - fg="cyan", - ) - ) - - run_finished_at = datetime.datetime.now(datetime.UTC) - elapsed = run_finished_at - run_started_at - click.echo( - click.style( - f"Workflow run archiving completed. start={run_started_at.isoformat()} " - f"end={run_finished_at.isoformat()} duration={elapsed}", - fg="green", - ) - ) - - -@click.command( - "restore-workflow-runs", - help="Restore archived workflow runs from S3-compatible storage.", -) -@click.option( - "--tenant-ids", - required=False, - help="Tenant IDs (comma-separated).", -) -@click.option("--run-id", required=False, help="Workflow run ID to restore.") -@click.option( - "--start-from", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", -) -@click.option( - "--end-before", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", -) -@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to restore.") -@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to restore.") -@click.option("--dry-run", is_flag=True, help="Preview without restoring.") -def restore_workflow_runs( - tenant_ids: str | None, - run_id: str | None, - start_from: datetime.datetime | None, - end_before: datetime.datetime | None, - workers: int, - limit: int, - dry_run: bool, -): - """ - Restore an archived workflow run from storage to the database. - - This restores the following tables: - - workflow_node_executions - - workflow_node_execution_offload - - workflow_pauses - - workflow_pause_reasons - - workflow_trigger_logs - """ - from services.retention.workflow_run.restore_archived_workflow_run import WorkflowRunRestore - - parsed_tenant_ids = None - if tenant_ids: - parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()] - if not parsed_tenant_ids: - raise click.BadParameter("tenant-ids must not be empty") - - if (start_from is None) ^ (end_before is None): - raise click.UsageError("--start-from and --end-before must be provided together.") - if run_id is None and (start_from is None or end_before is None): - raise click.UsageError("--start-from and --end-before are required for batch restore.") - if workers < 1: - raise click.BadParameter("workers must be at least 1") - - start_time = datetime.datetime.now(datetime.UTC) - click.echo( - click.style( - f"Starting restore of workflow run {run_id} at {start_time.isoformat()}.", - fg="white", - ) - ) - - restorer = WorkflowRunRestore(dry_run=dry_run, workers=workers) - if run_id: - results = [restorer.restore_by_run_id(run_id)] - else: - assert start_from is not None - assert end_before is not None - results = restorer.restore_batch( - parsed_tenant_ids, - start_date=start_from, - end_date=end_before, - limit=limit, - ) - - end_time = datetime.datetime.now(datetime.UTC) - elapsed = end_time - start_time - - successes = sum(1 for result in results if result.success) - failures = len(results) - successes - - if failures == 0: - click.echo( - click.style( - f"Restore completed successfully. success={successes} duration={elapsed}", - fg="green", - ) - ) - else: - click.echo( - click.style( - f"Restore completed with failures. success={successes} failed={failures} duration={elapsed}", - fg="red", - ) - ) - - -@click.command( - "delete-archived-workflow-runs", - help="Delete archived workflow runs from the database.", -) -@click.option( - "--tenant-ids", - required=False, - help="Tenant IDs (comma-separated).", -) -@click.option("--run-id", required=False, help="Workflow run ID to delete.") -@click.option( - "--start-from", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", -) -@click.option( - "--end-before", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - default=None, - help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", -) -@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to delete.") -@click.option("--dry-run", is_flag=True, help="Preview without deleting.") -def delete_archived_workflow_runs( - tenant_ids: str | None, - run_id: str | None, - start_from: datetime.datetime | None, - end_before: datetime.datetime | None, - limit: int, - dry_run: bool, -): - """ - Delete archived workflow runs from the database. - """ - from services.retention.workflow_run.delete_archived_workflow_run import ArchivedWorkflowRunDeletion - - parsed_tenant_ids = None - if tenant_ids: - parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()] - if not parsed_tenant_ids: - raise click.BadParameter("tenant-ids must not be empty") - - if (start_from is None) ^ (end_before is None): - raise click.UsageError("--start-from and --end-before must be provided together.") - if run_id is None and (start_from is None or end_before is None): - raise click.UsageError("--start-from and --end-before are required for batch delete.") - - start_time = datetime.datetime.now(datetime.UTC) - target_desc = f"workflow run {run_id}" if run_id else "workflow runs" - click.echo( - click.style( - f"Starting delete of {target_desc} at {start_time.isoformat()}.", - fg="white", - ) - ) - - deleter = ArchivedWorkflowRunDeletion(dry_run=dry_run) - if run_id: - results = [deleter.delete_by_run_id(run_id)] - else: - assert start_from is not None - assert end_before is not None - results = deleter.delete_batch( - parsed_tenant_ids, - start_date=start_from, - end_date=end_before, - limit=limit, - ) - - for result in results: - if result.success: - click.echo( - click.style( - f"{'[DRY RUN] Would delete' if dry_run else 'Deleted'} " - f"workflow run {result.run_id} (tenant={result.tenant_id})", - fg="green", - ) - ) - else: - click.echo( - click.style( - f"Failed to delete workflow run {result.run_id}: {result.error}", - fg="red", - ) - ) - - end_time = datetime.datetime.now(datetime.UTC) - elapsed = end_time - start_time - - successes = sum(1 for result in results if result.success) - failures = len(results) - successes - - if failures == 0: - click.echo( - click.style( - f"Delete completed successfully. success={successes} duration={elapsed}", - fg="green", - ) - ) - else: - click.echo( - click.style( - f"Delete completed with failures. success={successes} failed={failures} duration={elapsed}", - fg="red", - ) - ) - - -@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") -@click.command("clear-orphaned-file-records", help="Clear orphaned file records.") -def clear_orphaned_file_records(force: bool): - """ - Clear orphaned file records in the database. - """ - - # define tables and columns to process - files_tables = [ - {"table": "upload_files", "id_column": "id", "key_column": "key"}, - {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, - ] - ids_tables = [ - {"type": "uuid", "table": "message_files", "column": "upload_file_id"}, - {"type": "text", "table": "documents", "column": "data_source_info"}, - {"type": "text", "table": "document_segments", "column": "content"}, - {"type": "text", "table": "messages", "column": "answer"}, - {"type": "text", "table": "workflow_node_executions", "column": "inputs"}, - {"type": "text", "table": "workflow_node_executions", "column": "process_data"}, - {"type": "text", "table": "workflow_node_executions", "column": "outputs"}, - {"type": "text", "table": "conversations", "column": "introduction"}, - {"type": "text", "table": "conversations", "column": "system_instruction"}, - {"type": "text", "table": "accounts", "column": "avatar"}, - {"type": "text", "table": "apps", "column": "icon"}, - {"type": "text", "table": "sites", "column": "icon"}, - {"type": "json", "table": "messages", "column": "inputs"}, - {"type": "json", "table": "messages", "column": "message"}, - ] - - # notify user and ask for confirmation - click.echo( - click.style( - "This command will first find and delete orphaned file records from the message_files table,", fg="yellow" - ) - ) - click.echo( - click.style( - "and then it will find and delete orphaned file records in the following tables:", - fg="yellow", - ) - ) - for files_table in files_tables: - click.echo(click.style(f"- {files_table['table']}", fg="yellow")) - click.echo( - click.style("The following tables and columns will be scanned to find orphaned file records:", fg="yellow") - ) - for ids_table in ids_tables: - click.echo(click.style(f"- {ids_table['table']} ({ids_table['column']})", fg="yellow")) - click.echo("") - - click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) - click.echo( - click.style( - ( - "Since not all patterns have been fully tested, " - "please note that this command may delete unintended file records." - ), - fg="yellow", - ) - ) - click.echo( - click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow") - ) - click.echo( - click.style( - ( - "It is also recommended to run this during the maintenance window, " - "as this may cause high load on your instance." - ), - fg="yellow", - ) - ) - if not force: - click.confirm("Do you want to proceed?", abort=True) - - # start the cleanup process - click.echo(click.style("Starting orphaned file records cleanup.", fg="white")) - - # clean up the orphaned records in the message_files table where message_id doesn't exist in messages table - try: - click.echo( - click.style("- Listing message_files records where message_id doesn't exist in messages table", fg="white") - ) - query = ( - "SELECT mf.id, mf.message_id " - "FROM message_files mf LEFT JOIN messages m ON mf.message_id = m.id " - "WHERE m.id IS NULL" - ) - orphaned_message_files = [] - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])}) - - if orphaned_message_files: - click.echo(click.style(f"Found {len(orphaned_message_files)} orphaned message_files records:", fg="white")) - for record in orphaned_message_files: - click.echo(click.style(f" - id: {record['id']}, message_id: {record['message_id']}", fg="black")) - - if not force: - click.confirm( - ( - f"Do you want to proceed " - f"to delete all {len(orphaned_message_files)} orphaned message_files records?" - ), - abort=True, - ) - - click.echo(click.style("- Deleting orphaned message_files records", fg="white")) - query = "DELETE FROM message_files WHERE id IN :ids" - with db.engine.begin() as conn: - conn.execute(sa.text(query), {"ids": tuple(record["id"] for record in orphaned_message_files)}) - click.echo( - click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green") - ) - else: - click.echo(click.style("No orphaned message_files records found. There is nothing to delete.", fg="green")) - except Exception as e: - click.echo(click.style(f"Error deleting orphaned message_files records: {str(e)}", fg="red")) - - # clean up the orphaned records in the rest of the *_files tables - try: - # fetch file id and keys from each table - all_files_in_tables = [] - for files_table in files_tables: - click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white")) - query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]}) - click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) - - # fetch referred table and columns - guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" - all_ids_in_tables = [] - for ids_table in ids_tables: - query = "" - match ids_table["type"]: - case "uuid": - click.echo( - click.style( - f"- Listing file ids in column {ids_table['column']} in table {ids_table['table']}", - fg="white", - ) - ) - c = ids_table["column"] - query = f"SELECT {c} FROM {ids_table['table']} WHERE {c} IS NOT NULL" - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])}) - case "text": - t = ids_table["table"] - click.echo( - click.style( - f"- Listing file-id-like strings in column {ids_table['column']} in table {t}", - fg="white", - ) - ) - query = ( - f"SELECT regexp_matches({ids_table['column']}, '{guid_regexp}', 'g') AS extracted_id " - f"FROM {ids_table['table']}" - ) - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - for j in i[0]: - all_ids_in_tables.append({"table": ids_table["table"], "id": j}) - case "json": - click.echo( - click.style( - ( - f"- Listing file-id-like JSON string in column {ids_table['column']} " - f"in table {ids_table['table']}" - ), - fg="white", - ) - ) - query = ( - f"SELECT regexp_matches({ids_table['column']}::text, '{guid_regexp}', 'g') AS extracted_id " - f"FROM {ids_table['table']}" - ) - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - for j in i[0]: - all_ids_in_tables.append({"table": ids_table["table"], "id": j}) - case _: - pass - click.echo(click.style(f"Found {len(all_ids_in_tables)} file ids in tables.", fg="white")) - - except Exception as e: - click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) - return - - # find orphaned files - all_files = [file["id"] for file in all_files_in_tables] - all_ids = [file["id"] for file in all_ids_in_tables] - orphaned_files = list(set(all_files) - set(all_ids)) - if not orphaned_files: - click.echo(click.style("No orphaned file records found. There is nothing to delete.", fg="green")) - return - click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white")) - for file in orphaned_files: - click.echo(click.style(f"- orphaned file id: {file}", fg="black")) - if not force: - click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True) - - # delete orphaned records for each file - try: - for files_table in files_tables: - click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white")) - query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids" - with db.engine.begin() as conn: - conn.execute(sa.text(query), {"ids": tuple(orphaned_files)}) - except Exception as e: - click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red")) - return - click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green")) - - -@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") -@click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.") -def remove_orphaned_files_on_storage(force: bool): - """ - Remove orphaned files on the storage. - """ - - # define tables and columns to process - files_tables = [ - {"table": "upload_files", "key_column": "key"}, - {"table": "tool_files", "key_column": "file_key"}, - ] - storage_paths = ["image_files", "tools", "upload_files"] - - # notify user and ask for confirmation - click.echo(click.style("This command will find and remove orphaned files on the storage,", fg="yellow")) - click.echo( - click.style("by comparing the files on the storage with the records in the following tables:", fg="yellow") - ) - for files_table in files_tables: - click.echo(click.style(f"- {files_table['table']}", fg="yellow")) - click.echo(click.style("The following paths on the storage will be scanned to find orphaned files:", fg="yellow")) - for storage_path in storage_paths: - click.echo(click.style(f"- {storage_path}", fg="yellow")) - click.echo("") - - click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) - click.echo( - click.style( - "Currently, this command will work only for opendal based storage (STORAGE_TYPE=opendal).", fg="yellow" - ) - ) - click.echo( - click.style( - "Since not all patterns have been fully tested, please note that this command may delete unintended files.", - fg="yellow", - ) - ) - click.echo( - click.style("This cannot be undone. Please make sure to back up your storage before proceeding.", fg="yellow") - ) - click.echo( - click.style( - ( - "It is also recommended to run this during the maintenance window, " - "as this may cause high load on your instance." - ), - fg="yellow", - ) - ) - if not force: - click.confirm("Do you want to proceed?", abort=True) - - # start the cleanup process - click.echo(click.style("Starting orphaned files cleanup.", fg="white")) - - # fetch file id and keys from each table - all_files_in_tables = [] - try: - for files_table in files_tables: - click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white")) - query = f"SELECT {files_table['key_column']} FROM {files_table['table']}" - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for i in rs: - all_files_in_tables.append(str(i[0])) - click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) - except Exception as e: - click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) - return - - all_files_on_storage = [] - for storage_path in storage_paths: - try: - click.echo(click.style(f"- Scanning files on storage path {storage_path}", fg="white")) - files = storage.scan(path=storage_path, files=True, directories=False) - all_files_on_storage.extend(files) - except FileNotFoundError: - click.echo(click.style(f" -> Skipping path {storage_path} as it does not exist.", fg="yellow")) - continue - except Exception as e: - click.echo(click.style(f" -> Error scanning files on storage path {storage_path}: {str(e)}", fg="red")) - continue - click.echo(click.style(f"Found {len(all_files_on_storage)} files on storage.", fg="white")) - - # find orphaned files - orphaned_files = list(set(all_files_on_storage) - set(all_files_in_tables)) - if not orphaned_files: - click.echo(click.style("No orphaned files found. There is nothing to remove.", fg="green")) - return - click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white")) - for file in orphaned_files: - click.echo(click.style(f"- orphaned file: {file}", fg="black")) - if not force: - click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True) - - # delete orphaned files - removed_files = 0 - error_files = 0 - for file in orphaned_files: - try: - storage.delete(file) - removed_files += 1 - click.echo(click.style(f"- Removing orphaned file: {file}", fg="white")) - except Exception as e: - error_files += 1 - click.echo(click.style(f"- Error deleting orphaned file {file}: {str(e)}", fg="red")) - continue - if error_files == 0: - click.echo(click.style(f"Removed {removed_files} orphaned files without errors.", fg="green")) - else: - click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow")) - - -@click.command("file-usage", help="Query file usages and show where files are referenced.") -@click.option("--file-id", type=str, default=None, help="Filter by file UUID.") -@click.option("--key", type=str, default=None, help="Filter by storage key.") -@click.option("--src", type=str, default=None, help="Filter by table.column pattern (e.g., 'documents.%' or '%.icon').") -@click.option("--limit", type=int, default=100, help="Limit number of results (default: 100).") -@click.option("--offset", type=int, default=0, help="Offset for pagination (default: 0).") -@click.option("--json", "output_json", is_flag=True, help="Output results in JSON format.") -def file_usage( - file_id: str | None, - key: str | None, - src: str | None, - limit: int, - offset: int, - output_json: bool, -): - """ - Query file usages and show where files are referenced in the database. - - This command reuses the same reference checking logic as clear-orphaned-file-records - and displays detailed information about where each file is referenced. - """ - # define tables and columns to process - files_tables = [ - {"table": "upload_files", "id_column": "id", "key_column": "key"}, - {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, - ] - ids_tables = [ - {"type": "uuid", "table": "message_files", "column": "upload_file_id", "pk_column": "id"}, - {"type": "text", "table": "documents", "column": "data_source_info", "pk_column": "id"}, - {"type": "text", "table": "document_segments", "column": "content", "pk_column": "id"}, - {"type": "text", "table": "messages", "column": "answer", "pk_column": "id"}, - {"type": "text", "table": "workflow_node_executions", "column": "inputs", "pk_column": "id"}, - {"type": "text", "table": "workflow_node_executions", "column": "process_data", "pk_column": "id"}, - {"type": "text", "table": "workflow_node_executions", "column": "outputs", "pk_column": "id"}, - {"type": "text", "table": "conversations", "column": "introduction", "pk_column": "id"}, - {"type": "text", "table": "conversations", "column": "system_instruction", "pk_column": "id"}, - {"type": "text", "table": "accounts", "column": "avatar", "pk_column": "id"}, - {"type": "text", "table": "apps", "column": "icon", "pk_column": "id"}, - {"type": "text", "table": "sites", "column": "icon", "pk_column": "id"}, - {"type": "json", "table": "messages", "column": "inputs", "pk_column": "id"}, - {"type": "json", "table": "messages", "column": "message", "pk_column": "id"}, - ] - - # Stream file usages with pagination to avoid holding all results in memory - paginated_usages = [] - total_count = 0 - - # First, build a mapping of file_id -> storage_key from the base tables - file_key_map = {} - for files_table in files_tables: - query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for row in rs: - file_key_map[str(row[0])] = f"{files_table['table']}:{row[1]}" - - # If filtering by key or file_id, verify it exists - if file_id and file_id not in file_key_map: - if output_json: - click.echo(json.dumps({"error": f"File ID {file_id} not found in base tables"})) - else: - click.echo(click.style(f"File ID {file_id} not found in base tables.", fg="red")) - return - - if key: - valid_prefixes = {f"upload_files:{key}", f"tool_files:{key}"} - matching_file_ids = [fid for fid, fkey in file_key_map.items() if fkey in valid_prefixes] - if not matching_file_ids: - if output_json: - click.echo(json.dumps({"error": f"Key {key} not found in base tables"})) - else: - click.echo(click.style(f"Key {key} not found in base tables.", fg="red")) - return - - guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" - - # For each reference table/column, find matching file IDs and record the references - for ids_table in ids_tables: - src_filter = f"{ids_table['table']}.{ids_table['column']}" - - # Skip if src filter doesn't match (use fnmatch for wildcard patterns) - if src: - if "%" in src or "_" in src: - import fnmatch - - # Convert SQL LIKE wildcards to fnmatch wildcards (% -> *, _ -> ?) - pattern = src.replace("%", "*").replace("_", "?") - if not fnmatch.fnmatch(src_filter, pattern): - continue - else: - if src_filter != src: - continue - - match ids_table["type"]: - case "uuid": - # Direct UUID match - query = ( - f"SELECT {ids_table['pk_column']}, {ids_table['column']} " - f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" - ) - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for row in rs: - record_id = str(row[0]) - ref_file_id = str(row[1]) - if ref_file_id not in file_key_map: - continue - storage_key = file_key_map[ref_file_id] - - # Apply filters - if file_id and ref_file_id != file_id: - continue - if key and not storage_key.endswith(key): - continue - - # Only collect items within the requested page range - if offset <= total_count < offset + limit: - paginated_usages.append( - { - "src": f"{ids_table['table']}.{ids_table['column']}", - "record_id": record_id, - "file_id": ref_file_id, - "key": storage_key, - } - ) - total_count += 1 - - case "text" | "json": - # Extract UUIDs from text/json content - column_cast = f"{ids_table['column']}::text" if ids_table["type"] == "json" else ids_table["column"] - query = ( - f"SELECT {ids_table['pk_column']}, {column_cast} " - f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" - ) - with db.engine.begin() as conn: - rs = conn.execute(sa.text(query)) - for row in rs: - record_id = str(row[0]) - content = str(row[1]) - - # Find all UUIDs in the content - import re - - uuid_pattern = re.compile(guid_regexp, re.IGNORECASE) - matches = uuid_pattern.findall(content) - - for ref_file_id in matches: - if ref_file_id not in file_key_map: - continue - storage_key = file_key_map[ref_file_id] - - # Apply filters - if file_id and ref_file_id != file_id: - continue - if key and not storage_key.endswith(key): - continue - - # Only collect items within the requested page range - if offset <= total_count < offset + limit: - paginated_usages.append( - { - "src": f"{ids_table['table']}.{ids_table['column']}", - "record_id": record_id, - "file_id": ref_file_id, - "key": storage_key, - } - ) - total_count += 1 - case _: - pass - - # Output results - if output_json: - result = { - "total": total_count, - "offset": offset, - "limit": limit, - "usages": paginated_usages, - } - click.echo(json.dumps(result, indent=2)) - else: - click.echo( - click.style(f"Found {total_count} file usages (showing {len(paginated_usages)} results)", fg="white") - ) - click.echo("") - - if not paginated_usages: - click.echo(click.style("No file usages found matching the specified criteria.", fg="yellow")) - return - - # Print table header - click.echo( - click.style( - f"{'Src (Table.Column)':<50} {'Record ID':<40} {'File ID':<40} {'Storage Key':<60}", - fg="cyan", - ) - ) - click.echo(click.style("-" * 190, fg="white")) - - # Print each usage - for usage in paginated_usages: - click.echo(f"{usage['src']:<50} {usage['record_id']:<40} {usage['file_id']:<40} {usage['key']:<60}") - - # Show pagination info - if offset + limit < total_count: - click.echo("") - click.echo( - click.style( - f"Showing {offset + 1}-{offset + len(paginated_usages)} of {total_count} results", fg="white" - ) - ) - click.echo(click.style(f"Use --offset {offset + limit} to see next page", fg="white")) - - -@click.command("setup-sandbox-system-config", help="Setup system-level sandbox provider configuration.") -@click.option( - "--provider-type", prompt=True, type=click.Choice(["e2b", "docker", "local", "ssh"]), help="Sandbox provider type" -) -@click.option("--config", prompt=True, help='Configuration JSON (e.g., {"api_key": "xxx"} for e2b)') -def setup_sandbox_system_config(provider_type: str, config: str): - """ - Setup system-level sandbox provider configuration. - - Examples: - flask setup-sandbox-system-config --provider-type e2b --config '{"api_key": "e2b_xxx"}' - flask setup-sandbox-system-config --provider-type docker --config '{"docker_sock": "unix:///var/run/docker.sock"}' - flask setup-sandbox-system-config --provider-type local --config '{}' - flask setup-sandbox-system-config --provider-type ssh --config \ - '{"ssh_host": "agentbox", "ssh_port": "22", "ssh_username": "agentbox", "ssh_password": "agentbox"}' - """ - from models.sandbox import SandboxProviderSystemConfig - - try: - click.echo(click.style(f"Validating config: {config}", fg="yellow")) - config_dict = TypeAdapter(dict[str, Any]).validate_json(config) - click.echo(click.style("Config validated successfully.", fg="green")) - - click.echo(click.style(f"Validating config schema for provider type: {provider_type}", fg="yellow")) - SandboxBuilder.validate(SandboxType(provider_type), config_dict) - click.echo(click.style("Config schema validated successfully.", fg="green")) - - click.echo(click.style("Encrypting config...", fg="yellow")) - click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow")) - encrypted_config = encrypt_system_params(config_dict) - click.echo(click.style("Config encrypted successfully.", fg="green")) - except Exception as e: - click.echo(click.style(f"Error validating/encrypting config: {str(e)}", fg="red")) - return - - deleted_count = db.session.query(SandboxProviderSystemConfig).filter_by(provider_type=provider_type).delete() - if deleted_count > 0: - click.echo( - click.style( - f"Deleted {deleted_count} existing system config for provider type: {provider_type}", fg="yellow" - ) - ) - - system_config = SandboxProviderSystemConfig( - provider_type=provider_type, - encrypted_config=encrypted_config, - ) - db.session.add(system_config) - db.session.commit() - click.echo(click.style(f"Sandbox system config setup successfully. id: {system_config.id}", fg="green")) - click.echo(click.style(f"Provider type: {provider_type}", fg="green")) - - -@click.command("setup-system-tool-oauth-client", help="Setup system tool oauth client.") -@click.option("--provider", prompt=True, help="Provider name") -@click.option("--client-params", prompt=True, help="Client Params") -def setup_system_tool_oauth_client(provider, client_params): - """ - Setup system tool oauth client - """ - provider_id = ToolProviderID(provider) - provider_name = provider_id.provider_name - plugin_id = provider_id.plugin_id - - try: - # json validate - click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) - client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) - click.echo(click.style("Client params validated successfully.", fg="green")) - - click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow")) - click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow")) - oauth_client_params = encrypt_system_params(client_params_dict) - click.echo(click.style("Client params encrypted successfully.", fg="green")) - except Exception as e: - click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) - return - - deleted_count = ( - db.session.query(ToolOAuthSystemClient) - .filter_by( - provider=provider_name, - plugin_id=plugin_id, - ) - .delete() - ) - if deleted_count > 0: - click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) - - oauth_client = ToolOAuthSystemClient( - provider=provider_name, - plugin_id=plugin_id, - encrypted_oauth_params=oauth_client_params, - ) - db.session.add(oauth_client) - db.session.commit() - click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green")) - - -@click.command("setup-system-trigger-oauth-client", help="Setup system trigger oauth client.") -@click.option("--provider", prompt=True, help="Provider name") -@click.option("--client-params", prompt=True, help="Client Params") -def setup_system_trigger_oauth_client(provider, client_params): - """ - Setup system trigger oauth client - """ - from models.provider_ids import TriggerProviderID - from models.trigger import TriggerOAuthSystemClient - - provider_id = TriggerProviderID(provider) - provider_name = provider_id.provider_name - plugin_id = provider_id.plugin_id - - try: - # json validate - click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) - client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) - click.echo(click.style("Client params validated successfully.", fg="green")) - - click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow")) - click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow")) - oauth_client_params = encrypt_system_params(client_params_dict) - click.echo(click.style("Client params encrypted successfully.", fg="green")) - except Exception as e: - click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) - return - - deleted_count = ( - db.session.query(TriggerOAuthSystemClient) - .filter_by( - provider=provider_name, - plugin_id=plugin_id, - ) - .delete() - ) - if deleted_count > 0: - click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) - - oauth_client = TriggerOAuthSystemClient( - provider=provider_name, - plugin_id=plugin_id, - encrypted_oauth_params=oauth_client_params, - ) - db.session.add(oauth_client) - db.session.commit() - click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green")) - - -def _find_orphaned_draft_variables(batch_size: int = 1000) -> list[str]: - """ - Find draft variables that reference non-existent apps. - - Args: - batch_size: Maximum number of orphaned app IDs to return - - Returns: - List of app IDs that have draft variables but don't exist in the apps table - """ - query = """ - SELECT DISTINCT wdv.app_id - FROM workflow_draft_variables AS wdv - WHERE NOT EXISTS( - SELECT 1 FROM apps WHERE apps.id = wdv.app_id - ) - LIMIT :batch_size - """ - - with db.engine.connect() as conn: - result = conn.execute(sa.text(query), {"batch_size": batch_size}) - return [row[0] for row in result] - - -def _count_orphaned_draft_variables() -> dict[str, Any]: - """ - Count orphaned draft variables by app, including associated file counts. - - Returns: - Dictionary with statistics about orphaned variables and files - """ - # Count orphaned variables by app - variables_query = """ - SELECT - wdv.app_id, - COUNT(*) as variable_count, - COUNT(wdv.file_id) as file_count - FROM workflow_draft_variables AS wdv - WHERE NOT EXISTS( - SELECT 1 FROM apps WHERE apps.id = wdv.app_id - ) - GROUP BY wdv.app_id - ORDER BY variable_count DESC - """ - - with db.engine.connect() as conn: - result = conn.execute(sa.text(variables_query)) - orphaned_by_app = {} - total_files = 0 - - for row in result: - app_id, variable_count, file_count = row - orphaned_by_app[app_id] = {"variables": variable_count, "files": file_count} - total_files += file_count - - total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values()) - app_count = len(orphaned_by_app) - - return { - "total_orphaned_variables": total_orphaned, - "total_orphaned_files": total_files, - "orphaned_app_count": app_count, - "orphaned_by_app": orphaned_by_app, - } - - -@click.command() -@click.option("--dry-run", is_flag=True, help="Show what would be deleted without actually deleting") -@click.option("--batch-size", default=1000, help="Number of records to process per batch (default 1000)") -@click.option("--max-apps", default=None, type=int, help="Maximum number of apps to process (default: no limit)") -@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") -def cleanup_orphaned_draft_variables( - dry_run: bool, - batch_size: int, - max_apps: int | None, - force: bool = False, -): - """ - Clean up orphaned draft variables from the database. - - This script finds and removes draft variables that belong to apps - that no longer exist in the database. - """ - logger = logging.getLogger(__name__) - - # Get statistics - stats = _count_orphaned_draft_variables() - - logger.info("Found %s orphaned draft variables", stats["total_orphaned_variables"]) - logger.info("Found %s associated offload files", stats["total_orphaned_files"]) - logger.info("Across %s non-existent apps", stats["orphaned_app_count"]) - - if stats["total_orphaned_variables"] == 0: - logger.info("No orphaned draft variables found. Exiting.") - return - - if dry_run: - logger.info("DRY RUN: Would delete the following:") - for app_id, data in sorted(stats["orphaned_by_app"].items(), key=lambda x: x[1]["variables"], reverse=True)[ - :10 - ]: # Show top 10 - logger.info(" App %s: %s variables, %s files", app_id, data["variables"], data["files"]) - if len(stats["orphaned_by_app"]) > 10: - logger.info(" ... and %s more apps", len(stats["orphaned_by_app"]) - 10) - return - - # Confirm deletion - if not force: - click.confirm( - f"Are you sure you want to delete {stats['total_orphaned_variables']} " - f"orphaned draft variables and {stats['total_orphaned_files']} associated files " - f"from {stats['orphaned_app_count']} apps?", - abort=True, - ) - - total_deleted = 0 - processed_apps = 0 - - while True: - if max_apps and processed_apps >= max_apps: - logger.info("Reached maximum app limit (%s). Stopping.", max_apps) - break - - orphaned_app_ids = _find_orphaned_draft_variables(batch_size=10) - if not orphaned_app_ids: - logger.info("No more orphaned draft variables found.") - break - - for app_id in orphaned_app_ids: - if max_apps and processed_apps >= max_apps: - break - - try: - deleted_count = delete_draft_variables_batch(app_id, batch_size) - total_deleted += deleted_count - processed_apps += 1 - - logger.info("Deleted %s variables for app %s", deleted_count, app_id) - - except Exception: - logger.exception("Error processing app %s", app_id) - continue - - logger.info("Cleanup completed. Total deleted: %s variables across %s apps", total_deleted, processed_apps) - - -@click.command("setup-datasource-oauth-client", help="Setup datasource oauth client.") -@click.option("--provider", prompt=True, help="Provider name") -@click.option("--client-params", prompt=True, help="Client Params") -def setup_datasource_oauth_client(provider, client_params): - """ - Setup datasource oauth client - """ - provider_id = DatasourceProviderID(provider) - provider_name = provider_id.provider_name - plugin_id = provider_id.plugin_id - - try: - # json validate - click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) - client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) - click.echo(click.style("Client params validated successfully.", fg="green")) - except Exception as e: - click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) - return - - click.echo(click.style(f"Ready to delete existing oauth client params: {provider_name}", fg="yellow")) - deleted_count = ( - db.session.query(DatasourceOauthParamConfig) - .filter_by( - provider=provider_name, - plugin_id=plugin_id, - ) - .delete() - ) - if deleted_count > 0: - click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) - - click.echo(click.style(f"Ready to setup datasource oauth client: {provider_name}", fg="yellow")) - oauth_client = DatasourceOauthParamConfig( - provider=provider_name, - plugin_id=plugin_id, - system_credentials=client_params_dict, - ) - db.session.add(oauth_client) - db.session.commit() - click.echo(click.style(f"provider: {provider_name}", fg="green")) - click.echo(click.style(f"plugin_id: {plugin_id}", fg="green")) - click.echo(click.style(f"params: {json.dumps(client_params_dict, indent=2, ensure_ascii=False)}", fg="green")) - click.echo(click.style(f"Datasource oauth client setup successfully. id: {oauth_client.id}", fg="green")) - - -@click.command("transform-datasource-credentials", help="Transform datasource credentials.") -@click.option( - "--environment", prompt=True, help="the environment to transform datasource credentials", default="online" -) -def transform_datasource_credentials(environment: str): - """ - Transform datasource credentials - """ - try: - installer_manager = PluginInstaller() - plugin_migration = PluginMigration() - - notion_plugin_id = "langgenius/notion_datasource" - firecrawl_plugin_id = "langgenius/firecrawl_datasource" - jina_plugin_id = "langgenius/jina_datasource" - if environment == "online": - notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage] - firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage] - jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage] - else: - notion_plugin_unique_identifier = None - firecrawl_plugin_unique_identifier = None - jina_plugin_unique_identifier = None - oauth_credential_type = CredentialType.OAUTH2 - api_key_credential_type = CredentialType.API_KEY - - # deal notion credentials - deal_notion_count = 0 - notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all() - if notion_credentials: - notion_credentials_tenant_mapping: dict[str, list[DataSourceOauthBinding]] = {} - for notion_credential in notion_credentials: - tenant_id = notion_credential.tenant_id - if tenant_id not in notion_credentials_tenant_mapping: - notion_credentials_tenant_mapping[tenant_id] = [] - notion_credentials_tenant_mapping[tenant_id].append(notion_credential) - for tenant_id, notion_tenant_credentials in notion_credentials_tenant_mapping.items(): - tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() - if not tenant: - continue - try: - # check notion plugin is installed - installed_plugins = installer_manager.list_plugins(tenant_id) - installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] - if notion_plugin_id not in installed_plugins_ids: - if notion_plugin_unique_identifier: - # install notion plugin - PluginService.install_from_marketplace_pkg(tenant_id, [notion_plugin_unique_identifier]) - auth_count = 0 - for notion_tenant_credential in notion_tenant_credentials: - auth_count += 1 - # get credential oauth params - access_token = notion_tenant_credential.access_token - # notion info - notion_info = notion_tenant_credential.source_info - workspace_id = notion_info.get("workspace_id") - workspace_name = notion_info.get("workspace_name") - workspace_icon = notion_info.get("workspace_icon") - new_credentials = { - "integration_secret": encrypter.encrypt_token(tenant_id, access_token), - "workspace_id": workspace_id, - "workspace_name": workspace_name, - "workspace_icon": workspace_icon, - } - datasource_provider = DatasourceProvider( - provider="notion_datasource", - tenant_id=tenant_id, - plugin_id=notion_plugin_id, - auth_type=oauth_credential_type.value, - encrypted_credentials=new_credentials, - name=f"Auth {auth_count}", - avatar_url=workspace_icon or "default", - is_default=False, - ) - db.session.add(datasource_provider) - deal_notion_count += 1 - except Exception as e: - click.echo( - click.style( - f"Error transforming notion credentials: {str(e)}, tenant_id: {tenant_id}", fg="red" - ) - ) - continue - db.session.commit() - # deal firecrawl credentials - deal_firecrawl_count = 0 - firecrawl_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="firecrawl").all() - if firecrawl_credentials: - firecrawl_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {} - for firecrawl_credential in firecrawl_credentials: - tenant_id = firecrawl_credential.tenant_id - if tenant_id not in firecrawl_credentials_tenant_mapping: - firecrawl_credentials_tenant_mapping[tenant_id] = [] - firecrawl_credentials_tenant_mapping[tenant_id].append(firecrawl_credential) - for tenant_id, firecrawl_tenant_credentials in firecrawl_credentials_tenant_mapping.items(): - tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() - if not tenant: - continue - try: - # check firecrawl plugin is installed - installed_plugins = installer_manager.list_plugins(tenant_id) - installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] - if firecrawl_plugin_id not in installed_plugins_ids: - if firecrawl_plugin_unique_identifier: - # install firecrawl plugin - PluginService.install_from_marketplace_pkg(tenant_id, [firecrawl_plugin_unique_identifier]) - - auth_count = 0 - for firecrawl_tenant_credential in firecrawl_tenant_credentials: - auth_count += 1 - if not firecrawl_tenant_credential.credentials: - click.echo( - click.style( - f"Skipping firecrawl credential for tenant {tenant_id} due to missing credentials.", - fg="yellow", - ) - ) - continue - # get credential api key - credentials_json = json.loads(firecrawl_tenant_credential.credentials) - api_key = credentials_json.get("config", {}).get("api_key") - base_url = credentials_json.get("config", {}).get("base_url") - new_credentials = { - "firecrawl_api_key": api_key, - "base_url": base_url, - } - datasource_provider = DatasourceProvider( - provider="firecrawl", - tenant_id=tenant_id, - plugin_id=firecrawl_plugin_id, - auth_type=api_key_credential_type.value, - encrypted_credentials=new_credentials, - name=f"Auth {auth_count}", - avatar_url="default", - is_default=False, - ) - db.session.add(datasource_provider) - deal_firecrawl_count += 1 - except Exception as e: - click.echo( - click.style( - f"Error transforming firecrawl credentials: {str(e)}, tenant_id: {tenant_id}", fg="red" - ) - ) - continue - db.session.commit() - # deal jina credentials - deal_jina_count = 0 - jina_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="jinareader").all() - if jina_credentials: - jina_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {} - for jina_credential in jina_credentials: - tenant_id = jina_credential.tenant_id - if tenant_id not in jina_credentials_tenant_mapping: - jina_credentials_tenant_mapping[tenant_id] = [] - jina_credentials_tenant_mapping[tenant_id].append(jina_credential) - for tenant_id, jina_tenant_credentials in jina_credentials_tenant_mapping.items(): - tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() - if not tenant: - continue - try: - # check jina plugin is installed - installed_plugins = installer_manager.list_plugins(tenant_id) - installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] - if jina_plugin_id not in installed_plugins_ids: - if jina_plugin_unique_identifier: - # install jina plugin - logger.debug("Installing Jina plugin %s", jina_plugin_unique_identifier) - PluginService.install_from_marketplace_pkg(tenant_id, [jina_plugin_unique_identifier]) - - auth_count = 0 - for jina_tenant_credential in jina_tenant_credentials: - auth_count += 1 - if not jina_tenant_credential.credentials: - click.echo( - click.style( - f"Skipping jina credential for tenant {tenant_id} due to missing credentials.", - fg="yellow", - ) - ) - continue - # get credential api key - credentials_json = json.loads(jina_tenant_credential.credentials) - api_key = credentials_json.get("config", {}).get("api_key") - new_credentials = { - "integration_secret": api_key, - } - datasource_provider = DatasourceProvider( - provider="jinareader", - tenant_id=tenant_id, - plugin_id=jina_plugin_id, - auth_type=api_key_credential_type.value, - encrypted_credentials=new_credentials, - name=f"Auth {auth_count}", - avatar_url="default", - is_default=False, - ) - db.session.add(datasource_provider) - deal_jina_count += 1 - except Exception as e: - click.echo( - click.style(f"Error transforming jina credentials: {str(e)}, tenant_id: {tenant_id}", fg="red") - ) - continue - db.session.commit() - except Exception as e: - click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) - return - click.echo(click.style(f"Transforming notion successfully. deal_notion_count: {deal_notion_count}", fg="green")) - click.echo( - click.style(f"Transforming firecrawl successfully. deal_firecrawl_count: {deal_firecrawl_count}", fg="green") - ) - click.echo(click.style(f"Transforming jina successfully. deal_jina_count: {deal_jina_count}", fg="green")) - - -@click.command("install-rag-pipeline-plugins", help="Install rag pipeline plugins.") -@click.option( - "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" -) -@click.option( - "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl" -) -@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100) -def install_rag_pipeline_plugins(input_file, output_file, workers): - """ - Install rag pipeline plugins - """ - click.echo(click.style("Installing rag pipeline plugins", fg="yellow")) - plugin_migration = PluginMigration() - plugin_migration.install_rag_pipeline_plugins( - input_file, - output_file, - workers, - ) - click.echo(click.style("Installing rag pipeline plugins successfully", fg="green")) - - -@click.command( - "migrate-oss", - help="Migrate files from Local or OpenDAL source to a cloud OSS storage (destination must NOT be local/opendal).", -) -@click.option( - "--path", - "paths", - multiple=True, - help="Storage path prefixes to migrate (repeatable). Defaults: privkeys, upload_files, image_files," - " tools, website_files, keyword_files, ops_trace", -) -@click.option( - "--source", - type=click.Choice(["local", "opendal"], case_sensitive=False), - default="opendal", - show_default=True, - help="Source storage type to read from", -) -@click.option("--overwrite", is_flag=True, default=False, help="Overwrite destination if file already exists") -@click.option("--dry-run", is_flag=True, default=False, help="Show what would be migrated without uploading") -@click.option("-f", "--force", is_flag=True, help="Skip confirmation and run without prompts") -@click.option( - "--update-db/--no-update-db", - default=True, - help="Update upload_files.storage_type from source type to current storage after migration", -) -def migrate_oss( - paths: tuple[str, ...], - source: str, - overwrite: bool, - dry_run: bool, - force: bool, - update_db: bool, -): - """ - Copy all files under selected prefixes from a source storage - (Local filesystem or OpenDAL-backed) into the currently configured - destination storage backend, then optionally update DB records. - - Expected usage: set STORAGE_TYPE (and its credentials) to your target backend. - """ - # Ensure target storage is not local/opendal - if dify_config.STORAGE_TYPE in (StorageType.LOCAL, StorageType.OPENDAL): - click.echo( - click.style( - "Target STORAGE_TYPE must be a cloud OSS (not 'local' or 'opendal').\n" - "Please set STORAGE_TYPE to one of: s3, aliyun-oss, azure-blob, google-storage, tencent-cos, \n" - "volcengine-tos, supabase, oci-storage, huawei-obs, baidu-obs, clickzetta-volume.", - fg="red", - ) - ) - return - - # Default paths if none specified - default_paths = ("privkeys", "upload_files", "image_files", "tools", "website_files", "keyword_files", "ops_trace") - path_list = list(paths) if paths else list(default_paths) - is_source_local = source.lower() == "local" - - click.echo(click.style("Preparing migration to target storage.", fg="yellow")) - click.echo(click.style(f"Target storage type: {dify_config.STORAGE_TYPE}", fg="white")) - if is_source_local: - src_root = dify_config.STORAGE_LOCAL_PATH - click.echo(click.style(f"Source: local fs, root: {src_root}", fg="white")) - else: - click.echo(click.style(f"Source: opendal scheme={dify_config.OPENDAL_SCHEME}", fg="white")) - click.echo(click.style(f"Paths to migrate: {', '.join(path_list)}", fg="white")) - click.echo("") - - if not force: - click.confirm("Proceed with migration?", abort=True) - - # Instantiate source storage - try: - if is_source_local: - src_root = dify_config.STORAGE_LOCAL_PATH - source_storage = OpenDALStorage(scheme="fs", root=src_root) - else: - source_storage = OpenDALStorage(scheme=dify_config.OPENDAL_SCHEME) - except Exception as e: - click.echo(click.style(f"Failed to initialize source storage: {str(e)}", fg="red")) - return - - total_files = 0 - copied_files = 0 - skipped_files = 0 - errored_files = 0 - copied_upload_file_keys: list[str] = [] - - for prefix in path_list: - click.echo(click.style(f"Scanning source path: {prefix}", fg="white")) - try: - keys = source_storage.scan(path=prefix, files=True, directories=False) - except FileNotFoundError: - click.echo(click.style(f" -> Skipping missing path: {prefix}", fg="yellow")) - continue - except NotImplementedError: - click.echo(click.style(" -> Source storage does not support scanning.", fg="red")) - return - except Exception as e: - click.echo(click.style(f" -> Error scanning '{prefix}': {str(e)}", fg="red")) - continue - - click.echo(click.style(f"Found {len(keys)} files under {prefix}", fg="white")) - - for key in keys: - total_files += 1 - - # check destination existence - if not overwrite: - try: - if storage.exists(key): - skipped_files += 1 - continue - except Exception as e: - # existence check failures should not block migration attempt - # but should be surfaced to user as a warning for visibility - click.echo( - click.style( - f" -> Warning: failed target existence check for {key}: {str(e)}", - fg="yellow", - ) - ) - - if dry_run: - copied_files += 1 - continue - - # read from source and write to destination - try: - data = source_storage.load_once(key) - except FileNotFoundError: - errored_files += 1 - click.echo(click.style(f" -> Missing on source: {key}", fg="yellow")) - continue - except Exception as e: - errored_files += 1 - click.echo(click.style(f" -> Error reading {key}: {str(e)}", fg="red")) - continue - - try: - storage.save(key, data) - copied_files += 1 - if prefix == "upload_files": - copied_upload_file_keys.append(key) - except Exception as e: - errored_files += 1 - click.echo(click.style(f" -> Error writing {key} to target: {str(e)}", fg="red")) - continue - - click.echo("") - click.echo(click.style("Migration summary:", fg="yellow")) - click.echo(click.style(f" Total: {total_files}", fg="white")) - click.echo(click.style(f" Copied: {copied_files}", fg="green")) - click.echo(click.style(f" Skipped: {skipped_files}", fg="white")) - if errored_files: - click.echo(click.style(f" Errors: {errored_files}", fg="red")) - - if dry_run: - click.echo(click.style("Dry-run complete. No changes were made.", fg="green")) - return - - if errored_files: - click.echo( - click.style( - "Some files failed to migrate. Review errors above before updating DB records.", - fg="yellow", - ) - ) - if update_db and not force: - if not click.confirm("Proceed to update DB storage_type despite errors?", default=False): - update_db = False - - # Optionally update DB records for upload_files.storage_type (only for successfully copied upload_files) - if update_db: - if not copied_upload_file_keys: - click.echo(click.style("No upload_files copied. Skipping DB storage_type update.", fg="yellow")) - else: - try: - source_storage_type = StorageType.LOCAL if is_source_local else StorageType.OPENDAL - updated = ( - db.session.query(UploadFile) - .where( - UploadFile.storage_type == source_storage_type, - UploadFile.key.in_(copied_upload_file_keys), - ) - .update({UploadFile.storage_type: dify_config.STORAGE_TYPE}, synchronize_session=False) - ) - db.session.commit() - click.echo(click.style(f"Updated storage_type for {updated} upload_files records.", fg="green")) - except Exception as e: - db.session.rollback() - click.echo(click.style(f"Failed to update DB storage_type: {str(e)}", fg="red")) - - -@click.command("clean-expired-messages", help="Clean expired messages.") -@click.option( - "--start-from", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - required=True, - help="Lower bound (inclusive) for created_at.", -) -@click.option( - "--end-before", - type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), - required=True, - help="Upper bound (exclusive) for created_at.", -) -@click.option("--batch-size", default=1000, show_default=True, help="Batch size for selecting messages.") -@click.option( - "--graceful-period", - default=21, - show_default=True, - help="Graceful period in days after subscription expiration, will be ignored when billing is disabled.", -) -@click.option("--dry-run", is_flag=True, default=False, help="Show messages logs would be cleaned without deleting") -def clean_expired_messages( - batch_size: int, - graceful_period: int, - start_from: datetime.datetime, - end_before: datetime.datetime, - dry_run: bool, -): - """ - Clean expired messages and related data for tenants based on clean policy. - """ - click.echo(click.style("clean_messages: start clean messages.", fg="green")) - - start_at = time.perf_counter() - - try: - # Create policy based on billing configuration - # NOTE: graceful_period will be ignored when billing is disabled. - policy = create_message_clean_policy(graceful_period_days=graceful_period) - - # Create and run the cleanup service - service = MessagesCleanService.from_time_range( - policy=policy, - start_from=start_from, - end_before=end_before, - batch_size=batch_size, - dry_run=dry_run, - ) - stats = service.run() - - end_at = time.perf_counter() - click.echo( - click.style( - f"clean_messages: completed successfully\n" - f" - Latency: {end_at - start_at:.2f}s\n" - f" - Batches processed: {stats['batches']}\n" - f" - Total messages scanned: {stats['total_messages']}\n" - f" - Messages filtered: {stats['filtered_messages']}\n" - f" - Messages deleted: {stats['total_deleted']}", - fg="green", - ) - ) - except Exception as e: - end_at = time.perf_counter() - logger.exception("clean_messages failed") - click.echo( - click.style( - f"clean_messages: failed after {end_at - start_at:.2f}s - {str(e)}", - fg="red", - ) - ) - raise - - click.echo(click.style("messages cleanup completed.", fg="green")) diff --git a/api/commands/__init__.py b/api/commands/__init__.py new file mode 100644 index 0000000000..d62d0dbd7c --- /dev/null +++ b/api/commands/__init__.py @@ -0,0 +1,71 @@ +""" +CLI command modules extracted from `commands.py`. +""" + +from .account import create_tenant, reset_email, reset_password +from .plugin import ( + extract_plugins, + extract_unique_plugins, + install_plugins, + install_rag_pipeline_plugins, + migrate_data_for_plugin, + setup_datasource_oauth_client, + setup_system_tool_oauth_client, + setup_system_trigger_oauth_client, + transform_datasource_credentials, +) +from .retention import ( + archive_workflow_runs, + clean_expired_messages, + clean_workflow_runs, + cleanup_orphaned_draft_variables, + clear_free_plan_tenant_expired_logs, + delete_archived_workflow_runs, + export_app_messages, + restore_workflow_runs, +) +from .storage import clear_orphaned_file_records, file_usage, migrate_oss, remove_orphaned_files_on_storage +from .system import convert_to_agent_apps, fix_app_site_missing, reset_encrypt_key_pair, upgrade_db +from .vector import ( + add_qdrant_index, + migrate_annotation_vector_database, + migrate_knowledge_vector_database, + old_metadata_migration, + vdb_migrate, +) + +__all__ = [ + "add_qdrant_index", + "archive_workflow_runs", + "clean_expired_messages", + "clean_workflow_runs", + "cleanup_orphaned_draft_variables", + "clear_free_plan_tenant_expired_logs", + "clear_orphaned_file_records", + "convert_to_agent_apps", + "create_tenant", + "delete_archived_workflow_runs", + "export_app_messages", + "extract_plugins", + "extract_unique_plugins", + "file_usage", + "fix_app_site_missing", + "install_plugins", + "install_rag_pipeline_plugins", + "migrate_annotation_vector_database", + "migrate_data_for_plugin", + "migrate_knowledge_vector_database", + "migrate_oss", + "old_metadata_migration", + "remove_orphaned_files_on_storage", + "reset_email", + "reset_encrypt_key_pair", + "reset_password", + "restore_workflow_runs", + "setup_datasource_oauth_client", + "setup_system_tool_oauth_client", + "setup_system_trigger_oauth_client", + "transform_datasource_credentials", + "upgrade_db", + "vdb_migrate", +] diff --git a/api/commands/account.py b/api/commands/account.py new file mode 100644 index 0000000000..84af7a5ae6 --- /dev/null +++ b/api/commands/account.py @@ -0,0 +1,130 @@ +import base64 +import secrets + +import click +from sqlalchemy.orm import sessionmaker + +from constants.languages import languages +from extensions.ext_database import db +from libs.helper import email as email_validate +from libs.password import hash_password, password_pattern, valid_password +from services.account_service import AccountService, RegisterService, TenantService + + +@click.command("reset-password", help="Reset the account password.") +@click.option("--email", prompt=True, help="Account email to reset password for") +@click.option("--new-password", prompt=True, help="New password") +@click.option("--password-confirm", prompt=True, help="Confirm new password") +def reset_password(email, new_password, password_confirm): + """ + Reset password of owner account + Only available in SELF_HOSTED mode + """ + if str(new_password).strip() != str(password_confirm).strip(): + click.echo(click.style("Passwords do not match.", fg="red")) + return + normalized_email = email.strip().lower() + + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session) + + if not account: + click.echo(click.style(f"Account not found for email: {email}", fg="red")) + return + + try: + valid_password(new_password) + except: + click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) + return + + # generate password salt + salt = secrets.token_bytes(16) + base64_salt = base64.b64encode(salt).decode() + + # encrypt password with salt + password_hashed = hash_password(new_password, salt) + base64_password_hashed = base64.b64encode(password_hashed).decode() + account.password = base64_password_hashed + account.password_salt = base64_salt + AccountService.reset_login_error_rate_limit(normalized_email) + click.echo(click.style("Password reset successfully.", fg="green")) + + +@click.command("reset-email", help="Reset the account email.") +@click.option("--email", prompt=True, help="Current account email") +@click.option("--new-email", prompt=True, help="New email") +@click.option("--email-confirm", prompt=True, help="Confirm new email") +def reset_email(email, new_email, email_confirm): + """ + Replace account email + :return: + """ + if str(new_email).strip() != str(email_confirm).strip(): + click.echo(click.style("New emails do not match.", fg="red")) + return + normalized_new_email = new_email.strip().lower() + + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + account = AccountService.get_account_by_email_with_case_fallback(email.strip(), session=session) + + if not account: + click.echo(click.style(f"Account not found for email: {email}", fg="red")) + return + + try: + email_validate(normalized_new_email) + except: + click.echo(click.style(f"Invalid email: {new_email}", fg="red")) + return + + account.email = normalized_new_email + click.echo(click.style("Email updated successfully.", fg="green")) + + +@click.command("create-tenant", help="Create account and tenant.") +@click.option("--email", prompt=True, help="Tenant account email.") +@click.option("--name", prompt=True, help="Workspace name.") +@click.option("--language", prompt=True, help="Account language, default: en-US.") +def create_tenant(email: str, language: str | None = None, name: str | None = None): + """ + Create tenant account + """ + if not email: + click.echo(click.style("Email is required.", fg="red")) + return + + # Create account + email = email.strip().lower() + + if "@" not in email: + click.echo(click.style("Invalid email address.", fg="red")) + return + + account_name = email.split("@")[0] + + if language not in languages: + language = "en-US" + + # Validates name encoding for non-Latin characters. + name = name.strip().encode("utf-8").decode("utf-8") if name else None + + # generate random password + new_password = secrets.token_urlsafe(16) + + # register account + account = RegisterService.register( + email=email, + name=account_name, + password=new_password, + language=language, + create_workspace_required=False, + ) + TenantService.create_owner_tenant_if_not_exist(account, name) + + click.echo( + click.style( + f"Account and tenant created.\nAccount: {email}\nPassword: {new_password}", + fg="green", + ) + ) diff --git a/api/commands/plugin.py b/api/commands/plugin.py new file mode 100644 index 0000000000..2dfbd73b3a --- /dev/null +++ b/api/commands/plugin.py @@ -0,0 +1,467 @@ +import json +import logging +from typing import Any + +import click +from pydantic import TypeAdapter + +from configs import dify_config +from core.helper import encrypter +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.impl.plugin import PluginInstaller +from core.tools.utils.system_oauth_encryption import encrypt_system_oauth_params +from extensions.ext_database import db +from models import Tenant +from models.oauth import DatasourceOauthParamConfig, DatasourceProvider +from models.provider_ids import DatasourceProviderID, ToolProviderID +from models.source import DataSourceApiKeyAuthBinding, DataSourceOauthBinding +from models.tools import ToolOAuthSystemClient +from services.plugin.data_migration import PluginDataMigration +from services.plugin.plugin_migration import PluginMigration +from services.plugin.plugin_service import PluginService + +logger = logging.getLogger(__name__) + + +@click.command("setup-system-tool-oauth-client", help="Setup system tool oauth client.") +@click.option("--provider", prompt=True, help="Provider name") +@click.option("--client-params", prompt=True, help="Client Params") +def setup_system_tool_oauth_client(provider, client_params): + """ + Setup system tool oauth client + """ + provider_id = ToolProviderID(provider) + provider_name = provider_id.provider_name + plugin_id = provider_id.plugin_id + + try: + # json validate + click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) + client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) + click.echo(click.style("Client params validated successfully.", fg="green")) + + click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow")) + click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow")) + oauth_client_params = encrypt_system_oauth_params(client_params_dict) + click.echo(click.style("Client params encrypted successfully.", fg="green")) + except Exception as e: + click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) + return + + deleted_count = ( + db.session.query(ToolOAuthSystemClient) + .filter_by( + provider=provider_name, + plugin_id=plugin_id, + ) + .delete() + ) + if deleted_count > 0: + click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) + + oauth_client = ToolOAuthSystemClient( + provider=provider_name, + plugin_id=plugin_id, + encrypted_oauth_params=oauth_client_params, + ) + db.session.add(oauth_client) + db.session.commit() + click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green")) + + +@click.command("setup-system-trigger-oauth-client", help="Setup system trigger oauth client.") +@click.option("--provider", prompt=True, help="Provider name") +@click.option("--client-params", prompt=True, help="Client Params") +def setup_system_trigger_oauth_client(provider, client_params): + """ + Setup system trigger oauth client + """ + from models.provider_ids import TriggerProviderID + from models.trigger import TriggerOAuthSystemClient + + provider_id = TriggerProviderID(provider) + provider_name = provider_id.provider_name + plugin_id = provider_id.plugin_id + + try: + # json validate + click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) + client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) + click.echo(click.style("Client params validated successfully.", fg="green")) + + click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow")) + click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow")) + oauth_client_params = encrypt_system_oauth_params(client_params_dict) + click.echo(click.style("Client params encrypted successfully.", fg="green")) + except Exception as e: + click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) + return + + deleted_count = ( + db.session.query(TriggerOAuthSystemClient) + .filter_by( + provider=provider_name, + plugin_id=plugin_id, + ) + .delete() + ) + if deleted_count > 0: + click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) + + oauth_client = TriggerOAuthSystemClient( + provider=provider_name, + plugin_id=plugin_id, + encrypted_oauth_params=oauth_client_params, + ) + db.session.add(oauth_client) + db.session.commit() + click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green")) + + +@click.command("setup-datasource-oauth-client", help="Setup datasource oauth client.") +@click.option("--provider", prompt=True, help="Provider name") +@click.option("--client-params", prompt=True, help="Client Params") +def setup_datasource_oauth_client(provider, client_params): + """ + Setup datasource oauth client + """ + provider_id = DatasourceProviderID(provider) + provider_name = provider_id.provider_name + plugin_id = provider_id.plugin_id + + try: + # json validate + click.echo(click.style(f"Validating client params: {client_params}", fg="yellow")) + client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params) + click.echo(click.style("Client params validated successfully.", fg="green")) + except Exception as e: + click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) + return + + click.echo(click.style(f"Ready to delete existing oauth client params: {provider_name}", fg="yellow")) + deleted_count = ( + db.session.query(DatasourceOauthParamConfig) + .filter_by( + provider=provider_name, + plugin_id=plugin_id, + ) + .delete() + ) + if deleted_count > 0: + click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow")) + + click.echo(click.style(f"Ready to setup datasource oauth client: {provider_name}", fg="yellow")) + oauth_client = DatasourceOauthParamConfig( + provider=provider_name, + plugin_id=plugin_id, + system_credentials=client_params_dict, + ) + db.session.add(oauth_client) + db.session.commit() + click.echo(click.style(f"provider: {provider_name}", fg="green")) + click.echo(click.style(f"plugin_id: {plugin_id}", fg="green")) + click.echo(click.style(f"params: {json.dumps(client_params_dict, indent=2, ensure_ascii=False)}", fg="green")) + click.echo(click.style(f"Datasource oauth client setup successfully. id: {oauth_client.id}", fg="green")) + + +@click.command("transform-datasource-credentials", help="Transform datasource credentials.") +@click.option( + "--environment", prompt=True, help="the environment to transform datasource credentials", default="online" +) +def transform_datasource_credentials(environment: str): + """ + Transform datasource credentials + """ + try: + installer_manager = PluginInstaller() + plugin_migration = PluginMigration() + + notion_plugin_id = "langgenius/notion_datasource" + firecrawl_plugin_id = "langgenius/firecrawl_datasource" + jina_plugin_id = "langgenius/jina_datasource" + if environment == "online": + notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage] + firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage] + jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage] + else: + notion_plugin_unique_identifier = None + firecrawl_plugin_unique_identifier = None + jina_plugin_unique_identifier = None + oauth_credential_type = CredentialType.OAUTH2 + api_key_credential_type = CredentialType.API_KEY + + # deal notion credentials + deal_notion_count = 0 + notion_credentials = db.session.query(DataSourceOauthBinding).filter_by(provider="notion").all() + if notion_credentials: + notion_credentials_tenant_mapping: dict[str, list[DataSourceOauthBinding]] = {} + for notion_credential in notion_credentials: + tenant_id = notion_credential.tenant_id + if tenant_id not in notion_credentials_tenant_mapping: + notion_credentials_tenant_mapping[tenant_id] = [] + notion_credentials_tenant_mapping[tenant_id].append(notion_credential) + for tenant_id, notion_tenant_credentials in notion_credentials_tenant_mapping.items(): + tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() + if not tenant: + continue + try: + # check notion plugin is installed + installed_plugins = installer_manager.list_plugins(tenant_id) + installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] + if notion_plugin_id not in installed_plugins_ids: + if notion_plugin_unique_identifier: + # install notion plugin + PluginService.install_from_marketplace_pkg(tenant_id, [notion_plugin_unique_identifier]) + auth_count = 0 + for notion_tenant_credential in notion_tenant_credentials: + auth_count += 1 + # get credential oauth params + access_token = notion_tenant_credential.access_token + # notion info + notion_info = notion_tenant_credential.source_info + workspace_id = notion_info.get("workspace_id") + workspace_name = notion_info.get("workspace_name") + workspace_icon = notion_info.get("workspace_icon") + new_credentials = { + "integration_secret": encrypter.encrypt_token(tenant_id, access_token), + "workspace_id": workspace_id, + "workspace_name": workspace_name, + "workspace_icon": workspace_icon, + } + datasource_provider = DatasourceProvider( + provider="notion_datasource", + tenant_id=tenant_id, + plugin_id=notion_plugin_id, + auth_type=oauth_credential_type.value, + encrypted_credentials=new_credentials, + name=f"Auth {auth_count}", + avatar_url=workspace_icon or "default", + is_default=False, + ) + db.session.add(datasource_provider) + deal_notion_count += 1 + except Exception as e: + click.echo( + click.style( + f"Error transforming notion credentials: {str(e)}, tenant_id: {tenant_id}", fg="red" + ) + ) + continue + db.session.commit() + # deal firecrawl credentials + deal_firecrawl_count = 0 + firecrawl_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="firecrawl").all() + if firecrawl_credentials: + firecrawl_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {} + for firecrawl_credential in firecrawl_credentials: + tenant_id = firecrawl_credential.tenant_id + if tenant_id not in firecrawl_credentials_tenant_mapping: + firecrawl_credentials_tenant_mapping[tenant_id] = [] + firecrawl_credentials_tenant_mapping[tenant_id].append(firecrawl_credential) + for tenant_id, firecrawl_tenant_credentials in firecrawl_credentials_tenant_mapping.items(): + tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() + if not tenant: + continue + try: + # check firecrawl plugin is installed + installed_plugins = installer_manager.list_plugins(tenant_id) + installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] + if firecrawl_plugin_id not in installed_plugins_ids: + if firecrawl_plugin_unique_identifier: + # install firecrawl plugin + PluginService.install_from_marketplace_pkg(tenant_id, [firecrawl_plugin_unique_identifier]) + + auth_count = 0 + for firecrawl_tenant_credential in firecrawl_tenant_credentials: + auth_count += 1 + if not firecrawl_tenant_credential.credentials: + click.echo( + click.style( + f"Skipping firecrawl credential for tenant {tenant_id} due to missing credentials.", + fg="yellow", + ) + ) + continue + # get credential api key + credentials_json = json.loads(firecrawl_tenant_credential.credentials) + api_key = credentials_json.get("config", {}).get("api_key") + base_url = credentials_json.get("config", {}).get("base_url") + new_credentials = { + "firecrawl_api_key": api_key, + "base_url": base_url, + } + datasource_provider = DatasourceProvider( + provider="firecrawl", + tenant_id=tenant_id, + plugin_id=firecrawl_plugin_id, + auth_type=api_key_credential_type.value, + encrypted_credentials=new_credentials, + name=f"Auth {auth_count}", + avatar_url="default", + is_default=False, + ) + db.session.add(datasource_provider) + deal_firecrawl_count += 1 + except Exception as e: + click.echo( + click.style( + f"Error transforming firecrawl credentials: {str(e)}, tenant_id: {tenant_id}", fg="red" + ) + ) + continue + db.session.commit() + # deal jina credentials + deal_jina_count = 0 + jina_credentials = db.session.query(DataSourceApiKeyAuthBinding).filter_by(provider="jinareader").all() + if jina_credentials: + jina_credentials_tenant_mapping: dict[str, list[DataSourceApiKeyAuthBinding]] = {} + for jina_credential in jina_credentials: + tenant_id = jina_credential.tenant_id + if tenant_id not in jina_credentials_tenant_mapping: + jina_credentials_tenant_mapping[tenant_id] = [] + jina_credentials_tenant_mapping[tenant_id].append(jina_credential) + for tenant_id, jina_tenant_credentials in jina_credentials_tenant_mapping.items(): + tenant = db.session.query(Tenant).filter_by(id=tenant_id).first() + if not tenant: + continue + try: + # check jina plugin is installed + installed_plugins = installer_manager.list_plugins(tenant_id) + installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] + if jina_plugin_id not in installed_plugins_ids: + if jina_plugin_unique_identifier: + # install jina plugin + logger.debug("Installing Jina plugin %s", jina_plugin_unique_identifier) + PluginService.install_from_marketplace_pkg(tenant_id, [jina_plugin_unique_identifier]) + + auth_count = 0 + for jina_tenant_credential in jina_tenant_credentials: + auth_count += 1 + if not jina_tenant_credential.credentials: + click.echo( + click.style( + f"Skipping jina credential for tenant {tenant_id} due to missing credentials.", + fg="yellow", + ) + ) + continue + # get credential api key + credentials_json = json.loads(jina_tenant_credential.credentials) + api_key = credentials_json.get("config", {}).get("api_key") + new_credentials = { + "integration_secret": api_key, + } + datasource_provider = DatasourceProvider( + provider="jinareader", + tenant_id=tenant_id, + plugin_id=jina_plugin_id, + auth_type=api_key_credential_type.value, + encrypted_credentials=new_credentials, + name=f"Auth {auth_count}", + avatar_url="default", + is_default=False, + ) + db.session.add(datasource_provider) + deal_jina_count += 1 + except Exception as e: + click.echo( + click.style(f"Error transforming jina credentials: {str(e)}, tenant_id: {tenant_id}", fg="red") + ) + continue + db.session.commit() + except Exception as e: + click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red")) + return + click.echo(click.style(f"Transforming notion successfully. deal_notion_count: {deal_notion_count}", fg="green")) + click.echo( + click.style(f"Transforming firecrawl successfully. deal_firecrawl_count: {deal_firecrawl_count}", fg="green") + ) + click.echo(click.style(f"Transforming jina successfully. deal_jina_count: {deal_jina_count}", fg="green")) + + +@click.command("migrate-data-for-plugin", help="Migrate data for plugin.") +def migrate_data_for_plugin(): + """ + Migrate data for plugin. + """ + click.echo(click.style("Starting migrate data for plugin.", fg="white")) + + PluginDataMigration.migrate() + + click.echo(click.style("Migrate data for plugin completed.", fg="green")) + + +@click.command("extract-plugins", help="Extract plugins.") +@click.option("--output_file", prompt=True, help="The file to store the extracted plugins.", default="plugins.jsonl") +@click.option("--workers", prompt=True, help="The number of workers to extract plugins.", default=10) +def extract_plugins(output_file: str, workers: int): + """ + Extract plugins. + """ + click.echo(click.style("Starting extract plugins.", fg="white")) + + PluginMigration.extract_plugins(output_file, workers) + + click.echo(click.style("Extract plugins completed.", fg="green")) + + +@click.command("extract-unique-identifiers", help="Extract unique identifiers.") +@click.option( + "--output_file", + prompt=True, + help="The file to store the extracted unique identifiers.", + default="unique_identifiers.json", +) +@click.option( + "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" +) +def extract_unique_plugins(output_file: str, input_file: str): + """ + Extract unique plugins. + """ + click.echo(click.style("Starting extract unique plugins.", fg="white")) + + PluginMigration.extract_unique_plugins_to_file(input_file, output_file) + + click.echo(click.style("Extract unique plugins completed.", fg="green")) + + +@click.command("install-plugins", help="Install plugins.") +@click.option( + "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" +) +@click.option( + "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl" +) +@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100) +def install_plugins(input_file: str, output_file: str, workers: int): + """ + Install plugins. + """ + click.echo(click.style("Starting install plugins.", fg="white")) + + PluginMigration.install_plugins(input_file, output_file, workers) + + click.echo(click.style("Install plugins completed.", fg="green")) + + +@click.command("install-rag-pipeline-plugins", help="Install rag pipeline plugins.") +@click.option( + "--input_file", prompt=True, help="The file to store the extracted unique identifiers.", default="plugins.jsonl" +) +@click.option( + "--output_file", prompt=True, help="The file to store the installed plugins.", default="installed_plugins.jsonl" +) +@click.option("--workers", prompt=True, help="The number of workers to install plugins.", default=100) +def install_rag_pipeline_plugins(input_file, output_file, workers): + """ + Install rag pipeline plugins + """ + click.echo(click.style("Installing rag pipeline plugins", fg="yellow")) + plugin_migration = PluginMigration() + plugin_migration.install_rag_pipeline_plugins( + input_file, + output_file, + workers, + ) + click.echo(click.style("Installing rag pipeline plugins successfully", fg="green")) diff --git a/api/commands/retention.py b/api/commands/retention.py new file mode 100644 index 0000000000..5a91c1cc70 --- /dev/null +++ b/api/commands/retention.py @@ -0,0 +1,830 @@ +import datetime +import logging +import time +from typing import Any + +import click +import sqlalchemy as sa + +from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now +from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs +from services.retention.conversation.messages_clean_policy import create_message_clean_policy +from services.retention.conversation.messages_clean_service import MessagesCleanService +from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup +from tasks.remove_app_and_related_data_task import delete_draft_variables_batch + +logger = logging.getLogger(__name__) + + +@click.command("clear-free-plan-tenant-expired-logs", help="Clear free plan tenant expired logs.") +@click.option("--days", prompt=True, help="The days to clear free plan tenant expired logs.", default=30) +@click.option("--batch", prompt=True, help="The batch size to clear free plan tenant expired logs.", default=100) +@click.option( + "--tenant_ids", + prompt=True, + multiple=True, + help="The tenant ids to clear free plan tenant expired logs.", +) +def clear_free_plan_tenant_expired_logs(days: int, batch: int, tenant_ids: list[str]): + """ + Clear free plan tenant expired logs. + """ + click.echo(click.style("Starting clear free plan tenant expired logs.", fg="white")) + + ClearFreePlanTenantExpiredLogs.process(days, batch, tenant_ids) + + click.echo(click.style("Clear free plan tenant expired logs completed.", fg="green")) + + +@click.command("clean-workflow-runs", help="Clean expired workflow runs and related data for free tenants.") +@click.option( + "--before-days", + "--days", + default=30, + show_default=True, + type=click.IntRange(min=0), + help="Delete workflow runs created before N days ago.", +) +@click.option("--batch-size", default=200, show_default=True, help="Batch size for selecting workflow runs.") +@click.option( + "--from-days-ago", + default=None, + type=click.IntRange(min=0), + help="Lower bound in days ago (older). Must be paired with --to-days-ago.", +) +@click.option( + "--to-days-ago", + default=None, + type=click.IntRange(min=0), + help="Upper bound in days ago (newer). Must be paired with --from-days-ago.", +) +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", +) +@click.option( + "--dry-run", + is_flag=True, + help="Preview cleanup results without deleting any workflow run data.", +) +def clean_workflow_runs( + before_days: int, + batch_size: int, + from_days_ago: int | None, + to_days_ago: int | None, + start_from: datetime.datetime | None, + end_before: datetime.datetime | None, + dry_run: bool, +): + """ + Clean workflow runs and related workflow data for free tenants. + """ + if (start_from is None) ^ (end_before is None): + raise click.UsageError("--start-from and --end-before must be provided together.") + + if (from_days_ago is None) ^ (to_days_ago is None): + raise click.UsageError("--from-days-ago and --to-days-ago must be provided together.") + + if from_days_ago is not None and to_days_ago is not None: + if start_from or end_before: + raise click.UsageError("Choose either day offsets or explicit dates, not both.") + if from_days_ago <= to_days_ago: + raise click.UsageError("--from-days-ago must be greater than --to-days-ago.") + now = datetime.datetime.now() + start_from = now - datetime.timedelta(days=from_days_ago) + end_before = now - datetime.timedelta(days=to_days_ago) + before_days = 0 + + start_time = datetime.datetime.now(datetime.UTC) + click.echo(click.style(f"Starting workflow run cleanup at {start_time.isoformat()}.", fg="white")) + + WorkflowRunCleanup( + days=before_days, + batch_size=batch_size, + start_from=start_from, + end_before=end_before, + dry_run=dry_run, + ).run() + + end_time = datetime.datetime.now(datetime.UTC) + elapsed = end_time - start_time + click.echo( + click.style( + f"Workflow run cleanup completed. start={start_time.isoformat()} " + f"end={end_time.isoformat()} duration={elapsed}", + fg="green", + ) + ) + + +@click.command( + "archive-workflow-runs", + help="Archive workflow runs for paid plan tenants to S3-compatible storage.", +) +@click.option("--tenant-ids", default=None, help="Optional comma-separated tenant IDs for grayscale rollout.") +@click.option("--before-days", default=90, show_default=True, help="Archive runs older than N days.") +@click.option( + "--from-days-ago", + default=None, + type=click.IntRange(min=0), + help="Lower bound in days ago (older). Must be paired with --to-days-ago.", +) +@click.option( + "--to-days-ago", + default=None, + type=click.IntRange(min=0), + help="Upper bound in days ago (newer). Must be paired with --from-days-ago.", +) +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Archive runs created at or after this timestamp (UTC if no timezone).", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Archive runs created before this timestamp (UTC if no timezone).", +) +@click.option("--batch-size", default=100, show_default=True, help="Batch size for processing.") +@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to archive.") +@click.option("--limit", default=None, type=int, help="Maximum number of runs to archive.") +@click.option("--dry-run", is_flag=True, help="Preview without archiving.") +@click.option("--delete-after-archive", is_flag=True, help="Delete runs and related data after archiving.") +def archive_workflow_runs( + tenant_ids: str | None, + before_days: int, + from_days_ago: int | None, + to_days_ago: int | None, + start_from: datetime.datetime | None, + end_before: datetime.datetime | None, + batch_size: int, + workers: int, + limit: int | None, + dry_run: bool, + delete_after_archive: bool, +): + """ + Archive workflow runs for paid plan tenants older than the specified days. + + This command archives the following tables to storage: + - workflow_node_executions + - workflow_node_execution_offload + - workflow_pauses + - workflow_pause_reasons + - workflow_trigger_logs + + The workflow_runs and workflow_app_logs tables are preserved for UI listing. + """ + from services.retention.workflow_run.archive_paid_plan_workflow_run import WorkflowRunArchiver + + run_started_at = datetime.datetime.now(datetime.UTC) + click.echo( + click.style( + f"Starting workflow run archiving at {run_started_at.isoformat()}.", + fg="white", + ) + ) + + if (start_from is None) ^ (end_before is None): + click.echo(click.style("start-from and end-before must be provided together.", fg="red")) + return + + if (from_days_ago is None) ^ (to_days_ago is None): + click.echo(click.style("from-days-ago and to-days-ago must be provided together.", fg="red")) + return + + if from_days_ago is not None and to_days_ago is not None: + if start_from or end_before: + click.echo(click.style("Choose either day offsets or explicit dates, not both.", fg="red")) + return + if from_days_ago <= to_days_ago: + click.echo(click.style("from-days-ago must be greater than to-days-ago.", fg="red")) + return + now = datetime.datetime.now() + start_from = now - datetime.timedelta(days=from_days_ago) + end_before = now - datetime.timedelta(days=to_days_ago) + before_days = 0 + + if start_from and end_before and start_from >= end_before: + click.echo(click.style("start-from must be earlier than end-before.", fg="red")) + return + if workers < 1: + click.echo(click.style("workers must be at least 1.", fg="red")) + return + + archiver = WorkflowRunArchiver( + days=before_days, + batch_size=batch_size, + start_from=start_from, + end_before=end_before, + workers=workers, + tenant_ids=[tid.strip() for tid in tenant_ids.split(",")] if tenant_ids else None, + limit=limit, + dry_run=dry_run, + delete_after_archive=delete_after_archive, + ) + summary = archiver.run() + click.echo( + click.style( + f"Summary: processed={summary.total_runs_processed}, archived={summary.runs_archived}, " + f"skipped={summary.runs_skipped}, failed={summary.runs_failed}, " + f"time={summary.total_elapsed_time:.2f}s", + fg="cyan", + ) + ) + + run_finished_at = datetime.datetime.now(datetime.UTC) + elapsed = run_finished_at - run_started_at + click.echo( + click.style( + f"Workflow run archiving completed. start={run_started_at.isoformat()} " + f"end={run_finished_at.isoformat()} duration={elapsed}", + fg="green", + ) + ) + + +@click.command( + "restore-workflow-runs", + help="Restore archived workflow runs from S3-compatible storage.", +) +@click.option( + "--tenant-ids", + required=False, + help="Tenant IDs (comma-separated).", +) +@click.option("--run-id", required=False, help="Workflow run ID to restore.") +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", +) +@click.option("--workers", default=1, show_default=True, type=int, help="Concurrent workflow runs to restore.") +@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to restore.") +@click.option("--dry-run", is_flag=True, help="Preview without restoring.") +def restore_workflow_runs( + tenant_ids: str | None, + run_id: str | None, + start_from: datetime.datetime | None, + end_before: datetime.datetime | None, + workers: int, + limit: int, + dry_run: bool, +): + """ + Restore an archived workflow run from storage to the database. + + This restores the following tables: + - workflow_node_executions + - workflow_node_execution_offload + - workflow_pauses + - workflow_pause_reasons + - workflow_trigger_logs + """ + from services.retention.workflow_run.restore_archived_workflow_run import WorkflowRunRestore + + parsed_tenant_ids = None + if tenant_ids: + parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()] + if not parsed_tenant_ids: + raise click.BadParameter("tenant-ids must not be empty") + + if (start_from is None) ^ (end_before is None): + raise click.UsageError("--start-from and --end-before must be provided together.") + if run_id is None and (start_from is None or end_before is None): + raise click.UsageError("--start-from and --end-before are required for batch restore.") + if workers < 1: + raise click.BadParameter("workers must be at least 1") + + start_time = datetime.datetime.now(datetime.UTC) + click.echo( + click.style( + f"Starting restore of workflow run {run_id} at {start_time.isoformat()}.", + fg="white", + ) + ) + + restorer = WorkflowRunRestore(dry_run=dry_run, workers=workers) + if run_id: + results = [restorer.restore_by_run_id(run_id)] + else: + assert start_from is not None + assert end_before is not None + results = restorer.restore_batch( + parsed_tenant_ids, + start_date=start_from, + end_date=end_before, + limit=limit, + ) + + end_time = datetime.datetime.now(datetime.UTC) + elapsed = end_time - start_time + + successes = sum(1 for result in results if result.success) + failures = len(results) - successes + + if failures == 0: + click.echo( + click.style( + f"Restore completed successfully. success={successes} duration={elapsed}", + fg="green", + ) + ) + else: + click.echo( + click.style( + f"Restore completed with failures. success={successes} failed={failures} duration={elapsed}", + fg="red", + ) + ) + + +@click.command( + "delete-archived-workflow-runs", + help="Delete archived workflow runs from the database.", +) +@click.option( + "--tenant-ids", + required=False, + help="Tenant IDs (comma-separated).", +) +@click.option("--run-id", required=False, help="Workflow run ID to delete.") +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional lower bound (inclusive) for created_at; must be paired with --end-before.", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional upper bound (exclusive) for created_at; must be paired with --start-from.", +) +@click.option("--limit", type=int, default=100, show_default=True, help="Maximum number of runs to delete.") +@click.option("--dry-run", is_flag=True, help="Preview without deleting.") +def delete_archived_workflow_runs( + tenant_ids: str | None, + run_id: str | None, + start_from: datetime.datetime | None, + end_before: datetime.datetime | None, + limit: int, + dry_run: bool, +): + """ + Delete archived workflow runs from the database. + """ + from services.retention.workflow_run.delete_archived_workflow_run import ArchivedWorkflowRunDeletion + + parsed_tenant_ids = None + if tenant_ids: + parsed_tenant_ids = [tid.strip() for tid in tenant_ids.split(",") if tid.strip()] + if not parsed_tenant_ids: + raise click.BadParameter("tenant-ids must not be empty") + + if (start_from is None) ^ (end_before is None): + raise click.UsageError("--start-from and --end-before must be provided together.") + if run_id is None and (start_from is None or end_before is None): + raise click.UsageError("--start-from and --end-before are required for batch delete.") + + start_time = datetime.datetime.now(datetime.UTC) + target_desc = f"workflow run {run_id}" if run_id else "workflow runs" + click.echo( + click.style( + f"Starting delete of {target_desc} at {start_time.isoformat()}.", + fg="white", + ) + ) + + deleter = ArchivedWorkflowRunDeletion(dry_run=dry_run) + if run_id: + results = [deleter.delete_by_run_id(run_id)] + else: + assert start_from is not None + assert end_before is not None + results = deleter.delete_batch( + parsed_tenant_ids, + start_date=start_from, + end_date=end_before, + limit=limit, + ) + + for result in results: + if result.success: + click.echo( + click.style( + f"{'[DRY RUN] Would delete' if dry_run else 'Deleted'} " + f"workflow run {result.run_id} (tenant={result.tenant_id})", + fg="green", + ) + ) + else: + click.echo( + click.style( + f"Failed to delete workflow run {result.run_id}: {result.error}", + fg="red", + ) + ) + + end_time = datetime.datetime.now(datetime.UTC) + elapsed = end_time - start_time + + successes = sum(1 for result in results if result.success) + failures = len(results) - successes + + if failures == 0: + click.echo( + click.style( + f"Delete completed successfully. success={successes} duration={elapsed}", + fg="green", + ) + ) + else: + click.echo( + click.style( + f"Delete completed with failures. success={successes} failed={failures} duration={elapsed}", + fg="red", + ) + ) + + +def _find_orphaned_draft_variables(batch_size: int = 1000) -> list[str]: + """ + Find draft variables that reference non-existent apps. + + Args: + batch_size: Maximum number of orphaned app IDs to return + + Returns: + List of app IDs that have draft variables but don't exist in the apps table + """ + query = """ + SELECT DISTINCT wdv.app_id + FROM workflow_draft_variables AS wdv + WHERE NOT EXISTS( + SELECT 1 FROM apps WHERE apps.id = wdv.app_id + ) + LIMIT :batch_size + """ + + with db.engine.connect() as conn: + result = conn.execute(sa.text(query), {"batch_size": batch_size}) + return [row[0] for row in result] + + +def _count_orphaned_draft_variables() -> dict[str, Any]: + """ + Count orphaned draft variables by app, including associated file counts. + + Returns: + Dictionary with statistics about orphaned variables and files + """ + # Count orphaned variables by app + variables_query = """ + SELECT + wdv.app_id, + COUNT(*) as variable_count, + COUNT(wdv.file_id) as file_count + FROM workflow_draft_variables AS wdv + WHERE NOT EXISTS( + SELECT 1 FROM apps WHERE apps.id = wdv.app_id + ) + GROUP BY wdv.app_id + ORDER BY variable_count DESC + """ + + with db.engine.connect() as conn: + result = conn.execute(sa.text(variables_query)) + orphaned_by_app = {} + total_files = 0 + + for row in result: + app_id, variable_count, file_count = row + orphaned_by_app[app_id] = {"variables": variable_count, "files": file_count} + total_files += file_count + + total_orphaned = sum(app_data["variables"] for app_data in orphaned_by_app.values()) + app_count = len(orphaned_by_app) + + return { + "total_orphaned_variables": total_orphaned, + "total_orphaned_files": total_files, + "orphaned_app_count": app_count, + "orphaned_by_app": orphaned_by_app, + } + + +@click.command() +@click.option("--dry-run", is_flag=True, help="Show what would be deleted without actually deleting") +@click.option("--batch-size", default=1000, help="Number of records to process per batch (default 1000)") +@click.option("--max-apps", default=None, type=int, help="Maximum number of apps to process (default: no limit)") +@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") +def cleanup_orphaned_draft_variables( + dry_run: bool, + batch_size: int, + max_apps: int | None, + force: bool = False, +): + """ + Clean up orphaned draft variables from the database. + + This script finds and removes draft variables that belong to apps + that no longer exist in the database. + """ + logger = logging.getLogger(__name__) + + # Get statistics + stats = _count_orphaned_draft_variables() + + logger.info("Found %s orphaned draft variables", stats["total_orphaned_variables"]) + logger.info("Found %s associated offload files", stats["total_orphaned_files"]) + logger.info("Across %s non-existent apps", stats["orphaned_app_count"]) + + if stats["total_orphaned_variables"] == 0: + logger.info("No orphaned draft variables found. Exiting.") + return + + if dry_run: + logger.info("DRY RUN: Would delete the following:") + for app_id, data in sorted(stats["orphaned_by_app"].items(), key=lambda x: x[1]["variables"], reverse=True)[ + :10 + ]: # Show top 10 + logger.info(" App %s: %s variables, %s files", app_id, data["variables"], data["files"]) + if len(stats["orphaned_by_app"]) > 10: + logger.info(" ... and %s more apps", len(stats["orphaned_by_app"]) - 10) + return + + # Confirm deletion + if not force: + click.confirm( + f"Are you sure you want to delete {stats['total_orphaned_variables']} " + f"orphaned draft variables and {stats['total_orphaned_files']} associated files " + f"from {stats['orphaned_app_count']} apps?", + abort=True, + ) + + total_deleted = 0 + processed_apps = 0 + + while True: + if max_apps and processed_apps >= max_apps: + logger.info("Reached maximum app limit (%s). Stopping.", max_apps) + break + + orphaned_app_ids = _find_orphaned_draft_variables(batch_size=10) + if not orphaned_app_ids: + logger.info("No more orphaned draft variables found.") + break + + for app_id in orphaned_app_ids: + if max_apps and processed_apps >= max_apps: + break + + try: + deleted_count = delete_draft_variables_batch(app_id, batch_size) + total_deleted += deleted_count + processed_apps += 1 + + logger.info("Deleted %s variables for app %s", deleted_count, app_id) + + except Exception: + logger.exception("Error processing app %s", app_id) + continue + + logger.info("Cleanup completed. Total deleted: %s variables across %s apps", total_deleted, processed_apps) + + +@click.command("clean-expired-messages", help="Clean expired messages.") +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + required=False, + default=None, + help="Lower bound (inclusive) for created_at.", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + required=False, + default=None, + help="Upper bound (exclusive) for created_at.", +) +@click.option( + "--from-days-ago", + type=int, + default=None, + help="Relative lower bound in days ago (inclusive). Must be used with --before-days.", +) +@click.option( + "--before-days", + type=int, + default=None, + help="Relative upper bound in days ago (exclusive). Required for relative mode.", +) +@click.option("--batch-size", default=1000, show_default=True, help="Batch size for selecting messages.") +@click.option( + "--graceful-period", + default=21, + show_default=True, + help="Graceful period in days after subscription expiration, will be ignored when billing is disabled.", +) +@click.option("--dry-run", is_flag=True, default=False, help="Show messages logs would be cleaned without deleting") +def clean_expired_messages( + batch_size: int, + graceful_period: int, + start_from: datetime.datetime | None, + end_before: datetime.datetime | None, + from_days_ago: int | None, + before_days: int | None, + dry_run: bool, +): + """ + Clean expired messages and related data for tenants based on clean policy. + """ + click.echo(click.style("clean_messages: start clean messages.", fg="green")) + + start_at = time.perf_counter() + + try: + abs_mode = start_from is not None and end_before is not None + rel_mode = before_days is not None + + if abs_mode and rel_mode: + raise click.UsageError( + "Options are mutually exclusive: use either (--start-from,--end-before) " + "or (--from-days-ago,--before-days)." + ) + + if from_days_ago is not None and before_days is None: + raise click.UsageError("--from-days-ago must be used together with --before-days.") + + if (start_from is None) ^ (end_before is None): + raise click.UsageError("Both --start-from and --end-before are required when using absolute time range.") + + if not abs_mode and not rel_mode: + raise click.UsageError( + "You must provide either (--start-from,--end-before) or (--before-days [--from-days-ago])." + ) + + if rel_mode: + assert before_days is not None + if before_days < 0: + raise click.UsageError("--before-days must be >= 0.") + if from_days_ago is not None: + if from_days_ago < 0: + raise click.UsageError("--from-days-ago must be >= 0.") + if from_days_ago <= before_days: + raise click.UsageError("--from-days-ago must be greater than --before-days.") + + # Create policy based on billing configuration + # NOTE: graceful_period will be ignored when billing is disabled. + policy = create_message_clean_policy(graceful_period_days=graceful_period) + + # Create and run the cleanup service + if abs_mode: + assert start_from is not None + assert end_before is not None + service = MessagesCleanService.from_time_range( + policy=policy, + start_from=start_from, + end_before=end_before, + batch_size=batch_size, + dry_run=dry_run, + ) + elif from_days_ago is None: + assert before_days is not None + service = MessagesCleanService.from_days( + policy=policy, + days=before_days, + batch_size=batch_size, + dry_run=dry_run, + ) + else: + assert before_days is not None + assert from_days_ago is not None + now = naive_utc_now() + service = MessagesCleanService.from_time_range( + policy=policy, + start_from=now - datetime.timedelta(days=from_days_ago), + end_before=now - datetime.timedelta(days=before_days), + batch_size=batch_size, + dry_run=dry_run, + ) + stats = service.run() + + end_at = time.perf_counter() + click.echo( + click.style( + f"clean_messages: completed successfully\n" + f" - Latency: {end_at - start_at:.2f}s\n" + f" - Batches processed: {stats['batches']}\n" + f" - Total messages scanned: {stats['total_messages']}\n" + f" - Messages filtered: {stats['filtered_messages']}\n" + f" - Messages deleted: {stats['total_deleted']}", + fg="green", + ) + ) + except Exception as e: + end_at = time.perf_counter() + logger.exception("clean_messages failed") + click.echo( + click.style( + f"clean_messages: failed after {end_at - start_at:.2f}s - {str(e)}", + fg="red", + ) + ) + raise + + click.echo(click.style("messages cleanup completed.", fg="green")) + + +@click.command("export-app-messages", help="Export messages for an app to JSONL.GZ.") +@click.option("--app-id", required=True, help="Application ID to export messages for.") +@click.option( + "--start-from", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + default=None, + help="Optional lower bound (inclusive) for created_at.", +) +@click.option( + "--end-before", + type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]), + required=True, + help="Upper bound (exclusive) for created_at.", +) +@click.option( + "--filename", + required=True, + help="Base filename (relative path). Do not include suffix like .jsonl.gz.", +) +@click.option("--use-cloud-storage", is_flag=True, default=False, help="Upload to cloud storage instead of local file.") +@click.option("--batch-size", default=1000, show_default=True, help="Batch size for cursor pagination.") +@click.option("--dry-run", is_flag=True, default=False, help="Scan only, print stats without writing any file.") +def export_app_messages( + app_id: str, + start_from: datetime.datetime | None, + end_before: datetime.datetime, + filename: str, + use_cloud_storage: bool, + batch_size: int, + dry_run: bool, +): + if start_from and start_from >= end_before: + raise click.UsageError("--start-from must be before --end-before.") + + from services.retention.conversation.message_export_service import AppMessageExportService + + try: + validated_filename = AppMessageExportService.validate_export_filename(filename) + except ValueError as e: + raise click.BadParameter(str(e), param_hint="--filename") from e + + click.echo(click.style(f"export_app_messages: starting export for app {app_id}.", fg="green")) + start_at = time.perf_counter() + + try: + service = AppMessageExportService( + app_id=app_id, + end_before=end_before, + filename=validated_filename, + start_from=start_from, + batch_size=batch_size, + use_cloud_storage=use_cloud_storage, + dry_run=dry_run, + ) + stats = service.run() + + elapsed = time.perf_counter() - start_at + click.echo( + click.style( + f"export_app_messages: completed in {elapsed:.2f}s\n" + f" - Batches: {stats.batches}\n" + f" - Total messages: {stats.total_messages}\n" + f" - Messages with feedback: {stats.messages_with_feedback}\n" + f" - Total feedbacks: {stats.total_feedbacks}", + fg="green", + ) + ) + except Exception as e: + elapsed = time.perf_counter() - start_at + logger.exception("export_app_messages failed") + click.echo(click.style(f"export_app_messages: failed after {elapsed:.2f}s - {e}", fg="red")) + raise diff --git a/api/commands/storage.py b/api/commands/storage.py new file mode 100644 index 0000000000..fa890a855a --- /dev/null +++ b/api/commands/storage.py @@ -0,0 +1,755 @@ +import json + +import click +import sqlalchemy as sa + +from configs import dify_config +from extensions.ext_database import db +from extensions.ext_storage import storage +from extensions.storage.opendal_storage import OpenDALStorage +from extensions.storage.storage_type import StorageType +from models.model import UploadFile + + +@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") +@click.command("clear-orphaned-file-records", help="Clear orphaned file records.") +def clear_orphaned_file_records(force: bool): + """ + Clear orphaned file records in the database. + """ + + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "id_column": "id", "key_column": "key"}, + {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, + ] + ids_tables = [ + {"type": "uuid", "table": "message_files", "column": "upload_file_id"}, + {"type": "text", "table": "documents", "column": "data_source_info"}, + {"type": "text", "table": "document_segments", "column": "content"}, + {"type": "text", "table": "messages", "column": "answer"}, + {"type": "text", "table": "workflow_node_executions", "column": "inputs"}, + {"type": "text", "table": "workflow_node_executions", "column": "process_data"}, + {"type": "text", "table": "workflow_node_executions", "column": "outputs"}, + {"type": "text", "table": "conversations", "column": "introduction"}, + {"type": "text", "table": "conversations", "column": "system_instruction"}, + {"type": "text", "table": "accounts", "column": "avatar"}, + {"type": "text", "table": "apps", "column": "icon"}, + {"type": "text", "table": "sites", "column": "icon"}, + {"type": "json", "table": "messages", "column": "inputs"}, + {"type": "json", "table": "messages", "column": "message"}, + ] + + # notify user and ask for confirmation + click.echo( + click.style( + "This command will first find and delete orphaned file records from the message_files table,", fg="yellow" + ) + ) + click.echo( + click.style( + "and then it will find and delete orphaned file records in the following tables:", + fg="yellow", + ) + ) + for files_table in files_tables: + click.echo(click.style(f"- {files_table['table']}", fg="yellow")) + click.echo( + click.style("The following tables and columns will be scanned to find orphaned file records:", fg="yellow") + ) + for ids_table in ids_tables: + click.echo(click.style(f"- {ids_table['table']} ({ids_table['column']})", fg="yellow")) + click.echo("") + + click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) + click.echo( + click.style( + ( + "Since not all patterns have been fully tested, " + "please note that this command may delete unintended file records." + ), + fg="yellow", + ) + ) + click.echo( + click.style("This cannot be undone. Please make sure to back up your database before proceeding.", fg="yellow") + ) + click.echo( + click.style( + ( + "It is also recommended to run this during the maintenance window, " + "as this may cause high load on your instance." + ), + fg="yellow", + ) + ) + if not force: + click.confirm("Do you want to proceed?", abort=True) + + # start the cleanup process + click.echo(click.style("Starting orphaned file records cleanup.", fg="white")) + + # clean up the orphaned records in the message_files table where message_id doesn't exist in messages table + try: + click.echo( + click.style("- Listing message_files records where message_id doesn't exist in messages table", fg="white") + ) + query = ( + "SELECT mf.id, mf.message_id " + "FROM message_files mf LEFT JOIN messages m ON mf.message_id = m.id " + "WHERE m.id IS NULL" + ) + orphaned_message_files = [] + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])}) + + if orphaned_message_files: + click.echo(click.style(f"Found {len(orphaned_message_files)} orphaned message_files records:", fg="white")) + for record in orphaned_message_files: + click.echo(click.style(f" - id: {record['id']}, message_id: {record['message_id']}", fg="black")) + + if not force: + click.confirm( + ( + f"Do you want to proceed " + f"to delete all {len(orphaned_message_files)} orphaned message_files records?" + ), + abort=True, + ) + + click.echo(click.style("- Deleting orphaned message_files records", fg="white")) + query = "DELETE FROM message_files WHERE id IN :ids" + with db.engine.begin() as conn: + conn.execute(sa.text(query), {"ids": tuple(record["id"] for record in orphaned_message_files)}) + click.echo( + click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green") + ) + else: + click.echo(click.style("No orphaned message_files records found. There is nothing to delete.", fg="green")) + except Exception as e: + click.echo(click.style(f"Error deleting orphaned message_files records: {str(e)}", fg="red")) + + # clean up the orphaned records in the rest of the *_files tables + try: + # fetch file id and keys from each table + all_files_in_tables = [] + for files_table in files_tables: + click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white")) + query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]}) + click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) + + # fetch referred table and columns + guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" + all_ids_in_tables = [] + for ids_table in ids_tables: + query = "" + match ids_table["type"]: + case "uuid": + click.echo( + click.style( + f"- Listing file ids in column {ids_table['column']} in table {ids_table['table']}", + fg="white", + ) + ) + c = ids_table["column"] + query = f"SELECT {c} FROM {ids_table['table']} WHERE {c} IS NOT NULL" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])}) + case "text": + t = ids_table["table"] + click.echo( + click.style( + f"- Listing file-id-like strings in column {ids_table['column']} in table {t}", + fg="white", + ) + ) + query = ( + f"SELECT regexp_matches({ids_table['column']}, '{guid_regexp}', 'g') AS extracted_id " + f"FROM {ids_table['table']}" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + for j in i[0]: + all_ids_in_tables.append({"table": ids_table["table"], "id": j}) + case "json": + click.echo( + click.style( + ( + f"- Listing file-id-like JSON string in column {ids_table['column']} " + f"in table {ids_table['table']}" + ), + fg="white", + ) + ) + query = ( + f"SELECT regexp_matches({ids_table['column']}::text, '{guid_regexp}', 'g') AS extracted_id " + f"FROM {ids_table['table']}" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + for j in i[0]: + all_ids_in_tables.append({"table": ids_table["table"], "id": j}) + case _: + pass + click.echo(click.style(f"Found {len(all_ids_in_tables)} file ids in tables.", fg="white")) + + except Exception as e: + click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) + return + + # find orphaned files + all_files = [file["id"] for file in all_files_in_tables] + all_ids = [file["id"] for file in all_ids_in_tables] + orphaned_files = list(set(all_files) - set(all_ids)) + if not orphaned_files: + click.echo(click.style("No orphaned file records found. There is nothing to delete.", fg="green")) + return + click.echo(click.style(f"Found {len(orphaned_files)} orphaned file records.", fg="white")) + for file in orphaned_files: + click.echo(click.style(f"- orphaned file id: {file}", fg="black")) + if not force: + click.confirm(f"Do you want to proceed to delete all {len(orphaned_files)} orphaned file records?", abort=True) + + # delete orphaned records for each file + try: + for files_table in files_tables: + click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white")) + query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids" + with db.engine.begin() as conn: + conn.execute(sa.text(query), {"ids": tuple(orphaned_files)}) + except Exception as e: + click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red")) + return + click.echo(click.style(f"Removed {len(orphaned_files)} orphaned file records.", fg="green")) + + +@click.option("-f", "--force", is_flag=True, help="Skip user confirmation and force the command to execute.") +@click.command("remove-orphaned-files-on-storage", help="Remove orphaned files on the storage.") +def remove_orphaned_files_on_storage(force: bool): + """ + Remove orphaned files on the storage. + """ + + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "key_column": "key"}, + {"table": "tool_files", "key_column": "file_key"}, + ] + storage_paths = ["image_files", "tools", "upload_files"] + + # notify user and ask for confirmation + click.echo(click.style("This command will find and remove orphaned files on the storage,", fg="yellow")) + click.echo( + click.style("by comparing the files on the storage with the records in the following tables:", fg="yellow") + ) + for files_table in files_tables: + click.echo(click.style(f"- {files_table['table']}", fg="yellow")) + click.echo(click.style("The following paths on the storage will be scanned to find orphaned files:", fg="yellow")) + for storage_path in storage_paths: + click.echo(click.style(f"- {storage_path}", fg="yellow")) + click.echo("") + + click.echo(click.style("!!! USE WITH CAUTION !!!", fg="red")) + click.echo( + click.style( + "Currently, this command will work only for opendal based storage (STORAGE_TYPE=opendal).", fg="yellow" + ) + ) + click.echo( + click.style( + "Since not all patterns have been fully tested, please note that this command may delete unintended files.", + fg="yellow", + ) + ) + click.echo( + click.style("This cannot be undone. Please make sure to back up your storage before proceeding.", fg="yellow") + ) + click.echo( + click.style( + ( + "It is also recommended to run this during the maintenance window, " + "as this may cause high load on your instance." + ), + fg="yellow", + ) + ) + if not force: + click.confirm("Do you want to proceed?", abort=True) + + # start the cleanup process + click.echo(click.style("Starting orphaned files cleanup.", fg="white")) + + # fetch file id and keys from each table + all_files_in_tables = [] + try: + for files_table in files_tables: + click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white")) + query = f"SELECT {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for i in rs: + all_files_in_tables.append(str(i[0])) + click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white")) + except Exception as e: + click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red")) + return + + all_files_on_storage = [] + for storage_path in storage_paths: + try: + click.echo(click.style(f"- Scanning files on storage path {storage_path}", fg="white")) + files = storage.scan(path=storage_path, files=True, directories=False) + all_files_on_storage.extend(files) + except FileNotFoundError: + click.echo(click.style(f" -> Skipping path {storage_path} as it does not exist.", fg="yellow")) + continue + except Exception as e: + click.echo(click.style(f" -> Error scanning files on storage path {storage_path}: {str(e)}", fg="red")) + continue + click.echo(click.style(f"Found {len(all_files_on_storage)} files on storage.", fg="white")) + + # find orphaned files + orphaned_files = list(set(all_files_on_storage) - set(all_files_in_tables)) + if not orphaned_files: + click.echo(click.style("No orphaned files found. There is nothing to remove.", fg="green")) + return + click.echo(click.style(f"Found {len(orphaned_files)} orphaned files.", fg="white")) + for file in orphaned_files: + click.echo(click.style(f"- orphaned file: {file}", fg="black")) + if not force: + click.confirm(f"Do you want to proceed to remove all {len(orphaned_files)} orphaned files?", abort=True) + + # delete orphaned files + removed_files = 0 + error_files = 0 + for file in orphaned_files: + try: + storage.delete(file) + removed_files += 1 + click.echo(click.style(f"- Removing orphaned file: {file}", fg="white")) + except Exception as e: + error_files += 1 + click.echo(click.style(f"- Error deleting orphaned file {file}: {str(e)}", fg="red")) + continue + if error_files == 0: + click.echo(click.style(f"Removed {removed_files} orphaned files without errors.", fg="green")) + else: + click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow")) + + +@click.command("file-usage", help="Query file usages and show where files are referenced.") +@click.option("--file-id", type=str, default=None, help="Filter by file UUID.") +@click.option("--key", type=str, default=None, help="Filter by storage key.") +@click.option("--src", type=str, default=None, help="Filter by table.column pattern (e.g., 'documents.%' or '%.icon').") +@click.option("--limit", type=int, default=100, help="Limit number of results (default: 100).") +@click.option("--offset", type=int, default=0, help="Offset for pagination (default: 0).") +@click.option("--json", "output_json", is_flag=True, help="Output results in JSON format.") +def file_usage( + file_id: str | None, + key: str | None, + src: str | None, + limit: int, + offset: int, + output_json: bool, +): + """ + Query file usages and show where files are referenced in the database. + + This command reuses the same reference checking logic as clear-orphaned-file-records + and displays detailed information about where each file is referenced. + """ + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "id_column": "id", "key_column": "key"}, + {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, + ] + ids_tables = [ + {"type": "uuid", "table": "message_files", "column": "upload_file_id", "pk_column": "id"}, + {"type": "text", "table": "documents", "column": "data_source_info", "pk_column": "id"}, + {"type": "text", "table": "document_segments", "column": "content", "pk_column": "id"}, + {"type": "text", "table": "messages", "column": "answer", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "inputs", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "process_data", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "outputs", "pk_column": "id"}, + {"type": "text", "table": "conversations", "column": "introduction", "pk_column": "id"}, + {"type": "text", "table": "conversations", "column": "system_instruction", "pk_column": "id"}, + {"type": "text", "table": "accounts", "column": "avatar", "pk_column": "id"}, + {"type": "text", "table": "apps", "column": "icon", "pk_column": "id"}, + {"type": "text", "table": "sites", "column": "icon", "pk_column": "id"}, + {"type": "json", "table": "messages", "column": "inputs", "pk_column": "id"}, + {"type": "json", "table": "messages", "column": "message", "pk_column": "id"}, + ] + + # Stream file usages with pagination to avoid holding all results in memory + paginated_usages = [] + total_count = 0 + + # First, build a mapping of file_id -> storage_key from the base tables + file_key_map = {} + for files_table in files_tables: + query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + file_key_map[str(row[0])] = f"{files_table['table']}:{row[1]}" + + # If filtering by key or file_id, verify it exists + if file_id and file_id not in file_key_map: + if output_json: + click.echo(json.dumps({"error": f"File ID {file_id} not found in base tables"})) + else: + click.echo(click.style(f"File ID {file_id} not found in base tables.", fg="red")) + return + + if key: + valid_prefixes = {f"upload_files:{key}", f"tool_files:{key}"} + matching_file_ids = [fid for fid, fkey in file_key_map.items() if fkey in valid_prefixes] + if not matching_file_ids: + if output_json: + click.echo(json.dumps({"error": f"Key {key} not found in base tables"})) + else: + click.echo(click.style(f"Key {key} not found in base tables.", fg="red")) + return + + guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" + + # For each reference table/column, find matching file IDs and record the references + for ids_table in ids_tables: + src_filter = f"{ids_table['table']}.{ids_table['column']}" + + # Skip if src filter doesn't match (use fnmatch for wildcard patterns) + if src: + if "%" in src or "_" in src: + import fnmatch + + # Convert SQL LIKE wildcards to fnmatch wildcards (% -> *, _ -> ?) + pattern = src.replace("%", "*").replace("_", "?") + if not fnmatch.fnmatch(src_filter, pattern): + continue + else: + if src_filter != src: + continue + + match ids_table["type"]: + case "uuid": + # Direct UUID match + query = ( + f"SELECT {ids_table['pk_column']}, {ids_table['column']} " + f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + record_id = str(row[0]) + ref_file_id = str(row[1]) + if ref_file_id not in file_key_map: + continue + storage_key = file_key_map[ref_file_id] + + # Apply filters + if file_id and ref_file_id != file_id: + continue + if key and not storage_key.endswith(key): + continue + + # Only collect items within the requested page range + if offset <= total_count < offset + limit: + paginated_usages.append( + { + "src": f"{ids_table['table']}.{ids_table['column']}", + "record_id": record_id, + "file_id": ref_file_id, + "key": storage_key, + } + ) + total_count += 1 + + case "text" | "json": + # Extract UUIDs from text/json content + column_cast = f"{ids_table['column']}::text" if ids_table["type"] == "json" else ids_table["column"] + query = ( + f"SELECT {ids_table['pk_column']}, {column_cast} " + f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + record_id = str(row[0]) + content = str(row[1]) + + # Find all UUIDs in the content + import re + + uuid_pattern = re.compile(guid_regexp, re.IGNORECASE) + matches = uuid_pattern.findall(content) + + for ref_file_id in matches: + if ref_file_id not in file_key_map: + continue + storage_key = file_key_map[ref_file_id] + + # Apply filters + if file_id and ref_file_id != file_id: + continue + if key and not storage_key.endswith(key): + continue + + # Only collect items within the requested page range + if offset <= total_count < offset + limit: + paginated_usages.append( + { + "src": f"{ids_table['table']}.{ids_table['column']}", + "record_id": record_id, + "file_id": ref_file_id, + "key": storage_key, + } + ) + total_count += 1 + case _: + pass + + # Output results + if output_json: + result = { + "total": total_count, + "offset": offset, + "limit": limit, + "usages": paginated_usages, + } + click.echo(json.dumps(result, indent=2)) + else: + click.echo( + click.style(f"Found {total_count} file usages (showing {len(paginated_usages)} results)", fg="white") + ) + click.echo("") + + if not paginated_usages: + click.echo(click.style("No file usages found matching the specified criteria.", fg="yellow")) + return + + # Print table header + click.echo( + click.style( + f"{'Src (Table.Column)':<50} {'Record ID':<40} {'File ID':<40} {'Storage Key':<60}", + fg="cyan", + ) + ) + click.echo(click.style("-" * 190, fg="white")) + + # Print each usage + for usage in paginated_usages: + click.echo(f"{usage['src']:<50} {usage['record_id']:<40} {usage['file_id']:<40} {usage['key']:<60}") + + # Show pagination info + if offset + limit < total_count: + click.echo("") + click.echo( + click.style( + f"Showing {offset + 1}-{offset + len(paginated_usages)} of {total_count} results", fg="white" + ) + ) + click.echo(click.style(f"Use --offset {offset + limit} to see next page", fg="white")) + + +@click.command( + "migrate-oss", + help="Migrate files from Local or OpenDAL source to a cloud OSS storage (destination must NOT be local/opendal).", +) +@click.option( + "--path", + "paths", + multiple=True, + help="Storage path prefixes to migrate (repeatable). Defaults: privkeys, upload_files, image_files," + " tools, website_files, keyword_files, ops_trace", +) +@click.option( + "--source", + type=click.Choice(["local", "opendal"], case_sensitive=False), + default="opendal", + show_default=True, + help="Source storage type to read from", +) +@click.option("--overwrite", is_flag=True, default=False, help="Overwrite destination if file already exists") +@click.option("--dry-run", is_flag=True, default=False, help="Show what would be migrated without uploading") +@click.option("-f", "--force", is_flag=True, help="Skip confirmation and run without prompts") +@click.option( + "--update-db/--no-update-db", + default=True, + help="Update upload_files.storage_type from source type to current storage after migration", +) +def migrate_oss( + paths: tuple[str, ...], + source: str, + overwrite: bool, + dry_run: bool, + force: bool, + update_db: bool, +): + """ + Copy all files under selected prefixes from a source storage + (Local filesystem or OpenDAL-backed) into the currently configured + destination storage backend, then optionally update DB records. + + Expected usage: set STORAGE_TYPE (and its credentials) to your target backend. + """ + # Ensure target storage is not local/opendal + if dify_config.STORAGE_TYPE in (StorageType.LOCAL, StorageType.OPENDAL): + click.echo( + click.style( + "Target STORAGE_TYPE must be a cloud OSS (not 'local' or 'opendal').\n" + "Please set STORAGE_TYPE to one of: s3, aliyun-oss, azure-blob, google-storage, tencent-cos, \n" + "volcengine-tos, supabase, oci-storage, huawei-obs, baidu-obs, clickzetta-volume.", + fg="red", + ) + ) + return + + # Default paths if none specified + default_paths = ("privkeys", "upload_files", "image_files", "tools", "website_files", "keyword_files", "ops_trace") + path_list = list(paths) if paths else list(default_paths) + is_source_local = source.lower() == "local" + + click.echo(click.style("Preparing migration to target storage.", fg="yellow")) + click.echo(click.style(f"Target storage type: {dify_config.STORAGE_TYPE}", fg="white")) + if is_source_local: + src_root = dify_config.STORAGE_LOCAL_PATH + click.echo(click.style(f"Source: local fs, root: {src_root}", fg="white")) + else: + click.echo(click.style(f"Source: opendal scheme={dify_config.OPENDAL_SCHEME}", fg="white")) + click.echo(click.style(f"Paths to migrate: {', '.join(path_list)}", fg="white")) + click.echo("") + + if not force: + click.confirm("Proceed with migration?", abort=True) + + # Instantiate source storage + try: + if is_source_local: + src_root = dify_config.STORAGE_LOCAL_PATH + source_storage = OpenDALStorage(scheme="fs", root=src_root) + else: + source_storage = OpenDALStorage(scheme=dify_config.OPENDAL_SCHEME) + except Exception as e: + click.echo(click.style(f"Failed to initialize source storage: {str(e)}", fg="red")) + return + + total_files = 0 + copied_files = 0 + skipped_files = 0 + errored_files = 0 + copied_upload_file_keys: list[str] = [] + + for prefix in path_list: + click.echo(click.style(f"Scanning source path: {prefix}", fg="white")) + try: + keys = source_storage.scan(path=prefix, files=True, directories=False) + except FileNotFoundError: + click.echo(click.style(f" -> Skipping missing path: {prefix}", fg="yellow")) + continue + except NotImplementedError: + click.echo(click.style(" -> Source storage does not support scanning.", fg="red")) + return + except Exception as e: + click.echo(click.style(f" -> Error scanning '{prefix}': {str(e)}", fg="red")) + continue + + click.echo(click.style(f"Found {len(keys)} files under {prefix}", fg="white")) + + for key in keys: + total_files += 1 + + # check destination existence + if not overwrite: + try: + if storage.exists(key): + skipped_files += 1 + continue + except Exception as e: + # existence check failures should not block migration attempt + # but should be surfaced to user as a warning for visibility + click.echo( + click.style( + f" -> Warning: failed target existence check for {key}: {str(e)}", + fg="yellow", + ) + ) + + if dry_run: + copied_files += 1 + continue + + # read from source and write to destination + try: + data = source_storage.load_once(key) + except FileNotFoundError: + errored_files += 1 + click.echo(click.style(f" -> Missing on source: {key}", fg="yellow")) + continue + except Exception as e: + errored_files += 1 + click.echo(click.style(f" -> Error reading {key}: {str(e)}", fg="red")) + continue + + try: + storage.save(key, data) + copied_files += 1 + if prefix == "upload_files": + copied_upload_file_keys.append(key) + except Exception as e: + errored_files += 1 + click.echo(click.style(f" -> Error writing {key} to target: {str(e)}", fg="red")) + continue + + click.echo("") + click.echo(click.style("Migration summary:", fg="yellow")) + click.echo(click.style(f" Total: {total_files}", fg="white")) + click.echo(click.style(f" Copied: {copied_files}", fg="green")) + click.echo(click.style(f" Skipped: {skipped_files}", fg="white")) + if errored_files: + click.echo(click.style(f" Errors: {errored_files}", fg="red")) + + if dry_run: + click.echo(click.style("Dry-run complete. No changes were made.", fg="green")) + return + + if errored_files: + click.echo( + click.style( + "Some files failed to migrate. Review errors above before updating DB records.", + fg="yellow", + ) + ) + if update_db and not force: + if not click.confirm("Proceed to update DB storage_type despite errors?", default=False): + update_db = False + + # Optionally update DB records for upload_files.storage_type (only for successfully copied upload_files) + if update_db: + if not copied_upload_file_keys: + click.echo(click.style("No upload_files copied. Skipping DB storage_type update.", fg="yellow")) + else: + try: + source_storage_type = StorageType.LOCAL if is_source_local else StorageType.OPENDAL + updated = ( + db.session.query(UploadFile) + .where( + UploadFile.storage_type == source_storage_type, + UploadFile.key.in_(copied_upload_file_keys), + ) + .update({UploadFile.storage_type: dify_config.STORAGE_TYPE}, synchronize_session=False) + ) + db.session.commit() + click.echo(click.style(f"Updated storage_type for {updated} upload_files records.", fg="green")) + except Exception as e: + db.session.rollback() + click.echo(click.style(f"Failed to update DB storage_type: {str(e)}", fg="red")) diff --git a/api/commands/system.py b/api/commands/system.py new file mode 100644 index 0000000000..604f0e34d0 --- /dev/null +++ b/api/commands/system.py @@ -0,0 +1,204 @@ +import logging + +import click +import sqlalchemy as sa +from sqlalchemy.orm import sessionmaker + +from configs import dify_config +from events.app_event import app_was_created +from extensions.ext_database import db +from extensions.ext_redis import redis_client +from libs.db_migration_lock import DbMigrationAutoRenewLock +from libs.rsa import generate_key_pair +from models import Tenant +from models.model import App, AppMode, Conversation +from models.provider import Provider, ProviderModel + +logger = logging.getLogger(__name__) + +DB_UPGRADE_LOCK_TTL_SECONDS = 60 + + +@click.command( + "reset-encrypt-key-pair", + help="Reset the asymmetric key pair of workspace for encrypt LLM credentials. " + "After the reset, all LLM credentials will become invalid, " + "requiring re-entry." + "Only support SELF_HOSTED mode.", +) +@click.confirmation_option( + prompt=click.style( + "Are you sure you want to reset encrypt key pair? This operation cannot be rolled back!", fg="red" + ) +) +def reset_encrypt_key_pair(): + """ + Reset the encrypted key pair of workspace for encrypt LLM credentials. + After the reset, all LLM credentials will become invalid, requiring re-entry. + Only support SELF_HOSTED mode. + """ + if dify_config.EDITION != "SELF_HOSTED": + click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red")) + return + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + tenants = session.query(Tenant).all() + for tenant in tenants: + if not tenant: + click.echo(click.style("No workspaces found. Run /install first.", fg="red")) + return + + tenant.encrypt_public_key = generate_key_pair(tenant.id) + + session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete() + session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete() + + click.echo( + click.style( + f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", + fg="green", + ) + ) + + +@click.command("convert-to-agent-apps", help="Convert Agent Assistant to Agent App.") +def convert_to_agent_apps(): + """ + Convert Agent Assistant to Agent App. + """ + click.echo(click.style("Starting convert to agent apps.", fg="green")) + + proceeded_app_ids = [] + + while True: + # fetch first 1000 apps + sql_query = """SELECT a.id AS id FROM apps a + INNER JOIN app_model_configs am ON a.app_model_config_id=am.id + WHERE a.mode = 'chat' + AND am.agent_mode is not null + AND ( + am.agent_mode like '%"strategy": "function_call"%' + OR am.agent_mode like '%"strategy": "react"%' + ) + AND ( + am.agent_mode like '{"enabled": true%' + OR am.agent_mode like '{"max_iteration": %' + ) ORDER BY a.created_at DESC LIMIT 1000 + """ + + with db.engine.begin() as conn: + rs = conn.execute(sa.text(sql_query)) + + apps = [] + for i in rs: + app_id = str(i.id) + if app_id not in proceeded_app_ids: + proceeded_app_ids.append(app_id) + app = db.session.query(App).where(App.id == app_id).first() + if app is not None: + apps.append(app) + + if len(apps) == 0: + break + + for app in apps: + click.echo(f"Converting app: {app.id}") + + try: + app.mode = AppMode.AGENT_CHAT + db.session.commit() + + # update conversation mode to agent + db.session.query(Conversation).where(Conversation.app_id == app.id).update( + {Conversation.mode: AppMode.AGENT_CHAT} + ) + + db.session.commit() + click.echo(click.style(f"Converted app: {app.id}", fg="green")) + except Exception as e: + click.echo(click.style(f"Convert app error: {e.__class__.__name__} {str(e)}", fg="red")) + + click.echo(click.style(f"Conversion complete. Converted {len(proceeded_app_ids)} agent apps.", fg="green")) + + +@click.command("upgrade-db", help="Upgrade the database") +def upgrade_db(): + click.echo("Preparing database migration...") + lock = DbMigrationAutoRenewLock( + redis_client=redis_client, + name="db_upgrade_lock", + ttl_seconds=DB_UPGRADE_LOCK_TTL_SECONDS, + logger=logger, + log_context="db_migration", + ) + if lock.acquire(blocking=False): + migration_succeeded = False + try: + click.echo(click.style("Starting database migration.", fg="green")) + + # run db migration + import flask_migrate + + flask_migrate.upgrade() + + migration_succeeded = True + click.echo(click.style("Database migration successful!", fg="green")) + + except Exception as e: + logger.exception("Failed to execute database migration") + click.echo(click.style(f"Database migration failed: {e}", fg="red")) + raise SystemExit(1) + finally: + status = "successful" if migration_succeeded else "failed" + lock.release_safely(status=status) + else: + click.echo("Database migration skipped") + + +@click.command("fix-app-site-missing", help="Fix app related site missing issue.") +def fix_app_site_missing(): + """ + Fix app related site missing issue. + """ + click.echo(click.style("Starting fix for missing app-related sites.", fg="green")) + + failed_app_ids = [] + while True: + sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id +where sites.id is null limit 1000""" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(sql)) + + processed_count = 0 + for i in rs: + processed_count += 1 + app_id = str(i.id) + + if app_id in failed_app_ids: + continue + + try: + app = db.session.query(App).where(App.id == app_id).first() + if not app: + logger.info("App %s not found", app_id) + continue + + tenant = app.tenant + if tenant: + accounts = tenant.get_accounts() + if not accounts: + logger.info("Fix failed for app %s", app.id) + continue + + account = accounts[0] + logger.info("Fixing missing site for app %s", app.id) + app_was_created.send(app, account=account) + except Exception: + failed_app_ids.append(app_id) + click.echo(click.style(f"Failed to fix missing site for app {app_id}", fg="red")) + logger.exception("Failed to fix app related site missing issue, app_id: %s", app_id) + continue + + if not processed_count: + break + + click.echo(click.style("Fix for missing app-related sites completed successfully!", fg="green")) diff --git a/api/commands/vector.py b/api/commands/vector.py new file mode 100644 index 0000000000..4df194026b --- /dev/null +++ b/api/commands/vector.py @@ -0,0 +1,466 @@ +import json + +import click +from flask import current_app +from sqlalchemy import select +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import sessionmaker + +from configs import dify_config +from core.rag.datasource.vdb.vector_factory import Vector +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.index_processor.constant.built_in_field import BuiltInField +from core.rag.models.document import ChildDocument, Document +from extensions.ext_database import db +from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment +from models.dataset import Document as DatasetDocument +from models.model import App, AppAnnotationSetting, MessageAnnotation + + +@click.command("vdb-migrate", help="Migrate vector db.") +@click.option("--scope", default="all", prompt=False, help="The scope of vector database to migrate, Default is All.") +def vdb_migrate(scope: str): + if scope in {"knowledge", "all"}: + migrate_knowledge_vector_database() + if scope in {"annotation", "all"}: + migrate_annotation_vector_database() + + +def migrate_annotation_vector_database(): + """ + Migrate annotation datas to target vector database . + """ + click.echo(click.style("Starting annotation data migration.", fg="green")) + create_count = 0 + skipped_count = 0 + total_count = 0 + page = 1 + while True: + try: + # get apps info + per_page = 50 + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + apps = ( + session.query(App) + .where(App.status == "normal") + .order_by(App.created_at.desc()) + .limit(per_page) + .offset((page - 1) * per_page) + .all() + ) + if not apps: + break + except SQLAlchemyError: + raise + + page += 1 + for app in apps: + total_count = total_count + 1 + click.echo( + f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped." + ) + try: + click.echo(f"Creating app annotation index: {app.id}") + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + app_annotation_setting = ( + session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() + ) + + if not app_annotation_setting: + skipped_count = skipped_count + 1 + click.echo(f"App annotation setting disabled: {app.id}") + continue + # get dataset_collection_binding info + dataset_collection_binding = ( + session.query(DatasetCollectionBinding) + .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id) + .first() + ) + if not dataset_collection_binding: + click.echo(f"App annotation collection binding not found: {app.id}") + continue + annotations = session.scalars( + select(MessageAnnotation).where(MessageAnnotation.app_id == app.id) + ).all() + dataset = Dataset( + id=app.id, + tenant_id=app.tenant_id, + indexing_technique="high_quality", + embedding_model_provider=dataset_collection_binding.provider_name, + embedding_model=dataset_collection_binding.model_name, + collection_binding_id=dataset_collection_binding.id, + ) + documents = [] + if annotations: + for annotation in annotations: + document = Document( + page_content=annotation.question_text, + metadata={"annotation_id": annotation.id, "app_id": app.id, "doc_id": annotation.id}, + ) + documents.append(document) + + vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"]) + click.echo(f"Migrating annotations for app: {app.id}.") + + try: + vector.delete() + click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green")) + except Exception as e: + click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red")) + raise e + if documents: + try: + click.echo( + click.style( + f"Creating vector index with {len(documents)} annotations for app {app.id}.", + fg="green", + ) + ) + vector.create(documents) + click.echo(click.style(f"Created vector index for app {app.id}.", fg="green")) + except Exception as e: + click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red")) + raise e + click.echo(f"Successfully migrated app annotation {app.id}.") + create_count += 1 + except Exception as e: + click.echo( + click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red") + ) + continue + + click.echo( + click.style( + f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.", + fg="green", + ) + ) + + +def migrate_knowledge_vector_database(): + """ + Migrate vector database datas to target vector database . + """ + click.echo(click.style("Starting vector database migration.", fg="green")) + create_count = 0 + skipped_count = 0 + total_count = 0 + vector_type = dify_config.VECTOR_STORE + upper_collection_vector_types = { + VectorType.MILVUS, + VectorType.PGVECTOR, + VectorType.VASTBASE, + VectorType.RELYT, + VectorType.WEAVIATE, + VectorType.ORACLE, + VectorType.ELASTICSEARCH, + VectorType.OPENGAUSS, + VectorType.TABLESTORE, + VectorType.MATRIXONE, + } + lower_collection_vector_types = { + VectorType.ANALYTICDB, + VectorType.CHROMA, + VectorType.MYSCALE, + VectorType.PGVECTO_RS, + VectorType.TIDB_VECTOR, + VectorType.OPENSEARCH, + VectorType.TENCENT, + VectorType.BAIDU, + VectorType.VIKINGDB, + VectorType.UPSTASH, + VectorType.COUCHBASE, + VectorType.OCEANBASE, + } + page = 1 + while True: + try: + stmt = ( + select(Dataset).where(Dataset.indexing_technique == "high_quality").order_by(Dataset.created_at.desc()) + ) + + datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) + if not datasets.items: + break + except SQLAlchemyError: + raise + + page += 1 + for dataset in datasets: + total_count = total_count + 1 + click.echo( + f"Processing the {total_count} dataset {dataset.id}. {create_count} created, {skipped_count} skipped." + ) + try: + click.echo(f"Creating dataset vector database index: {dataset.id}") + if dataset.index_struct_dict: + if dataset.index_struct_dict["type"] == vector_type: + skipped_count = skipped_count + 1 + continue + collection_name = "" + dataset_id = dataset.id + if vector_type in upper_collection_vector_types: + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + elif vector_type == VectorType.QDRANT: + if dataset.collection_binding_id: + dataset_collection_binding = ( + db.session.query(DatasetCollectionBinding) + .where(DatasetCollectionBinding.id == dataset.collection_binding_id) + .one_or_none() + ) + if dataset_collection_binding: + collection_name = dataset_collection_binding.collection_name + else: + raise ValueError("Dataset Collection Binding not found") + else: + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + + elif vector_type in lower_collection_vector_types: + collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower() + else: + raise ValueError(f"Vector store {vector_type} is not supported.") + + index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}} + dataset.index_struct = json.dumps(index_struct_dict) + vector = Vector(dataset) + click.echo(f"Migrating dataset {dataset.id}.") + + try: + vector.delete() + click.echo( + click.style(f"Deleted vector index {collection_name} for dataset {dataset.id}.", fg="green") + ) + except Exception as e: + click.echo( + click.style( + f"Failed to delete vector index {collection_name} for dataset {dataset.id}.", fg="red" + ) + ) + raise e + + dataset_documents = db.session.scalars( + select(DatasetDocument).where( + DatasetDocument.dataset_id == dataset.id, + DatasetDocument.indexing_status == "completed", + DatasetDocument.enabled == True, + DatasetDocument.archived == False, + ) + ).all() + + documents = [] + segments_count = 0 + for dataset_document in dataset_documents: + segments = db.session.scalars( + select(DocumentSegment).where( + DocumentSegment.document_id == dataset_document.id, + DocumentSegment.status == "completed", + DocumentSegment.enabled == True, + ) + ).all() + + for segment in segments: + document = Document( + page_content=segment.content, + metadata={ + "doc_id": segment.index_node_id, + "doc_hash": segment.index_node_hash, + "document_id": segment.document_id, + "dataset_id": segment.dataset_id, + }, + ) + if dataset_document.doc_form == "hierarchical_model": + child_chunks = segment.get_child_chunks() + if child_chunks: + child_documents = [] + for child_chunk in child_chunks: + child_document = ChildDocument( + page_content=child_chunk.content, + metadata={ + "doc_id": child_chunk.index_node_id, + "doc_hash": child_chunk.index_node_hash, + "document_id": segment.document_id, + "dataset_id": segment.dataset_id, + }, + ) + child_documents.append(child_document) + document.children = child_documents + + documents.append(document) + segments_count = segments_count + 1 + + if documents: + try: + click.echo( + click.style( + f"Creating vector index with {len(documents)} documents of {segments_count}" + f" segments for dataset {dataset.id}.", + fg="green", + ) + ) + all_child_documents = [] + for doc in documents: + if doc.children: + all_child_documents.extend(doc.children) + vector.create(documents) + if all_child_documents: + vector.create(all_child_documents) + click.echo(click.style(f"Created vector index for dataset {dataset.id}.", fg="green")) + except Exception as e: + click.echo(click.style(f"Failed to created vector index for dataset {dataset.id}.", fg="red")) + raise e + db.session.add(dataset) + db.session.commit() + click.echo(f"Successfully migrated dataset {dataset.id}.") + create_count += 1 + except Exception as e: + db.session.rollback() + click.echo(click.style(f"Error creating dataset index: {e.__class__.__name__} {str(e)}", fg="red")) + continue + + click.echo( + click.style( + f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets.", fg="green" + ) + ) + + +@click.command("add-qdrant-index", help="Add Qdrant index.") +@click.option("--field", default="metadata.doc_id", prompt=False, help="Index field , default is metadata.doc_id.") +def add_qdrant_index(field: str): + click.echo(click.style("Starting Qdrant index creation.", fg="green")) + + create_count = 0 + + try: + bindings = db.session.query(DatasetCollectionBinding).all() + if not bindings: + click.echo(click.style("No dataset collection bindings found.", fg="red")) + return + import qdrant_client + from qdrant_client.http.exceptions import UnexpectedResponse + from qdrant_client.http.models import PayloadSchemaType + + from core.rag.datasource.vdb.qdrant.qdrant_vector import PathQdrantParams, QdrantConfig + + for binding in bindings: + if dify_config.QDRANT_URL is None: + raise ValueError("Qdrant URL is required.") + qdrant_config = QdrantConfig( + endpoint=dify_config.QDRANT_URL, + api_key=dify_config.QDRANT_API_KEY, + root_path=current_app.root_path, + timeout=dify_config.QDRANT_CLIENT_TIMEOUT, + grpc_port=dify_config.QDRANT_GRPC_PORT, + prefer_grpc=dify_config.QDRANT_GRPC_ENABLED, + ) + try: + params = qdrant_config.to_qdrant_params() + # Check the type before using + if isinstance(params, PathQdrantParams): + # PathQdrantParams case + client = qdrant_client.QdrantClient(path=params.path) + else: + # UrlQdrantParams case - params is UrlQdrantParams + client = qdrant_client.QdrantClient( + url=params.url, + api_key=params.api_key, + timeout=int(params.timeout), + verify=params.verify, + grpc_port=params.grpc_port, + prefer_grpc=params.prefer_grpc, + ) + # create payload index + client.create_payload_index(binding.collection_name, field, field_schema=PayloadSchemaType.KEYWORD) + create_count += 1 + except UnexpectedResponse as e: + # Collection does not exist, so return + if e.status_code == 404: + click.echo(click.style(f"Collection not found: {binding.collection_name}.", fg="red")) + continue + # Some other error occurred, so re-raise the exception + else: + click.echo( + click.style( + f"Failed to create Qdrant index for collection: {binding.collection_name}.", fg="red" + ) + ) + + except Exception: + click.echo(click.style("Failed to create Qdrant client.", fg="red")) + + click.echo(click.style(f"Index creation complete. Created {create_count} collection indexes.", fg="green")) + + +@click.command("old-metadata-migration", help="Old metadata migration.") +def old_metadata_migration(): + """ + Old metadata migration. + """ + click.echo(click.style("Starting old metadata migration.", fg="green")) + + page = 1 + while True: + try: + stmt = ( + select(DatasetDocument) + .where(DatasetDocument.doc_metadata.is_not(None)) + .order_by(DatasetDocument.created_at.desc()) + ) + documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False) + except SQLAlchemyError: + raise + if not documents: + break + for document in documents: + if document.doc_metadata: + doc_metadata = document.doc_metadata + for key in doc_metadata: + for field in BuiltInField: + if field.value == key: + break + else: + dataset_metadata = ( + db.session.query(DatasetMetadata) + .where(DatasetMetadata.dataset_id == document.dataset_id, DatasetMetadata.name == key) + .first() + ) + if not dataset_metadata: + dataset_metadata = DatasetMetadata( + tenant_id=document.tenant_id, + dataset_id=document.dataset_id, + name=key, + type="string", + created_by=document.created_by, + ) + db.session.add(dataset_metadata) + db.session.flush() + dataset_metadata_binding = DatasetMetadataBinding( + tenant_id=document.tenant_id, + dataset_id=document.dataset_id, + metadata_id=dataset_metadata.id, + document_id=document.id, + created_by=document.created_by, + ) + db.session.add(dataset_metadata_binding) + else: + dataset_metadata_binding = ( + db.session.query(DatasetMetadataBinding) # type: ignore + .where( + DatasetMetadataBinding.dataset_id == document.dataset_id, + DatasetMetadataBinding.document_id == document.id, + DatasetMetadataBinding.metadata_id == dataset_metadata.id, + ) + .first() + ) + if not dataset_metadata_binding: + dataset_metadata_binding = DatasetMetadataBinding( + tenant_id=document.tenant_id, + dataset_id=document.dataset_id, + metadata_id=dataset_metadata.id, + document_id=document.id, + created_by=document.created_by, + ) + db.session.add(dataset_metadata_binding) + db.session.commit() + page += 1 + click.echo(click.style("Old metadata migration completed.", fg="green")) diff --git a/api/configs/enterprise/__init__.py b/api/configs/enterprise/__init__.py index eda6345e14..f8447c6979 100644 --- a/api/configs/enterprise/__init__.py +++ b/api/configs/enterprise/__init__.py @@ -18,3 +18,7 @@ class EnterpriseFeatureConfig(BaseSettings): description="Allow customization of the enterprise logo.", default=False, ) + + ENTERPRISE_REQUEST_TIMEOUT: int = Field( + ge=1, description="Maximum timeout in seconds for enterprise requests", default=5 + ) diff --git a/api/configs/middleware/vdb/weaviate_config.py b/api/configs/middleware/vdb/weaviate_config.py index 6f4fccaa7f..2d1216c0d1 100644 --- a/api/configs/middleware/vdb/weaviate_config.py +++ b/api/configs/middleware/vdb/weaviate_config.py @@ -17,11 +17,6 @@ class WeaviateConfig(BaseSettings): default=None, ) - WEAVIATE_GRPC_ENABLED: bool = Field( - description="Whether to enable gRPC for Weaviate connection (True for gRPC, False for HTTP)", - default=True, - ) - WEAVIATE_GRPC_ENDPOINT: str | None = Field( description="URL of the Weaviate gRPC server (e.g., 'grpc://localhost:50051' or 'grpcs://weaviate.example.com:443')", default=None, diff --git a/api/controllers/console/__init__.py b/api/controllers/console/__init__.py index 862ad94b70..bd75491518 100644 --- a/api/controllers/console/__init__.py +++ b/api/controllers/console/__init__.py @@ -40,6 +40,7 @@ from . import ( feature, human_input_form, init_validate, + notification, ping, sandbox_files, setup, @@ -192,6 +193,7 @@ __all__ = [ "model_config", "model_providers", "models", + "notification", "oauth", "oauth_server", "ops_trace", diff --git a/api/controllers/console/admin.py b/api/controllers/console/admin.py index 03b602f6e8..6c3a6a8c1f 100644 --- a/api/controllers/console/admin.py +++ b/api/controllers/console/admin.py @@ -1,3 +1,5 @@ +import csv +import io from collections.abc import Callable from functools import wraps from typing import ParamSpec, TypeVar @@ -6,7 +8,7 @@ from flask import request from flask_restx import Resource from pydantic import BaseModel, Field, field_validator from sqlalchemy import select -from werkzeug.exceptions import NotFound, Unauthorized +from werkzeug.exceptions import BadRequest, NotFound, Unauthorized from configs import dify_config from constants.languages import supported_language @@ -16,6 +18,7 @@ from core.db.session_factory import session_factory from extensions.ext_database import db from libs.token import extract_access_token from models.model import App, ExporleBanner, InstalledApp, RecommendedApp, TrialApp +from services.billing_service import BillingService P = ParamSpec("P") R = TypeVar("R") @@ -277,3 +280,168 @@ class DeleteExploreBannerApi(Resource): db.session.commit() return {"result": "success"}, 204 + + +class LangContentPayload(BaseModel): + lang: str = Field(..., description="Language tag: 'zh' | 'en' | 'jp'") + title: str = Field(...) + subtitle: str | None = Field(default=None) + body: str = Field(...) + title_pic_url: str | None = Field(default=None) + + +class UpsertNotificationPayload(BaseModel): + notification_id: str | None = Field(default=None, description="Omit to create; supply UUID to update") + contents: list[LangContentPayload] = Field(..., min_length=1) + start_time: str | None = Field(default=None, description="RFC3339, e.g. 2026-03-01T00:00:00Z") + end_time: str | None = Field(default=None, description="RFC3339, e.g. 2026-03-20T23:59:59Z") + frequency: str = Field(default="once", description="'once' | 'every_page_load'") + status: str = Field(default="active", description="'active' | 'inactive'") + + +class BatchAddNotificationAccountsPayload(BaseModel): + notification_id: str = Field(...) + user_email: list[str] = Field(..., description="List of account email addresses") + + +console_ns.schema_model( + UpsertNotificationPayload.__name__, + UpsertNotificationPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0), +) + +console_ns.schema_model( + BatchAddNotificationAccountsPayload.__name__, + BatchAddNotificationAccountsPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0), +) + + +@console_ns.route("/admin/upsert_notification") +class UpsertNotificationApi(Resource): + @console_ns.doc("upsert_notification") + @console_ns.doc( + description=( + "Create or update an in-product notification. " + "Supply notification_id to update an existing one; omit it to create a new one. " + "Pass at least one language variant in contents (zh / en / jp)." + ) + ) + @console_ns.expect(console_ns.models[UpsertNotificationPayload.__name__]) + @console_ns.response(200, "Notification upserted successfully") + @only_edition_cloud + @admin_required + def post(self): + payload = UpsertNotificationPayload.model_validate(console_ns.payload) + result = BillingService.upsert_notification( + contents=[c.model_dump() for c in payload.contents], + frequency=payload.frequency, + status=payload.status, + notification_id=payload.notification_id, + start_time=payload.start_time, + end_time=payload.end_time, + ) + return {"result": "success", "notification_id": result.get("notificationId")}, 200 + + +@console_ns.route("/admin/batch_add_notification_accounts") +class BatchAddNotificationAccountsApi(Resource): + @console_ns.doc("batch_add_notification_accounts") + @console_ns.doc( + description=( + "Register target accounts for a notification by email address. " + 'JSON body: {"notification_id": "...", "user_email": ["a@example.com", ...]}. ' + "File upload: multipart/form-data with a 'file' field (CSV or TXT, one email per line) " + "plus a 'notification_id' field. " + "Emails that do not match any account are silently skipped." + ) + ) + @console_ns.response(200, "Accounts added successfully") + @only_edition_cloud + @admin_required + def post(self): + from models.account import Account + + if "file" in request.files: + notification_id = request.form.get("notification_id", "").strip() + if not notification_id: + raise BadRequest("notification_id is required.") + emails = self._parse_emails_from_file() + else: + payload = BatchAddNotificationAccountsPayload.model_validate(console_ns.payload) + notification_id = payload.notification_id + emails = payload.user_email + + if not emails: + raise BadRequest("No valid email addresses provided.") + + # Resolve emails → account IDs in chunks to avoid large IN-clause + account_ids: list[str] = [] + chunk_size = 500 + for i in range(0, len(emails), chunk_size): + chunk = emails[i : i + chunk_size] + rows = db.session.execute(select(Account.id, Account.email).where(Account.email.in_(chunk))).all() + account_ids.extend(str(row.id) for row in rows) + + if not account_ids: + raise BadRequest("None of the provided emails matched an existing account.") + + # Send to dify-saas in batches of 1000 + total_count = 0 + batch_size = 1000 + for i in range(0, len(account_ids), batch_size): + batch = account_ids[i : i + batch_size] + result = BillingService.batch_add_notification_accounts( + notification_id=notification_id, + account_ids=batch, + ) + total_count += result.get("count", 0) + + return { + "result": "success", + "emails_provided": len(emails), + "accounts_matched": len(account_ids), + "count": total_count, + }, 200 + + @staticmethod + def _parse_emails_from_file() -> list[str]: + """Parse email addresses from an uploaded CSV or TXT file.""" + file = request.files["file"] + if not file.filename: + raise BadRequest("Uploaded file has no filename.") + + filename_lower = file.filename.lower() + if not filename_lower.endswith((".csv", ".txt")): + raise BadRequest("Invalid file type. Only CSV (.csv) and TXT (.txt) files are allowed.") + + try: + content = file.read().decode("utf-8") + except UnicodeDecodeError: + try: + file.seek(0) + content = file.read().decode("gbk") + except UnicodeDecodeError: + raise BadRequest("Unable to decode the file. Please use UTF-8 or GBK encoding.") + + emails: list[str] = [] + if filename_lower.endswith(".csv"): + reader = csv.reader(io.StringIO(content)) + for row in reader: + for cell in row: + cell = cell.strip() + if cell: + emails.append(cell) + else: + for line in content.splitlines(): + line = line.strip() + if line: + emails.append(line) + + # Deduplicate while preserving order + seen: set[str] = set() + unique_emails: list[str] = [] + for email in emails: + if email.lower() not in seen: + seen.add(email.lower()) + unique_emails.append(email) + + return unique_emails diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index a268c9e9ff..b4bf216fef 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -26,7 +26,8 @@ from controllers.console.wraps import ( ) from core.ops.ops_trace_manager import OpsTraceManager from core.rag.retrieval.retrieval_methods import RetrievalMethod -from dify_graph.enums import NodeType, WorkflowExecutionStatus +from core.trigger.constants import TRIGGER_NODE_TYPES +from dify_graph.enums import WorkflowExecutionStatus from dify_graph.file import helpers as file_helpers from extensions.ext_database import db from libs.login import current_account_with_tenant, login_required @@ -522,11 +523,7 @@ class AppListApi(Resource): .scalars() .all() ) - trigger_node_types = { - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - } + trigger_node_types = TRIGGER_NODE_TYPES for workflow in draft_workflows: # Check sandbox feature if workflow.get_feature(WorkflowFeatures.SANDBOX).enabled: diff --git a/api/controllers/console/app/mcp_server.py b/api/controllers/console/app/mcp_server.py index dd982b6d7b..2025048e09 100644 --- a/api/controllers/console/app/mcp_server.py +++ b/api/controllers/console/app/mcp_server.py @@ -1,5 +1,4 @@ import json -from enum import StrEnum from flask_restx import Resource, marshal_with from pydantic import BaseModel, Field @@ -11,6 +10,7 @@ from controllers.console.wraps import account_initialization_required, edit_perm from extensions.ext_database import db from fields.app_fields import app_server_fields from libs.login import current_account_with_tenant, login_required +from models.enums import AppMCPServerStatus from models.model import AppMCPServer DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" @@ -19,11 +19,6 @@ DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" app_server_model = console_ns.model("AppServer", app_server_fields) -class AppMCPServerStatus(StrEnum): - ACTIVE = "active" - INACTIVE = "inactive" - - class MCPServerCreatePayload(BaseModel): description: str | None = Field(default=None, description="Server description") parameters: dict = Field(..., description="Server parameters configuration") @@ -117,9 +112,10 @@ class AppMCPServerController(Resource): server.parameters = json.dumps(payload.parameters, ensure_ascii=False) if payload.status: - if payload.status not in [status.value for status in AppMCPServerStatus]: + try: + server.status = AppMCPServerStatus(payload.status) + except ValueError: raise ValueError("Invalid status") - server.status = payload.status db.session.commit() return server diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py index e15b72f272..3f2284976d 100644 --- a/api/controllers/console/app/workflow.py +++ b/api/controllers/console/app/workflow.py @@ -22,6 +22,7 @@ from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY from core.app.entities.app_invoke_entities import InvokeFrom from core.helper.trace_id_helper import get_external_trace_id from core.plugin.impl.exc import PluginInvokeError +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE from core.trigger.debug.event_selectors import ( TriggerDebugEvent, TriggerDebugEventPoller, @@ -1259,7 +1260,7 @@ class DraftWorkflowTriggerNodeApi(Resource): node_type: NodeType = draft_workflow.get_node_type_from_node_config(node_config) event: TriggerDebugEvent | None = None # for schedule trigger, when run single node, just execute directly - if node_type == NodeType.TRIGGER_SCHEDULE: + if node_type == TRIGGER_SCHEDULE_NODE_TYPE: event = TriggerDebugEvent( workflow_args={}, node_id=node_id, diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 54303b2482..ddad7f40ca 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -807,7 +807,7 @@ class DatasetApiKeyApi(Resource): console_ns.abort( 400, message=f"Cannot create more than {self.max_keys} API keys for this resource type.", - code="max_keys_exceeded", + custom="max_keys_exceeded", ) key = ApiToken.generate_api_key(self.token_prefix, 24) diff --git a/api/controllers/console/explore/parameter.py b/api/controllers/console/explore/parameter.py index 660a4d5aea..0f29627746 100644 --- a/api/controllers/console/explore/parameter.py +++ b/api/controllers/console/explore/parameter.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from controllers.common import fields from controllers.console import console_ns from controllers.console.app.error import AppUnavailableError @@ -23,14 +25,14 @@ class AppParameterApi(InstalledAppResource): if workflow is None: raise AppUnavailableError() - features_dict = workflow.features_dict + features_dict: dict[str, Any] = workflow.features_dict user_input_form = workflow.user_input_form(to_old_structure=True) else: app_model_config = app_model.app_model_config if app_model_config is None: raise AppUnavailableError() - features_dict = app_model_config.to_dict() + features_dict = cast(dict[str, Any], app_model_config.to_dict()) user_input_form = features_dict.get("user_input_form", []) diff --git a/api/controllers/console/notification.py b/api/controllers/console/notification.py new file mode 100644 index 0000000000..53e4aa3d86 --- /dev/null +++ b/api/controllers/console/notification.py @@ -0,0 +1,90 @@ +from flask import request +from flask_restx import Resource +from pydantic import BaseModel, Field + +from controllers.console import console_ns +from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required +from libs.login import current_account_with_tenant, login_required +from services.billing_service import BillingService + +# Notification content is stored under three lang tags. +_FALLBACK_LANG = "en-US" + + +def _pick_lang_content(contents: dict, lang: str) -> dict: + """Return the single LangContent for *lang*, falling back to English.""" + return contents.get(lang) or contents.get(_FALLBACK_LANG) or next(iter(contents.values()), {}) + + +class DismissNotificationPayload(BaseModel): + notification_id: str = Field(...) + + +@console_ns.route("/notification") +class NotificationApi(Resource): + @console_ns.doc("get_notification") + @console_ns.doc( + description=( + "Return the active in-product notification for the current user " + "in their interface language (falls back to English if unavailable). " + "The notification is NOT marked as seen here; call POST /notification/dismiss " + "when the user explicitly closes the modal." + ), + responses={ + 200: "Success — inspect should_show to decide whether to render the modal", + 401: "Unauthorized", + }, + ) + @setup_required + @login_required + @account_initialization_required + @only_edition_cloud + def get(self): + current_user, _ = current_account_with_tenant() + + result = BillingService.get_account_notification(str(current_user.id)) + + # Proto JSON uses camelCase field names (Kratos default marshaling). + if not result.get("shouldShow"): + return {"should_show": False, "notifications": []}, 200 + + lang = current_user.interface_language or _FALLBACK_LANG + + notifications = [] + for notification in result.get("notifications") or []: + contents: dict = notification.get("contents") or {} + lang_content = _pick_lang_content(contents, lang) + notifications.append( + { + "notification_id": notification.get("notificationId"), + "frequency": notification.get("frequency"), + "lang": lang_content.get("lang", lang), + "title": lang_content.get("title", ""), + "subtitle": lang_content.get("subtitle", ""), + "body": lang_content.get("body", ""), + "title_pic_url": lang_content.get("titlePicUrl", ""), + } + ) + + return {"should_show": bool(notifications), "notifications": notifications}, 200 + + +@console_ns.route("/notification/dismiss") +class NotificationDismissApi(Resource): + @console_ns.doc("dismiss_notification") + @console_ns.doc( + description="Mark a notification as dismissed for the current user.", + responses={200: "Success", 401: "Unauthorized"}, + ) + @setup_required + @login_required + @account_initialization_required + @only_edition_cloud + def post(self): + current_user, _ = current_account_with_tenant() + payload = DismissNotificationPayload.model_validate(request.get_json()) + BillingService.dismiss_notification( + notification_id=payload.notification_id, + account_id=str(current_user.id), + ) + return {"result": "success"}, 200 diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py index 6d94ea2ba0..b926676e16 100644 --- a/api/controllers/console/workspace/account.py +++ b/api/controllers/console/workspace/account.py @@ -44,6 +44,7 @@ from libs.datetime_utils import naive_utc_now from libs.helper import EmailStr, TimestampField, extract_remote_ip, timezone from libs.login import current_account_with_tenant, login_required from models import AccountIntegrate, InvitationCode +from models.account import AccountStatus, InvitationCodeStatus from services.account_service import AccountService from services.billing_service import BillingService from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError @@ -221,7 +222,7 @@ class AccountInitApi(Resource): db.session.query(InvitationCode) .where( InvitationCode.code == args.invitation_code, - InvitationCode.status == "unused", + InvitationCode.status == InvitationCodeStatus.UNUSED, ) .first() ) @@ -229,7 +230,7 @@ class AccountInitApi(Resource): if not invitation_code: raise InvalidInvitationCodeError() - invitation_code.status = "used" + invitation_code.status = InvitationCodeStatus.USED invitation_code.used_at = naive_utc_now() invitation_code.used_by_tenant_id = account.current_tenant_id invitation_code.used_by_account_id = account.id @@ -237,7 +238,7 @@ class AccountInitApi(Resource): account.interface_language = args.interface_language account.timezone = args.timezone account.interface_theme = "light" - account.status = "active" + account.status = AccountStatus.ACTIVE account.initialized_at = naive_utc_now() db.session.commit() diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py index 2f06f72f29..ee537367c7 100644 --- a/api/controllers/console/workspace/plugin.py +++ b/api/controllers/console/workspace/plugin.py @@ -5,6 +5,7 @@ from typing import Any, Literal from flask import request, send_file from flask_restx import Resource from pydantic import BaseModel, Field +from werkzeug.datastructures import FileStorage from werkzeug.exceptions import Forbidden from configs import dify_config @@ -169,6 +170,20 @@ register_enum_models( ) +def _read_upload_content(file: FileStorage, max_size: int) -> bytes: + """ + Read the uploaded file and validate its actual size before delegating to the plugin service. + + FileStorage.content_length is not reliable for multipart test uploads and may be zero even when + content exists, so the controllers validate against the loaded bytes instead. + """ + content = file.read() + if len(content) > max_size: + raise ValueError("File size exceeds the maximum allowed size") + + return content + + @console_ns.route("/workspaces/current/plugin/debugging-key") class PluginDebuggingKeyApi(Resource): @setup_required @@ -284,12 +299,7 @@ class PluginUploadFromPkgApi(Resource): _, tenant_id = current_account_with_tenant() file = request.files["pkg"] - - # check file size - if file.content_length > dify_config.PLUGIN_MAX_PACKAGE_SIZE: - raise ValueError("File size exceeds the maximum allowed size") - - content = file.read() + content = _read_upload_content(file, dify_config.PLUGIN_MAX_PACKAGE_SIZE) try: response = PluginService.upload_pkg(tenant_id, content) except PluginDaemonClientSideError as e: @@ -328,12 +338,7 @@ class PluginUploadFromBundleApi(Resource): _, tenant_id = current_account_with_tenant() file = request.files["bundle"] - - # check file size - if file.content_length > dify_config.PLUGIN_MAX_BUNDLE_SIZE: - raise ValueError("File size exceeds the maximum allowed size") - - content = file.read() + content = _read_upload_content(file, dify_config.PLUGIN_MAX_BUNDLE_SIZE) try: response = PluginService.upload_bundle(tenant_id, content) except PluginDaemonClientSideError as e: diff --git a/api/controllers/files/tool_files.py b/api/controllers/files/tool_files.py index f6032a8e49..9e3fb3a90b 100644 --- a/api/controllers/files/tool_files.py +++ b/api/controllers/files/tool_files.py @@ -10,7 +10,6 @@ from controllers.common.file_response import enforce_download_for_html from controllers.files import files_ns from core.tools.signature import verify_tool_file_signature from core.tools.tool_file_manager import ToolFileManager -from extensions.ext_database import db as global_db DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" @@ -57,7 +56,7 @@ class ToolFileApi(Resource): raise Forbidden("Invalid request.") try: - tool_file_manager = ToolFileManager(engine=global_db.engine) + tool_file_manager = ToolFileManager() stream, tool_file = tool_file_manager.get_file_generator_by_tool_file_id( file_id, ) diff --git a/api/controllers/inner_api/plugin/wraps.py b/api/controllers/inner_api/plugin/wraps.py index 4b9574fe4a..fd2f49db1e 100644 --- a/api/controllers/inner_api/plugin/wraps.py +++ b/api/controllers/inner_api/plugin/wraps.py @@ -113,6 +113,7 @@ def get_user_tenant(view_func: Callable[P, R]): def plugin_data(view: Callable[P, R] | None = None, *, payload_type: type[BaseModel]): def decorator(view_func: Callable[P, R]): + @wraps(view_func) def decorated_view(*args: P.args, **kwargs: P.kwargs): try: data = request.get_json() diff --git a/api/controllers/mcp/mcp.py b/api/controllers/mcp/mcp.py index 2bc6640807..9ddaaa315b 100644 --- a/api/controllers/mcp/mcp.py +++ b/api/controllers/mcp/mcp.py @@ -6,13 +6,13 @@ from pydantic import BaseModel, Field, ValidationError from sqlalchemy.orm import Session from controllers.common.schema import register_schema_model -from controllers.console.app.mcp_server import AppMCPServerStatus from controllers.mcp import mcp_ns from core.mcp import types as mcp_types from core.mcp.server.streamable_http import handle_mcp_request from dify_graph.variables.input_entities import VariableEntity from extensions.ext_database import db from libs import helper +from models.enums import AppMCPServerStatus from models.model import App, AppMCPServer, AppMode, EndUser diff --git a/api/controllers/service_api/app/app.py b/api/controllers/service_api/app/app.py index 562f5e33cc..abcaa0e240 100644 --- a/api/controllers/service_api/app/app.py +++ b/api/controllers/service_api/app/app.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from flask_restx import Resource from controllers.common.fields import Parameters @@ -33,14 +35,14 @@ class AppParameterApi(Resource): if workflow is None: raise AppUnavailableError() - features_dict = workflow.features_dict + features_dict: dict[str, Any] = workflow.features_dict user_input_form = workflow.user_input_form(to_old_structure=True) else: app_model_config = app_model.app_model_config if app_model_config is None: raise AppUnavailableError() - features_dict = app_model_config.to_dict() + features_dict = cast(dict[str, Any], app_model_config.to_dict()) user_input_form = features_dict.get("user_input_form", []) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index dc8da025d4..5a1d28ea1d 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -1,8 +1,9 @@ import json +from contextlib import ExitStack from typing import Self from uuid import UUID -from flask import request +from flask import request, send_file from flask_restx import marshal from pydantic import BaseModel, Field, field_validator, model_validator from sqlalchemy import desc, select @@ -100,6 +101,15 @@ class DocumentListQuery(BaseModel): status: str | None = Field(default=None, description="Document status filter") +DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS = 100 + + +class DocumentBatchDownloadZipPayload(BaseModel): + """Request payload for bulk downloading uploaded documents as a ZIP archive.""" + + document_ids: list[UUID] = Field(..., min_length=1, max_length=DOCUMENT_BATCH_DOWNLOAD_ZIP_MAX_DOCS) + + register_enum_models(service_api_ns, RetrievalMethod) register_schema_models( @@ -109,6 +119,7 @@ register_schema_models( DocumentTextCreatePayload, DocumentTextUpdate, DocumentListQuery, + DocumentBatchDownloadZipPayload, Rule, PreProcessingRule, Segmentation, @@ -540,6 +551,46 @@ class DocumentListApi(DatasetApiResource): return response +@service_api_ns.route("/datasets//documents/download-zip") +class DocumentBatchDownloadZipApi(DatasetApiResource): + """Download multiple uploaded-file documents as a single ZIP archive.""" + + @service_api_ns.expect(service_api_ns.models[DocumentBatchDownloadZipPayload.__name__]) + @service_api_ns.doc("download_documents_as_zip") + @service_api_ns.doc(description="Download selected uploaded documents as a single ZIP archive") + @service_api_ns.doc(params={"dataset_id": "Dataset ID"}) + @service_api_ns.doc( + responses={ + 200: "ZIP archive generated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Document or dataset not found", + } + ) + @cloud_edition_billing_rate_limit_check("knowledge", "dataset") + def post(self, tenant_id, dataset_id): + payload = DocumentBatchDownloadZipPayload.model_validate(service_api_ns.payload or {}) + + upload_files, download_name = DocumentService.prepare_document_batch_download_zip( + dataset_id=str(dataset_id), + document_ids=[str(document_id) for document_id in payload.document_ids], + tenant_id=str(tenant_id), + current_user=current_user, + ) + + with ExitStack() as stack: + zip_path = stack.enter_context(FileService.build_upload_files_zip_tempfile(upload_files=upload_files)) + response = send_file( + zip_path, + mimetype="application/zip", + as_attachment=True, + download_name=download_name, + ) + cleanup = stack.pop_all() + response.call_on_close(cleanup.close) + return response + + @service_api_ns.route("/datasets//documents//indexing-status") class DocumentIndexingStatusApi(DatasetApiResource): @service_api_ns.doc("get_document_indexing_status") @@ -600,6 +651,35 @@ class DocumentIndexingStatusApi(DatasetApiResource): return data +@service_api_ns.route("/datasets//documents//download") +class DocumentDownloadApi(DatasetApiResource): + """Return a signed download URL for a document's original uploaded file.""" + + @service_api_ns.doc("get_document_download_url") + @service_api_ns.doc(description="Get a signed download URL for a document's original uploaded file") + @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"}) + @service_api_ns.doc( + responses={ + 200: "Download URL generated successfully", + 401: "Unauthorized - invalid API token", + 403: "Forbidden - insufficient permissions", + 404: "Document or upload file not found", + } + ) + @cloud_edition_billing_rate_limit_check("knowledge", "dataset") + def get(self, tenant_id, dataset_id, document_id): + dataset = self.get_dataset(str(dataset_id), str(tenant_id)) + document = DocumentService.get_document(dataset.id, str(document_id)) + + if not document: + raise NotFound("Document not found.") + + if document.tenant_id != str(tenant_id): + raise Forbidden("No permission.") + + return {"url": DocumentService.get_document_download_url(document)} + + @service_api_ns.route("/datasets//documents/") class DocumentApi(DatasetApiResource): METADATA_CHOICES = {"all", "only", "without"} diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py index 62ea532eac..25bbedce54 100644 --- a/api/controllers/web/app.py +++ b/api/controllers/web/app.py @@ -1,4 +1,5 @@ import logging +from typing import Any, cast from flask import request from flask_restx import Resource @@ -57,14 +58,14 @@ class AppParameterApi(WebApiResource): if workflow is None: raise AppUnavailableError() - features_dict = workflow.features_dict + features_dict: dict[str, Any] = workflow.features_dict user_input_form = workflow.user_input_form(to_old_structure=True) else: app_model_config = app_model.app_model_config if app_model_config is None: raise AppUnavailableError() - features_dict = app_model_config.to_dict() + features_dict = cast(dict[str, Any], app_model_config.to_dict()) user_input_form = features_dict.get("user_input_form", []) diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index bbae1ce266..2b60691949 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -239,7 +239,7 @@ class MessageSuggestedQuestionApi(WebApiResource): def get(self, app_model, end_user, message_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: - raise NotCompletionAppError() + raise NotChatAppError() message_id = str(message_id) diff --git a/api/core/agent/errors.py b/api/core/agent/errors.py new file mode 100644 index 0000000000..ed504d500a --- /dev/null +++ b/api/core/agent/errors.py @@ -0,0 +1,9 @@ +class AgentMaxIterationError(Exception): + """Raised when an agent runner exceeds the configured max iteration count.""" + + def __init__(self, max_iteration: int): + self.max_iteration = max_iteration + super().__init__( + f"Agent exceeded the maximum iteration limit of {max_iteration}. " + f"The agent was unable to complete the task within the allowed number of iterations." + ) diff --git a/api/core/app/app_config/common/sensitive_word_avoidance/manager.py b/api/core/app/app_config/common/sensitive_word_avoidance/manager.py index e925d6dd52..7d1b11c008 100644 --- a/api/core/app/app_config/common/sensitive_word_avoidance/manager.py +++ b/api/core/app/app_config/common/sensitive_word_avoidance/manager.py @@ -1,10 +1,13 @@ +from collections.abc import Mapping +from typing import Any + from core.app.app_config.entities import SensitiveWordAvoidanceEntity from core.moderation.factory import ModerationFactory class SensitiveWordAvoidanceConfigManager: @classmethod - def convert(cls, config: dict) -> SensitiveWordAvoidanceEntity | None: + def convert(cls, config: Mapping[str, Any]) -> SensitiveWordAvoidanceEntity | None: sensitive_word_avoidance_dict = config.get("sensitive_word_avoidance") if not sensitive_word_avoidance_dict: return None @@ -12,7 +15,7 @@ class SensitiveWordAvoidanceConfigManager: if sensitive_word_avoidance_dict.get("enabled"): return SensitiveWordAvoidanceEntity( type=sensitive_word_avoidance_dict.get("type"), - config=sensitive_word_avoidance_dict.get("config"), + config=sensitive_word_avoidance_dict.get("config", {}), ) else: return None diff --git a/api/core/app/app_config/easy_ui_based_app/agent/manager.py b/api/core/app/app_config/easy_ui_based_app/agent/manager.py index 9b981dfc09..10db380d1f 100644 --- a/api/core/app/app_config/easy_ui_based_app/agent/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/agent/manager.py @@ -1,10 +1,13 @@ +from typing import Any, cast + from core.agent.entities import AgentEntity, AgentPromptEntity, AgentToolEntity from core.agent.prompt.template import REACT_PROMPT_TEMPLATES +from models.model import AppModelConfigDict class AgentConfigManager: @classmethod - def convert(cls, config: dict) -> AgentEntity | None: + def convert(cls, config: AppModelConfigDict) -> AgentEntity | None: """ Convert model config to model config @@ -28,17 +31,17 @@ class AgentConfigManager: agent_tools = [] for tool in agent_dict.get("tools", []): - keys = tool.keys() - if len(keys) >= 4: - if "enabled" not in tool or not tool["enabled"]: + tool_dict = cast(dict[str, Any], tool) + if len(tool_dict) >= 4: + if "enabled" not in tool_dict or not tool_dict["enabled"]: continue agent_tool_properties = { - "provider_type": tool["provider_type"], - "provider_id": tool["provider_id"], - "tool_name": tool["tool_name"], - "tool_parameters": tool.get("tool_parameters", {}), - "credential_id": tool.get("credential_id", None), + "provider_type": tool_dict["provider_type"], + "provider_id": tool_dict["provider_id"], + "tool_name": tool_dict["tool_name"], + "tool_parameters": tool_dict.get("tool_parameters", {}), + "credential_id": tool_dict.get("credential_id", None), } agent_tools.append(AgentToolEntity.model_validate(agent_tool_properties)) @@ -47,7 +50,8 @@ class AgentConfigManager: "react_router", "router", }: - agent_prompt = agent_dict.get("prompt", None) or {} + agent_prompt_raw = agent_dict.get("prompt", None) + agent_prompt: dict[str, Any] = agent_prompt_raw if isinstance(agent_prompt_raw, dict) else {} # check model mode model_mode = config.get("model", {}).get("mode", "completion") if model_mode == "completion": @@ -75,7 +79,7 @@ class AgentConfigManager: strategy=strategy, prompt=agent_prompt_entity, tools=agent_tools, - max_iteration=agent_dict.get("max_iteration", 10), + max_iteration=cast(int, agent_dict.get("max_iteration", 10)), ) return None diff --git a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py index aacafb2dad..70f43b2c83 100644 --- a/api/core/app/app_config/easy_ui_based_app/dataset/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/dataset/manager.py @@ -1,5 +1,5 @@ import uuid -from typing import Literal, cast +from typing import Any, Literal, cast from core.app.app_config.entities import ( DatasetEntity, @@ -8,13 +8,13 @@ from core.app.app_config.entities import ( ModelConfig, ) from core.entities.agent_entities import PlanningStrategy -from models.model import AppMode +from models.model import AppMode, AppModelConfigDict from services.dataset_service import DatasetService class DatasetConfigManager: @classmethod - def convert(cls, config: dict) -> DatasetEntity | None: + def convert(cls, config: AppModelConfigDict) -> DatasetEntity | None: """ Convert model config to model config @@ -25,11 +25,15 @@ class DatasetConfigManager: datasets = config.get("dataset_configs", {}).get("datasets", {"strategy": "router", "datasets": []}) for dataset in datasets.get("datasets", []): + if not isinstance(dataset, dict): + continue keys = list(dataset.keys()) if len(keys) == 0 or keys[0] != "dataset": continue dataset = dataset["dataset"] + if not isinstance(dataset, dict): + continue if "enabled" not in dataset or not dataset["enabled"]: continue @@ -47,15 +51,14 @@ class DatasetConfigManager: agent_dict = config.get("agent_mode", {}) for tool in agent_dict.get("tools", []): - keys = tool.keys() - if len(keys) == 1: + if len(tool) == 1: # old standard key = list(tool.keys())[0] if key != "dataset": continue - tool_item = tool[key] + tool_item = cast(dict[str, Any], tool)[key] if "enabled" not in tool_item or not tool_item["enabled"]: continue diff --git a/api/core/app/app_config/easy_ui_based_app/model_config/manager.py b/api/core/app/app_config/easy_ui_based_app/model_config/manager.py index e4e750c735..0929f52e33 100644 --- a/api/core/app/app_config/easy_ui_based_app/model_config/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/model_config/manager.py @@ -5,12 +5,13 @@ from core.app.app_config.entities import ModelConfigEntity from core.provider_manager import ProviderManager from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from dify_graph.model_runtime.model_providers.model_provider_factory import ModelProviderFactory +from models.model import AppModelConfigDict from models.provider_ids import ModelProviderID class ModelConfigManager: @classmethod - def convert(cls, config: dict) -> ModelConfigEntity: + def convert(cls, config: AppModelConfigDict) -> ModelConfigEntity: """ Convert model config to model config @@ -22,7 +23,7 @@ class ModelConfigManager: if not model_config: raise ValueError("model is required") - completion_params = model_config.get("completion_params") + completion_params = model_config.get("completion_params") or {} stop = [] if "stop" in completion_params: stop = completion_params["stop"] diff --git a/api/core/app/app_config/easy_ui_based_app/prompt_template/manager.py b/api/core/app/app_config/easy_ui_based_app/prompt_template/manager.py index 01b9601965..b7073898d6 100644 --- a/api/core/app/app_config/easy_ui_based_app/prompt_template/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/prompt_template/manager.py @@ -1,3 +1,5 @@ +from typing import Any + from core.app.app_config.entities import ( AdvancedChatMessageEntity, AdvancedChatPromptTemplateEntity, @@ -6,12 +8,12 @@ from core.app.app_config.entities import ( ) from core.prompt.simple_prompt_transform import ModelMode from dify_graph.model_runtime.entities.message_entities import PromptMessageRole -from models.model import AppMode +from models.model import AppMode, AppModelConfigDict class PromptTemplateConfigManager: @classmethod - def convert(cls, config: dict) -> PromptTemplateEntity: + def convert(cls, config: AppModelConfigDict) -> PromptTemplateEntity: if not config.get("prompt_type"): raise ValueError("prompt_type is required") @@ -40,14 +42,15 @@ class PromptTemplateConfigManager: advanced_completion_prompt_template = None completion_prompt_config = config.get("completion_prompt_config", {}) if completion_prompt_config: - completion_prompt_template_params = { + completion_prompt_template_params: dict[str, Any] = { "prompt": completion_prompt_config["prompt"]["text"], } - if "conversation_histories_role" in completion_prompt_config: + conv_role = completion_prompt_config.get("conversation_histories_role") + if conv_role: completion_prompt_template_params["role_prefix"] = { - "user": completion_prompt_config["conversation_histories_role"]["user_prefix"], - "assistant": completion_prompt_config["conversation_histories_role"]["assistant_prefix"], + "user": conv_role["user_prefix"], + "assistant": conv_role["assistant_prefix"], } advanced_completion_prompt_template = AdvancedCompletionPromptTemplateEntity( diff --git a/api/core/app/app_config/easy_ui_based_app/variables/manager.py b/api/core/app/app_config/easy_ui_based_app/variables/manager.py index 157e5d8bc0..8de1224a89 100644 --- a/api/core/app/app_config/easy_ui_based_app/variables/manager.py +++ b/api/core/app/app_config/easy_ui_based_app/variables/manager.py @@ -1,8 +1,10 @@ import re +from typing import cast from core.app.app_config.entities import ExternalDataVariableEntity from core.external_data_tool.factory import ExternalDataToolFactory from dify_graph.variables.input_entities import VariableEntity, VariableEntityType +from models.model import AppModelConfigDict _ALLOWED_VARIABLE_ENTITY_TYPE = frozenset( [ @@ -18,7 +20,7 @@ _ALLOWED_VARIABLE_ENTITY_TYPE = frozenset( class BasicVariablesConfigManager: @classmethod - def convert(cls, config: dict) -> tuple[list[VariableEntity], list[ExternalDataVariableEntity]]: + def convert(cls, config: AppModelConfigDict) -> tuple[list[VariableEntity], list[ExternalDataVariableEntity]]: """ Convert model config to model config @@ -51,7 +53,9 @@ class BasicVariablesConfigManager: external_data_variables.append( ExternalDataVariableEntity( - variable=variable["variable"], type=variable["type"], config=variable["config"] + variable=variable["variable"], + type=variable.get("type", ""), + config=variable.get("config", {}), ) ) elif variable_type in { @@ -64,10 +68,10 @@ class BasicVariablesConfigManager: variable = variables[variable_type] variable_entities.append( VariableEntity( - type=variable_type, - variable=variable.get("variable"), + type=cast(VariableEntityType, variable_type), + variable=variable["variable"], description=variable.get("description") or "", - label=variable.get("label"), + label=variable["label"], required=variable.get("required", False), max_length=variable.get("max_length"), options=variable.get("options") or [], diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index f26351d93e..ac21577d57 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -281,7 +281,7 @@ class EasyUIBasedAppConfig(AppConfig): app_model_config_from: EasyUIBasedAppModelConfigFrom app_model_config_id: str - app_model_config_dict: dict + app_model_config_dict: dict[str, Any] model: ModelConfigEntity prompt_template: PromptTemplateEntity dataset: DatasetEntity | None = None diff --git a/api/core/app/apps/advanced_chat/app_runner.py b/api/core/app/apps/advanced_chat/app_runner.py index 3c416d22b3..037e760962 100644 --- a/api/core/app/apps/advanced_chat/app_runner.py +++ b/api/core/app/apps/advanced_chat/app_runner.py @@ -142,20 +142,25 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): query = self.application_generate_entity.query # moderation - if self.handle_input_moderation( + stop, new_inputs, new_query = self.handle_input_moderation( app_record=self._app, app_generate_entity=self.application_generate_entity, inputs=inputs, query=query, message_id=self.message.id, - ): + ) + if stop: return + self.application_generate_entity.inputs = new_inputs + self.application_generate_entity.query = new_query + system_inputs.query = new_query + # annotation reply if self.handle_annotation_reply( app_record=self._app, message=self.message, - query=query, + query=new_query, app_generate_entity=self.application_generate_entity, ): return @@ -167,7 +172,7 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): # init variable pool variable_pool = VariablePool( system_variables=system_inputs, - user_inputs=inputs, + user_inputs=new_inputs, environment_variables=self._workflow.environment_variables, # Based on the definition of `Variable`, # `VariableBase` instances can be safely used as `Variable` since they are compatible. @@ -246,10 +251,10 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): inputs: Mapping[str, Any], query: str, message_id: str, - ) -> bool: + ) -> tuple[bool, Mapping[str, Any], str]: try: # process sensitive_word_avoidance - _, inputs, query = self.moderation_for_inputs( + _, new_inputs, new_query = self.moderation_for_inputs( app_id=app_record.id, tenant_id=app_generate_entity.app_config.tenant_id, app_generate_entity=app_generate_entity, @@ -259,9 +264,9 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner): ) except ModerationError as e: self._complete_with_stream_output(text=str(e), stopped_by=QueueStopEvent.StopBy.INPUT_MODERATION) - return True + return True, inputs, query - return False + return False, new_inputs, new_query def handle_annotation_reply( self, app_record: App, message: Message, query: str, app_generate_entity: AdvancedChatAppGenerateEntity diff --git a/api/core/app/apps/advanced_chat/generate_response_converter.py b/api/core/app/apps/advanced_chat/generate_response_converter.py index 2c3df9e910..4693ed1b16 100644 --- a/api/core/app/apps/advanced_chat/generate_response_converter.py +++ b/api/core/app/apps/advanced_chat/generate_response_converter.py @@ -114,7 +114,7 @@ class AdvancedChatAppGenerateResponseConverter(AppGenerateResponseConverter): metadata = sub_stream_response_dict.get("metadata", {}) sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata) response_chunk.update(sub_stream_response_dict) - if isinstance(sub_stream_response, ErrorStreamResponse): + elif isinstance(sub_stream_response, ErrorStreamResponse): data = cls._error_to_stream_response(sub_stream_response.err) response_chunk.update(data) elif isinstance(sub_stream_response, NodeStartStreamResponse | NodeFinishStreamResponse): diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 8754644857..a5f4b91fd7 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -71,7 +71,7 @@ from dify_graph.entities.pause_reason import HumanInputRequired from dify_graph.enums import WorkflowExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.repositories.draft_variable_repository import DraftVariableSaverFactory from dify_graph.runtime import GraphRuntimeState from dify_graph.system_variable import SystemVariable @@ -482,7 +482,7 @@ class AdvancedChatAppGenerateTaskPipeline(GraphRuntimeStateSupport): ) -> Generator[StreamResponse, None, None]: """Handle node succeeded events.""" # Record files if it's an answer node or end node - if event.node_type in [NodeType.ANSWER, NodeType.END, NodeType.LLM]: + if event.node_type in [BuiltinNodeTypes.ANSWER, BuiltinNodeTypes.END, BuiltinNodeTypes.LLM]: self._recorded_files.extend( self._workflow_response_converter.fetch_files_from_node_outputs(event.outputs or {}) ) diff --git a/api/core/app/apps/agent_chat/app_config_manager.py b/api/core/app/apps/agent_chat/app_config_manager.py index 801619ddbc..f0d81e0c59 100644 --- a/api/core/app/apps/agent_chat/app_config_manager.py +++ b/api/core/app/apps/agent_chat/app_config_manager.py @@ -20,7 +20,7 @@ from core.app.app_config.features.suggested_questions_after_answer.manager impor ) from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager from core.entities.agent_entities import PlanningStrategy -from models.model import App, AppMode, AppModelConfig, Conversation +from models.model import App, AppMode, AppModelConfig, AppModelConfigDict, Conversation OLD_TOOLS = ["dataset", "google_search", "web_reader", "wikipedia", "current_datetime"] @@ -40,7 +40,7 @@ class AgentChatAppConfigManager(BaseAppConfigManager): app_model: App, app_model_config: AppModelConfig, conversation: Conversation | None = None, - override_config_dict: dict | None = None, + override_config_dict: AppModelConfigDict | None = None, ) -> AgentChatAppConfig: """ Convert app model config to agent chat app config @@ -61,7 +61,9 @@ class AgentChatAppConfigManager(BaseAppConfigManager): app_model_config_dict = app_model_config.to_dict() config_dict = app_model_config_dict.copy() else: - config_dict = override_config_dict or {} + if not override_config_dict: + raise Exception("override_config_dict is required when config_from is ARGS") + config_dict = override_config_dict app_mode = AppMode.value_of(app_model.mode) app_config = AgentChatAppConfig( @@ -70,7 +72,7 @@ class AgentChatAppConfigManager(BaseAppConfigManager): app_mode=app_mode, app_model_config_from=config_from, app_model_config_id=app_model_config.id, - app_model_config_dict=config_dict, + app_model_config_dict=cast(dict[str, Any], config_dict), model=ModelConfigManager.convert(config=config_dict), prompt_template=PromptTemplateConfigManager.convert(config=config_dict), sensitive_word_avoidance=SensitiveWordAvoidanceConfigManager.convert(config=config_dict), @@ -86,7 +88,7 @@ class AgentChatAppConfigManager(BaseAppConfigManager): return app_config @classmethod - def config_validate(cls, tenant_id: str, config: Mapping[str, Any]): + def config_validate(cls, tenant_id: str, config: Mapping[str, Any]) -> AppModelConfigDict: """ Validate for agent chat app model config @@ -157,7 +159,7 @@ class AgentChatAppConfigManager(BaseAppConfigManager): # Filter out extra parameters filtered_config = {key: config.get(key) for key in related_config_keys} - return filtered_config + return cast(AppModelConfigDict, filtered_config) @classmethod def validate_agent_mode_and_set_defaults( diff --git a/api/core/app/apps/agent_chat/generate_response_converter.py b/api/core/app/apps/agent_chat/generate_response_converter.py index f328425fb7..bf4ada483f 100644 --- a/api/core/app/apps/agent_chat/generate_response_converter.py +++ b/api/core/app/apps/agent_chat/generate_response_converter.py @@ -113,7 +113,7 @@ class AgentChatAppGenerateResponseConverter(AppGenerateResponseConverter): metadata = sub_stream_response_dict.get("metadata", {}) sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata) response_chunk.update(sub_stream_response_dict) - if isinstance(sub_stream_response, ErrorStreamResponse): + elif isinstance(sub_stream_response, ErrorStreamResponse): data = cls._error_to_stream_response(sub_stream_response.err) response_chunk.update(data) else: diff --git a/api/core/app/apps/chat/app_config_manager.py b/api/core/app/apps/chat/app_config_manager.py index 4b6720a3c3..5f087f6066 100644 --- a/api/core/app/apps/chat/app_config_manager.py +++ b/api/core/app/apps/chat/app_config_manager.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from core.app.app_config.base_app_config_manager import BaseAppConfigManager from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager from core.app.app_config.easy_ui_based_app.dataset.manager import DatasetConfigManager @@ -13,7 +15,7 @@ from core.app.app_config.features.suggested_questions_after_answer.manager impor SuggestedQuestionsAfterAnswerConfigManager, ) from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager -from models.model import App, AppMode, AppModelConfig, Conversation +from models.model import App, AppMode, AppModelConfig, AppModelConfigDict, Conversation class ChatAppConfig(EasyUIBasedAppConfig): @@ -31,7 +33,7 @@ class ChatAppConfigManager(BaseAppConfigManager): app_model: App, app_model_config: AppModelConfig, conversation: Conversation | None = None, - override_config_dict: dict | None = None, + override_config_dict: AppModelConfigDict | None = None, ) -> ChatAppConfig: """ Convert app model config to chat app config @@ -64,7 +66,7 @@ class ChatAppConfigManager(BaseAppConfigManager): app_mode=app_mode, app_model_config_from=config_from, app_model_config_id=app_model_config.id, - app_model_config_dict=config_dict, + app_model_config_dict=cast(dict[str, Any], config_dict), model=ModelConfigManager.convert(config=config_dict), prompt_template=PromptTemplateConfigManager.convert(config=config_dict), sensitive_word_avoidance=SensitiveWordAvoidanceConfigManager.convert(config=config_dict), @@ -79,7 +81,7 @@ class ChatAppConfigManager(BaseAppConfigManager): return app_config @classmethod - def config_validate(cls, tenant_id: str, config: dict): + def config_validate(cls, tenant_id: str, config: dict) -> AppModelConfigDict: """ Validate for chat app model config @@ -145,4 +147,4 @@ class ChatAppConfigManager(BaseAppConfigManager): # Filter out extra parameters filtered_config = {key: config.get(key) for key in related_config_keys} - return filtered_config + return cast(AppModelConfigDict, filtered_config) diff --git a/api/core/app/apps/chat/app_runner.py b/api/core/app/apps/chat/app_runner.py index 23546a47bb..f63b38fc86 100644 --- a/api/core/app/apps/chat/app_runner.py +++ b/api/core/app/apps/chat/app_runner.py @@ -173,8 +173,10 @@ class ChatAppRunner(AppRunner): memory=memory, message_id=message.id, inputs=inputs, - vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get( - "enabled", False + vision_enabled=bool( + application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}) + .get("image", {}) + .get("enabled", False) ), ) context_files = retrieved_files or [] diff --git a/api/core/app/apps/chat/generate_response_converter.py b/api/core/app/apps/chat/generate_response_converter.py index da02f6b750..dec957e68d 100644 --- a/api/core/app/apps/chat/generate_response_converter.py +++ b/api/core/app/apps/chat/generate_response_converter.py @@ -113,7 +113,7 @@ class ChatAppGenerateResponseConverter(AppGenerateResponseConverter): metadata = sub_stream_response_dict.get("metadata", {}) sub_stream_response_dict["metadata"] = cls._get_simple_metadata(metadata) response_chunk.update(sub_stream_response_dict) - if isinstance(sub_stream_response, ErrorStreamResponse): + elif isinstance(sub_stream_response, ErrorStreamResponse): data = cls._error_to_stream_response(sub_stream_response.err) response_chunk.update(data) else: diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py index 102cf66aff..fac81c1145 100644 --- a/api/core/app/apps/common/workflow_response_converter.py +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -48,12 +48,13 @@ from core.app.entities.task_entities import ( from core.plugin.impl.datasource import PluginDatasourceManager from core.tools.entities.tool_entities import ToolProviderType from core.tools.tool_manager import ToolManager +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.trigger_manager import TriggerManager from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.pause_reason import HumanInputRequired from dify_graph.entities.workflow_start_reason import WorkflowStartReason from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, SystemVariableKey, WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, @@ -445,7 +446,7 @@ class WorkflowResponseConverter: event: QueueNodeStartedEvent, task_id: str, ) -> NodeStartStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() snapshot = self._store_snapshot(event) @@ -468,13 +469,13 @@ class WorkflowResponseConverter: ) try: - if event.node_type == NodeType.TOOL: + if event.node_type == BuiltinNodeTypes.TOOL: response.data.extras["icon"] = ToolManager.get_tool_icon( tenant_id=self._application_generate_entity.app_config.tenant_id, provider_type=ToolProviderType(event.provider_type), provider_id=event.provider_id, ) - elif event.node_type == NodeType.DATASOURCE: + elif event.node_type == BuiltinNodeTypes.DATASOURCE: manager = PluginDatasourceManager() provider_entity = manager.fetch_datasource_provider( self._application_generate_entity.app_config.tenant_id, @@ -483,7 +484,7 @@ class WorkflowResponseConverter: response.data.extras["icon"] = provider_entity.declaration.identity.generate_datasource_icon_url( self._application_generate_entity.app_config.tenant_id ) - elif event.node_type == NodeType.TRIGGER_PLUGIN: + elif event.node_type == TRIGGER_PLUGIN_NODE_TYPE: response.data.extras["icon"] = TriggerManager.get_trigger_plugin_icon( self._application_generate_entity.app_config.tenant_id, event.provider_id, @@ -500,7 +501,7 @@ class WorkflowResponseConverter: event: QueueNodeSucceededEvent | QueueNodeFailedEvent | QueueNodeExceptionEvent, task_id: str, ) -> NodeFinishStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() snapshot = self._pop_snapshot(event.node_execution_id) @@ -559,7 +560,7 @@ class WorkflowResponseConverter: event: QueueNodeRetryEvent, task_id: str, ) -> NodeRetryStreamResponse | None: - if event.node_type in {NodeType.ITERATION, NodeType.LOOP}: + if event.node_type in {BuiltinNodeTypes.ITERATION, BuiltinNodeTypes.LOOP}: return None run_id = self._ensure_workflow_run_id() @@ -618,7 +619,7 @@ class WorkflowResponseConverter: data=IterationNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, created_at=int(time.time()), extras={}, @@ -641,7 +642,7 @@ class WorkflowResponseConverter: data=IterationNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, index=event.index, created_at=int(time.time()), @@ -668,7 +669,7 @@ class WorkflowResponseConverter: data=IterationNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, outputs=new_outputs, outputs_truncated=outputs_truncated, @@ -698,7 +699,7 @@ class WorkflowResponseConverter: data=LoopNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, created_at=int(time.time()), extras={}, @@ -721,7 +722,7 @@ class WorkflowResponseConverter: data=LoopNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, index=event.index, # The `pre_loop_output` field is not utilized by the frontend. @@ -750,7 +751,7 @@ class WorkflowResponseConverter: data=LoopNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, - node_type=event.node_type.value, + node_type=event.node_type, title=event.node_title, outputs=new_outputs, outputs_truncated=outputs_truncated, diff --git a/api/core/app/apps/completion/app_config_manager.py b/api/core/app/apps/completion/app_config_manager.py index eb1902f12e..f49e7b8b5e 100644 --- a/api/core/app/apps/completion/app_config_manager.py +++ b/api/core/app/apps/completion/app_config_manager.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from core.app.app_config.base_app_config_manager import BaseAppConfigManager from core.app.app_config.common.sensitive_word_avoidance.manager import SensitiveWordAvoidanceConfigManager from core.app.app_config.easy_ui_based_app.dataset.manager import DatasetConfigManager @@ -8,7 +10,7 @@ from core.app.app_config.entities import EasyUIBasedAppConfig, EasyUIBasedAppMod from core.app.app_config.features.file_upload.manager import FileUploadConfigManager from core.app.app_config.features.more_like_this.manager import MoreLikeThisConfigManager from core.app.app_config.features.text_to_speech.manager import TextToSpeechConfigManager -from models.model import App, AppMode, AppModelConfig +from models.model import App, AppMode, AppModelConfig, AppModelConfigDict class CompletionAppConfig(EasyUIBasedAppConfig): @@ -22,7 +24,7 @@ class CompletionAppConfig(EasyUIBasedAppConfig): class CompletionAppConfigManager(BaseAppConfigManager): @classmethod def get_app_config( - cls, app_model: App, app_model_config: AppModelConfig, override_config_dict: dict | None = None + cls, app_model: App, app_model_config: AppModelConfig, override_config_dict: AppModelConfigDict | None = None ) -> CompletionAppConfig: """ Convert app model config to completion app config @@ -40,7 +42,9 @@ class CompletionAppConfigManager(BaseAppConfigManager): app_model_config_dict = app_model_config.to_dict() config_dict = app_model_config_dict.copy() else: - config_dict = override_config_dict or {} + if not override_config_dict: + raise Exception("override_config_dict is required when config_from is ARGS") + config_dict = override_config_dict app_mode = AppMode.value_of(app_model.mode) app_config = CompletionAppConfig( @@ -49,7 +53,7 @@ class CompletionAppConfigManager(BaseAppConfigManager): app_mode=app_mode, app_model_config_from=config_from, app_model_config_id=app_model_config.id, - app_model_config_dict=config_dict, + app_model_config_dict=cast(dict[str, Any], config_dict), model=ModelConfigManager.convert(config=config_dict), prompt_template=PromptTemplateConfigManager.convert(config=config_dict), sensitive_word_avoidance=SensitiveWordAvoidanceConfigManager.convert(config=config_dict), @@ -64,7 +68,7 @@ class CompletionAppConfigManager(BaseAppConfigManager): return app_config @classmethod - def config_validate(cls, tenant_id: str, config: dict): + def config_validate(cls, tenant_id: str, config: dict) -> AppModelConfigDict: """ Validate for completion app model config @@ -116,4 +120,4 @@ class CompletionAppConfigManager(BaseAppConfigManager): # Filter out extra parameters filtered_config = {key: config.get(key) for key in related_config_keys} - return filtered_config + return cast(AppModelConfigDict, filtered_config) diff --git a/api/core/app/apps/completion/app_generator.py b/api/core/app/apps/completion/app_generator.py index e8b0e4f179..002b914ef1 100644 --- a/api/core/app/apps/completion/app_generator.py +++ b/api/core/app/apps/completion/app_generator.py @@ -275,7 +275,7 @@ class CompletionAppGenerator(MessageBasedAppGenerator): raise ValueError("Message app_model_config is None") override_model_config_dict = app_model_config.to_dict() model_dict = override_model_config_dict["model"] - completion_params = model_dict.get("completion_params") + completion_params = model_dict.get("completion_params", {}) completion_params["temperature"] = 0.9 model_dict["completion_params"] = completion_params override_model_config_dict["model"] = model_dict diff --git a/api/core/app/apps/completion/app_runner.py b/api/core/app/apps/completion/app_runner.py index ac05172945..56a4519879 100644 --- a/api/core/app/apps/completion/app_runner.py +++ b/api/core/app/apps/completion/app_runner.py @@ -132,8 +132,10 @@ class CompletionAppRunner(AppRunner): hit_callback=hit_callback, message_id=message.id, inputs=inputs, - vision_enabled=application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}).get( - "enabled", False + vision_enabled=bool( + application_generate_entity.app_config.app_model_config_dict.get("file_upload", {}) + .get("image", {}) + .get("enabled", False) ), ) context_files = retrieved_files or [] diff --git a/api/core/app/apps/pipeline/pipeline_runner.py b/api/core/app/apps/pipeline/pipeline_runner.py index 4222aae809..e767766bdb 100644 --- a/api/core/app/apps/pipeline/pipeline_runner.py +++ b/api/core/app/apps/pipeline/pipeline_runner.py @@ -12,7 +12,7 @@ from core.app.entities.app_invoke_entities import ( build_dify_run_context, ) from core.app.workflow.layers.persistence import PersistenceWorkflowInfo, WorkflowPersistenceLayer -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.graph_init_params import GraphInitParams from dify_graph.enums import WorkflowType @@ -274,6 +274,8 @@ class PipelineRunner(WorkflowBasedAppRunner): graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, ) + if start_node_id is None: + start_node_id = get_default_root_node_id(graph_config) graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=start_node_id) if not graph: diff --git a/api/core/app/apps/workflow_app_runner.py b/api/core/app/apps/workflow_app_runner.py index 7eb2a25e70..2461a7ac09 100644 --- a/api/core/app/apps/workflow_app_runner.py +++ b/api/core/app/apps/workflow_app_runner.py @@ -3,7 +3,10 @@ import time from collections.abc import Mapping, Sequence from typing import Any, cast +from pydantic import ValidationError + from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom +from core.app.entities.agent_strategy import AgentStrategyInfo from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context from core.app.entities.queue_entities import ( AppQueueEvent, @@ -29,9 +32,11 @@ from core.app.entities.queue_entities import ( QueueWorkflowStartedEvent, QueueWorkflowSucceededEvent, ) -from core.workflow.node_factory import DifyNodeFactory +from core.rag.entities.citation_metadata import RetrievalSourceMetadata +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id, resolve_workflow_node_class from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities import GraphInitParams +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.entities.pause_reason import HumanInputRequired from dify_graph.graph import Graph from dify_graph.graph_engine.layers.base import GraphEngineLayer @@ -62,8 +67,6 @@ from dify_graph.graph_events import ( NodeRunSucceededEvent, ) from dify_graph.graph_events.graph import GraphRunAbortedEvent -from dify_graph.nodes import NodeType -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool @@ -137,6 +140,9 @@ class WorkflowBasedAppRunner: graph_runtime_state=graph_runtime_state, ) + if root_node_id is None: + root_node_id = get_default_root_node_id(graph_config) + # init graph graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=root_node_id) @@ -303,10 +309,12 @@ class WorkflowBasedAppRunner: if not target_node_config: raise ValueError(f"{node_type_label} node id not found in workflow graph") + target_node_config = NodeConfigDictAdapter.validate_python(target_node_config) + # Get node class - node_type = NodeType(target_node_config.get("data", {}).get("type")) - node_version = target_node_config.get("data", {}).get("version", "1") - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_type = target_node_config["data"].type + node_version = str(target_node_config["data"].version) + node_cls = resolve_workflow_node_class(node_type=node_type, node_version=node_version) # Use the variable pool from graph_runtime_state instead of creating a new one variable_pool = graph_runtime_state.variable_pool @@ -334,6 +342,18 @@ class WorkflowBasedAppRunner: return graph, variable_pool + @staticmethod + def _build_agent_strategy_info(event: NodeRunStartedEvent) -> AgentStrategyInfo | None: + raw_agent_strategy = event.extras.get("agent_strategy") + if raw_agent_strategy is None: + return None + + try: + return AgentStrategyInfo.model_validate(raw_agent_strategy) + except ValidationError: + logger.warning("Invalid agent strategy payload for node %s", event.node_id, exc_info=True) + return None + def _handle_event(self, workflow_entry: WorkflowEntry, event: GraphEngineEvent): """ Handle event @@ -421,7 +441,7 @@ class WorkflowBasedAppRunner: in_iteration_id=event.in_iteration_id, in_loop_id=event.in_loop_id, in_parent_node_id=event.in_parent_node_id, - agent_strategy=event.agent_strategy, + agent_strategy=self._build_agent_strategy_info(event), provider_type=event.provider_type, provider_id=event.provider_id, ) @@ -509,7 +529,9 @@ class WorkflowBasedAppRunner: elif isinstance(event, NodeRunRetrieverResourceEvent): self._publish_event( QueueRetrieverResourcesEvent( - retriever_resources=event.retriever_resources, + retriever_resources=[ + RetrievalSourceMetadata.model_validate(resource) for resource in event.retriever_resources + ], in_iteration_id=event.in_iteration_id, in_loop_id=event.in_loop_id, in_parent_node_id=event.in_parent_node_id, diff --git a/api/core/app/entities/__init__.py b/api/core/app/entities/__init__.py index e69de29bb2..8e41acee32 100644 --- a/api/core/app/entities/__init__.py +++ b/api/core/app/entities/__init__.py @@ -0,0 +1,3 @@ +from .agent_strategy import AgentStrategyInfo + +__all__ = ["AgentStrategyInfo"] diff --git a/api/core/app/entities/agent_strategy.py b/api/core/app/entities/agent_strategy.py new file mode 100644 index 0000000000..b063a12f4f --- /dev/null +++ b/api/core/app/entities/agent_strategy.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel, ConfigDict + + +class AgentStrategyInfo(BaseModel): + name: str + icon: str | None = None + + model_config = ConfigDict(extra="forbid") diff --git a/api/core/app/entities/queue_entities.py b/api/core/app/entities/queue_entities.py index fa000fa13e..cb01b14097 100644 --- a/api/core/app/entities/queue_entities.py +++ b/api/core/app/entities/queue_entities.py @@ -5,13 +5,13 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field +from core.app.entities.agent_strategy import AgentStrategyInfo from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit, ToolCall, ToolResult +from dify_graph.entities import ToolCall, ToolResult from dify_graph.entities.pause_reason import PauseReason from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import WorkflowNodeExecutionMetadataKey +from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk -from dify_graph.nodes import NodeType class QueueEvent(StrEnum): @@ -354,7 +354,7 @@ class QueueNodeStartedEvent(AppQueueEvent): in_parent_node_id: str | None = None """parent node id if this is an extractor node event""" start_at: datetime - agent_strategy: AgentNodeStrategyInit | None = None + agent_strategy: AgentStrategyInfo | None = None # FIXME(-LAN-): only for ToolNode, need to refactor provider_type: str # should be a core.tools.entities.tool_entities.ToolProviderType diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index 5c0383d3a7..db6d8666de 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -4,8 +4,8 @@ from typing import Any from pydantic import BaseModel, ConfigDict, Field +from core.app.entities.agent_strategy import AgentStrategyInfo from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit from dify_graph.entities.workflow_start_reason import WorkflowStartReason from dify_graph.enums import WorkflowExecutionStatus, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage @@ -393,7 +393,7 @@ class NodeStartStreamResponse(StreamResponse): iteration_id: str | None = None loop_id: str | None = None parent_node_id: str | None = None - agent_strategy: AgentNodeStrategyInit | None = None + agent_strategy: AgentStrategyInfo | None = None event: StreamEvent = StreamEvent.NODE_STARTED workflow_run_id: str diff --git a/api/core/app/layers/conversation_variable_persist_layer.py b/api/core/app/layers/conversation_variable_persist_layer.py index e495abf855..d227e4e904 100644 --- a/api/core/app/layers/conversation_variable_persist_layer.py +++ b/api/core/app/layers/conversation_variable_persist_layer.py @@ -2,7 +2,7 @@ import logging from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.conversation_variable_updater import ConversationVariableUpdater -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphEngineEvent, NodeRunSucceededEvent from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -22,7 +22,7 @@ class ConversationVariablePersistenceLayer(GraphEngineLayer): def on_event(self, event: GraphEngineEvent) -> None: if not isinstance(event, NodeRunSucceededEvent): return - if event.node_type != NodeType.VARIABLE_ASSIGNER: + if event.node_type != BuiltinNodeTypes.VARIABLE_ASSIGNER: return if self.graph_runtime_state is None: return diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py index 6093413c5d..bb1b9a7804 100644 --- a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +++ b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py @@ -3,7 +3,7 @@ import re import time from collections.abc import Generator from threading import Thread -from typing import Union, cast +from typing import Any, Union, cast from sqlalchemy import select from sqlalchemy.orm import Session @@ -45,14 +45,13 @@ from core.app.entities.task_entities import ( ) from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.app.task_pipeline.message_cycle_manager import MessageCycleManager +from core.app.task_pipeline.message_file_utils import prepare_file_dict from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.model_manager import ModelInstance from core.ops.entities.trace_entity import TraceTaskName from core.ops.ops_trace_manager import TraceQueueManager, TraceTask from core.prompt.utils.prompt_message_util import PromptMessageUtil from core.prompt.utils.prompt_template_parser import PromptTemplateParser -from core.tools.signature import sign_tool_file -from dify_graph.file import helpers as file_helpers from dify_graph.file.enums import FileTransferMethod from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage from dify_graph.model_runtime.entities.message_entities import ( @@ -230,14 +229,14 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline): tenant_id = self._application_generate_entity.app_config.tenant_id task_id = self._application_generate_entity.task_id publisher = None - text_to_speech_dict = self._app_config.app_model_config_dict.get("text_to_speech") + text_to_speech_dict = cast(dict[str, Any], self._app_config.app_model_config_dict.get("text_to_speech")) if ( text_to_speech_dict and text_to_speech_dict.get("autoPlay") == "enabled" and text_to_speech_dict.get("enabled") ): publisher = AppGeneratorTTSPublisher( - tenant_id, text_to_speech_dict.get("voice", None), text_to_speech_dict.get("language", None) + tenant_id, text_to_speech_dict.get("voice", ""), text_to_speech_dict.get("language", None) ) for response in self._process_stream_response(publisher=publisher, trace_manager=trace_manager): while True: @@ -596,91 +595,40 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline): """ self._task_state.metadata.usage = self._task_state.llm_result.usage metadata_dict = self._task_state.metadata.model_dump() + + # Fetch files associated with this message + files = None + with Session(db.engine, expire_on_commit=False) as session: + message_files = session.scalars(select(MessageFile).where(MessageFile.message_id == self._message_id)).all() + + if message_files: + # Fetch all required UploadFile objects in a single query to avoid N+1 problem + upload_file_ids = list( + dict.fromkeys( + mf.upload_file_id + for mf in message_files + if mf.transfer_method == FileTransferMethod.LOCAL_FILE and mf.upload_file_id + ) + ) + upload_files_map = {} + if upload_file_ids: + upload_files = session.scalars(select(UploadFile).where(UploadFile.id.in_(upload_file_ids))).all() + upload_files_map = {uf.id: uf for uf in upload_files} + + files_list = [] + for message_file in message_files: + file_dict = prepare_file_dict(message_file, upload_files_map) + files_list.append(file_dict) + + files = files_list or None + return MessageEndStreamResponse( task_id=self._application_generate_entity.task_id, id=self._message_id, metadata=metadata_dict, + files=files, ) - def _record_files(self): - with Session(db.engine, expire_on_commit=False) as session: - message_files = session.scalars(select(MessageFile).where(MessageFile.message_id == self._message_id)).all() - if not message_files: - return None - - files_list = [] - upload_file_ids = [ - mf.upload_file_id - for mf in message_files - if mf.transfer_method == FileTransferMethod.LOCAL_FILE and mf.upload_file_id - ] - upload_files_map = {} - if upload_file_ids: - upload_files = session.scalars(select(UploadFile).where(UploadFile.id.in_(upload_file_ids))).all() - upload_files_map = {uf.id: uf for uf in upload_files} - - for message_file in message_files: - upload_file = None - if message_file.transfer_method == FileTransferMethod.LOCAL_FILE and message_file.upload_file_id: - upload_file = upload_files_map.get(message_file.upload_file_id) - - url = None - filename = "file" - mime_type = "application/octet-stream" - size = 0 - extension = "" - - if message_file.transfer_method == FileTransferMethod.REMOTE_URL: - url = message_file.url - if message_file.url: - filename = message_file.url.split("/")[-1].split("?")[0] # Remove query params - elif message_file.transfer_method == FileTransferMethod.LOCAL_FILE: - if upload_file: - url = file_helpers.get_signed_file_url(upload_file_id=str(upload_file.id)) - filename = upload_file.name - mime_type = upload_file.mime_type or "application/octet-stream" - size = upload_file.size or 0 - extension = f".{upload_file.extension}" if upload_file.extension else "" - elif message_file.upload_file_id: - # Fallback: generate URL even if upload_file not found - url = file_helpers.get_signed_file_url(upload_file_id=str(message_file.upload_file_id)) - elif message_file.transfer_method == FileTransferMethod.TOOL_FILE and message_file.url: - # For tool files, use URL directly if it's HTTP, otherwise sign it - if message_file.url.startswith("http"): - url = message_file.url - filename = message_file.url.split("/")[-1].split("?")[0] - else: - # Extract tool file id and extension from URL - url_parts = message_file.url.split("/") - if url_parts: - file_part = url_parts[-1].split("?")[0] # Remove query params first - # Use rsplit to correctly handle filenames with multiple dots - if "." in file_part: - tool_file_id, ext = file_part.rsplit(".", 1) - extension = f".{ext}" - else: - tool_file_id = file_part - extension = ".bin" - url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) - filename = file_part - - transfer_method_value = message_file.transfer_method - remote_url = message_file.url if message_file.transfer_method == FileTransferMethod.REMOTE_URL else "" - file_dict = { - "related_id": message_file.id, - "extension": extension, - "filename": filename, - "size": size, - "mime_type": mime_type, - "transfer_method": transfer_method_value, - "type": message_file.type, - "url": url or "", - "upload_file_id": message_file.upload_file_id or message_file.id, - "remote_url": remote_url, - } - files_list.append(file_dict) - return files_list or None - def _agent_message_to_stream_response(self, answer: str, message_id: str) -> AgentMessageStreamResponse: """ Agent message to stream response. diff --git a/api/core/app/task_pipeline/message_cycle_manager.py b/api/core/app/task_pipeline/message_cycle_manager.py index 7d62da9e75..01c60572a2 100644 --- a/api/core/app/task_pipeline/message_cycle_manager.py +++ b/api/core/app/task_pipeline/message_cycle_manager.py @@ -1,7 +1,6 @@ import hashlib import logging -import time -from threading import Thread +from threading import Thread, Timer from typing import Union from flask import Flask, current_app @@ -96,9 +95,9 @@ class MessageCycleManager: if auto_generate_conversation_name and is_first_message: # start generate thread # time.sleep not block other logic - time.sleep(1) - thread = Thread( - target=self._generate_conversation_name_worker, + thread = Timer( + 1, + self._generate_conversation_name_worker, kwargs={ "flask_app": current_app._get_current_object(), # type: ignore "conversation_id": conversation_id, diff --git a/api/core/app/task_pipeline/message_file_utils.py b/api/core/app/task_pipeline/message_file_utils.py new file mode 100644 index 0000000000..843e9eea30 --- /dev/null +++ b/api/core/app/task_pipeline/message_file_utils.py @@ -0,0 +1,76 @@ +from core.tools.signature import sign_tool_file +from dify_graph.file import helpers as file_helpers +from dify_graph.file.enums import FileTransferMethod +from models.model import MessageFile, UploadFile + +MAX_TOOL_FILE_EXTENSION_LENGTH = 10 + + +def prepare_file_dict(message_file: MessageFile, upload_files_map: dict[str, UploadFile]) -> dict: + """ + Prepare file dictionary for message end stream response. + + :param message_file: MessageFile instance + :param upload_files_map: Dictionary mapping upload_file_id to UploadFile + :return: Dictionary containing file information + """ + upload_file = None + if message_file.transfer_method == FileTransferMethod.LOCAL_FILE and message_file.upload_file_id: + upload_file = upload_files_map.get(message_file.upload_file_id) + + url = None + filename = "file" + mime_type = "application/octet-stream" + size = 0 + extension = "" + + if message_file.transfer_method == FileTransferMethod.REMOTE_URL: + url = message_file.url + if message_file.url: + filename = message_file.url.split("/")[-1].split("?")[0] + if "." in filename: + extension = "." + filename.rsplit(".", 1)[1] + elif message_file.transfer_method == FileTransferMethod.LOCAL_FILE: + if upload_file: + url = file_helpers.get_signed_file_url(upload_file_id=str(upload_file.id)) + filename = upload_file.name + mime_type = upload_file.mime_type or "application/octet-stream" + size = upload_file.size or 0 + extension = f".{upload_file.extension}" if upload_file.extension else "" + elif message_file.upload_file_id: + url = file_helpers.get_signed_file_url(upload_file_id=str(message_file.upload_file_id)) + elif message_file.transfer_method == FileTransferMethod.TOOL_FILE and message_file.url: + if message_file.url.startswith(("http://", "https://")): + url = message_file.url + filename = message_file.url.split("/")[-1].split("?")[0] + if "." in filename: + extension = "." + filename.rsplit(".", 1)[1] + else: + url_parts = message_file.url.split("/") + if url_parts: + file_part = url_parts[-1].split("?")[0] + if "." in file_part: + tool_file_id, ext = file_part.rsplit(".", 1) + extension = f".{ext}" + if len(extension) > MAX_TOOL_FILE_EXTENSION_LENGTH: + extension = ".bin" + else: + tool_file_id = file_part + extension = ".bin" + url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) + filename = file_part + + transfer_method_value = message_file.transfer_method.value + remote_url = message_file.url if message_file.transfer_method == FileTransferMethod.REMOTE_URL else "" + return { + "related_id": message_file.id, + "extension": extension, + "filename": filename, + "size": size, + "mime_type": mime_type, + "transfer_method": transfer_method_value, + "type": message_file.type, + "url": url or "", + "upload_file_id": message_file.upload_file_id or message_file.id, + "remote_url": remote_url, + } diff --git a/api/core/app/workflow/layers/llm_quota.py b/api/core/app/workflow/layers/llm_quota.py index 2e930a1f58..faf1516c40 100644 --- a/api/core/app/workflow/layers/llm_quota.py +++ b/api/core/app/workflow/layers/llm_quota.py @@ -12,7 +12,7 @@ from typing_extensions import override from core.app.llm import deduct_llm_quota, ensure_llm_quota_available from core.errors.error import QuotaExceededError from core.model_manager import ModelInstance -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine.entities.commands import AbortCommand, CommandType from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphEngineEvent, GraphNodeEventBase @@ -113,11 +113,11 @@ class LLMQuotaLayer(GraphEngineLayer): def _extract_model_instance(node: Node) -> ModelInstance | None: try: match node.node_type: - case NodeType.LLM: + case BuiltinNodeTypes.LLM: return cast("LLMNode", node).model_instance - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: return cast("ParameterExtractorNode", node).model_instance - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: return cast("QuestionClassifierNode", node).model_instance case _: return None diff --git a/api/core/app/workflow/layers/observability.py b/api/core/app/workflow/layers/observability.py index ab73db59f1..4b20477a7f 100644 --- a/api/core/app/workflow/layers/observability.py +++ b/api/core/app/workflow/layers/observability.py @@ -16,7 +16,7 @@ from opentelemetry.trace import Span, SpanKind, Tracer, get_tracer, set_span_in_ from typing_extensions import override from configs import dify_config -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_events import GraphNodeEventBase from dify_graph.nodes.base.node import Node @@ -74,16 +74,13 @@ class ObservabilityLayer(GraphEngineLayer): def _build_parser_registry(self) -> None: """Initialize parser registry for node types.""" self._parsers = { - NodeType.TOOL: ToolNodeOTelParser(), - NodeType.LLM: LLMNodeOTelParser(), - NodeType.KNOWLEDGE_RETRIEVAL: RetrievalNodeOTelParser(), + BuiltinNodeTypes.TOOL: ToolNodeOTelParser(), + BuiltinNodeTypes.LLM: LLMNodeOTelParser(), + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: RetrievalNodeOTelParser(), } def _get_parser(self, node: Node) -> NodeOTelParser: - node_type = getattr(node, "node_type", None) - if isinstance(node_type, NodeType): - return self._parsers.get(node_type, self._default_parser) - return self._default_parser + return self._parsers.get(node.node_type, self._default_parser) @override def on_graph_start(self) -> None: diff --git a/api/core/callback_handler/index_tool_callback_handler.py b/api/core/callback_handler/index_tool_callback_handler.py index 5249fea8cd..35bba46b5c 100644 --- a/api/core/callback_handler/index_tool_callback_handler.py +++ b/api/core/callback_handler/index_tool_callback_handler.py @@ -11,6 +11,7 @@ from core.rag.models.document import Document from extensions.ext_database import db from models.dataset import ChildChunk, DatasetQuery, DocumentSegment from models.dataset import Document as DatasetDocument +from models.enums import CreatorUserRole _logger = logging.getLogger(__name__) @@ -37,7 +38,9 @@ class DatasetIndexToolCallbackHandler: source="app", source_app_id=self._app_id, created_by_role=( - "account" if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} else "end_user" + CreatorUserRole.ACCOUNT + if self._invoke_from in {InvokeFrom.EXPLORE, InvokeFrom.DEBUGGER} + else CreatorUserRole.END_USER ), created_by=self._user_id, ) diff --git a/api/core/datasource/__base/datasource_provider.py b/api/core/datasource/__base/datasource_provider.py index bae39dc8c7..4b47777f0b 100644 --- a/api/core/datasource/__base/datasource_provider.py +++ b/api/core/datasource/__base/datasource_provider.py @@ -59,8 +59,6 @@ class DatasourcePluginProviderController(ABC): :param credentials: the credentials of the tool """ credentials_schema = dict[str, ProviderConfig]() - if credentials_schema is None: - return for credential in self.entity.credentials_schema: credentials_schema[credential.name] = credential diff --git a/api/core/datasource/datasource_manager.py b/api/core/datasource/datasource_manager.py index 15cd319750..4fa941ae16 100644 --- a/api/core/datasource/datasource_manager.py +++ b/api/core/datasource/datasource_manager.py @@ -24,12 +24,12 @@ from core.datasource.utils.message_transformer import DatasourceFileMessageTrans from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController from core.db.session_factory import session_factory from core.plugin.impl.datasource import PluginDatasourceManager +from core.workflow.nodes.datasource.entities import DatasourceParameter, OnlineDriveDownloadFileParam from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.enums import WorkflowNodeExecutionMetadataKey from dify_graph.file import File from dify_graph.file.enums import FileTransferMethod, FileType from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.repositories.datasource_manager_protocol import DatasourceParameter, OnlineDriveDownloadFileParam from factories import file_factory from models.model import UploadFile from models.tools import ToolFile diff --git a/api/core/llm_generator/llm_generator.py b/api/core/llm_generator/llm_generator.py index 81c42c6269..fefa641bcb 100644 --- a/api/core/llm_generator/llm_generator.py +++ b/api/core/llm_generator/llm_generator.py @@ -201,7 +201,8 @@ class LLMGenerator: error_step = "generate rule config" except Exception as e: logger.exception("Failed to generate rule config, model: %s", args.model_config_data.name) - rule_config["error"] = str(e) + error = str(e) + error_step = "generate rule config" rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" @@ -287,7 +288,8 @@ class LLMGenerator: except Exception as e: logger.exception("Failed to generate rule config, model: %s", args.model_config_data.name) - rule_config["error"] = str(e) + error = str(e) + error_step = "handle unexpected exception" rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else "" diff --git a/api/core/ops/aliyun_trace/aliyun_trace.py b/api/core/ops/aliyun_trace/aliyun_trace.py index 19111cc917..18f35b5b9c 100644 --- a/api/core/ops/aliyun_trace/aliyun_trace.py +++ b/api/core/ops/aliyun_trace/aliyun_trace.py @@ -58,7 +58,7 @@ from core.ops.entities.trace_entity import ( ) from core.repositories import DifyCoreRepositoryFactory from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import WorkflowNodeExecutionTriggeredFrom @@ -302,11 +302,11 @@ class AliyunDataTrace(BaseTraceInstance): self, node_execution: WorkflowNodeExecution, trace_info: WorkflowTraceInfo, trace_metadata: TraceMetadata ): try: - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: node_span = self.build_workflow_llm_span(trace_info, node_execution, trace_metadata) - elif node_execution.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_execution.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: node_span = self.build_workflow_retrieval_span(trace_info, node_execution, trace_metadata) - elif node_execution.node_type == NodeType.TOOL: + elif node_execution.node_type == BuiltinNodeTypes.TOOL: node_span = self.build_workflow_tool_span(trace_info, node_execution, trace_metadata) else: node_span = self.build_workflow_task_span(trace_info, node_execution, trace_metadata) diff --git a/api/core/ops/aliyun_trace/data_exporter/traceclient.py b/api/core/ops/aliyun_trace/data_exporter/traceclient.py index 7624586367..0e00e90520 100644 --- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py +++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py @@ -7,7 +7,7 @@ import uuid from collections import deque from collections.abc import Sequence from datetime import datetime -from typing import Final, cast +from typing import Final from urllib.parse import urljoin import httpx @@ -201,7 +201,7 @@ def convert_to_trace_id(uuid_v4: str | None) -> int: raise ValueError("UUID cannot be None") try: uuid_obj = uuid.UUID(uuid_v4) - return cast(int, uuid_obj.int) + return uuid_obj.int except ValueError as e: raise ValueError(f"Invalid UUID input: {uuid_v4}") from e diff --git a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py index 452255f69e..7cb54b2c88 100644 --- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py +++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py @@ -155,8 +155,8 @@ def wrap_span_metadata(metadata, **kwargs): return metadata -# Mapping from NodeType string values to OpenInference span kinds. -# NodeType values not listed here default to CHAIN. +# Mapping from built-in node type strings to OpenInference span kinds. +# Node types not listed here default to CHAIN. _NODE_TYPE_TO_SPAN_KIND: dict[str, OpenInferenceSpanKindValues] = { "llm": OpenInferenceSpanKindValues.LLM, "knowledge-retrieval": OpenInferenceSpanKindValues.RETRIEVER, @@ -168,7 +168,7 @@ _NODE_TYPE_TO_SPAN_KIND: dict[str, OpenInferenceSpanKindValues] = { def _get_node_span_kind(node_type: str) -> OpenInferenceSpanKindValues: """Return the OpenInference span kind for a given workflow node type. - Covers every ``NodeType`` enum value. Nodes that do not have a + Covers every built-in node type string. Nodes that do not have a specialised span kind (e.g. ``start``, ``end``, ``if-else``, ``code``, ``loop``, ``iteration``, etc.) are mapped to ``CHAIN``. """ diff --git a/api/core/ops/langfuse_trace/langfuse_trace.py b/api/core/ops/langfuse_trace/langfuse_trace.py index 28e800e6c7..6e62387a1f 100644 --- a/api/core/ops/langfuse_trace/langfuse_trace.py +++ b/api/core/ops/langfuse_trace/langfuse_trace.py @@ -28,7 +28,7 @@ from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( ) from core.ops.utils import filter_none_values from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from extensions.ext_database import db from models import EndUser, WorkflowNodeExecutionTriggeredFrom from models.enums import MessageStatus @@ -141,7 +141,7 @@ class LangFuseDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} diff --git a/api/core/ops/langsmith_trace/langsmith_trace.py b/api/core/ops/langsmith_trace/langsmith_trace.py index b40bc89b71..32a0c77fe2 100644 --- a/api/core/ops/langsmith_trace/langsmith_trace.py +++ b/api/core/ops/langsmith_trace/langsmith_trace.py @@ -28,7 +28,7 @@ from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( ) from core.ops.utils import filter_none_values, generate_dotted_order from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -163,7 +163,7 @@ class LangSmithDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} @@ -197,7 +197,7 @@ class LangSmithDataTrace(BaseTraceInstance): "ls_model_name": process_data.get("model_name", ""), } ) - elif node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: run_type = LangSmithRunType.retriever else: run_type = LangSmithRunType.tool diff --git a/api/core/ops/mlflow_trace/mlflow_trace.py b/api/core/ops/mlflow_trace/mlflow_trace.py index ba2cb9e0c3..ab4a7650ec 100644 --- a/api/core/ops/mlflow_trace/mlflow_trace.py +++ b/api/core/ops/mlflow_trace/mlflow_trace.py @@ -23,7 +23,7 @@ from core.ops.entities.trace_entity import ( TraceTaskName, WorkflowTraceInfo, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from extensions.ext_database import db from models import EndUser from models.workflow import WorkflowNodeExecutionModel @@ -145,10 +145,10 @@ class MLflowDataTrace(BaseTraceInstance): "app_name": node.title, } - if node.node_type in (NodeType.LLM, NodeType.QUESTION_CLASSIFIER): + if node.node_type in (BuiltinNodeTypes.LLM, BuiltinNodeTypes.QUESTION_CLASSIFIER): inputs, llm_attributes = self._parse_llm_inputs_and_attributes(node) attributes.update(llm_attributes) - elif node.node_type == NodeType.HTTP_REQUEST: + elif node.node_type == BuiltinNodeTypes.HTTP_REQUEST: inputs = node.process_data # contains request URL if not inputs: @@ -180,9 +180,9 @@ class MLflowDataTrace(BaseTraceInstance): # End node span finished_at = node.created_at + timedelta(seconds=node.elapsed_time) outputs = json.loads(node.outputs) if node.outputs else {} - if node.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + if node.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: outputs = self._parse_knowledge_retrieval_outputs(outputs) - elif node.node_type == NodeType.LLM: + elif node.node_type == BuiltinNodeTypes.LLM: outputs = outputs.get("text", outputs) node_span.end( outputs=outputs, @@ -471,13 +471,13 @@ class MLflowDataTrace(BaseTraceInstance): def _get_node_span_type(self, node_type: str) -> str: """Map Dify node types to MLflow span types""" node_type_mapping = { - NodeType.LLM: SpanType.LLM, - NodeType.QUESTION_CLASSIFIER: SpanType.LLM, - NodeType.KNOWLEDGE_RETRIEVAL: SpanType.RETRIEVER, - NodeType.TOOL: SpanType.TOOL, - NodeType.CODE: SpanType.TOOL, - NodeType.HTTP_REQUEST: SpanType.TOOL, - NodeType.AGENT: SpanType.AGENT, + BuiltinNodeTypes.LLM: SpanType.LLM, + BuiltinNodeTypes.QUESTION_CLASSIFIER: SpanType.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: SpanType.RETRIEVER, + BuiltinNodeTypes.TOOL: SpanType.TOOL, + BuiltinNodeTypes.CODE: SpanType.TOOL, + BuiltinNodeTypes.HTTP_REQUEST: SpanType.TOOL, + BuiltinNodeTypes.AGENT: SpanType.AGENT, } return node_type_mapping.get(node_type, "CHAIN") # type: ignore[arg-type,call-overload] diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index eeae489c68..fb72bc2381 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -1,3 +1,4 @@ +import hashlib import logging import os import uuid @@ -22,7 +23,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -46,6 +47,22 @@ def wrap_metadata(metadata, **kwargs): return metadata +def _seed_to_uuid4(seed: str) -> str: + """Derive a deterministic UUID4-formatted string from an arbitrary seed. + + uuid4_to_uuid7 requires a valid UUID v4 string, but some Dify identifiers + are not UUIDs (e.g. a workflow_run_id with a "-root" suffix appended to + distinguish the root span from the trace). This helper hashes the seed + with MD5 and patches the version/variant bits so the result satisfies the + UUID v4 contract. + """ + raw = hashlib.md5(seed.encode()).digest() + ba = bytearray(raw) + ba[6] = (ba[6] & 0x0F) | 0x40 # version 4 + ba[8] = (ba[8] & 0x3F) | 0x80 # variant 1 + return str(uuid.UUID(bytes=bytes(ba))) + + def prepare_opik_uuid(user_datetime: datetime | None, user_uuid: str | None): """Opik needs UUIDv7 while Dify uses UUIDv4 for identifier of most messages and objects. The type-hints of BaseTraceInfo indicates that @@ -95,60 +112,52 @@ class OpikDataTrace(BaseTraceInstance): self.generate_name_trace(trace_info) def workflow_trace(self, trace_info: WorkflowTraceInfo): - dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id - opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) workflow_metadata = wrap_metadata( trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id ) - root_span_id = None if trace_info.message_id: dify_trace_id = trace_info.trace_id or trace_info.message_id - opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) - - trace_data = { - "id": opik_trace_id, - "name": TraceTaskName.MESSAGE_TRACE, - "start_time": trace_info.start_time, - "end_time": trace_info.end_time, - "metadata": workflow_metadata, - "input": wrap_dict("input", trace_info.workflow_run_inputs), - "output": wrap_dict("output", trace_info.workflow_run_outputs), - "thread_id": trace_info.conversation_id, - "tags": ["message", "workflow"], - "project_name": self.project, - } - self.add_trace(trace_data) - - root_span_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_run_id) - span_data = { - "id": root_span_id, - "parent_span_id": None, - "trace_id": opik_trace_id, - "name": TraceTaskName.WORKFLOW_TRACE, - "input": wrap_dict("input", trace_info.workflow_run_inputs), - "output": wrap_dict("output", trace_info.workflow_run_outputs), - "start_time": trace_info.start_time, - "end_time": trace_info.end_time, - "metadata": workflow_metadata, - "tags": ["workflow"], - "project_name": self.project, - } - self.add_span(span_data) + trace_name = TraceTaskName.MESSAGE_TRACE + trace_tags = ["message", "workflow"] + root_span_seed = trace_info.workflow_run_id else: - trace_data = { - "id": opik_trace_id, - "name": TraceTaskName.MESSAGE_TRACE, - "start_time": trace_info.start_time, - "end_time": trace_info.end_time, - "metadata": workflow_metadata, - "input": wrap_dict("input", trace_info.workflow_run_inputs), - "output": wrap_dict("output", trace_info.workflow_run_outputs), - "thread_id": trace_info.conversation_id, - "tags": ["workflow"], - "project_name": self.project, - } - self.add_trace(trace_data) + dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id + trace_name = TraceTaskName.WORKFLOW_TRACE + trace_tags = ["workflow"] + root_span_seed = _seed_to_uuid4(trace_info.workflow_run_id + "-root") + + opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) + + trace_data = { + "id": opik_trace_id, + "name": trace_name, + "start_time": trace_info.start_time, + "end_time": trace_info.end_time, + "metadata": workflow_metadata, + "input": wrap_dict("input", trace_info.workflow_run_inputs), + "output": wrap_dict("output", trace_info.workflow_run_outputs), + "thread_id": trace_info.conversation_id, + "tags": trace_tags, + "project_name": self.project, + } + self.add_trace(trace_data) + + root_span_id = prepare_opik_uuid(trace_info.start_time, root_span_seed) + span_data = { + "id": root_span_id, + "parent_span_id": None, + "trace_id": opik_trace_id, + "name": TraceTaskName.WORKFLOW_TRACE, + "input": wrap_dict("input", trace_info.workflow_run_inputs), + "output": wrap_dict("output", trace_info.workflow_run_outputs), + "start_time": trace_info.start_time, + "end_time": trace_info.end_time, + "metadata": workflow_metadata, + "tags": ["workflow"], + "project_name": self.project, + } + self.add_span(span_data) # through workflow_run_id get all_nodes_execution using repository session_factory = sessionmaker(bind=db.engine) @@ -178,7 +187,7 @@ class OpikDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} @@ -231,15 +240,13 @@ class OpikDataTrace(BaseTraceInstance): else: run_type = "tool" - parent_span_id = trace_info.workflow_app_log_id or trace_info.workflow_run_id - if not total_tokens: total_tokens = execution_metadata.get(WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS) or 0 span_data = { "trace_id": opik_trace_id, "id": prepare_opik_uuid(created_at, node_execution_id), - "parent_span_id": prepare_opik_uuid(trace_info.start_time, parent_span_id), + "parent_span_id": root_span_id, "name": node_name, "type": run_type, "start_time": created_at, diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 33782e7949..9ac753240b 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -628,10 +628,10 @@ class TraceTask: if not message_data: return {} conversation_mode_stmt = select(Conversation.mode).where(Conversation.id == message_data.conversation_id) - conversation_mode = db.session.scalars(conversation_mode_stmt).all() - if not conversation_mode or len(conversation_mode) == 0: + conversation_modes = db.session.scalars(conversation_mode_stmt).all() + if not conversation_modes or len(conversation_modes) == 0: return {} - conversation_mode = conversation_mode[0] + conversation_mode = conversation_modes[0] created_at = message_data.created_at inputs = message_data.message diff --git a/api/core/ops/tencent_trace/tencent_trace.py b/api/core/ops/tencent_trace/tencent_trace.py index cbff1c9e1c..7e56b1effa 100644 --- a/api/core/ops/tencent_trace/tencent_trace.py +++ b/api/core/ops/tencent_trace/tencent_trace.py @@ -27,7 +27,7 @@ from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, ) -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from extensions.ext_database import db from models import Account, App, TenantAccountJoin, WorkflowNodeExecutionTriggeredFrom @@ -179,7 +179,7 @@ class TencentDataTrace(BaseTraceInstance): if node_span: self.trace_client.add_span(node_span) - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: self._record_llm_metrics(node_execution) except Exception: logger.exception("[Tencent APM] Failed to process node execution: %s", node_execution.id) @@ -192,15 +192,15 @@ class TencentDataTrace(BaseTraceInstance): ) -> SpanData | None: """Build span for different node types""" try: - if node_execution.node_type == NodeType.LLM: + if node_execution.node_type == BuiltinNodeTypes.LLM: return TencentSpanBuilder.build_workflow_llm_span( trace_id, workflow_span_id, trace_info, node_execution ) - elif node_execution.node_type == NodeType.KNOWLEDGE_RETRIEVAL: + elif node_execution.node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: return TencentSpanBuilder.build_workflow_retrieval_span( trace_id, workflow_span_id, trace_info, node_execution ) - elif node_execution.node_type == NodeType.TOOL: + elif node_execution.node_type == BuiltinNodeTypes.TOOL: return TencentSpanBuilder.build_workflow_tool_span( trace_id, workflow_span_id, trace_info, node_execution ) diff --git a/api/core/ops/tencent_trace/utils.py b/api/core/ops/tencent_trace/utils.py index 96087951ab..678287ae1d 100644 --- a/api/core/ops/tencent_trace/utils.py +++ b/api/core/ops/tencent_trace/utils.py @@ -6,7 +6,6 @@ import hashlib import random import uuid from datetime import datetime -from typing import cast from opentelemetry.trace import Link, SpanContext, TraceFlags @@ -23,7 +22,7 @@ class TencentTraceUtils: uuid_obj = uuid.UUID(uuid_v4) if uuid_v4 else uuid.uuid4() except Exception as e: raise ValueError(f"Invalid UUID input: {e}") - return cast(int, uuid_obj.int) + return uuid_obj.int @staticmethod def convert_to_span_id(uuid_v4: str | None, span_type: str) -> int: @@ -52,9 +51,9 @@ class TencentTraceUtils: @staticmethod def create_link(trace_id_str: str) -> Link: try: - trace_id = int(trace_id_str, 16) if len(trace_id_str) == 32 else cast(int, uuid.UUID(trace_id_str).int) + trace_id = int(trace_id_str, 16) if len(trace_id_str) == 32 else uuid.UUID(trace_id_str).int except (ValueError, TypeError): - trace_id = cast(int, uuid.uuid4().int) + trace_id = uuid.uuid4().int span_context = SpanContext( trace_id=trace_id, diff --git a/api/core/ops/weave_trace/weave_trace.py b/api/core/ops/weave_trace/weave_trace.py index 7b62207366..2a657b672c 100644 --- a/api/core/ops/weave_trace/weave_trace.py +++ b/api/core/ops/weave_trace/weave_trace.py @@ -31,7 +31,7 @@ from core.ops.entities.trace_entity import ( ) from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel from core.repositories import DifyCoreRepositoryFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey from extensions.ext_database import db from models import EndUser, MessageFile, WorkflowNodeExecutionTriggeredFrom @@ -175,7 +175,7 @@ class WeaveDataTrace(BaseTraceInstance): node_name = node_execution.title node_type = node_execution.node_type status = node_execution.status - if node_type == NodeType.LLM: + if node_type == BuiltinNodeTypes.LLM: inputs = node_execution.process_data.get("prompts", {}) if node_execution.process_data else {} else: inputs = node_execution.inputs or {} diff --git a/api/core/plugin/backwards_invocation/app.py b/api/core/plugin/backwards_invocation/app.py index 3c5df2b905..60d08b26c9 100644 --- a/api/core/plugin/backwards_invocation/app.py +++ b/api/core/plugin/backwards_invocation/app.py @@ -1,6 +1,6 @@ import uuid from collections.abc import Generator, Mapping -from typing import Union +from typing import Any, Union, cast from sqlalchemy import select from sqlalchemy.orm import Session @@ -34,14 +34,14 @@ class PluginAppBackwardsInvocation(BaseBackwardsInvocation): if workflow is None: raise ValueError("unexpected app type") - features_dict = workflow.features_dict + features_dict: dict[str, Any] = workflow.features_dict user_input_form = workflow.user_input_form(to_old_structure=True) else: app_model_config = app.app_model_config if app_model_config is None: raise ValueError("unexpected app type") - features_dict = app_model_config.to_dict() + features_dict = cast(dict[str, Any], app_model_config.to_dict()) user_input_form = features_dict.get("user_input_form", []) diff --git a/api/core/plugin/backwards_invocation/node.py b/api/core/plugin/backwards_invocation/node.py index 33c45c0007..d6aef93fc4 100644 --- a/api/core/plugin/backwards_invocation/node.py +++ b/api/core/plugin/backwards_invocation/node.py @@ -1,5 +1,5 @@ from core.plugin.backwards_invocation.base import BaseBackwardsInvocation -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.parameter_extractor.entities import ( ModelConfig as ParameterExtractorModelConfig, ) @@ -52,7 +52,7 @@ class PluginNodeBackwardsInvocation(BaseBackwardsInvocation): instruction=instruction, # instruct with variables are not supported ) node_data_dict = node_data.model_dump() - node_data_dict["type"] = NodeType.PARAMETER_EXTRACTOR + node_data_dict["type"] = BuiltinNodeTypes.PARAMETER_EXTRACTOR execution = workflow_service.run_free_workflow_node( node_data_dict, tenant_id=tenant_id, diff --git a/api/core/plugin/entities/parameters.py b/api/core/plugin/entities/parameters.py index bfa662b9f6..ce5813a294 100644 --- a/api/core/plugin/entities/parameters.py +++ b/api/core/plugin/entities/parameters.py @@ -191,7 +191,7 @@ def cast_parameter_value(typ: StrEnum, value: Any, /): except ValueError: raise except Exception: - raise ValueError(f"The tool parameter value {value} is not in correct type of {as_normal_type(typ)}.") + raise ValueError(f"The tool parameter value {repr(value)} is not in correct type of {as_normal_type(typ)}.") def init_frontend_parameter(rule: PluginParameter, type: StrEnum, value: Any): diff --git a/api/core/plugin/entities/plugin_daemon.py b/api/core/plugin/entities/plugin_daemon.py index 2dc540e6a8..416e0f6b4d 100644 --- a/api/core/plugin/entities/plugin_daemon.py +++ b/api/core/plugin/entities/plugin_daemon.py @@ -157,6 +157,7 @@ class PluginInstallTaskPluginStatus(BaseModel): message: str = Field(description="The message of the install task.") icon: str = Field(description="The icon of the plugin.") labels: I18nObject = Field(description="The labels of the plugin.") + source: str | None = Field(default=None, description="The installation source of the plugin") class PluginInstallTask(BasePluginEntity): diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index f82c3a846b..c29a463bb6 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -627,7 +627,7 @@ class ProviderManager: tenant_id=tenant_id, # TODO: Use provider name with prefix after the data migration. provider_name=ModelProviderID(provider_name).provider_name, - provider_type=ProviderType.SYSTEM.value, + provider_type=ProviderType.SYSTEM, quota_type=quota.quota_type, quota_limit=0, # type: ignore quota_used=0, diff --git a/api/core/rag/datasource/vdb/chroma/chroma_vector.py b/api/core/rag/datasource/vdb/chroma/chroma_vector.py index de1572410c..cbc846f716 100644 --- a/api/core/rag/datasource/vdb/chroma/chroma_vector.py +++ b/api/core/rag/datasource/vdb/chroma/chroma_vector.py @@ -65,7 +65,7 @@ class ChromaVector(BaseVector): self._client.get_or_create_collection(collection_name) redis_client.set(collection_exist_cache_key, 1, ex=3600) - def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): + def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs) -> list[str]: uuids = self._get_uuids(documents) texts = [d.page_content for d in documents] metadatas = [d.metadata for d in documents] @@ -73,6 +73,7 @@ class ChromaVector(BaseVector): collection = self._client.get_or_create_collection(self._collection_name) # FIXME: chromadb using numpy array, fix the type error later collection.upsert(ids=uuids, documents=texts, embeddings=embeddings, metadatas=metadatas) # type: ignore + return uuids def delete_by_metadata_field(self, key: str, value: str): collection = self._client.get_or_create_collection(self._collection_name) diff --git a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py index 91bb71bfa6..8e8120fc10 100644 --- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py +++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py @@ -605,25 +605,36 @@ class ClickzettaVector(BaseVector): logger.warning("Failed to create inverted index: %s", e) # Continue without inverted index - full-text search will fall back to LIKE - def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs): + def add_texts(self, documents: list[Document], embeddings: list[list[float]], **kwargs) -> list[str]: """Add documents with embeddings to the collection.""" if not documents: - return + return [] batch_size = self._config.batch_size total_batches = (len(documents) + batch_size - 1) // batch_size + added_ids = [] for i in range(0, len(documents), batch_size): batch_docs = documents[i : i + batch_size] batch_embeddings = embeddings[i : i + batch_size] + batch_doc_ids = [] + for doc in batch_docs: + metadata = doc.metadata if isinstance(doc.metadata, dict) else {} + batch_doc_ids.append(self._safe_doc_id(metadata.get("doc_id", str(uuid.uuid4())))) + added_ids.extend(batch_doc_ids) # Execute batch insert through write queue - self._execute_write(self._insert_batch, batch_docs, batch_embeddings, i, batch_size, total_batches) + self._execute_write( + self._insert_batch, batch_docs, batch_embeddings, batch_doc_ids, i, batch_size, total_batches + ) + + return added_ids def _insert_batch( self, batch_docs: list[Document], batch_embeddings: list[list[float]], + batch_doc_ids: list[str], batch_index: int, batch_size: int, total_batches: int, @@ -641,14 +652,9 @@ class ClickzettaVector(BaseVector): data_rows = [] vector_dimension = len(batch_embeddings[0]) if batch_embeddings and batch_embeddings[0] else 768 - for doc, embedding in zip(batch_docs, batch_embeddings): + for doc, embedding, doc_id in zip(batch_docs, batch_embeddings, batch_doc_ids): # Optimized: minimal checks for common case, fallback for edge cases - metadata = doc.metadata or {} - - if not isinstance(metadata, dict): - metadata = {} - - doc_id = self._safe_doc_id(metadata.get("doc_id", str(uuid.uuid4()))) + metadata = doc.metadata if isinstance(doc.metadata, dict) else {} # Fast path for JSON serialization try: diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 6d28ce25bc..449be6a448 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -74,7 +74,8 @@ class ExtractProcessor: else: suffix = "" # https://stackoverflow.com/questions/26541416/generate-temporary-file-names-without-creating-actual-file-in-python#comment90414256_26541521 - file_path = f"{temp_dir}/{tempfile.gettempdir()}{suffix}" + # Generate a temporary filename under the created temp_dir and ensure the directory exists + file_path = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}" # type: ignore Path(file_path).write_bytes(response.content) extract_setting = ExtractSetting(datasource_type=DatasourceType.FILE, document_model="text_model") if return_text: diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 1ddbfc5864..d6b6ca35be 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -204,26 +204,61 @@ class WordExtractor(BaseExtractor): return " ".join(unique_content) def _parse_cell_paragraph(self, paragraph, image_map): - paragraph_content = [] - for run in paragraph.runs: - if run.element.xpath(".//a:blip"): - for blip in run.element.xpath(".//a:blip"): - image_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed") - if not image_id: - continue - rel = paragraph.part.rels.get(image_id) - if rel is None: - continue - # For external images, use image_id as key; for internal, use target_part - if rel.is_external: - if image_id in image_map: - paragraph_content.append(image_map[image_id]) - else: - image_part = rel.target_part - if image_part in image_map: - paragraph_content.append(image_map[image_part]) - else: - paragraph_content.append(run.text) + paragraph_content: list[str] = [] + + for child in paragraph._element: + tag = child.tag + if tag == qn("w:hyperlink"): + # Note: w:hyperlink elements may also use w:anchor for internal bookmarks. + # This extractor intentionally only converts external links (HTTP/mailto, etc.) + # that are backed by a relationship id (r:id) with rel.is_external == True. + # Hyperlinks without such an external rel (including anchor-only bookmarks) + # are left as plain text link_text. + r_id = child.get(qn("r:id")) + link_text_parts: list[str] = [] + for run_elem in child.findall(qn("w:r")): + run = Run(run_elem, paragraph) + if run.text: + link_text_parts.append(run.text) + link_text = "".join(link_text_parts).strip() + if r_id: + try: + rel = paragraph.part.rels.get(r_id) + if rel: + target_ref = getattr(rel, "target_ref", None) + if target_ref: + parsed_target = urlparse(str(target_ref)) + if rel.is_external or parsed_target.scheme in ("http", "https", "mailto"): + display_text = link_text or str(target_ref) + link_text = f"[{display_text}]({target_ref})" + except Exception: + logger.exception("Failed to resolve URL for hyperlink with r:id: %s", r_id) + if link_text: + paragraph_content.append(link_text) + + elif tag == qn("w:r"): + run = Run(child, paragraph) + if run.element.xpath(".//a:blip"): + for blip in run.element.xpath(".//a:blip"): + image_id = blip.get( + "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed" + ) + if not image_id: + continue + rel = paragraph.part.rels.get(image_id) + if rel is None: + continue + if rel.is_external: + if image_id in image_map: + paragraph_content.append(image_map[image_id]) + else: + image_part = rel.target_part + if image_part in image_map: + paragraph_content.append(image_map[image_part]) + else: + if run.text: + paragraph_content.append(run.text) + return "".join(paragraph_content).strip() def parse_docx(self, docx_path): diff --git a/api/core/rag/index_processor/index_processor.py b/api/core/rag/index_processor/index_processor.py index c8f9d29012..a7c42c5a4e 100644 --- a/api/core/rag/index_processor/index_processor.py +++ b/api/core/rag/index_processor/index_processor.py @@ -9,8 +9,8 @@ from flask import current_app from sqlalchemy import delete, func, select from core.db.session_factory import session_factory -from dify_graph.nodes.knowledge_index.exc import KnowledgeIndexNodeError -from dify_graph.repositories.index_processor_protocol import Preview, PreviewItem, QaPreview +from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError +from core.workflow.nodes.knowledge_index.protocols import Preview, PreviewItem, QaPreview from models.dataset import Dataset, Document, DocumentSegment from .index_processor_factory import IndexProcessorFactory diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index b56ff9edef..4c96b63f25 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -56,18 +56,18 @@ from core.rag.retrieval.template_prompts import ( ) from core.tools.signature import sign_upload_file from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool -from dify_graph.file import File, FileTransferMethod, FileType -from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage -from dify_graph.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool -from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelType -from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel -from dify_graph.nodes.knowledge_retrieval import exc -from dify_graph.repositories.rag_retrieval_protocol import ( +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import ( KnowledgeRetrievalRequest, Source, SourceChildChunk, SourceMetadata, ) +from dify_graph.file import File, FileTransferMethod, FileType +from dify_graph.model_runtime.entities.llm_entities import LLMMode, LLMResult, LLMUsage +from dify_graph.model_runtime.entities.message_entities import PromptMessage, PromptMessageRole, PromptMessageTool +from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelType +from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from extensions.ext_database import db from extensions.ext_redis import redis_client from libs.json_in_md_parser import parse_and_check_json_markdown @@ -83,6 +83,7 @@ from models.dataset import ( ) from models.dataset import Document as DatasetDocument from models.dataset import Document as DocumentModel +from models.enums import CreatorUserRole from services.external_knowledge_service import ExternalDatasetService from services.feature_service import FeatureService @@ -127,11 +128,12 @@ class DatasetRetrieval: metadata_filter_document_ids, metadata_condition = None, None if request.metadata_filtering_mode != "disabled": - # Convert workflow layer types to app_config layer types - if not request.metadata_model_config: - raise ValueError("metadata_model_config is required for this method") + app_metadata_model_config = ModelConfig(provider="", name="", mode=LLMMode.CHAT, completion_params={}) + if request.metadata_filtering_mode == "automatic": + if not request.metadata_model_config: + raise ValueError("metadata_model_config is required for this method") - app_metadata_model_config = ModelConfig.model_validate(request.metadata_model_config.model_dump()) + app_metadata_model_config = ModelConfig.model_validate(request.metadata_model_config.model_dump()) app_metadata_filtering_conditions = None if request.metadata_filtering_conditions is not None: @@ -1008,7 +1010,7 @@ class DatasetRetrieval: content=json.dumps(contents), source="app", source_app_id=app_id, - created_by_role=user_from, + created_by_role=CreatorUserRole(user_from), created_by=user_id, ) dataset_queries.append(dataset_query) diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py index 649e2f7358..55e96515ac 100644 --- a/api/core/repositories/sqlalchemy_workflow_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -146,7 +146,9 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): # No sequence number generation needed anymore - db_model.type = domain_model.workflow_type + from models.workflow import WorkflowType as ModelWorkflowType + + db_model.type = ModelWorkflowType(domain_model.workflow_type.value) db_model.version = domain_model.workflow_version db_model.graph = json.dumps(domain_model.graph) if domain_model.graph else None db_model.inputs = json.dumps(domain_model.inputs) if domain_model.inputs else None @@ -194,6 +196,13 @@ class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): # Create a new database session with self._session_factory() as session: + existing_model = session.get(WorkflowRun, db_model.id) + if existing_model: + if existing_model.tenant_id != self._tenant_id: + raise ValueError("Unauthorized access to workflow run") + # Preserve the original start time for pause/resume flows. + db_model.created_at = existing_model.created_at + # SQLAlchemy merge intelligently handles both insert and update operations # based on the presence of the primary key session.merge(db_model) diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index d1da1d8bd4..92310c543d 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -18,7 +18,7 @@ from tenacity import before_sleep_log, retry, retry_if_exception, stop_after_att from configs import dify_config from dify_graph.entities import WorkflowNodeExecution -from dify_graph.enums import NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.repositories.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository from dify_graph.workflow_type_encoder import WorkflowRuntimeTypeConverter @@ -147,7 +147,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) index=db_model.index, predecessor_node_id=db_model.predecessor_node_id, node_id=db_model.node_id, - node_type=NodeType(db_model.node_type), + node_type=db_model.node_type, title=db_model.title, inputs=inputs, process_data=process_data, diff --git a/api/core/tools/builtin_tool/provider.py b/api/core/tools/builtin_tool/provider.py index 50105bd707..20cdb3e57f 100644 --- a/api/core/tools/builtin_tool/provider.py +++ b/api/core/tools/builtin_tool/provider.py @@ -113,17 +113,26 @@ class BuiltinToolProviderController(ToolProviderController): """ return self.get_credentials_schema_by_type(CredentialType.API_KEY) - def get_credentials_schema_by_type(self, credential_type: str) -> list[ProviderConfig]: + def get_credentials_schema_by_type(self, credential_type: CredentialType | str) -> list[ProviderConfig]: """ returns the credentials schema of the provider - :param credential_type: the type of the credential - :return: the credentials schema of the provider + :param credential_type: the type of the credential, as CredentialType or str; str values + are normalized via CredentialType.of and may raise ValueError for invalid values. + :return: list[ProviderConfig] for CredentialType.OAUTH2 or CredentialType.API_KEY, an + empty list for CredentialType.UNAUTHORIZED or missing schemas. + + Reads from self.entity.oauth_schema and self.entity.credentials_schema. + Raises ValueError for invalid credential types. """ - if credential_type == CredentialType.OAUTH2.value: + if isinstance(credential_type, str): + credential_type = CredentialType.of(credential_type) + if credential_type == CredentialType.OAUTH2: return self.entity.oauth_schema.credentials_schema.copy() if self.entity.oauth_schema else [] if credential_type == CredentialType.API_KEY: return self.entity.credentials_schema.copy() if self.entity.credentials_schema else [] + if credential_type == CredentialType.UNAUTHORIZED: + return [] raise ValueError(f"Invalid credential type: {credential_type}") def get_oauth_client_schema(self) -> list[ProviderConfig]: diff --git a/api/core/tools/tool_file_manager.py b/api/core/tools/tool_file_manager.py index 83e4e53418..210f488afc 100644 --- a/api/core/tools/tool_file_manager.py +++ b/api/core/tools/tool_file_manager.py @@ -10,28 +10,19 @@ from typing import Union from uuid import uuid4 import httpx -from sqlalchemy.orm import Session from configs import dify_config +from core.db.session_factory import session_factory from core.helper import ssrf_proxy -from extensions.ext_database import db as global_db +from dify_graph.file.models import ToolFile as ToolFilePydanticModel from extensions.ext_storage import storage from models.model import MessageFile from models.tools import ToolFile logger = logging.getLogger(__name__) -from sqlalchemy.engine import Engine - class ToolFileManager: - _engine: Engine - - def __init__(self, engine: Engine | None = None): - if engine is None: - engine = global_db.engine - self._engine = engine - @staticmethod def sign_file(tool_file_id: str, extension: str) -> str: """ @@ -89,7 +80,7 @@ class ToolFileManager: filepath = f"tools/{tenant_id}/{unique_filename}" storage.save(filepath, file_binary) - with Session(self._engine, expire_on_commit=False) as session: + with session_factory.create_session() as session: tool_file = ToolFile( user_id=user_id, tenant_id=tenant_id, @@ -132,7 +123,7 @@ class ToolFileManager: filename = f"{unique_name}{extension}" filepath = f"tools/{tenant_id}/{filename}" storage.save(filepath, blob) - with Session(self._engine, expire_on_commit=False) as session: + with session_factory.create_session() as session: tool_file = ToolFile( user_id=user_id, tenant_id=tenant_id, @@ -146,6 +137,7 @@ class ToolFileManager: session.add(tool_file) session.commit() + session.refresh(tool_file) return tool_file @@ -157,7 +149,7 @@ class ToolFileManager: :return: the binary of the file, mime type """ - with Session(self._engine, expire_on_commit=False) as session: + with session_factory.create_session() as session: tool_file: ToolFile | None = ( session.query(ToolFile) .where( @@ -181,7 +173,7 @@ class ToolFileManager: :return: the binary of the file, mime type """ - with Session(self._engine, expire_on_commit=False) as session: + with session_factory.create_session() as session: message_file: MessageFile | None = ( session.query(MessageFile) .where( @@ -217,7 +209,9 @@ class ToolFileManager: return blob, tool_file.mimetype - def get_file_generator_by_tool_file_id(self, tool_file_id: str) -> tuple[Generator | None, ToolFile | None]: + def get_file_generator_by_tool_file_id( + self, tool_file_id: str + ) -> tuple[Generator | None, ToolFilePydanticModel | None]: """ get file binary @@ -225,7 +219,7 @@ class ToolFileManager: :return: the binary of the file, mime type """ - with Session(self._engine, expire_on_commit=False) as session: + with session_factory.create_session() as session: tool_file: ToolFile | None = ( session.query(ToolFile) .where( @@ -239,7 +233,7 @@ class ToolFileManager: stream = storage.load_stream(tool_file.file_key) - return stream, tool_file + return stream, ToolFilePydanticModel.model_validate(tool_file) # init tool_file_parser diff --git a/api/core/tools/utils/workflow_configuration_sync.py b/api/core/tools/utils/workflow_configuration_sync.py index d8ce53083b..28f1376655 100644 --- a/api/core/tools/utils/workflow_configuration_sync.py +++ b/api/core/tools/utils/workflow_configuration_sync.py @@ -3,7 +3,7 @@ from typing import Any from core.tools.entities.tool_entities import WorkflowToolParameterConfiguration from core.tools.errors import WorkflowToolHumanInputNotSupportedError -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.base.entities import OutputVariableEntity from dify_graph.variables.input_entities import VariableEntity @@ -51,7 +51,7 @@ class WorkflowToolConfigurationUtils: def ensure_no_human_input_nodes(cls, graph: Mapping[str, Any]) -> None: nodes = graph.get("nodes", []) for node in nodes: - if node.get("data", {}).get("type") == NodeType.HUMAN_INPUT: + if node.get("data", {}).get("type") == BuiltinNodeTypes.HUMAN_INPUT: raise WorkflowToolHumanInputNotSupportedError() @classmethod diff --git a/api/core/tools/workflow_as_tool/provider.py b/api/core/tools/workflow_as_tool/provider.py index d73012375d..aef8b3f779 100644 --- a/api/core/tools/workflow_as_tool/provider.py +++ b/api/core/tools/workflow_as_tool/provider.py @@ -37,6 +37,7 @@ VARIABLE_TO_PARAMETER_TYPE_MAPPING = { VariableEntityType.CHECKBOX: ToolParameter.ToolParameterType.BOOLEAN, VariableEntityType.FILE: ToolParameter.ToolParameterType.FILE, VariableEntityType.FILE_LIST: ToolParameter.ToolParameterType.FILES, + VariableEntityType.JSON_OBJECT: ToolParameter.ToolParameterType.OBJECT, } diff --git a/api/core/trigger/constants.py b/api/core/trigger/constants.py new file mode 100644 index 0000000000..bfa45c3f2b --- /dev/null +++ b/api/core/trigger/constants.py @@ -0,0 +1,18 @@ +from typing import Final + +TRIGGER_WEBHOOK_NODE_TYPE: Final[str] = "trigger-webhook" +TRIGGER_SCHEDULE_NODE_TYPE: Final[str] = "trigger-schedule" +TRIGGER_PLUGIN_NODE_TYPE: Final[str] = "trigger-plugin" +TRIGGER_INFO_METADATA_KEY: Final[str] = "trigger_info" + +TRIGGER_NODE_TYPES: Final[frozenset[str]] = frozenset( + { + TRIGGER_WEBHOOK_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_PLUGIN_NODE_TYPE, + } +) + + +def is_trigger_node_type(node_type: str) -> bool: + return node_type in TRIGGER_NODE_TYPES diff --git a/api/core/trigger/debug/event_selectors.py b/api/core/trigger/debug/event_selectors.py index 9b7b3de614..2a133b2b94 100644 --- a/api/core/trigger/debug/event_selectors.py +++ b/api/core/trigger/debug/event_selectors.py @@ -11,6 +11,11 @@ from typing import Any from pydantic import BaseModel from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.events import ( PluginTriggerDebugEvent, @@ -19,9 +24,9 @@ from core.trigger.debug.events import ( build_plugin_pool_key, build_webhook_pool_key, ) -from dify_graph.enums import NodeType -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig +from dify_graph.entities.graph_config import NodeConfigDict from extensions.ext_redis import redis_client from libs.datetime_utils import ensure_naive_utc, naive_utc_now from libs.schedule_utils import calculate_next_run_at @@ -41,10 +46,10 @@ class TriggerDebugEventPoller(ABC): app_id: str user_id: str tenant_id: str - node_config: Mapping[str, Any] + node_config: NodeConfigDict node_id: str - def __init__(self, tenant_id: str, user_id: str, app_id: str, node_config: Mapping[str, Any], node_id: str): + def __init__(self, tenant_id: str, user_id: str, app_id: str, node_config: NodeConfigDict, node_id: str): self.tenant_id = tenant_id self.user_id = user_id self.app_id = app_id @@ -60,7 +65,7 @@ class PluginTriggerDebugEventPoller(TriggerDebugEventPoller): def poll(self) -> TriggerDebugEvent | None: from services.trigger.trigger_service import TriggerService - plugin_trigger_data = TriggerEventNodeData.model_validate(self.node_config.get("data", {})) + plugin_trigger_data = TriggerEventNodeData.model_validate(self.node_config["data"], from_attributes=True) provider_id = TriggerProviderID(plugin_trigger_data.provider_id) pool_key: str = build_plugin_pool_key( name=plugin_trigger_data.event_name, @@ -205,21 +210,19 @@ def create_event_poller( if not node_config: raise ValueError("Node data not found for node %s", node_id) node_type = draft_workflow.get_node_type_from_node_config(node_config) - match node_type: - case NodeType.TRIGGER_PLUGIN: - return PluginTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case NodeType.TRIGGER_WEBHOOK: - return WebhookTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case NodeType.TRIGGER_SCHEDULE: - return ScheduleTriggerDebugEventPoller( - tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id - ) - case _: - raise ValueError("unable to create event poller for node type %s", node_type) + if node_type == TRIGGER_PLUGIN_NODE_TYPE: + return PluginTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + if node_type == TRIGGER_WEBHOOK_NODE_TYPE: + return WebhookTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + if node_type == TRIGGER_SCHEDULE_NODE_TYPE: + return ScheduleTriggerDebugEventPoller( + tenant_id=tenant_id, user_id=user_id, app_id=app_id, node_config=node_config, node_id=node_id + ) + raise ValueError("unable to create event poller for node type %s", node_type) def select_trigger_debug_events( diff --git a/api/core/workflow/__init__.py b/api/core/workflow/__init__.py index 57c2ef3d10..937012dcee 100644 --- a/api/core/workflow/__init__.py +++ b/api/core/workflow/__init__.py @@ -1,4 +1 @@ -from .node_factory import DifyNodeFactory -from .workflow_entry import WorkflowEntry - -__all__ = ["DifyNodeFactory", "WorkflowEntry"] +"""Core workflow package.""" diff --git a/api/core/workflow/node_factory.py b/api/core/workflow/node_factory.py index 4cbee08a65..ee3b322636 100644 --- a/api/core/workflow/node_factory.py +++ b/api/core/workflow/node_factory.py @@ -1,5 +1,8 @@ -from collections.abc import Mapping -from typing import TYPE_CHECKING, Any, cast, final +import importlib +import pkgutil +from collections.abc import Callable, Iterator, Mapping, MutableMapping +from functools import lru_cache +from typing import TYPE_CHECKING, Any, TypeAlias, cast, final from sqlalchemy import select from sqlalchemy.orm import Session @@ -8,7 +11,6 @@ from typing_extensions import override from configs import dify_config from core.app.entities.app_invoke_entities import DifyRunContext from core.app.llm.model_access import build_dify_model_access -from core.datasource.datasource_manager import DatasourceManager from core.helper.code_executor.code_executor import ( CodeExecutionError, CodeExecutor, @@ -17,39 +19,37 @@ from core.helper.ssrf_proxy import ssrf_proxy from core.memory.token_buffer_memory import TokenBufferMemory from core.model_manager import ModelInstance from core.prompt.entities.advanced_prompt_entities import MemoryConfig -from core.rag.index_processor.index_processor import IndexProcessor -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from core.rag.summary_index.summary_index import SummaryIndex from core.repositories.human_input_repository import HumanInputFormRepositoryImpl from core.tools.tool_file_manager import ToolFileManager -from dify_graph.entities.graph_config import NodeConfigDict +from core.trigger.constants import TRIGGER_NODE_TYPES +from core.workflow.nodes.agent.message_transformer import AgentMessageTransformer +from core.workflow.nodes.agent.plugin_strategy_adapter import ( + PluginAgentStrategyPresentationProvider, + PluginAgentStrategyResolver, +) +from core.workflow.nodes.agent.runtime_support import AgentRuntimeSupport +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, NodeType, SystemVariableKey from dify_graph.file.file_manager import file_manager from dify_graph.graph.graph import NodeFactory from dify_graph.model_runtime.entities.model_entities import ModelType from dify_graph.model_runtime.memory import PromptMessageMemory from dify_graph.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel from dify_graph.nodes.base.node import Node -from dify_graph.nodes.code.code_node import CodeNode, WorkflowCodeExecutor +from dify_graph.nodes.code.code_node import WorkflowCodeExecutor from dify_graph.nodes.code.entities import CodeLanguage from dify_graph.nodes.code.limits import CodeNodeLimits -from dify_graph.nodes.datasource import DatasourceNode -from dify_graph.nodes.document_extractor import DocumentExtractorNode, UnstructuredApiConfig -from dify_graph.nodes.http_request import HttpRequestNode, build_http_request_config -from dify_graph.nodes.human_input.human_input_node import HumanInputNode -from dify_graph.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode -from dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode -from dify_graph.nodes.llm.entities import ModelConfig +from dify_graph.nodes.document_extractor import UnstructuredApiConfig +from dify_graph.nodes.http_request import build_http_request_config +from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.llm.exc import LLMModeRequiredError, ModelNotExistError -from dify_graph.nodes.llm.node import LLMNode -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING -from dify_graph.nodes.parameter_extractor.parameter_extractor_node import ParameterExtractorNode -from dify_graph.nodes.question_classifier.question_classifier_node import QuestionClassifierNode +from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData +from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData from dify_graph.nodes.template_transform.template_renderer import ( CodeExecutorJinja2TemplateRenderer, ) -from dify_graph.nodes.template_transform.template_transform_node import TemplateTransformNode from dify_graph.variables.segments import StringSegment from extensions.ext_database import db from models.model import Conversation @@ -58,6 +58,138 @@ if TYPE_CHECKING: from dify_graph.entities import GraphInitParams from dify_graph.runtime import GraphRuntimeState +LATEST_VERSION = "latest" +_START_NODE_TYPES: frozenset[NodeType] = frozenset( + (BuiltinNodeTypes.START, BuiltinNodeTypes.DATASOURCE, *TRIGGER_NODE_TYPES) +) + + +def _import_node_package(package_name: str, *, excluded_modules: frozenset[str] = frozenset()) -> None: + package = importlib.import_module(package_name) + for _, module_name, _ in pkgutil.walk_packages(package.__path__, package.__name__ + "."): + if module_name in excluded_modules: + continue + importlib.import_module(module_name) + + +@lru_cache(maxsize=1) +def register_nodes() -> None: + """Import production node modules so they self-register with ``Node``.""" + _import_node_package("dify_graph.nodes") + _import_node_package("core.workflow.nodes") + + +def get_node_type_classes_mapping() -> Mapping[NodeType, Mapping[str, type[Node]]]: + """Return a read-only snapshot of the current production node registry. + + The workflow layer owns node bootstrap because it must compose built-in + `dify_graph.nodes.*` implementations with workflow-local nodes under + `core.workflow.nodes.*`. Keeping this import side effect here avoids + reintroducing registry bootstrapping into lower-level graph primitives. + """ + register_nodes() + return Node.get_node_type_classes_mapping() + + +def resolve_workflow_node_class(*, node_type: NodeType, node_version: str) -> type[Node]: + node_mapping = get_node_type_classes_mapping().get(node_type) + if not node_mapping: + raise ValueError(f"No class mapping found for node type: {node_type}") + + latest_node_class = node_mapping.get(LATEST_VERSION) + matched_node_class = node_mapping.get(node_version) + node_class = matched_node_class or latest_node_class + if not node_class: + raise ValueError(f"No latest version class found for node type: {node_type}") + return node_class + + +def is_start_node_type(node_type: NodeType) -> bool: + """Return True when the node type can serve as a workflow entry point.""" + return node_type in _START_NODE_TYPES + + +def get_default_root_node_id(graph_config: Mapping[str, Any]) -> str: + """Resolve the default entry node for a persisted top-level workflow graph. + + This workflow-layer helper depends on start-node semantics defined by + `is_start_node_type`, so it intentionally lives next to the node registry + instead of in the raw `dify_graph.entities.graph_config` schema module. + """ + nodes = graph_config.get("nodes") + if not isinstance(nodes, list): + raise ValueError("nodes in workflow graph must be a list") + + for node in nodes: + if not isinstance(node, Mapping): + continue + + if node.get("type") == "custom-note": + continue + + node_id = node.get("id") + data = node.get("data") + if not isinstance(node_id, str) or not isinstance(data, Mapping): + continue + + node_type = data.get("type") + if isinstance(node_type, str) and is_start_node_type(node_type): + return node_id + + raise ValueError("Unable to determine default root node ID from workflow graph") + + +class _LazyNodeTypeClassesMapping(MutableMapping[NodeType, Mapping[str, type[Node]]]): + """Mutable dict-like view over the current node registry.""" + + def __init__(self) -> None: + self._cached_snapshot: dict[NodeType, Mapping[str, type[Node]]] = {} + self._cached_version = -1 + self._deleted: set[NodeType] = set() + self._overrides: dict[NodeType, Mapping[str, type[Node]]] = {} + + def _snapshot(self) -> dict[NodeType, Mapping[str, type[Node]]]: + current_version = Node.get_registry_version() + if self._cached_version != current_version: + self._cached_snapshot = dict(get_node_type_classes_mapping()) + self._cached_version = current_version + if not self._deleted and not self._overrides: + return self._cached_snapshot + + snapshot = {key: value for key, value in self._cached_snapshot.items() if key not in self._deleted} + snapshot.update(self._overrides) + return snapshot + + def __getitem__(self, key: NodeType) -> Mapping[str, type[Node]]: + return self._snapshot()[key] + + def __setitem__(self, key: NodeType, value: Mapping[str, type[Node]]) -> None: + self._deleted.discard(key) + self._overrides[key] = value + + def __delitem__(self, key: NodeType) -> None: + if key in self._overrides: + del self._overrides[key] + return + if key in self._cached_snapshot: + self._deleted.add(key) + return + raise KeyError(key) + + def __iter__(self) -> Iterator[NodeType]: + return iter(self._snapshot()) + + def __len__(self) -> int: + return len(self._snapshot()) + + +# Keep the canonical node-class mapping in the workflow layer that also bootstraps +# legacy `core.workflow.nodes.*` registrations. +NODE_TYPE_CLASSES_MAPPING: MutableMapping[NodeType, Mapping[str, type[Node]]] = _LazyNodeTypeClassesMapping() + + +LLMCompatibleNodeData: TypeAlias = LLMNodeData | QuestionClassifierNodeData | ParameterExtractorNodeData + def fetch_memory( *, @@ -99,10 +231,7 @@ class DefaultWorkflowCodeExecutor: @final class DifyNodeFactory(NodeFactory): """ - Default implementation of NodeFactory that uses the traditional node mapping. - - This factory creates nodes by looking up their types in NODE_TYPE_CLASSES_MAPPING - and instantiating the appropriate node class. + Default implementation of NodeFactory that resolves node classes from the live registry. """ def __init__( @@ -129,7 +258,6 @@ class DifyNodeFactory(NodeFactory): self._http_request_http_client = ssrf_proxy self._http_request_tool_file_manager_factory = ToolFileManager self._http_request_file_manager = file_manager - self._rag_retrieval = DatasetRetrieval() self._document_extractor_unstructured_api_config = UnstructuredApiConfig( api_url=dify_config.UNSTRUCTURED_API_URL, api_key=dify_config.UNSTRUCTURED_API_KEY or "", @@ -145,6 +273,10 @@ class DifyNodeFactory(NodeFactory): ) self._llm_credentials_provider, self._llm_model_factory = build_dify_model_access(self._dify_context.tenant_id) + self._agent_strategy_resolver = PluginAgentStrategyResolver() + self._agent_strategy_presentation_provider = PluginAgentStrategyPresentationProvider() + self._agent_runtime_support = AgentRuntimeSupport() + self._agent_message_transformer = AgentMessageTransformer() @staticmethod def _resolve_dify_context(run_context: Mapping[str, Any]) -> DifyRunContext: @@ -156,167 +288,115 @@ class DifyNodeFactory(NodeFactory): return DifyRunContext.model_validate(raw_ctx) @override - def create_node(self, node_config: NodeConfigDict) -> Node: + def create_node(self, node_config: dict[str, Any] | NodeConfigDict) -> Node: """ Create a Node instance from node configuration data using the traditional mapping. :param node_config: node configuration dictionary containing type and other data :return: initialized Node instance - :raises ValueError: if node type is unknown or configuration is invalid + :raises ValueError: if node_config fails NodeConfigDict/BaseNodeData validation + (including pydantic ValidationError, which subclasses ValueError), + if node type is unknown, or if no implementation exists for the resolved version """ - # Get node_id from config - node_id = node_config["id"] - - # Get node type from config - node_data = node_config["data"] - try: - node_type = NodeType(node_data["type"]) - except ValueError: - raise ValueError(f"Unknown node type: {node_data['type']}") - - # Get node class - node_mapping = NODE_TYPE_CLASSES_MAPPING.get(node_type) - if not node_mapping: - raise ValueError(f"No class mapping found for node type: {node_type}") - - latest_node_class = node_mapping.get(LATEST_VERSION) - node_version = str(node_data.get("version", "1")) - matched_node_class = node_mapping.get(node_version) - node_class = matched_node_class or latest_node_class - if not node_class: - raise ValueError(f"No latest version class found for node type: {node_type}") - - # Create node instance - if node_type == NodeType.CODE: - return CodeNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - code_executor=self._code_executor, - code_limits=self._code_limits, - ) - - if node_type == NodeType.TEMPLATE_TRANSFORM: - return TemplateTransformNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - template_renderer=self._template_renderer, - max_output_length=self._template_transform_max_output_length, - ) - - if node_type == NodeType.HTTP_REQUEST: - return HttpRequestNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - http_request_config=self._http_request_config, - http_client=self._http_request_http_client, - tool_file_manager_factory=self._http_request_tool_file_manager_factory, - file_manager=self._http_request_file_manager, - ) - - if node_type == NodeType.HUMAN_INPUT: - return HumanInputNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - form_repository=HumanInputFormRepositoryImpl(tenant_id=self._dify_context.tenant_id), - ) - - if node_type == NodeType.KNOWLEDGE_INDEX: - return KnowledgeIndexNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - index_processor=IndexProcessor(), - summary_index_service=SummaryIndex(), - ) - - if node_type == NodeType.LLM: - model_instance = self._build_model_instance_for_llm_node(node_data) - memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance) - return LLMNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - credentials_provider=self._llm_credentials_provider, - model_factory=self._llm_model_factory, - model_instance=model_instance, - memory=memory, - ) - - if node_type == NodeType.DATASOURCE: - return DatasourceNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - datasource_manager=DatasourceManager, - ) - - if node_type == NodeType.KNOWLEDGE_RETRIEVAL: - return KnowledgeRetrievalNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - rag_retrieval=self._rag_retrieval, - ) - - if node_type == NodeType.DOCUMENT_EXTRACTOR: - return DocumentExtractorNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - unstructured_api_config=self._document_extractor_unstructured_api_config, - http_client=self._http_request_http_client, - ) - - if node_type == NodeType.QUESTION_CLASSIFIER: - model_instance = self._build_model_instance_for_llm_node(node_data) - memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance) - return QuestionClassifierNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - credentials_provider=self._llm_credentials_provider, - model_factory=self._llm_model_factory, - model_instance=model_instance, - memory=memory, - ) - - if node_type == NodeType.PARAMETER_EXTRACTOR: - model_instance = self._build_model_instance_for_llm_node(node_data) - memory = self._build_memory_for_llm_node(node_data=node_data, model_instance=model_instance) - return ParameterExtractorNode( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - credentials_provider=self._llm_credentials_provider, - model_factory=self._llm_model_factory, - model_instance=model_instance, - memory=memory, - ) - + typed_node_config = NodeConfigDictAdapter.validate_python(node_config) + node_id = typed_node_config["id"] + node_data = typed_node_config["data"] + node_class = self._resolve_node_class(node_type=node_data.type, node_version=str(node_data.version)) + node_type = node_data.type + node_init_kwargs_factories: Mapping[NodeType, Callable[[], dict[str, object]]] = { + BuiltinNodeTypes.CODE: lambda: { + "code_executor": self._code_executor, + "code_limits": self._code_limits, + }, + BuiltinNodeTypes.TEMPLATE_TRANSFORM: lambda: { + "template_renderer": self._template_renderer, + "max_output_length": self._template_transform_max_output_length, + }, + BuiltinNodeTypes.HTTP_REQUEST: lambda: { + "http_request_config": self._http_request_config, + "http_client": self._http_request_http_client, + "tool_file_manager_factory": self._http_request_tool_file_manager_factory, + "file_manager": self._http_request_file_manager, + }, + BuiltinNodeTypes.HUMAN_INPUT: lambda: { + "form_repository": HumanInputFormRepositoryImpl(tenant_id=self._dify_context.tenant_id), + }, + BuiltinNodeTypes.LLM: lambda: self._build_llm_compatible_node_init_kwargs( + node_class=node_class, + node_data=node_data, + include_http_client=True, + ), + BuiltinNodeTypes.DOCUMENT_EXTRACTOR: lambda: { + "unstructured_api_config": self._document_extractor_unstructured_api_config, + "http_client": self._http_request_http_client, + }, + BuiltinNodeTypes.QUESTION_CLASSIFIER: lambda: self._build_llm_compatible_node_init_kwargs( + node_class=node_class, + node_data=node_data, + include_http_client=True, + ), + BuiltinNodeTypes.PARAMETER_EXTRACTOR: lambda: self._build_llm_compatible_node_init_kwargs( + node_class=node_class, + node_data=node_data, + include_http_client=False, + ), + BuiltinNodeTypes.TOOL: lambda: { + "tool_file_manager_factory": self._http_request_tool_file_manager_factory(), + }, + BuiltinNodeTypes.AGENT: lambda: { + "strategy_resolver": self._agent_strategy_resolver, + "presentation_provider": self._agent_strategy_presentation_provider, + "runtime_support": self._agent_runtime_support, + "message_transformer": self._agent_message_transformer, + }, + } + node_init_kwargs = node_init_kwargs_factories.get(node_type, lambda: {})() return node_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, + **node_init_kwargs, ) - def _build_model_instance_for_llm_node(self, node_data: Mapping[str, Any]) -> ModelInstance: - node_data_model = ModelConfig.model_validate(node_data["model"]) + @staticmethod + def _validate_resolved_node_data(node_class: type[Node], node_data: BaseNodeData) -> BaseNodeData: + """ + Re-validate the permissive graph payload with the concrete NodeData model declared by the resolved node class. + """ + return node_class.validate_node_data(node_data) + + @staticmethod + def _resolve_node_class(*, node_type: NodeType, node_version: str) -> type[Node]: + return resolve_workflow_node_class(node_type=node_type, node_version=node_version) + + def _build_llm_compatible_node_init_kwargs( + self, + *, + node_class: type[Node], + node_data: BaseNodeData, + include_http_client: bool, + ) -> dict[str, object]: + validated_node_data = cast( + LLMCompatibleNodeData, + self._validate_resolved_node_data(node_class=node_class, node_data=node_data), + ) + model_instance = self._build_model_instance_for_llm_node(validated_node_data) + node_init_kwargs: dict[str, object] = { + "credentials_provider": self._llm_credentials_provider, + "model_factory": self._llm_model_factory, + "model_instance": model_instance, + "memory": self._build_memory_for_llm_node( + node_data=validated_node_data, + model_instance=model_instance, + ), + } + if include_http_client: + node_init_kwargs["http_client"] = self._http_request_http_client + return node_init_kwargs + + def _build_model_instance_for_llm_node(self, node_data: LLMCompatibleNodeData) -> ModelInstance: + node_data_model = node_data.model if not node_data_model.mode: raise LLMModeRequiredError("LLM mode is required.") @@ -352,14 +432,12 @@ class DifyNodeFactory(NodeFactory): def _build_memory_for_llm_node( self, *, - node_data: Mapping[str, Any], + node_data: LLMCompatibleNodeData, model_instance: ModelInstance, ) -> PromptMessageMemory | None: - raw_memory_config = node_data.get("memory") - if raw_memory_config is None: + if node_data.memory is None: return None - node_memory = MemoryConfig.model_validate(raw_memory_config) conversation_id_variable = self.graph_runtime_state.variable_pool.get( ["sys", SystemVariableKey.CONVERSATION_ID] ) @@ -369,6 +447,6 @@ class DifyNodeFactory(NodeFactory): return fetch_memory( conversation_id=conversation_id, app_id=self._dify_context.app_id, - node_data_memory=node_memory, + node_data_memory=node_data.memory, model_instance=model_instance, ) diff --git a/api/core/workflow/nodes/__init__.py b/api/core/workflow/nodes/__init__.py new file mode 100644 index 0000000000..d23f80be59 --- /dev/null +++ b/api/core/workflow/nodes/__init__.py @@ -0,0 +1 @@ +"""Workflow node implementations that remain under the legacy core.workflow namespace.""" diff --git a/api/core/workflow/nodes/agent/__init__.py b/api/core/workflow/nodes/agent/__init__.py new file mode 100644 index 0000000000..ba6c667194 --- /dev/null +++ b/api/core/workflow/nodes/agent/__init__.py @@ -0,0 +1,4 @@ +from .agent_node import AgentNode +from .entities import AgentNodeData + +__all__ = ["AgentNode", "AgentNodeData"] diff --git a/api/core/workflow/nodes/agent/agent_node.py b/api/core/workflow/nodes/agent/agent_node.py new file mode 100644 index 0000000000..5699ccf404 --- /dev/null +++ b/api/core/workflow/nodes/agent/agent_node.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +from collections.abc import Generator, Mapping, Sequence +from typing import TYPE_CHECKING, Any + +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, SystemVariableKey, WorkflowNodeExecutionStatus +from dify_graph.node_events import NodeEventBase, NodeRunResult, StreamCompletedEvent +from dify_graph.nodes.base.node import Node +from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser + +from .entities import AgentNodeData +from .exceptions import ( + AgentInvocationError, + AgentMessageTransformError, +) +from .message_transformer import AgentMessageTransformer +from .runtime_support import AgentRuntimeSupport +from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver + +if TYPE_CHECKING: + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState + + +class AgentNode(Node[AgentNodeData]): + node_type = BuiltinNodeTypes.AGENT + + _strategy_resolver: AgentStrategyResolver + _presentation_provider: AgentStrategyPresentationProvider + _runtime_support: AgentRuntimeSupport + _message_transformer: AgentMessageTransformer + + def __init__( + self, + id: str, + config: NodeConfigDict, + graph_init_params: GraphInitParams, + graph_runtime_state: GraphRuntimeState, + *, + strategy_resolver: AgentStrategyResolver, + presentation_provider: AgentStrategyPresentationProvider, + runtime_support: AgentRuntimeSupport, + message_transformer: AgentMessageTransformer, + ) -> None: + super().__init__( + id=id, + config=config, + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + self._strategy_resolver = strategy_resolver + self._presentation_provider = presentation_provider + self._runtime_support = runtime_support + self._message_transformer = message_transformer + + @classmethod + def version(cls) -> str: + return "1" + + def populate_start_event(self, event) -> None: + dify_ctx = self.require_dify_context() + event.extras["agent_strategy"] = { + "name": self.node_data.agent_strategy_name, + "icon": self._presentation_provider.get_icon( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + ), + } + + def _run(self) -> Generator[NodeEventBase, None, None]: + from core.plugin.impl.exc import PluginDaemonClientSideError + + dify_ctx = self.require_dify_context() + + try: + strategy = self._strategy_resolver.resolve( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + agent_strategy_name=self.node_data.agent_strategy_name, + ) + except Exception as e: + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs={}, + error=f"Failed to get agent strategy: {str(e)}", + ), + ) + return + + agent_parameters = strategy.get_parameters() + + parameters = self._runtime_support.build_parameters( + agent_parameters=agent_parameters, + variable_pool=self.graph_runtime_state.variable_pool, + node_data=self.node_data, + strategy=strategy, + tenant_id=dify_ctx.tenant_id, + app_id=dify_ctx.app_id, + invoke_from=dify_ctx.invoke_from, + ) + parameters_for_log = self._runtime_support.build_parameters( + agent_parameters=agent_parameters, + variable_pool=self.graph_runtime_state.variable_pool, + node_data=self.node_data, + strategy=strategy, + tenant_id=dify_ctx.tenant_id, + app_id=dify_ctx.app_id, + invoke_from=dify_ctx.invoke_from, + for_log=True, + ) + credentials = self._runtime_support.build_credentials(parameters=parameters) + + conversation_id = self.graph_runtime_state.variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID]) + + try: + message_stream = strategy.invoke( + params=parameters, + user_id=dify_ctx.user_id, + app_id=dify_ctx.app_id, + conversation_id=conversation_id.text if conversation_id else None, + credentials=credentials, + ) + except Exception as e: + error = AgentInvocationError(f"Failed to invoke agent: {str(e)}", original_error=e) + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + error=str(error), + ) + ) + return + + try: + yield from self._message_transformer.transform( + messages=message_stream, + tool_info={ + "icon": self._presentation_provider.get_icon( + tenant_id=dify_ctx.tenant_id, + agent_strategy_provider_name=self.node_data.agent_strategy_provider_name, + ), + "agent_strategy": self.node_data.agent_strategy_name, + }, + parameters_for_log=parameters_for_log, + user_id=dify_ctx.user_id, + tenant_id=dify_ctx.tenant_id, + node_type=self.node_type, + node_id=self._node_id, + node_execution_id=self.id, + ) + except PluginDaemonClientSideError as e: + transform_error = AgentMessageTransformError( + f"Failed to transform agent message: {str(e)}", original_error=e + ) + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.FAILED, + inputs=parameters_for_log, + error=str(transform_error), + ) + ) + + @classmethod + def _extract_variable_selector_to_variable_mapping( + cls, + *, + graph_config: Mapping[str, Any], + node_id: str, + node_data: AgentNodeData, + ) -> Mapping[str, Sequence[str]]: + _ = graph_config # Explicitly mark as unused + result: dict[str, Any] = {} + typed_node_data = node_data + for parameter_name in typed_node_data.agent_parameters: + input = typed_node_data.agent_parameters[parameter_name] + match input.type: + case "mixed" | "constant": + selectors = VariableTemplateParser(str(input.value)).extract_variable_selectors() + for selector in selectors: + result[selector.variable] = selector.value_selector + case "variable": + result[parameter_name] = input.value + + result = {node_id + "." + key: value for key, value in result.items()} + + return result diff --git a/api/dify_graph/nodes/agent/entities.py b/api/core/workflow/nodes/agent/entities.py similarity index 83% rename from api/dify_graph/nodes/agent/entities.py rename to api/core/workflow/nodes/agent/entities.py index 9124420f01..91fed39795 100644 --- a/api/dify_graph/nodes/agent/entities.py +++ b/api/core/workflow/nodes/agent/entities.py @@ -5,13 +5,15 @@ from pydantic import BaseModel from core.prompt.entities.advanced_prompt_entities import MemoryConfig from core.tools.entities.tool_entities import ToolSelector -from dify_graph.nodes.base.entities import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class AgentNodeData(BaseNodeData): - agent_strategy_provider_name: str # redundancy + type: NodeType = BuiltinNodeTypes.AGENT + agent_strategy_provider_name: str agent_strategy_name: str - agent_strategy_label: str # redundancy + agent_strategy_label: str memory: MemoryConfig | None = None # The version of the tool parameter. # If this value is None, it indicates this is a previous version diff --git a/api/dify_graph/nodes/agent/exc.py b/api/core/workflow/nodes/agent/exceptions.py similarity index 90% rename from api/dify_graph/nodes/agent/exc.py rename to api/core/workflow/nodes/agent/exceptions.py index ba2c83d8a6..944f5f0b20 100644 --- a/api/dify_graph/nodes/agent/exc.py +++ b/api/core/workflow/nodes/agent/exceptions.py @@ -119,14 +119,3 @@ class AgentVariableTypeError(AgentNodeError): self.expected_type = expected_type self.actual_type = actual_type super().__init__(message) - - -class AgentMaxIterationError(AgentNodeError): - """Exception raised when the agent exceeds the maximum iteration limit.""" - - def __init__(self, max_iteration: int): - self.max_iteration = max_iteration - super().__init__( - f"Agent exceeded the maximum iteration limit of {max_iteration}. " - f"The agent was unable to complete the task within the allowed number of iterations." - ) diff --git a/api/core/workflow/nodes/agent/message_transformer.py b/api/core/workflow/nodes/agent/message_transformer.py new file mode 100644 index 0000000000..f58a5665f4 --- /dev/null +++ b/api/core/workflow/nodes/agent/message_transformer.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +from collections.abc import Generator, Mapping +from typing import Any, cast + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.utils.message_transformer import ToolFileMessageTransformer +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.file import File, FileTransferMethod +from dify_graph.model_runtime.entities.llm_entities import LLMUsage, LLMUsageMetadata +from dify_graph.model_runtime.utils.encoders import jsonable_encoder +from dify_graph.node_events import ( + AgentLogEvent, + NodeEventBase, + NodeRunResult, + StreamChunkEvent, + StreamCompletedEvent, +) +from dify_graph.variables.segments import ArrayFileSegment +from extensions.ext_database import db +from factories import file_factory +from models import ToolFile +from services.tools.builtin_tools_manage_service import BuiltinToolManageService + +from .exceptions import AgentNodeError, AgentVariableTypeError, ToolFileNotFoundError + + +class AgentMessageTransformer: + def transform( + self, + *, + messages: Generator[ToolInvokeMessage, None, None], + tool_info: Mapping[str, Any], + parameters_for_log: dict[str, Any], + user_id: str, + tenant_id: str, + node_type: NodeType, + node_id: str, + node_execution_id: str, + ) -> Generator[NodeEventBase, None, None]: + from core.plugin.impl.plugin import PluginInstaller + + message_stream = ToolFileMessageTransformer.transform_tool_invoke_messages( + messages=messages, + user_id=user_id, + tenant_id=tenant_id, + conversation_id=None, + ) + + text = "" + files: list[File] = [] + json_list: list[dict | list] = [] + + agent_logs: list[AgentLogEvent] = [] + agent_execution_metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] = {} + llm_usage = LLMUsage.empty_usage() + variables: dict[str, Any] = {} + + for message in message_stream: + if message.type in { + ToolInvokeMessage.MessageType.IMAGE_LINK, + ToolInvokeMessage.MessageType.BINARY_LINK, + ToolInvokeMessage.MessageType.IMAGE, + }: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + + url = message.message.text + if message.meta: + transfer_method = message.meta.get("transfer_method", FileTransferMethod.TOOL_FILE) + else: + transfer_method = FileTransferMethod.TOOL_FILE + + tool_file_id = str(url).split("/")[-1].split(".")[0] + + with Session(db.engine) as session: + stmt = select(ToolFile).where(ToolFile.id == tool_file_id) + tool_file = session.scalar(stmt) + if tool_file is None: + raise ToolFileNotFoundError(tool_file_id) + + mapping = { + "tool_file_id": tool_file_id, + "type": file_factory.get_file_type_by_mime_type(tool_file.mimetype), + "transfer_method": transfer_method, + "url": url, + } + file = file_factory.build_from_mapping( + mapping=mapping, + tenant_id=tenant_id, + ) + files.append(file) + elif message.type == ToolInvokeMessage.MessageType.BLOB: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + assert message.meta + + tool_file_id = message.message.text.split("/")[-1].split(".")[0] + with Session(db.engine) as session: + stmt = select(ToolFile).where(ToolFile.id == tool_file_id) + tool_file = session.scalar(stmt) + if tool_file is None: + raise ToolFileNotFoundError(tool_file_id) + + mapping = { + "tool_file_id": tool_file_id, + "transfer_method": FileTransferMethod.TOOL_FILE, + } + files.append( + file_factory.build_from_mapping( + mapping=mapping, + tenant_id=tenant_id, + ) + ) + elif message.type == ToolInvokeMessage.MessageType.TEXT: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + text += message.message.text + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk=message.message.text, + is_final=False, + ) + elif message.type == ToolInvokeMessage.MessageType.JSON: + assert isinstance(message.message, ToolInvokeMessage.JsonMessage) + if node_type == BuiltinNodeTypes.AGENT: + if isinstance(message.message.json_object, dict): + msg_metadata: dict[str, Any] = message.message.json_object.pop("execution_metadata", {}) + llm_usage = LLMUsage.from_metadata(cast(LLMUsageMetadata, msg_metadata)) + agent_execution_metadata = { + WorkflowNodeExecutionMetadataKey(key): value + for key, value in msg_metadata.items() + if key in WorkflowNodeExecutionMetadataKey.__members__.values() + } + else: + llm_usage = LLMUsage.empty_usage() + agent_execution_metadata = {} + if message.message.json_object: + json_list.append(message.message.json_object) + elif message.type == ToolInvokeMessage.MessageType.LINK: + assert isinstance(message.message, ToolInvokeMessage.TextMessage) + stream_text = f"Link: {message.message.text}\n" + text += stream_text + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk=stream_text, + is_final=False, + ) + elif message.type == ToolInvokeMessage.MessageType.VARIABLE: + assert isinstance(message.message, ToolInvokeMessage.VariableMessage) + variable_name = message.message.variable_name + variable_value = message.message.variable_value + if message.message.stream: + if not isinstance(variable_value, str): + raise AgentVariableTypeError( + "When 'stream' is True, 'variable_value' must be a string.", + variable_name=variable_name, + expected_type="str", + actual_type=type(variable_value).__name__, + ) + if variable_name not in variables: + variables[variable_name] = "" + variables[variable_name] += variable_value + + yield StreamChunkEvent( + selector=[node_id, variable_name], + chunk=variable_value, + is_final=False, + ) + else: + variables[variable_name] = variable_value + elif message.type == ToolInvokeMessage.MessageType.FILE: + assert message.meta is not None + assert isinstance(message.meta, dict) + if "file" not in message.meta: + raise AgentNodeError("File message is missing 'file' key in meta") + + if not isinstance(message.meta["file"], File): + raise AgentNodeError(f"Expected File object but got {type(message.meta['file']).__name__}") + files.append(message.meta["file"]) + elif message.type == ToolInvokeMessage.MessageType.LOG: + assert isinstance(message.message, ToolInvokeMessage.LogMessage) + if message.message.metadata: + icon = tool_info.get("icon", "") + dict_metadata = dict(message.message.metadata) + if dict_metadata.get("provider"): + manager = PluginInstaller() + plugins = manager.list_plugins(tenant_id) + try: + current_plugin = next( + plugin + for plugin in plugins + if f"{plugin.plugin_id}/{plugin.name}" == dict_metadata["provider"] + ) + icon = current_plugin.declaration.icon + except StopIteration: + pass + icon_dark = None + try: + builtin_tool = next( + provider + for provider in BuiltinToolManageService.list_builtin_tools( + user_id, + tenant_id, + ) + if provider.name == dict_metadata["provider"] + ) + icon = builtin_tool.icon + icon_dark = builtin_tool.icon_dark + except StopIteration: + pass + + dict_metadata["icon"] = icon + dict_metadata["icon_dark"] = icon_dark + message.message.metadata = dict_metadata + agent_log = AgentLogEvent( + message_id=message.message.id, + node_execution_id=node_execution_id, + parent_id=message.message.parent_id, + error=message.message.error, + status=message.message.status.value, + data=message.message.data, + label=message.message.label, + metadata=message.message.metadata, + node_id=node_id, + ) + + for log in agent_logs: + if log.message_id == agent_log.message_id: + log.data = agent_log.data + log.status = agent_log.status + log.error = agent_log.error + log.label = agent_log.label + log.metadata = agent_log.metadata + break + else: + agent_logs.append(agent_log) + + yield agent_log + + json_output: list[dict[str, Any] | list[Any]] = [] + if agent_logs: + for log in agent_logs: + json_output.append( + { + "id": log.message_id, + "parent_id": log.parent_id, + "error": log.error, + "status": log.status, + "data": log.data, + "label": log.label, + "metadata": log.metadata, + "node_id": log.node_id, + } + ) + if json_list: + json_output.extend(json_list) + else: + json_output.append({"data": []}) + + yield StreamChunkEvent( + selector=[node_id, "text"], + chunk="", + is_final=True, + ) + + for var_name in variables: + yield StreamChunkEvent( + selector=[node_id, var_name], + chunk="", + is_final=True, + ) + + yield StreamCompletedEvent( + node_run_result=NodeRunResult( + status=WorkflowNodeExecutionStatus.SUCCEEDED, + outputs={ + "text": text, + "usage": jsonable_encoder(llm_usage), + "files": ArrayFileSegment(value=files), + "json": json_output, + **variables, + }, + metadata={ + **agent_execution_metadata, + WorkflowNodeExecutionMetadataKey.TOOL_INFO: tool_info, + WorkflowNodeExecutionMetadataKey.AGENT_LOG: agent_logs, + }, + inputs=parameters_for_log, + llm_usage=llm_usage, + ) + ) diff --git a/api/core/workflow/nodes/agent/plugin_strategy_adapter.py b/api/core/workflow/nodes/agent/plugin_strategy_adapter.py new file mode 100644 index 0000000000..1fc427ad6c --- /dev/null +++ b/api/core/workflow/nodes/agent/plugin_strategy_adapter.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from factories.agent_factory import get_plugin_agent_strategy + +from .strategy_protocols import AgentStrategyPresentationProvider, AgentStrategyResolver, ResolvedAgentStrategy + + +class PluginAgentStrategyResolver(AgentStrategyResolver): + def resolve( + self, + *, + tenant_id: str, + agent_strategy_provider_name: str, + agent_strategy_name: str, + ) -> ResolvedAgentStrategy: + return get_plugin_agent_strategy( + tenant_id=tenant_id, + agent_strategy_provider_name=agent_strategy_provider_name, + agent_strategy_name=agent_strategy_name, + ) + + +class PluginAgentStrategyPresentationProvider(AgentStrategyPresentationProvider): + def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None: + from core.plugin.impl.plugin import PluginInstaller + + manager = PluginInstaller() + try: + plugins = manager.list_plugins(tenant_id) + except Exception: + return None + + try: + current_plugin = next( + plugin for plugin in plugins if f"{plugin.plugin_id}/{plugin.name}" == agent_strategy_provider_name + ) + except StopIteration: + return None + + return current_plugin.declaration.icon diff --git a/api/core/workflow/nodes/agent/runtime_support.py b/api/core/workflow/nodes/agent/runtime_support.py new file mode 100644 index 0000000000..2ff7c964b9 --- /dev/null +++ b/api/core/workflow/nodes/agent/runtime_support.py @@ -0,0 +1,276 @@ +from __future__ import annotations + +import json +from collections.abc import Sequence +from typing import Any, cast + +from packaging.version import Version +from pydantic import ValidationError +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.agent.entities import AgentToolEntity +from core.agent.plugin_entities import AgentStrategyParameter +from core.memory.token_buffer_memory import TokenBufferMemory +from core.model_manager import ModelInstance, ModelManager +from core.plugin.entities.request import InvokeCredentials +from core.provider_manager import ProviderManager +from core.tools.entities.tool_entities import ToolIdentity, ToolParameter, ToolProviderType +from core.tools.tool_manager import ToolManager +from dify_graph.enums import SystemVariableKey +from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ModelType +from dify_graph.runtime import VariablePool +from dify_graph.variables.segments import StringSegment +from extensions.ext_database import db +from models.model import Conversation + +from .entities import AgentNodeData, AgentOldVersionModelFeatures, ParamsAutoGenerated +from .exceptions import AgentInputTypeError, AgentVariableNotFoundError +from .strategy_protocols import ResolvedAgentStrategy + + +class AgentRuntimeSupport: + def build_parameters( + self, + *, + agent_parameters: Sequence[AgentStrategyParameter], + variable_pool: VariablePool, + node_data: AgentNodeData, + strategy: ResolvedAgentStrategy, + tenant_id: str, + app_id: str, + invoke_from: Any, + for_log: bool = False, + ) -> dict[str, Any]: + agent_parameters_dictionary = {parameter.name: parameter for parameter in agent_parameters} + + result: dict[str, Any] = {} + for parameter_name in node_data.agent_parameters: + parameter = agent_parameters_dictionary.get(parameter_name) + if not parameter: + result[parameter_name] = None + continue + + agent_input = node_data.agent_parameters[parameter_name] + match agent_input.type: + case "variable": + variable = variable_pool.get(agent_input.value) # type: ignore[arg-type] + if variable is None: + raise AgentVariableNotFoundError(str(agent_input.value)) + parameter_value = variable.value + case "mixed" | "constant": + try: + if not isinstance(agent_input.value, str): + parameter_value = json.dumps(agent_input.value, ensure_ascii=False) + else: + parameter_value = str(agent_input.value) + except TypeError: + parameter_value = str(agent_input.value) + + segment_group = variable_pool.convert_template(parameter_value) + parameter_value = segment_group.log if for_log else segment_group.text + try: + if not isinstance(agent_input.value, str): + parameter_value = json.loads(parameter_value) + except json.JSONDecodeError: + parameter_value = parameter_value + case _: + raise AgentInputTypeError(agent_input.type) + + value = parameter_value + if parameter.type == "array[tools]": + value = cast(list[dict[str, Any]], value) + value = [tool for tool in value if tool.get("enabled", False)] + value = self._filter_mcp_type_tool(strategy, value) + for tool in value: + if "schemas" in tool: + tool.pop("schemas") + parameters = tool.get("parameters", {}) + if all(isinstance(v, dict) for _, v in parameters.items()): + params = {} + for key, param in parameters.items(): + if param.get("auto", ParamsAutoGenerated.OPEN) in ( + ParamsAutoGenerated.CLOSE, + 0, + ): + value_param = param.get("value", {}) + if value_param and value_param.get("type", "") == "variable": + variable_selector = value_param.get("value") + if not variable_selector: + raise ValueError("Variable selector is missing for a variable-type parameter.") + + variable = variable_pool.get(variable_selector) + if variable is None: + raise AgentVariableNotFoundError(str(variable_selector)) + + params[key] = variable.value + else: + params[key] = value_param.get("value", "") if value_param is not None else None + else: + params[key] = None + parameters = params + tool["settings"] = {k: v.get("value", None) for k, v in tool.get("settings", {}).items()} + tool["parameters"] = parameters + + if not for_log: + if parameter.type == "array[tools]": + value = cast(list[dict[str, Any]], value) + tool_value = [] + for tool in value: + provider_type = ToolProviderType(tool.get("type", ToolProviderType.BUILT_IN)) + setting_params = tool.get("settings", {}) + parameters = tool.get("parameters", {}) + manual_input_params = [key for key, value in parameters.items() if value is not None] + + parameters = {**parameters, **setting_params} + entity = AgentToolEntity( + provider_id=tool.get("provider_name", ""), + provider_type=provider_type, + tool_name=tool.get("tool_name", ""), + tool_parameters=parameters, + plugin_unique_identifier=tool.get("plugin_unique_identifier", None), + credential_id=tool.get("credential_id", None), + ) + + extra = tool.get("extra", {}) + + runtime_variable_pool: VariablePool | None = None + if node_data.version != "1" or node_data.tool_node_version is not None: + runtime_variable_pool = variable_pool + tool_runtime = ToolManager.get_agent_tool_runtime( + tenant_id, + app_id, + entity, + invoke_from, + runtime_variable_pool, + ) + if tool_runtime.entity.description: + tool_runtime.entity.description.llm = ( + extra.get("description", "") or tool_runtime.entity.description.llm + ) + for tool_runtime_params in tool_runtime.entity.parameters: + tool_runtime_params.form = ( + ToolParameter.ToolParameterForm.FORM + if tool_runtime_params.name in manual_input_params + else tool_runtime_params.form + ) + manual_input_value = {} + if tool_runtime.entity.parameters: + manual_input_value = { + key: value for key, value in parameters.items() if key in manual_input_params + } + runtime_parameters = { + **tool_runtime.runtime.runtime_parameters, + **manual_input_value, + } + tool_value.append( + { + **tool_runtime.entity.model_dump(mode="json"), + "runtime_parameters": runtime_parameters, + "credential_id": tool.get("credential_id", None), + "provider_type": provider_type.value, + } + ) + value = tool_value + if parameter.type == AgentStrategyParameter.AgentStrategyParameterType.MODEL_SELECTOR: + value = cast(dict[str, Any], value) + model_instance, model_schema = self.fetch_model(tenant_id=tenant_id, value=value) + history_prompt_messages = [] + if node_data.memory: + memory = self.fetch_memory( + variable_pool=variable_pool, + app_id=app_id, + model_instance=model_instance, + ) + if memory: + prompt_messages = memory.get_history_prompt_messages( + message_limit=node_data.memory.window.size or None + ) + history_prompt_messages = [ + prompt_message.model_dump(mode="json") for prompt_message in prompt_messages + ] + value["history_prompt_messages"] = history_prompt_messages + if model_schema: + model_schema = self._remove_unsupported_model_features_for_old_version(model_schema) + value["entity"] = model_schema.model_dump(mode="json") + else: + value["entity"] = None + result[parameter_name] = value + + return result + + def build_credentials(self, *, parameters: dict[str, Any]) -> InvokeCredentials: + credentials = InvokeCredentials() + credentials.tool_credentials = {} + for tool in parameters.get("tools", []): + if not tool.get("credential_id"): + continue + try: + identity = ToolIdentity.model_validate(tool.get("identity", {})) + except ValidationError: + continue + credentials.tool_credentials[identity.provider] = tool.get("credential_id", None) + return credentials + + def fetch_memory( + self, + *, + variable_pool: VariablePool, + app_id: str, + model_instance: ModelInstance, + ) -> TokenBufferMemory | None: + conversation_id_variable = variable_pool.get(["sys", SystemVariableKey.CONVERSATION_ID]) + if not isinstance(conversation_id_variable, StringSegment): + return None + conversation_id = conversation_id_variable.value + + with Session(db.engine, expire_on_commit=False) as session: + stmt = select(Conversation).where(Conversation.app_id == app_id, Conversation.id == conversation_id) + conversation = session.scalar(stmt) + if not conversation: + return None + + return TokenBufferMemory(conversation=conversation, model_instance=model_instance) + + def fetch_model(self, *, tenant_id: str, value: dict[str, Any]) -> tuple[ModelInstance, AIModelEntity | None]: + provider_manager = ProviderManager() + provider_model_bundle = provider_manager.get_provider_model_bundle( + tenant_id=tenant_id, + provider=value.get("provider", ""), + model_type=ModelType.LLM, + ) + model_name = value.get("model", "") + model_credentials = provider_model_bundle.configuration.get_current_credentials( + model_type=ModelType.LLM, + model=model_name, + ) + provider_name = provider_model_bundle.configuration.provider.provider + model_type_instance = provider_model_bundle.model_type_instance + model_instance = ModelManager().get_model_instance( + tenant_id=tenant_id, + provider=provider_name, + model_type=ModelType(value.get("model_type", "")), + model=model_name, + ) + model_schema = model_type_instance.get_model_schema(model_name, model_credentials) + return model_instance, model_schema + + @staticmethod + def _remove_unsupported_model_features_for_old_version(model_schema: AIModelEntity) -> AIModelEntity: + if model_schema.features: + for feature in model_schema.features[:]: + try: + AgentOldVersionModelFeatures(feature.value) + except ValueError: + model_schema.features.remove(feature) + return model_schema + + @staticmethod + def _filter_mcp_type_tool( + strategy: ResolvedAgentStrategy, + tools: list[dict[str, Any]], + ) -> list[dict[str, Any]]: + meta_version = strategy.meta_version + if meta_version and Version(meta_version) > Version("0.0.1"): + return tools + return [tool for tool in tools if tool.get("type") != ToolProviderType.MCP] diff --git a/api/core/workflow/nodes/agent/strategy_protocols.py b/api/core/workflow/nodes/agent/strategy_protocols.py new file mode 100644 index 0000000000..643d916d15 --- /dev/null +++ b/api/core/workflow/nodes/agent/strategy_protocols.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from collections.abc import Generator, Sequence +from typing import Any, Protocol + +from core.agent.plugin_entities import AgentStrategyParameter +from core.plugin.entities.request import InvokeCredentials +from core.tools.entities.tool_entities import ToolInvokeMessage + + +class ResolvedAgentStrategy(Protocol): + meta_version: str | None + + def get_parameters(self) -> Sequence[AgentStrategyParameter]: ... + + def invoke( + self, + *, + params: dict[str, Any], + user_id: str, + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + credentials: InvokeCredentials | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: ... + + +class AgentStrategyResolver(Protocol): + def resolve( + self, + *, + tenant_id: str, + agent_strategy_provider_name: str, + agent_strategy_name: str, + ) -> ResolvedAgentStrategy: ... + + +class AgentStrategyPresentationProvider(Protocol): + def get_icon(self, *, tenant_id: str, agent_strategy_provider_name: str) -> str | None: ... diff --git a/api/core/workflow/nodes/datasource/__init__.py b/api/core/workflow/nodes/datasource/__init__.py new file mode 100644 index 0000000000..2e9bed5e00 --- /dev/null +++ b/api/core/workflow/nodes/datasource/__init__.py @@ -0,0 +1 @@ +"""Datasource workflow node package.""" diff --git a/api/dify_graph/nodes/datasource/datasource_node.py b/api/core/workflow/nodes/datasource/datasource_node.py similarity index 90% rename from api/dify_graph/nodes/datasource/datasource_node.py rename to api/core/workflow/nodes/datasource/datasource_node.py index b97394744e..44f4a23a5a 100644 --- a/api/dify_graph/nodes/datasource/datasource_node.py +++ b/api/core/workflow/nodes/datasource/datasource_node.py @@ -1,21 +1,17 @@ from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any +from core.datasource.datasource_manager import DatasourceManager from core.datasource.entities.datasource_entities import DatasourceProviderType from core.plugin.impl.exc import PluginDaemonClientSideError +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, SystemVariableKey, WorkflowNodeExecutionMetadataKey from dify_graph.node_events import NodeRunResult, StreamCompletedEvent from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser -from dify_graph.repositories.datasource_manager_protocol import ( - DatasourceManagerProtocol, - DatasourceParameter, - OnlineDriveDownloadFileParam, -) -from ...entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey -from .entities import DatasourceNodeData +from .entities import DatasourceNodeData, DatasourceParameter, OnlineDriveDownloadFileParam from .exc import DatasourceNodeError if TYPE_CHECKING: @@ -28,16 +24,15 @@ class DatasourceNode(Node[DatasourceNodeData]): Datasource Node """ - node_type = NodeType.DATASOURCE + node_type = BuiltinNodeTypes.DATASOURCE execution_type = NodeExecutionType.ROOT def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - datasource_manager: DatasourceManagerProtocol, ): super().__init__( id=id, @@ -45,7 +40,11 @@ class DatasourceNode(Node[DatasourceNodeData]): graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, ) - self.datasource_manager = datasource_manager + self.datasource_manager = DatasourceManager + + def populate_start_event(self, event) -> None: + event.provider_id = f"{self.node_data.plugin_id}/{self.node_data.provider_name}" + event.provider_type = self.node_data.provider_type def _run(self) -> Generator: """ @@ -181,7 +180,7 @@ class DatasourceNode(Node[DatasourceNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: DatasourceNodeData, ) -> Mapping[str, Sequence[str]]: """ Extract variable selector to variable mapping @@ -190,11 +189,10 @@ class DatasourceNode(Node[DatasourceNodeData]): :param node_data: node data :return: """ - typed_node_data = DatasourceNodeData.model_validate(node_data) result = {} - if typed_node_data.datasource_parameters: - for parameter_name in typed_node_data.datasource_parameters: - input = typed_node_data.datasource_parameters[parameter_name] + if node_data.datasource_parameters: + for parameter_name in node_data.datasource_parameters: + input = node_data.datasource_parameters[parameter_name] match input.type: case "mixed": assert isinstance(input.value, str) diff --git a/api/dify_graph/nodes/datasource/entities.py b/api/core/workflow/nodes/datasource/entities.py similarity index 82% rename from api/dify_graph/nodes/datasource/entities.py rename to api/core/workflow/nodes/datasource/entities.py index ba49e65f31..65864474b0 100644 --- a/api/dify_graph/nodes/datasource/entities.py +++ b/api/core/workflow/nodes/datasource/entities.py @@ -3,7 +3,8 @@ from typing import Any, Literal, Union from pydantic import BaseModel, field_validator from pydantic_core.core_schema import ValidationInfo -from dify_graph.nodes.base.entities import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class DatasourceEntity(BaseModel): @@ -16,6 +17,8 @@ class DatasourceEntity(BaseModel): class DatasourceNodeData(BaseNodeData, DatasourceEntity): + type: NodeType = BuiltinNodeTypes.DATASOURCE + class DatasourceInput(BaseModel): # TODO: check this type value: Union[Any, list[str]] @@ -39,3 +42,14 @@ class DatasourceNodeData(BaseNodeData, DatasourceEntity): return typ datasource_parameters: dict[str, DatasourceInput] | None = None + + +class DatasourceParameter(BaseModel): + workspace_id: str + page_id: str + type: str + + +class OnlineDriveDownloadFileParam(BaseModel): + id: str + bucket: str diff --git a/api/dify_graph/nodes/datasource/exc.py b/api/core/workflow/nodes/datasource/exc.py similarity index 100% rename from api/dify_graph/nodes/datasource/exc.py rename to api/core/workflow/nodes/datasource/exc.py diff --git a/api/dify_graph/repositories/datasource_manager_protocol.py b/api/core/workflow/nodes/datasource/protocols.py similarity index 79% rename from api/dify_graph/repositories/datasource_manager_protocol.py rename to api/core/workflow/nodes/datasource/protocols.py index fbe2016d3c..c006e0885c 100644 --- a/api/dify_graph/repositories/datasource_manager_protocol.py +++ b/api/core/workflow/nodes/datasource/protocols.py @@ -1,25 +1,10 @@ from collections.abc import Generator from typing import Any, Protocol -from pydantic import BaseModel - from dify_graph.file import File from dify_graph.node_events import StreamChunkEvent, StreamCompletedEvent - -class DatasourceParameter(BaseModel): - workspace_id: str - page_id: str - type: str - - -class OnlineDriveDownloadFileParam(BaseModel): - id: str - bucket: str - - -class DatasourceFinal(BaseModel): - data: dict[str, Any] | None = None +from .entities import DatasourceParameter, OnlineDriveDownloadFileParam class DatasourceManagerProtocol(Protocol): diff --git a/api/core/workflow/nodes/knowledge_index/__init__.py b/api/core/workflow/nodes/knowledge_index/__init__.py new file mode 100644 index 0000000000..efc6a57b3d --- /dev/null +++ b/api/core/workflow/nodes/knowledge_index/__init__.py @@ -0,0 +1,5 @@ +"""Knowledge index workflow node package.""" + +KNOWLEDGE_INDEX_NODE_TYPE = "knowledge-index" + +__all__ = ["KNOWLEDGE_INDEX_NODE_TYPE"] diff --git a/api/dify_graph/nodes/knowledge_index/entities.py b/api/core/workflow/nodes/knowledge_index/entities.py similarity index 92% rename from api/dify_graph/nodes/knowledge_index/entities.py rename to api/core/workflow/nodes/knowledge_index/entities.py index 493b5eadd8..8b00746268 100644 --- a/api/dify_graph/nodes/knowledge_index/entities.py +++ b/api/core/workflow/nodes/knowledge_index/entities.py @@ -3,7 +3,9 @@ from typing import Literal, Union from pydantic import BaseModel from core.rag.retrieval.retrieval_methods import RetrievalMethod -from dify_graph.nodes.base import BaseNodeData +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import NodeType class RerankingModelConfig(BaseModel): @@ -155,7 +157,7 @@ class KnowledgeIndexNodeData(BaseNodeData): Knowledge index Node Data. """ - type: str = "knowledge-index" + type: NodeType = KNOWLEDGE_INDEX_NODE_TYPE chunk_structure: str index_chunk_variable_selector: list[str] indexing_technique: str | None = None diff --git a/api/dify_graph/nodes/knowledge_index/exc.py b/api/core/workflow/nodes/knowledge_index/exc.py similarity index 100% rename from api/dify_graph/nodes/knowledge_index/exc.py rename to api/core/workflow/nodes/knowledge_index/exc.py diff --git a/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py similarity index 91% rename from api/dify_graph/nodes/knowledge_index/knowledge_index_node.py rename to api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index eeb4f3c229..0a74847bc1 100644 --- a/api/dify_graph/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -2,13 +2,15 @@ import logging from collections.abc import Mapping from typing import TYPE_CHECKING, Any +from core.rag.index_processor.index_processor import IndexProcessor +from core.rag.summary_index.summary_index import SummaryIndex +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType, SystemVariableKey +from dify_graph.enums import NodeExecutionType, SystemVariableKey from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.template import Template -from dify_graph.repositories.index_processor_protocol import IndexProcessorProtocol -from dify_graph.repositories.summary_index_service_protocol import SummaryIndexServiceProtocol from .entities import KnowledgeIndexNodeData from .exc import ( @@ -24,21 +26,19 @@ _INVOKE_FROM_DEBUGGER = "debugger" class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]): - node_type = NodeType.KNOWLEDGE_INDEX + node_type = KNOWLEDGE_INDEX_NODE_TYPE execution_type = NodeExecutionType.RESPONSE def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - index_processor: IndexProcessorProtocol, - summary_index_service: SummaryIndexServiceProtocol, ) -> None: super().__init__(id, config, graph_init_params, graph_runtime_state) - self.index_processor = index_processor - self.summary_index_service = summary_index_service + self.index_processor = IndexProcessor() + self.summary_index_service = SummaryIndex() def _run(self) -> NodeRunResult: # type: ignore node_data = self.node_data diff --git a/api/dify_graph/repositories/index_processor_protocol.py b/api/core/workflow/nodes/knowledge_index/protocols.py similarity index 55% rename from api/dify_graph/repositories/index_processor_protocol.py rename to api/core/workflow/nodes/knowledge_index/protocols.py index feaa4ab5de..bb52123082 100644 --- a/api/dify_graph/repositories/index_processor_protocol.py +++ b/api/core/workflow/nodes/knowledge_index/protocols.py @@ -5,21 +5,21 @@ from pydantic import BaseModel, Field class PreviewItem(BaseModel): - content: str | None = Field(None) - child_chunks: list[str] | None = Field(None) - summary: str | None = Field(None) + content: str | None = Field(default=None) + child_chunks: list[str] | None = Field(default=None) + summary: str | None = Field(default=None) class QaPreview(BaseModel): - answer: str | None = Field(None) - question: str | None = Field(None) + answer: str | None = Field(default=None) + question: str | None = Field(default=None) class Preview(BaseModel): chunk_structure: str - parent_mode: str | None = Field(None) - preview: list[PreviewItem] = Field([]) - qa_preview: list[QaPreview] = Field([]) + parent_mode: str | None = Field(default=None) + preview: list[PreviewItem] = Field(default_factory=list) + qa_preview: list[QaPreview] = Field(default_factory=list) total_segments: int @@ -39,3 +39,9 @@ class IndexProcessorProtocol(Protocol): def get_preview_output( self, chunks: Any, dataset_id: str, document_id: str, chunk_structure: str, summary_index_setting: dict | None ) -> Preview: ... + + +class SummaryIndexServiceProtocol(Protocol): + def generate_and_vectorize_summary( + self, dataset_id: str, document_id: str, is_preview: bool, summary_index_setting: dict | None = None + ) -> None: ... diff --git a/api/core/workflow/nodes/knowledge_retrieval/__init__.py b/api/core/workflow/nodes/knowledge_retrieval/__init__.py new file mode 100644 index 0000000000..33ea4277b4 --- /dev/null +++ b/api/core/workflow/nodes/knowledge_retrieval/__init__.py @@ -0,0 +1 @@ +"""Knowledge retrieval workflow node package.""" diff --git a/api/dify_graph/nodes/knowledge_retrieval/entities.py b/api/core/workflow/nodes/knowledge_retrieval/entities.py similarity index 94% rename from api/dify_graph/nodes/knowledge_retrieval/entities.py rename to api/core/workflow/nodes/knowledge_retrieval/entities.py index c3059897c7..bc5618685a 100644 --- a/api/dify_graph/nodes/knowledge_retrieval/entities.py +++ b/api/core/workflow/nodes/knowledge_retrieval/entities.py @@ -3,7 +3,8 @@ from typing import Literal from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm.entities import ModelConfig, VisionConfig @@ -113,7 +114,7 @@ class KnowledgeRetrievalNodeData(BaseNodeData): Knowledge retrieval Node Data. """ - type: str = "knowledge-retrieval" + type: NodeType = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL query_variable_selector: list[str] | None | str = None query_attachment_selector: list[str] | None | str = None dataset_ids: list[str] diff --git a/api/dify_graph/nodes/knowledge_retrieval/exc.py b/api/core/workflow/nodes/knowledge_retrieval/exc.py similarity index 100% rename from api/dify_graph/nodes/knowledge_retrieval/exc.py rename to api/core/workflow/nodes/knowledge_retrieval/exc.py diff --git a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py similarity index 77% rename from api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py rename to api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index d84dda42d6..9c3b9aacbf 100644 --- a/api/dify_graph/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -1,11 +1,19 @@ +"""Knowledge retrieval workflow node implementation. + +This node now lives under ``core.workflow.nodes`` and is discovered directly by +the workflow node registry. +""" + import logging from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any, Literal from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from dify_graph.entities import GraphInitParams +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -14,8 +22,6 @@ from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base import LLMUsageTrackingMixin from dify_graph.nodes.base.node import Node -from dify_graph.nodes.llm.file_saver import FileSaverImpl, LLMFileSaver -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest, RAGRetrievalProtocol, Source from dify_graph.variables import ( ArrayFileSegment, FileSegment, @@ -23,11 +29,16 @@ from dify_graph.variables import ( ) from dify_graph.variables.segments import ArrayObjectSegment -from .entities import KnowledgeRetrievalNodeData +from .entities import ( + Condition, + KnowledgeRetrievalNodeData, + MetadataFilteringCondition, +) from .exc import ( KnowledgeRetrievalNodeError, RateLimitExceededError, ) +from .retrieval import KnowledgeRetrievalRequest, Source if TYPE_CHECKING: from dify_graph.file.models import File @@ -37,23 +48,18 @@ logger = logging.getLogger(__name__) class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeData]): - node_type = NodeType.KNOWLEDGE_RETRIEVAL + node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL # Instance attributes specific to LLMNode. # Output variable for file _file_outputs: list["File"] - _llm_file_saver: LLMFileSaver - def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", - rag_retrieval: RAGRetrievalProtocol, - *, - llm_file_saver: LLMFileSaver | None = None, ): super().__init__( id=id, @@ -63,15 +69,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD ) # LLM file outputs, used for MultiModal outputs. self._file_outputs = [] - self._rag_retrieval = rag_retrieval - - if llm_file_saver is None: - dify_ctx = self.require_dify_context() - llm_file_saver = FileSaverImpl( - user_id=dify_ctx.user_id, - tenant_id=dify_ctx.tenant_id, - ) - self._llm_file_saver = llm_file_saver + self._rag_retrieval = DatasetRetrieval() @classmethod def version(cls): @@ -116,7 +114,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD try: results, usage = self._fetch_dataset_retriever(node_data=self._node_data, variables=variables) - outputs = {"result": ArrayObjectSegment(value=[item.model_dump() for item in results])} + outputs = {"result": ArrayObjectSegment(value=[item.model_dump(by_alias=True) for item in results])} return NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, @@ -171,6 +169,12 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD if node_data.metadata_filtering_mode is not None: metadata_filtering_mode = node_data.metadata_filtering_mode + resolved_metadata_conditions = ( + self._resolve_metadata_filtering_conditions(node_data.metadata_filtering_conditions) + if node_data.metadata_filtering_conditions + else None + ) + if str(node_data.retrieval_mode) == DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE and query: # fetch model config if node_data.single_retrieval_config is None: @@ -189,7 +193,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD model_mode=model.mode, model_name=model.name, metadata_model_config=node_data.metadata_model_config, - metadata_filtering_conditions=node_data.metadata_filtering_conditions, + metadata_filtering_conditions=resolved_metadata_conditions, metadata_filtering_mode=metadata_filtering_mode, query=query, ) @@ -247,7 +251,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD weights=weights, reranking_enable=node_data.multiple_retrieval_config.reranking_enable, metadata_model_config=node_data.metadata_model_config, - metadata_filtering_conditions=node_data.metadata_filtering_conditions, + metadata_filtering_conditions=resolved_metadata_conditions, metadata_filtering_mode=metadata_filtering_mode, attachment_ids=[attachment.related_id for attachment in attachments] if attachments else None, ) @@ -256,21 +260,60 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD usage = self._rag_retrieval.llm_usage return retrieval_resource_list, usage + def _resolve_metadata_filtering_conditions( + self, conditions: MetadataFilteringCondition + ) -> MetadataFilteringCondition: + if conditions.conditions is None: + return MetadataFilteringCondition( + logical_operator=conditions.logical_operator, + conditions=None, + ) + + variable_pool = self.graph_runtime_state.variable_pool + resolved_conditions: list[Condition] = [] + for cond in conditions.conditions or []: + value = cond.value + if isinstance(value, str): + segment_group = variable_pool.convert_template(value) + if len(segment_group.value) == 1: + resolved_value = segment_group.value[0].to_object() + else: + resolved_value = segment_group.text + elif isinstance(value, Sequence) and all(isinstance(v, str) for v in value): + resolved_values = [] + for v in value: # type: ignore + segment_group = variable_pool.convert_template(v) + if len(segment_group.value) == 1: + resolved_values.append(segment_group.value[0].to_object()) + else: + resolved_values.append(segment_group.text) + resolved_value = resolved_values + else: + resolved_value = value + resolved_conditions.append( + Condition( + name=cond.name, + comparison_operator=cond.comparison_operator, + value=resolved_value, + ) + ) + return MetadataFilteringCondition( + logical_operator=conditions.logical_operator or "and", + conditions=resolved_conditions, + ) + @classmethod def _extract_variable_selector_to_variable_mapping( cls, *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: KnowledgeRetrievalNodeData, ) -> Mapping[str, Sequence[str]]: # graph_config is not used in this node type - # Create typed NodeData from dict - typed_node_data = KnowledgeRetrievalNodeData.model_validate(node_data) - variable_mapping = {} - if typed_node_data.query_variable_selector: - variable_mapping[node_id + ".query"] = typed_node_data.query_variable_selector - if typed_node_data.query_attachment_selector: - variable_mapping[node_id + ".queryAttachment"] = typed_node_data.query_attachment_selector + if node_data.query_variable_selector: + variable_mapping[node_id + ".query"] = node_data.query_variable_selector + if node_data.query_attachment_selector: + variable_mapping[node_id + ".queryAttachment"] = node_data.query_attachment_selector return variable_mapping diff --git a/api/dify_graph/repositories/rag_retrieval_protocol.py b/api/core/workflow/nodes/knowledge_retrieval/retrieval.py similarity index 83% rename from api/dify_graph/repositories/rag_retrieval_protocol.py rename to api/core/workflow/nodes/knowledge_retrieval/retrieval.py index 5f3d38167e..f964f79582 100644 --- a/api/dify_graph/repositories/rag_retrieval_protocol.py +++ b/api/core/workflow/nodes/knowledge_retrieval/retrieval.py @@ -3,9 +3,10 @@ from typing import Any, Literal, Protocol from pydantic import BaseModel, Field from dify_graph.model_runtime.entities import LLMUsage -from dify_graph.nodes.knowledge_retrieval.entities import MetadataFilteringCondition from dify_graph.nodes.llm.entities import ModelConfig +from .entities import MetadataFilteringCondition + class SourceChildChunk(BaseModel): id: str = Field(default="", description="Child chunk ID") @@ -28,7 +29,7 @@ class SourceMetadata(BaseModel): segment_id: str | None = Field(default=None, description="Segment unique identifier") retriever_from: str = Field(default="workflow", description="Retriever source context") score: float = Field(default=0.0, description="Retrieval relevance score") - child_chunks: list[SourceChildChunk] = Field(default=[], description="List of child chunks") + child_chunks: list[SourceChildChunk] = Field(default_factory=list, description="List of child chunks") segment_hit_count: int | None = Field(default=0, description="Number of times segment was retrieved") segment_word_count: int | None = Field(default=0, description="Word count of the segment") segment_position: int | None = Field(default=0, description="Position of segment in document") @@ -81,28 +82,7 @@ class KnowledgeRetrievalRequest(BaseModel): class RAGRetrievalProtocol(Protocol): - """Protocol for RAG-based knowledge retrieval implementations. - - Implementations of this protocol handle knowledge retrieval from datasets - including rate limiting, dataset filtering, and document retrieval. - """ - @property - def llm_usage(self) -> LLMUsage: - """Return accumulated LLM usage for retrieval operations.""" - ... + def llm_usage(self) -> LLMUsage: ... - def knowledge_retrieval(self, request: KnowledgeRetrievalRequest) -> list[Source]: - """Retrieve knowledge from datasets based on the provided request. - - Args: - request: Knowledge retrieval request with search parameters - - Returns: - List of sources matching the search criteria - - Raises: - RateLimitExceededError: If rate limit is exceeded - ModelNotExistError: If specified model doesn't exist - """ - ... + def knowledge_retrieval(self, request: KnowledgeRetrievalRequest) -> list[Source]: ... diff --git a/api/dify_graph/nodes/knowledge_retrieval/template_prompts.py b/api/core/workflow/nodes/knowledge_retrieval/template_prompts.py similarity index 100% rename from api/dify_graph/nodes/knowledge_retrieval/template_prompts.py rename to api/core/workflow/nodes/knowledge_retrieval/template_prompts.py diff --git a/api/dify_graph/nodes/trigger_plugin/__init__.py b/api/core/workflow/nodes/trigger_plugin/__init__.py similarity index 100% rename from api/dify_graph/nodes/trigger_plugin/__init__.py rename to api/core/workflow/nodes/trigger_plugin/__init__.py diff --git a/api/dify_graph/nodes/trigger_plugin/entities.py b/api/core/workflow/nodes/trigger_plugin/entities.py similarity index 91% rename from api/dify_graph/nodes/trigger_plugin/entities.py rename to api/core/workflow/nodes/trigger_plugin/entities.py index 75d10ecaa4..ea7d20befe 100644 --- a/api/dify_graph/nodes/trigger_plugin/entities.py +++ b/api/core/workflow/nodes/trigger_plugin/entities.py @@ -3,14 +3,19 @@ from typing import Any, Literal, Union from pydantic import BaseModel, Field, ValidationInfo, field_validator +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.entities.entities import EventParameter -from dify_graph.nodes.base.entities import BaseNodeData -from dify_graph.nodes.trigger_plugin.exc import TriggerEventParameterError +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import NodeType + +from .exc import TriggerEventParameterError class TriggerEventNodeData(BaseNodeData): """Plugin trigger node data""" + type: NodeType = TRIGGER_PLUGIN_NODE_TYPE + class TriggerEventInput(BaseModel): value: Union[Any, list[str]] type: Literal["mixed", "variable", "constant"] @@ -38,8 +43,6 @@ class TriggerEventNodeData(BaseNodeData): raise ValueError("value must be a string, int, float, bool or dict") return type - title: str - desc: str | None = None plugin_id: str = Field(..., description="Plugin ID") provider_id: str = Field(..., description="Provider ID") event_name: str = Field(..., description="Event name") diff --git a/api/dify_graph/nodes/trigger_plugin/exc.py b/api/core/workflow/nodes/trigger_plugin/exc.py similarity index 100% rename from api/dify_graph/nodes/trigger_plugin/exc.py rename to api/core/workflow/nodes/trigger_plugin/exc.py diff --git a/api/dify_graph/nodes/trigger_plugin/trigger_event_node.py b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py similarity index 80% rename from api/dify_graph/nodes/trigger_plugin/trigger_event_node.py rename to api/core/workflow/nodes/trigger_plugin/trigger_event_node.py index b4f1116f7e..2048a53064 100644 --- a/api/dify_graph/nodes/trigger_plugin/trigger_event_node.py +++ b/api/core/workflow/nodes/trigger_plugin/trigger_event_node.py @@ -1,8 +1,10 @@ from collections.abc import Mapping +from typing import Any, cast +from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.enums import NodeExecutionType, WorkflowNodeExecutionMetadataKey from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -10,7 +12,7 @@ from .entities import TriggerEventNodeData class TriggerEventNode(Node[TriggerEventNodeData]): - node_type = NodeType.TRIGGER_PLUGIN + node_type = TRIGGER_PLUGIN_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod @@ -32,6 +34,9 @@ class TriggerEventNode(Node[TriggerEventNodeData]): def version(cls) -> str: return "1" + def populate_start_event(self, event) -> None: + event.provider_id = self.node_data.provider_id + def _run(self) -> NodeRunResult: """ Run the plugin trigger node. @@ -41,8 +46,8 @@ class TriggerEventNode(Node[TriggerEventNodeData]): """ # Get trigger data passed when workflow was triggered - metadata = { - WorkflowNodeExecutionMetadataKey.TRIGGER_INFO: { + metadata: dict[WorkflowNodeExecutionMetadataKey, Any] = { + cast(WorkflowNodeExecutionMetadataKey, TRIGGER_INFO_METADATA_KEY): { "provider_id": self.node_data.provider_id, "event_name": self.node_data.event_name, "plugin_unique_identifier": self.node_data.plugin_unique_identifier, diff --git a/api/core/workflow/nodes/trigger_schedule/__init__.py b/api/core/workflow/nodes/trigger_schedule/__init__.py new file mode 100644 index 0000000000..07b711a0fd --- /dev/null +++ b/api/core/workflow/nodes/trigger_schedule/__init__.py @@ -0,0 +1,3 @@ +from .trigger_schedule_node import TriggerScheduleNode + +__all__ = ["TriggerScheduleNode"] diff --git a/api/dify_graph/nodes/trigger_schedule/entities.py b/api/core/workflow/nodes/trigger_schedule/entities.py similarity index 88% rename from api/dify_graph/nodes/trigger_schedule/entities.py rename to api/core/workflow/nodes/trigger_schedule/entities.py index 6daadc7666..95a2548678 100644 --- a/api/dify_graph/nodes/trigger_schedule/entities.py +++ b/api/core/workflow/nodes/trigger_schedule/entities.py @@ -2,7 +2,9 @@ from typing import Literal, Union from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import NodeType class TriggerScheduleNodeData(BaseNodeData): @@ -10,6 +12,7 @@ class TriggerScheduleNodeData(BaseNodeData): Trigger Schedule Node Data """ + type: NodeType = TRIGGER_SCHEDULE_NODE_TYPE mode: str = Field(default="visual", description="Schedule mode: visual or cron") frequency: str | None = Field(default=None, description="Frequency for visual mode: hourly, daily, weekly, monthly") cron_expression: str | None = Field(default=None, description="Cron expression for cron mode") diff --git a/api/dify_graph/nodes/trigger_schedule/exc.py b/api/core/workflow/nodes/trigger_schedule/exc.py similarity index 90% rename from api/dify_graph/nodes/trigger_schedule/exc.py rename to api/core/workflow/nodes/trigger_schedule/exc.py index caea6241e4..336d64d58f 100644 --- a/api/dify_graph/nodes/trigger_schedule/exc.py +++ b/api/core/workflow/nodes/trigger_schedule/exc.py @@ -1,4 +1,4 @@ -from dify_graph.nodes.base.exc import BaseNodeError +from dify_graph.entities.exc import BaseNodeError class ScheduleNodeError(BaseNodeError): diff --git a/api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py similarity index 85% rename from api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py rename to api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py index 7e92eb3f4f..b9580e6ab1 100644 --- a/api/dify_graph/nodes/trigger_schedule/trigger_schedule_node.py +++ b/api/core/workflow/nodes/trigger_schedule/trigger_schedule_node.py @@ -1,15 +1,17 @@ from collections.abc import Mapping +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import NodeExecutionType from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node -from dify_graph.nodes.trigger_schedule.entities import TriggerScheduleNodeData + +from .entities import TriggerScheduleNodeData class TriggerScheduleNode(Node[TriggerScheduleNodeData]): - node_type = NodeType.TRIGGER_SCHEDULE + node_type = TRIGGER_SCHEDULE_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod @@ -19,7 +21,7 @@ class TriggerScheduleNode(Node[TriggerScheduleNodeData]): @classmethod def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]: return { - "type": "trigger-schedule", + "type": TRIGGER_SCHEDULE_NODE_TYPE, "config": { "mode": "visual", "frequency": "daily", diff --git a/api/dify_graph/nodes/trigger_webhook/__init__.py b/api/core/workflow/nodes/trigger_webhook/__init__.py similarity index 100% rename from api/dify_graph/nodes/trigger_webhook/__init__.py rename to api/core/workflow/nodes/trigger_webhook/__init__.py diff --git a/api/core/workflow/nodes/trigger_webhook/entities.py b/api/core/workflow/nodes/trigger_webhook/entities.py new file mode 100644 index 0000000000..242bf5ef6a --- /dev/null +++ b/api/core/workflow/nodes/trigger_webhook/entities.py @@ -0,0 +1,133 @@ +from collections.abc import Sequence +from enum import StrEnum + +from pydantic import BaseModel, Field, field_validator + +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import NodeType +from dify_graph.variables.types import SegmentType + +_WEBHOOK_HEADER_ALLOWED_TYPES = frozenset( + { + SegmentType.STRING, + } +) + +_WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES = frozenset( + { + SegmentType.STRING, + SegmentType.NUMBER, + SegmentType.BOOLEAN, + } +) + +_WEBHOOK_PARAMETER_ALLOWED_TYPES = _WEBHOOK_HEADER_ALLOWED_TYPES | _WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES + +_WEBHOOK_BODY_ALLOWED_TYPES = frozenset( + { + SegmentType.STRING, + SegmentType.NUMBER, + SegmentType.BOOLEAN, + SegmentType.OBJECT, + SegmentType.ARRAY_STRING, + SegmentType.ARRAY_NUMBER, + SegmentType.ARRAY_BOOLEAN, + SegmentType.ARRAY_OBJECT, + SegmentType.FILE, + } +) + + +class Method(StrEnum): + GET = "get" + POST = "post" + HEAD = "head" + PATCH = "patch" + PUT = "put" + DELETE = "delete" + + +class ContentType(StrEnum): + JSON = "application/json" + FORM_DATA = "multipart/form-data" + FORM_URLENCODED = "application/x-www-form-urlencoded" + TEXT = "text/plain" + BINARY = "application/octet-stream" + + +class WebhookParameter(BaseModel): + """Parameter definition for headers or query params.""" + + name: str + type: SegmentType = SegmentType.STRING + required: bool = False + + @field_validator("type", mode="after") + @classmethod + def validate_type(cls, v: SegmentType) -> SegmentType: + if v not in _WEBHOOK_PARAMETER_ALLOWED_TYPES: + raise ValueError(f"Unsupported webhook parameter type: {v}") + return v + + +class WebhookBodyParameter(BaseModel): + """Body parameter with type information.""" + + name: str + type: SegmentType = SegmentType.STRING + required: bool = False + + @field_validator("type", mode="after") + @classmethod + def validate_type(cls, v: SegmentType) -> SegmentType: + if v not in _WEBHOOK_BODY_ALLOWED_TYPES: + raise ValueError(f"Unsupported webhook body parameter type: {v}") + return v + + +class WebhookData(BaseNodeData): + """ + Webhook Node Data. + """ + + class SyncMode(StrEnum): + SYNC = "async" # only support + + type: NodeType = TRIGGER_WEBHOOK_NODE_TYPE + method: Method = Method.GET + content_type: ContentType = Field(default=ContentType.JSON) + headers: Sequence[WebhookParameter] = Field(default_factory=list) + params: Sequence[WebhookParameter] = Field(default_factory=list) # query parameters + body: Sequence[WebhookBodyParameter] = Field(default_factory=list) + + @field_validator("method", mode="before") + @classmethod + def normalize_method(cls, v) -> str: + """Normalize HTTP method to lowercase to support both uppercase and lowercase input.""" + if isinstance(v, str): + return v.lower() + return v + + @field_validator("headers", mode="after") + @classmethod + def validate_header_types(cls, v: Sequence[WebhookParameter]) -> Sequence[WebhookParameter]: + for param in v: + if param.type not in _WEBHOOK_HEADER_ALLOWED_TYPES: + raise ValueError(f"Unsupported webhook header parameter type: {param.type}") + return v + + @field_validator("params", mode="after") + @classmethod + def validate_query_parameter_types(cls, v: Sequence[WebhookParameter]) -> Sequence[WebhookParameter]: + for param in v: + if param.type not in _WEBHOOK_QUERY_PARAMETER_ALLOWED_TYPES: + raise ValueError(f"Unsupported webhook query parameter type: {param.type}") + return v + + status_code: int = 200 # Expected status code for response + response_body: str = "" # Template for response body + + # Webhook specific fields (not from client data, set internally) + webhook_id: str | None = None # Set when webhook trigger is created + timeout: int = 30 # Timeout in seconds to wait for webhook response diff --git a/api/dify_graph/nodes/trigger_webhook/exc.py b/api/core/workflow/nodes/trigger_webhook/exc.py similarity index 87% rename from api/dify_graph/nodes/trigger_webhook/exc.py rename to api/core/workflow/nodes/trigger_webhook/exc.py index 853b2456c5..4d87f2a069 100644 --- a/api/dify_graph/nodes/trigger_webhook/exc.py +++ b/api/core/workflow/nodes/trigger_webhook/exc.py @@ -1,4 +1,4 @@ -from dify_graph.nodes.base.exc import BaseNodeError +from dify_graph.entities.exc import BaseNodeError class WebhookNodeError(BaseNodeError): diff --git a/api/dify_graph/nodes/trigger_webhook/node.py b/api/core/workflow/nodes/trigger_webhook/node.py similarity index 97% rename from api/dify_graph/nodes/trigger_webhook/node.py rename to api/core/workflow/nodes/trigger_webhook/node.py index e466541908..317844cbda 100644 --- a/api/dify_graph/nodes/trigger_webhook/node.py +++ b/api/core/workflow/nodes/trigger_webhook/node.py @@ -2,9 +2,10 @@ import logging from collections.abc import Mapping from typing import Any +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import NodeExecutionType from dify_graph.file import FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -19,7 +20,7 @@ logger = logging.getLogger(__name__) class TriggerWebhookNode(Node[WebhookData]): - node_type = NodeType.TRIGGER_WEBHOOK + node_type = TRIGGER_WEBHOOK_NODE_TYPE execution_type = NodeExecutionType.ROOT @classmethod @@ -152,7 +153,7 @@ class TriggerWebhookNode(Node[WebhookData]): outputs[param_name] = raw_data continue - if param_type == "file": + if param_type == SegmentType.FILE: # Get File object (already processed by webhook controller) files = webhook_data.get("files", {}) if files and isinstance(files, dict): diff --git a/api/core/workflow/workflow_entry.py b/api/core/workflow/workflow_entry.py index 284c0619f0..fcd8fc08de 100644 --- a/api/core/workflow/workflow_entry.py +++ b/api/core/workflow/workflow_entry.py @@ -9,10 +9,10 @@ from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_di from core.app.workflow.layers.llm_quota import LLMQuotaLayer from core.app.workflow.layers.observability import ObservabilityLayer from core.sandbox import Sandbox -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, resolve_workflow_node_class from dify_graph.constants import ENVIRONMENT_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams -from dify_graph.entities.graph_config import NodeConfigData, NodeConfigDict +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.file.models import File from dify_graph.graph import Graph @@ -22,9 +22,8 @@ from dify_graph.graph_engine.layers import DebugLoggingLayer, ExecutionLimitsLay from dify_graph.graph_engine.layers.base import GraphEngineLayer from dify_graph.graph_engine.protocols.command_channel import CommandChannel from dify_graph.graph_events import GraphEngineEvent, GraphNodeEventBase, GraphRunFailedEvent -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.base.node import Node -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING from dify_graph.runtime import ChildGraphNotFoundError, GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variable_loader import DUMMY_VARIABLE_LOADER, VariableLoader, load_into_variable_pool @@ -214,7 +213,7 @@ class WorkflowEntry: node_config_data = node_config["data"] # Get node type - node_type = NodeType(node_config_data["type"]) + node_type = node_config_data.type # init graph init params and runtime state graph_init_params = GraphInitParams( @@ -239,8 +238,7 @@ class WorkflowEntry: graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, ) - typed_node_config = cast(dict[str, object], node_config) - node = cast(Any, node_factory).create_node(typed_node_config) + node = node_factory.create_node(node_config) node_cls = type(node) try: @@ -259,7 +257,7 @@ class WorkflowEntry: variable_mapping=variable_mapping, user_inputs=user_inputs, ) - if node_type != NodeType.DATASOURCE: + if node_type != BuiltinNodeTypes.DATASOURCE: cls.mapping_user_inputs_to_variable_pool( variable_mapping=variable_mapping, user_inputs=user_inputs, @@ -309,7 +307,7 @@ class WorkflowEntry: "height": node_height, "type": "custom", "data": { - "type": NodeType.START, + "type": BuiltinNodeTypes.START, "title": "Start", "desc": "Start", }, @@ -345,11 +343,11 @@ class WorkflowEntry: # Create a minimal graph for single node execution graph_dict = cls._create_single_node_graph(node_id, node_data) - node_type = NodeType(node_data.get("type", "")) - if node_type not in {NodeType.PARAMETER_EXTRACTOR, NodeType.QUESTION_CLASSIFIER}: + node_type = node_data.get("type", "") + if node_type not in {BuiltinNodeTypes.PARAMETER_EXTRACTOR, BuiltinNodeTypes.QUESTION_CLASSIFIER}: raise ValueError(f"Node type {node_type} not supported") - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type]["1"] + node_cls = resolve_workflow_node_class(node_type=node_type, node_version="1") if not node_cls: raise ValueError(f"Node class not found for node type {node_type}") @@ -376,10 +374,7 @@ class WorkflowEntry: graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=time.perf_counter()) # init workflow run state - node_config: NodeConfigDict = { - "id": node_id, - "data": cast(NodeConfigData, node_data), - } + node_config = NodeConfigDictAdapter.validate_python({"id": node_id, "data": node_data}) node_factory = DifyNodeFactory( graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, diff --git a/api/dify_graph/README.md b/api/dify_graph/README.md index 09c4f5afdc..2fc5b8b890 100644 --- a/api/dify_graph/README.md +++ b/api/dify_graph/README.md @@ -113,7 +113,7 @@ The codebase enforces strict layering via import-linter: 1. Create node class in `nodes//` 1. Inherit from `BaseNode` or appropriate base class 1. Implement `_run()` method -1. Register in `nodes/node_mapping.py` +1. Ensure the node module is importable under `nodes//` 1. Add tests in `tests/unit_tests/dify_graph/nodes/` ### Implementing a Custom Layer diff --git a/api/dify_graph/entities/__init__.py b/api/dify_graph/entities/__init__.py index aeb38d240d..c695625e4d 100644 --- a/api/dify_graph/entities/__init__.py +++ b/api/dify_graph/entities/__init__.py @@ -1,4 +1,3 @@ -from .agent import AgentNodeStrategyInit from .graph_init_params import GraphInitParams from .tool_entities import ToolCall, ToolCallResult, ToolResult, ToolResultStatus from .workflow_execution import WorkflowExecution @@ -6,7 +5,6 @@ from .workflow_node_execution import WorkflowNodeExecution from .workflow_start_reason import WorkflowStartReason __all__ = [ - "AgentNodeStrategyInit", "GraphInitParams", "ToolCall", "ToolCallResult", diff --git a/api/dify_graph/entities/agent.py b/api/dify_graph/entities/agent.py deleted file mode 100644 index 2b4d6db76f..0000000000 --- a/api/dify_graph/entities/agent.py +++ /dev/null @@ -1,8 +0,0 @@ -from pydantic import BaseModel - - -class AgentNodeStrategyInit(BaseModel): - """Agent node strategy initialization data.""" - - name: str - icon: str | None = None diff --git a/api/dify_graph/entities/base_node_data.py b/api/dify_graph/entities/base_node_data.py new file mode 100644 index 0000000000..8228ddda80 --- /dev/null +++ b/api/dify_graph/entities/base_node_data.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +import json +from abc import ABC +from builtins import type as type_ +from enum import StrEnum +from typing import Any, Union + +from pydantic import BaseModel, ConfigDict, Field, model_validator + +from dify_graph.entities.exc import DefaultValueTypeError +from dify_graph.enums import ErrorStrategy, NodeType + +# Project supports Python 3.11+, where `typing.Union[...]` is valid in `isinstance`. +_NumberType = Union[int, float] + + +class RetryConfig(BaseModel): + """node retry config""" + + max_retries: int = 0 # max retry times + retry_interval: int = 0 # retry interval in milliseconds + retry_enabled: bool = False # whether retry is enabled + + @property + def retry_interval_seconds(self) -> float: + return self.retry_interval / 1000 + + +class DefaultValueType(StrEnum): + STRING = "string" + NUMBER = "number" + OBJECT = "object" + ARRAY_NUMBER = "array[number]" + ARRAY_STRING = "array[string]" + ARRAY_OBJECT = "array[object]" + ARRAY_FILES = "array[file]" + + +class DefaultValue(BaseModel): + value: Any = None + type: DefaultValueType + key: str + + @staticmethod + def _parse_json(value: str): + """Unified JSON parsing handler""" + try: + return json.loads(value) + except json.JSONDecodeError: + raise DefaultValueTypeError(f"Invalid JSON format for value: {value}") + + @staticmethod + def _validate_array(value: Any, element_type: type_ | tuple[type_, ...]) -> bool: + """Unified array type validation""" + return isinstance(value, list) and all(isinstance(x, element_type) for x in value) + + @staticmethod + def _convert_number(value: str) -> float: + """Unified number conversion handler""" + try: + return float(value) + except ValueError: + raise DefaultValueTypeError(f"Cannot convert to number: {value}") + + @model_validator(mode="after") + def validate_value_type(self) -> DefaultValue: + # Type validation configuration + type_validators: dict[DefaultValueType, dict[str, Any]] = { + DefaultValueType.STRING: { + "type": str, + "converter": lambda x: x, + }, + DefaultValueType.NUMBER: { + "type": _NumberType, + "converter": self._convert_number, + }, + DefaultValueType.OBJECT: { + "type": dict, + "converter": self._parse_json, + }, + DefaultValueType.ARRAY_NUMBER: { + "type": list, + "element_type": _NumberType, + "converter": self._parse_json, + }, + DefaultValueType.ARRAY_STRING: { + "type": list, + "element_type": str, + "converter": self._parse_json, + }, + DefaultValueType.ARRAY_OBJECT: { + "type": list, + "element_type": dict, + "converter": self._parse_json, + }, + } + + validator: dict[str, Any] = type_validators.get(self.type, {}) + if not validator: + if self.type == DefaultValueType.ARRAY_FILES: + # Handle files type + return self + raise DefaultValueTypeError(f"Unsupported type: {self.type}") + + # Handle string input cases + if isinstance(self.value, str) and self.type != DefaultValueType.STRING: + self.value = validator["converter"](self.value) + + # Validate base type + if not isinstance(self.value, validator["type"]): + raise DefaultValueTypeError(f"Value must be {validator['type'].__name__} type for {self.value}") + + # Validate array element types + if validator["type"] == list and not self._validate_array(self.value, validator["element_type"]): + raise DefaultValueTypeError(f"All elements must be {validator['element_type'].__name__} for {self.value}") + + return self + + +class BaseNodeData(ABC, BaseModel): + # Raw graph payloads are first validated through `NodeConfigDictAdapter`, where + # `node["data"]` is typed as `BaseNodeData` before the concrete node class is known. + # `type` therefore accepts downstream string node kinds; unknown node implementations + # are rejected later when the node factory resolves the node registry. + # At that boundary, node-specific fields are still "extra" relative to this shared DTO, + # and persisted templates/workflows also carry undeclared compatibility keys such as + # `selected`, `params`, `paramSchemas`, and `datasource_label`. Keep extras permissive + # here until graph parsing becomes discriminated by node type or those legacy payloads + # are normalized. + model_config = ConfigDict(extra="allow") + + type: NodeType + title: str = "" + desc: str | None = None + version: str = "1" + error_strategy: ErrorStrategy | None = None + default_value: list[DefaultValue] | None = None + retry_config: RetryConfig = Field(default_factory=RetryConfig) + + parent_node_id: str | None = None + + @property + def is_extractor_node(self) -> bool: + return self.parent_node_id is not None + + @property + def default_value_dict(self) -> dict[str, Any]: + if self.default_value: + return {item.key: item.value for item in self.default_value} + return {} + + def __getitem__(self, key: str) -> Any: + """ + Dict-style access without calling model_dump() on every lookup. + Prefer using model fields and Pydantic's extra storage. + """ + # First, check declared model fields + if key in self.__class__.model_fields: + return getattr(self, key) + + # Then, check undeclared compatibility fields stored in Pydantic's extra dict. + extras = getattr(self, "__pydantic_extra__", None) + if extras is None: + extras = getattr(self, "model_extra", None) + if extras is not None and key in extras: + return extras[key] + + raise KeyError(key) + + def get(self, key: str, default: Any = None) -> Any: + """ + Dict-style .get() without calling model_dump() on every lookup. + """ + if key in self.__class__.model_fields: + return getattr(self, key) + + extras = getattr(self, "__pydantic_extra__", None) + if extras is None: + extras = getattr(self, "model_extra", None) + if extras is not None and key in extras: + return extras.get(key, default) + + return default diff --git a/api/dify_graph/nodes/base/exc.py b/api/dify_graph/entities/exc.py similarity index 100% rename from api/dify_graph/nodes/base/exc.py rename to api/dify_graph/entities/exc.py diff --git a/api/dify_graph/entities/graph_config.py b/api/dify_graph/entities/graph_config.py index 209dcfe6bc..36f7b94e82 100644 --- a/api/dify_graph/entities/graph_config.py +++ b/api/dify_graph/entities/graph_config.py @@ -4,21 +4,20 @@ import sys from pydantic import TypeAdapter, with_config +from dify_graph.entities.base_node_data import BaseNodeData + if sys.version_info >= (3, 12): from typing import TypedDict else: from typing_extensions import TypedDict -@with_config(extra="allow") -class NodeConfigData(TypedDict): - type: str - - @with_config(extra="allow") class NodeConfigDict(TypedDict): id: str - data: NodeConfigData + # This is the permissive raw graph boundary. Node factories re-validate `data` + # with the concrete `NodeData` subtype after resolving the node implementation. + data: BaseNodeData NodeConfigDictAdapter = TypeAdapter(NodeConfigDict) diff --git a/api/dify_graph/entities/workflow_node_execution.py b/api/dify_graph/entities/workflow_node_execution.py index 9dd04e331b..bc7e0d02e5 100644 --- a/api/dify_graph/entities/workflow_node_execution.py +++ b/api/dify_graph/entities/workflow_node_execution.py @@ -48,7 +48,7 @@ class WorkflowNodeExecution(BaseModel): index: int # Sequence number for ordering in trace visualization predecessor_node_id: str | None = None # ID of the node that executed before this one node_id: str # ID of the node being executed - node_type: NodeType # Type of node (e.g., start, llm, knowledge) + node_type: NodeType # Type of node (e.g., start, llm, downstream response node) title: str # Display title of the node # Execution data diff --git a/api/dify_graph/enums.py b/api/dify_graph/enums.py index 37bc339f44..9d515c9c43 100644 --- a/api/dify_graph/enums.py +++ b/api/dify_graph/enums.py @@ -1,4 +1,5 @@ from enum import StrEnum +from typing import ClassVar, TypeAlias class NodeState(StrEnum): @@ -33,59 +34,85 @@ class SystemVariableKey(StrEnum): INVOKE_FROM = "invoke_from" -class NodeType(StrEnum): - START = "start" - END = "end" - ANSWER = "answer" - LLM = "llm" - KNOWLEDGE_RETRIEVAL = "knowledge-retrieval" - KNOWLEDGE_INDEX = "knowledge-index" - IF_ELSE = "if-else" - CODE = "code" - TEMPLATE_TRANSFORM = "template-transform" - QUESTION_CLASSIFIER = "question-classifier" - HTTP_REQUEST = "http-request" - TOOL = "tool" - DATASOURCE = "datasource" - VARIABLE_AGGREGATOR = "variable-aggregator" - LEGACY_VARIABLE_AGGREGATOR = "variable-assigner" # TODO: Merge this into VARIABLE_AGGREGATOR in the database. - LOOP = "loop" - LOOP_START = "loop-start" - LOOP_END = "loop-end" - ITERATION = "iteration" - ITERATION_START = "iteration-start" # Fake start node for iteration. - PARAMETER_EXTRACTOR = "parameter-extractor" - VARIABLE_ASSIGNER = "assigner" - DOCUMENT_EXTRACTOR = "document-extractor" - LIST_OPERATOR = "list-operator" - AGENT = "agent" - TRIGGER_WEBHOOK = "trigger-webhook" - TRIGGER_SCHEDULE = "trigger-schedule" - TRIGGER_PLUGIN = "trigger-plugin" - HUMAN_INPUT = "human-input" - COMMAND = "command" - FILE_UPLOAD = "file-upload" - GROUP = "group" +NodeType: TypeAlias = str - @property - def is_trigger_node(self) -> bool: - """Check if this node type is a trigger node.""" - return self in [ - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - ] - @property - def is_start_node(self) -> bool: - """Check if this node type can serve as a workflow entry point.""" - return self in [ - NodeType.START, - NodeType.DATASOURCE, - NodeType.TRIGGER_WEBHOOK, - NodeType.TRIGGER_SCHEDULE, - NodeType.TRIGGER_PLUGIN, - ] +class BuiltinNodeTypes: + """Built-in node type string constants. + + `node_type` values are plain strings throughout the graph runtime. This namespace + only exposes the built-in values shipped by `dify_graph`; downstream packages can + use additional strings without extending this class. + """ + + START: ClassVar[NodeType] = "start" + END: ClassVar[NodeType] = "end" + ANSWER: ClassVar[NodeType] = "answer" + LLM: ClassVar[NodeType] = "llm" + KNOWLEDGE_RETRIEVAL: ClassVar[NodeType] = "knowledge-retrieval" + IF_ELSE: ClassVar[NodeType] = "if-else" + CODE: ClassVar[NodeType] = "code" + TEMPLATE_TRANSFORM: ClassVar[NodeType] = "template-transform" + QUESTION_CLASSIFIER: ClassVar[NodeType] = "question-classifier" + HTTP_REQUEST: ClassVar[NodeType] = "http-request" + TOOL: ClassVar[NodeType] = "tool" + DATASOURCE: ClassVar[NodeType] = "datasource" + VARIABLE_AGGREGATOR: ClassVar[NodeType] = "variable-aggregator" + LEGACY_VARIABLE_AGGREGATOR: ClassVar[NodeType] = "variable-assigner" + LOOP: ClassVar[NodeType] = "loop" + LOOP_START: ClassVar[NodeType] = "loop-start" + LOOP_END: ClassVar[NodeType] = "loop-end" + ITERATION: ClassVar[NodeType] = "iteration" + ITERATION_START: ClassVar[NodeType] = "iteration-start" + PARAMETER_EXTRACTOR: ClassVar[NodeType] = "parameter-extractor" + VARIABLE_ASSIGNER: ClassVar[NodeType] = "assigner" + DOCUMENT_EXTRACTOR: ClassVar[NodeType] = "document-extractor" + LIST_OPERATOR: ClassVar[NodeType] = "list-operator" + AGENT: ClassVar[NodeType] = "agent" + KNOWLEDGE_INDEX: ClassVar[NodeType] = "knowledge-index" + TRIGGER_WEBHOOK: ClassVar[NodeType] = "trigger-webhook" + TRIGGER_SCHEDULE: ClassVar[NodeType] = "trigger-schedule" + TRIGGER_PLUGIN: ClassVar[NodeType] = "trigger-plugin" + HUMAN_INPUT: ClassVar[NodeType] = "human-input" + COMMAND: ClassVar[NodeType] = "command" + FILE_UPLOAD: ClassVar[NodeType] = "file-upload" + GROUP: ClassVar[NodeType] = "group" + + +BUILT_IN_NODE_TYPES: tuple[NodeType, ...] = ( + BuiltinNodeTypes.START, + BuiltinNodeTypes.END, + BuiltinNodeTypes.ANSWER, + BuiltinNodeTypes.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, + BuiltinNodeTypes.IF_ELSE, + BuiltinNodeTypes.CODE, + BuiltinNodeTypes.TEMPLATE_TRANSFORM, + BuiltinNodeTypes.QUESTION_CLASSIFIER, + BuiltinNodeTypes.HTTP_REQUEST, + BuiltinNodeTypes.TOOL, + BuiltinNodeTypes.DATASOURCE, + BuiltinNodeTypes.VARIABLE_AGGREGATOR, + BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR, + BuiltinNodeTypes.LOOP, + BuiltinNodeTypes.LOOP_START, + BuiltinNodeTypes.LOOP_END, + BuiltinNodeTypes.ITERATION, + BuiltinNodeTypes.ITERATION_START, + BuiltinNodeTypes.PARAMETER_EXTRACTOR, + BuiltinNodeTypes.VARIABLE_ASSIGNER, + BuiltinNodeTypes.DOCUMENT_EXTRACTOR, + BuiltinNodeTypes.LIST_OPERATOR, + BuiltinNodeTypes.AGENT, + BuiltinNodeTypes.KNOWLEDGE_INDEX, + BuiltinNodeTypes.TRIGGER_WEBHOOK, + BuiltinNodeTypes.TRIGGER_SCHEDULE, + BuiltinNodeTypes.TRIGGER_PLUGIN, + BuiltinNodeTypes.HUMAN_INPUT, + BuiltinNodeTypes.COMMAND, + BuiltinNodeTypes.FILE_UPLOAD, + BuiltinNodeTypes.GROUP, +) class NodeExecutionType(StrEnum): @@ -239,7 +266,6 @@ class WorkflowNodeExecutionMetadataKey(StrEnum): CURRENCY = "currency" TOOL_INFO = "tool_info" AGENT_LOG = "agent_log" - TRIGGER_INFO = "trigger_info" ITERATION_ID = "iteration_id" ITERATION_INDEX = "iteration_index" LOOP_ID = "loop_id" diff --git a/api/dify_graph/file/models.py b/api/dify_graph/file/models.py index db12d4f57a..dcba00978e 100644 --- a/api/dify_graph/file/models.py +++ b/api/dify_graph/file/models.py @@ -2,6 +2,7 @@ from __future__ import annotations from collections.abc import Mapping, Sequence from typing import Any +from uuid import UUID, uuid4 from pydantic import BaseModel, Field, model_validator @@ -43,6 +44,24 @@ class FileUploadConfig(BaseModel): number_limits: int = 0 +class ToolFile(BaseModel): + id: UUID = Field(default_factory=uuid4, description="Unique identifier for the file") + user_id: UUID = Field(..., description="ID of the user who owns this file") + tenant_id: UUID = Field(..., description="ID of the tenant/organization") + conversation_id: UUID | None = Field(None, description="ID of the associated conversation") + file_key: str = Field(..., max_length=255, description="Storage key for the file") + mimetype: str = Field(..., max_length=255, description="MIME type of the file") + original_url: str | None = Field( + None, max_length=2048, description="Original URL if file was fetched from external source" + ) + name: str = Field(default="", max_length=255, description="Display name of the file") + size: int = Field(default=-1, ge=-1, description="File size in bytes (-1 if unknown)") + + class Config: + from_attributes = True # Enable ORM mode for SQLAlchemy compatibility + populate_by_name = True + + class File(BaseModel): # NOTE: dify_model_identity is a special identifier used to distinguish between # new and old data formats during serialization and deserialization. diff --git a/api/dify_graph/graph/graph.py b/api/dify_graph/graph/graph.py index 747f8d9e30..dda6fa3efe 100644 --- a/api/dify_graph/graph/graph.py +++ b/api/dify_graph/graph/graph.py @@ -8,7 +8,7 @@ from typing import Protocol, cast, final from pydantic import TypeAdapter from dify_graph.entities.graph_config import NodeConfigDict -from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeState, NodeType +from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeState from dify_graph.nodes.base.node import Node from libs.typing import is_str @@ -34,7 +34,8 @@ class NodeFactory(Protocol): :param node_config: node configuration dictionary containing type and other data :return: initialized Node instance - :raises ValueError: if node type is unknown or configuration is invalid + :raises ValueError: if node type is unknown or no implementation exists for the resolved version + :raises ValidationError: if node_config does not satisfy NodeConfigDict/BaseNodeData validation """ ... @@ -82,53 +83,6 @@ class Graph: return node_configs_map - @classmethod - def _find_root_node_id( - cls, - node_configs_map: Mapping[str, NodeConfigDict], - edge_configs: Sequence[Mapping[str, object]], - root_node_id: str | None = None, - ) -> str: - """ - Find the root node ID if not specified. - - :param node_configs_map: mapping of node ID to node config - :param edge_configs: list of edge configurations - :param root_node_id: explicitly specified root node ID - :return: determined root node ID - """ - if root_node_id: - if root_node_id not in node_configs_map: - raise ValueError(f"Root node id {root_node_id} not found in the graph") - return root_node_id - - # Find nodes with no incoming edges - nodes_with_incoming: set[str] = set() - for edge_config in edge_configs: - target = edge_config.get("target") - if isinstance(target, str): - nodes_with_incoming.add(target) - - root_candidates = [nid for nid in node_configs_map if nid not in nodes_with_incoming] - - # Prefer START node if available - start_node_id = None - for nid in root_candidates: - node_data = node_configs_map[nid]["data"] - node_type = node_data["type"] - if not isinstance(node_type, str): - continue - if NodeType(node_type).is_start_node: - start_node_id = nid - break - - root_node_id = start_node_id or (root_candidates[0] if root_candidates else None) - - if not root_node_id: - raise ValueError("Unable to determine root node ID") - - return root_node_id - @classmethod def _build_edges( cls, edge_configs: list[dict[str, object]] @@ -203,6 +157,23 @@ class Graph: return GraphBuilder(graph_cls=cls) + @staticmethod + def _filter_canvas_only_nodes(node_configs: Sequence[Mapping[str, object]]) -> list[dict[str, object]]: + """ + Remove editor-only nodes before `NodeConfigDict` validation. + + Persisted note widgets use a top-level `type == "custom-note"` but leave + `data.type` empty because they are never executable graph nodes. Filter + them while configs are still raw dicts so Pydantic does not validate + their placeholder payloads against `BaseNodeData.type: NodeType`. + """ + filtered_node_configs: list[dict[str, object]] = [] + for node_config in node_configs: + if node_config.get("type", "") == "custom-note": + continue + filtered_node_configs.append(dict(node_config)) + return filtered_node_configs + @classmethod def _promote_fail_branch_nodes(cls, nodes: dict[str, Node]) -> None: """ @@ -286,15 +257,15 @@ class Graph: *, graph_config: Mapping[str, object], node_factory: NodeFactory, - root_node_id: str | None = None, + root_node_id: str, skip_validation: bool = False, ) -> Graph: """ - Initialize graph + Initialize a graph with an explicit execution entry point. :param graph_config: graph config containing nodes and edges :param node_factory: factory for creating node instances from config data - :param root_node_id: root node id + :param root_node_id: active root node id :return: graph instance """ # Parse configs @@ -302,25 +273,25 @@ class Graph: node_configs = graph_config.get("nodes", []) edge_configs = cast(list[dict[str, object]], edge_configs) + node_configs = cast(list[dict[str, object]], node_configs) + node_configs = cls._filter_canvas_only_nodes(node_configs) node_configs = _ListNodeConfigDict.validate_python(node_configs) if not node_configs: raise ValueError("Graph must have at least one node") - # Filter out UI-only node types: - # - custom-note: top-level type (node_config.type == "custom-note") - # - group: data-level type (node_config.data.type == "group") node_configs = [ node_config for node_config in node_configs - if node_config.get("type", "") != "custom-note" and node_config.get("data", {}).get("type", "") != "group" + if node_config.get("data", {}).get("type", "") != "group" ] + # Parse node configurations node_configs_map = cls._parse_node_configs(node_configs) - # Find root node - root_node_id = cls._find_root_node_id(node_configs_map, edge_configs, root_node_id) + if root_node_id not in node_configs_map: + raise ValueError(f"Root node id {root_node_id} not found in the graph") # Build edges edges, in_edges, out_edges = cls._build_edges(edge_configs) diff --git a/api/dify_graph/graph/validation.py b/api/dify_graph/graph/validation.py index 6840bcfed2..50d1440b04 100644 --- a/api/dify_graph/graph/validation.py +++ b/api/dify_graph/graph/validation.py @@ -4,7 +4,7 @@ from collections.abc import Sequence from dataclasses import dataclass from typing import TYPE_CHECKING, Protocol -from dify_graph.enums import NodeExecutionType, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeType if TYPE_CHECKING: from .graph import Graph @@ -71,7 +71,7 @@ class _RootNodeValidator: """Validates root node invariants.""" invalid_root_code: str = "INVALID_ROOT" - container_entry_types: tuple[NodeType, ...] = (NodeType.ITERATION_START, NodeType.LOOP_START) + container_entry_types: tuple[NodeType, ...] = (BuiltinNodeTypes.ITERATION_START, BuiltinNodeTypes.LOOP_START) def validate(self, graph: Graph) -> Sequence[GraphValidationIssue]: root_node = graph.root_node @@ -86,7 +86,7 @@ class _RootNodeValidator: ) return issues - node_type = getattr(root_node, "node_type", None) + node_type = root_node.node_type if root_node.execution_type != NodeExecutionType.ROOT and node_type not in self.container_entry_types: issues.append( GraphValidationIssue( @@ -114,45 +114,9 @@ class GraphValidator: raise GraphValidationError(issues) -@dataclass(frozen=True, slots=True) -class _TriggerStartExclusivityValidator: - """Ensures trigger nodes do not coexist with UserInput (start) nodes.""" - - conflict_code: str = "TRIGGER_START_NODE_CONFLICT" - - def validate(self, graph: Graph) -> Sequence[GraphValidationIssue]: - start_node_id: str | None = None - trigger_node_ids: list[str] = [] - - for node in graph.nodes.values(): - node_type = getattr(node, "node_type", None) - if not isinstance(node_type, NodeType): - continue - - if node_type == NodeType.START: - start_node_id = node.id - elif node_type.is_trigger_node: - trigger_node_ids.append(node.id) - - if start_node_id and trigger_node_ids: - trigger_list = ", ".join(trigger_node_ids) - return [ - GraphValidationIssue( - code=self.conflict_code, - message=( - f"UserInput (start) node '{start_node_id}' cannot coexist with trigger nodes: {trigger_list}." - ), - node_id=start_node_id, - ) - ] - - return [] - - _DEFAULT_RULES: tuple[GraphValidationRule, ...] = ( _EdgeEndpointValidator(), _RootNodeValidator(), - _TriggerStartExclusivityValidator(), ) diff --git a/api/dify_graph/graph_engine/response_coordinator/__init__.py b/api/dify_graph/graph_engine/response_coordinator/__init__.py index e11d31199c..2a80d316e8 100644 --- a/api/dify_graph/graph_engine/response_coordinator/__init__.py +++ b/api/dify_graph/graph_engine/response_coordinator/__init__.py @@ -6,5 +6,6 @@ of responses based on upstream node outputs and constants. """ from .coordinator import ResponseStreamCoordinator +from .session import RESPONSE_SESSION_NODE_TYPES -__all__ = ["ResponseStreamCoordinator"] +__all__ = ["RESPONSE_SESSION_NODE_TYPES", "ResponseStreamCoordinator"] diff --git a/api/dify_graph/graph_engine/response_coordinator/session.py b/api/dify_graph/graph_engine/response_coordinator/session.py index 0548e88d93..99ac1b5edf 100644 --- a/api/dify_graph/graph_engine/response_coordinator/session.py +++ b/api/dify_graph/graph_engine/response_coordinator/session.py @@ -3,19 +3,34 @@ Internal response session management for response coordinator. This module contains the private ResponseSession class used internally by ResponseStreamCoordinator to manage streaming sessions. + +`RESPONSE_SESSION_NODE_TYPES` is intentionally mutable so downstream applications +can opt additional response-capable node types into session creation without +patching the coordinator. """ from __future__ import annotations from dataclasses import dataclass +from typing import Protocol, cast -from dify_graph.nodes.answer.answer_node import AnswerNode +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.template import Template -from dify_graph.nodes.end.end_node import EndNode -from dify_graph.nodes.knowledge_index import KnowledgeIndexNode from dify_graph.runtime.graph_runtime_state import NodeProtocol +class _ResponseSessionNodeProtocol(NodeProtocol, Protocol): + """Structural contract required from nodes that can open a response session.""" + + def get_streaming_template(self) -> Template: ... + + +RESPONSE_SESSION_NODE_TYPES: list[NodeType] = [ + BuiltinNodeTypes.ANSWER, + BuiltinNodeTypes.END, +] + + @dataclass class ResponseSession: """ @@ -33,10 +48,9 @@ class ResponseSession: """ Create a ResponseSession from a response-capable node. - The parameter is typed as `NodeProtocol` because the graph is exposed behind a protocol at the runtime layer, - but at runtime this must be an `AnswerNode`, `EndNode`, or `KnowledgeIndexNode` that provides: - - `id: str` - - `get_streaming_template() -> Template` + The parameter is typed as `NodeProtocol` because the graph is exposed behind a protocol at the runtime layer. + At runtime this must be a node whose `node_type` is listed in `RESPONSE_SESSION_NODE_TYPES` + and which implements `get_streaming_template()`. Args: node: Node from the materialized workflow graph. @@ -47,11 +61,22 @@ class ResponseSession: Raises: TypeError: If node is not a supported response node type. """ - if not isinstance(node, AnswerNode | EndNode | KnowledgeIndexNode): - raise TypeError("ResponseSession.from_node only supports AnswerNode, EndNode, or KnowledgeIndexNode") + if node.node_type not in RESPONSE_SESSION_NODE_TYPES: + supported_node_types = ", ".join(RESPONSE_SESSION_NODE_TYPES) + raise TypeError( + "ResponseSession.from_node only supports node types in " + f"RESPONSE_SESSION_NODE_TYPES: {supported_node_types}" + ) + + response_node = cast(_ResponseSessionNodeProtocol, node) + try: + template = response_node.get_streaming_template() + except AttributeError as exc: + raise TypeError("ResponseSession.from_node requires get_streaming_template() on response nodes") from exc + return cls( node_id=node.id, - template=node.get_streaming_template(), + template=template, ) def is_complete(self) -> bool: diff --git a/api/dify_graph/graph_events/node.py b/api/dify_graph/graph_events/node.py index e09bf5c706..093a6e617a 100644 --- a/api/dify_graph/graph_events/node.py +++ b/api/dify_graph/graph_events/node.py @@ -5,7 +5,7 @@ from enum import StrEnum from pydantic import Field from core.rag.entities.citation_metadata import RetrievalSourceMetadata -from dify_graph.entities import AgentNodeStrategyInit, ToolCall, ToolResult +from dify_graph.entities import ToolCall, ToolResult from dify_graph.entities.pause_reason import PauseReason from .base import GraphNodeEventBase @@ -14,8 +14,8 @@ from .base import GraphNodeEventBase class NodeRunStartedEvent(GraphNodeEventBase): node_title: str predecessor_node_id: str | None = None - agent_strategy: AgentNodeStrategyInit | None = None start_at: datetime = Field(..., description="node start time") + extras: dict[str, object] = Field(default_factory=dict) # FIXME(-LAN-): only for ToolNode provider_type: str = "" diff --git a/api/dify_graph/model_runtime/entities/message_entities.py b/api/dify_graph/model_runtime/entities/message_entities.py index 284f4dba01..09c5ba5da5 100644 --- a/api/dify_graph/model_runtime/entities/message_entities.py +++ b/api/dify_graph/model_runtime/entities/message_entities.py @@ -279,5 +279,4 @@ class ToolPromptMessage(PromptMessage): :return: True if prompt message is empty, False otherwise """ - # ToolPromptMessage is not empty if it has content OR has a tool_call_id return super().is_empty() and not self.tool_call_id diff --git a/api/dify_graph/model_runtime/errors/invoke.py b/api/dify_graph/model_runtime/errors/invoke.py index 80cf01fb6c..1a57078b98 100644 --- a/api/dify_graph/model_runtime/errors/invoke.py +++ b/api/dify_graph/model_runtime/errors/invoke.py @@ -4,7 +4,8 @@ class InvokeError(ValueError): description: str | None = None def __init__(self, description: str | None = None): - self.description = description + if description is not None: + self.description = description def __str__(self): return self.description or self.__class__.__name__ diff --git a/api/dify_graph/model_runtime/model_providers/model_provider_factory.py b/api/dify_graph/model_runtime/model_providers/model_provider_factory.py index e168fc11d1..de0677a348 100644 --- a/api/dify_graph/model_runtime/model_providers/model_provider_factory.py +++ b/api/dify_graph/model_runtime/model_providers/model_provider_factory.py @@ -282,7 +282,8 @@ class ModelProviderFactory: all_model_type_models.append(model_schema) simple_provider_schema = provider_schema.to_simple_provider() - simple_provider_schema.models.extend(all_model_type_models) + if model_type: + simple_provider_schema.models = all_model_type_models providers.append(simple_provider_schema) diff --git a/api/dify_graph/node_events/node.py b/api/dify_graph/node_events/node.py index f4432cacab..afaf8fe710 100644 --- a/api/dify_graph/node_events/node.py +++ b/api/dify_graph/node_events/node.py @@ -1,10 +1,10 @@ -from collections.abc import Sequence +from collections.abc import Mapping, Sequence from datetime import datetime from enum import StrEnum +from typing import Any from pydantic import Field -from core.rag.entities.citation_metadata import RetrievalSourceMetadata from dify_graph.entities import ToolCall, ToolResult from dify_graph.entities.pause_reason import PauseReason from dify_graph.file import File @@ -15,7 +15,7 @@ from .base import NodeEventBase class RunRetrieverResourceEvent(NodeEventBase): - retriever_resources: Sequence[RetrievalSourceMetadata] = Field(..., description="retriever resources") + retriever_resources: Sequence[Mapping[str, Any]] = Field(..., description="retriever resources") context: str = Field(..., description="context") context_files: list[File] | None = Field(default=None, description="context files") diff --git a/api/dify_graph/nodes/__init__.py b/api/dify_graph/nodes/__init__.py index d113ad5e70..0223149bb8 100644 --- a/api/dify_graph/nodes/__init__.py +++ b/api/dify_graph/nodes/__init__.py @@ -1,3 +1,3 @@ -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes -__all__ = ["NodeType"] +__all__ = ["BuiltinNodeTypes"] diff --git a/api/dify_graph/nodes/agent/__init__.py b/api/dify_graph/nodes/agent/__init__.py deleted file mode 100644 index 95e7cf895b..0000000000 --- a/api/dify_graph/nodes/agent/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .agent_node import AgentNode - -__all__ = ["AgentNode"] diff --git a/api/dify_graph/nodes/answer/answer_node.py b/api/dify_graph/nodes/answer/answer_node.py index d07b9c8062..4286e1a492 100644 --- a/api/dify_graph/nodes/answer/answer_node.py +++ b/api/dify_graph/nodes/answer/answer_node.py @@ -1,7 +1,7 @@ from collections.abc import Mapping, Sequence from typing import Any -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.answer.entities import AnswerNodeData from dify_graph.nodes.base.node import Node @@ -11,7 +11,7 @@ from dify_graph.variables import ArrayFileSegment, FileSegment, Segment class AnswerNode(Node[AnswerNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER execution_type = NodeExecutionType.RESPONSE @classmethod @@ -48,12 +48,10 @@ class AnswerNode(Node[AnswerNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: AnswerNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = AnswerNodeData.model_validate(node_data) - - variable_template_parser = VariableTemplateParser(template=typed_node_data.answer) + _ = graph_config # Explicitly mark as unused + variable_template_parser = VariableTemplateParser(template=node_data.answer) variable_selectors = variable_template_parser.extract_variable_selectors() variable_mapping = {} diff --git a/api/dify_graph/nodes/answer/entities.py b/api/dify_graph/nodes/answer/entities.py index 06927cd71e..cd82df1ac4 100644 --- a/api/dify_graph/nodes/answer/entities.py +++ b/api/dify_graph/nodes/answer/entities.py @@ -3,7 +3,8 @@ from enum import StrEnum, auto from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class AnswerNodeData(BaseNodeData): @@ -11,6 +12,7 @@ class AnswerNodeData(BaseNodeData): Answer Node Data. """ + type: NodeType = BuiltinNodeTypes.ANSWER answer: str = Field(..., description="answer template string") diff --git a/api/dify_graph/nodes/base/__init__.py b/api/dify_graph/nodes/base/__init__.py index 87fd6c5b32..036e25895d 100644 --- a/api/dify_graph/nodes/base/__init__.py +++ b/api/dify_graph/nodes/base/__init__.py @@ -1,10 +1,4 @@ -from .entities import ( - BaseIterationNodeData, - BaseIterationState, - BaseLoopNodeData, - BaseLoopState, - BaseNodeData, -) +from .entities import BaseIterationNodeData, BaseIterationState, BaseLoopNodeData, BaseLoopState from .usage_tracking_mixin import LLMUsageTrackingMixin __all__ = [ @@ -12,6 +6,5 @@ __all__ = [ "BaseIterationState", "BaseLoopNodeData", "BaseLoopState", - "BaseNodeData", "LLMUsageTrackingMixin", ] diff --git a/api/dify_graph/nodes/base/entities.py b/api/dify_graph/nodes/base/entities.py index 7d6dffe9e2..fa9457315f 100644 --- a/api/dify_graph/nodes/base/entities.py +++ b/api/dify_graph/nodes/base/entities.py @@ -1,31 +1,12 @@ from __future__ import annotations -import json -from abc import ABC -from builtins import type as type_ from collections.abc import Sequence from enum import StrEnum -from typing import Any, Union +from typing import Any -from pydantic import BaseModel, field_validator, model_validator +from pydantic import BaseModel, field_validator -from dify_graph.enums import ErrorStrategy - -from .exc import DefaultValueTypeError - -_NumberType = Union[int, float] - - -class RetryConfig(BaseModel): - """node retry config""" - - max_retries: int = 0 # max retry times - retry_interval: int = 0 # retry interval in milliseconds - retry_enabled: bool = False # whether retry is enabled - - @property - def retry_interval_seconds(self) -> float: - return self.retry_interval / 1000 +from dify_graph.entities.base_node_data import BaseNodeData class VariableSelector(BaseModel): @@ -76,120 +57,6 @@ class OutputVariableEntity(BaseModel): return v -class DefaultValueType(StrEnum): - STRING = "string" - NUMBER = "number" - OBJECT = "object" - ARRAY_NUMBER = "array[number]" - ARRAY_STRING = "array[string]" - ARRAY_OBJECT = "array[object]" - ARRAY_FILES = "array[file]" - - -class DefaultValue(BaseModel): - value: Any = None - type: DefaultValueType - key: str - - @staticmethod - def _parse_json(value: str): - """Unified JSON parsing handler""" - try: - return json.loads(value) - except json.JSONDecodeError: - raise DefaultValueTypeError(f"Invalid JSON format for value: {value}") - - @staticmethod - def _validate_array(value: Any, element_type: type_ | tuple[type_, ...]) -> bool: - """Unified array type validation""" - return isinstance(value, list) and all(isinstance(x, element_type) for x in value) - - @staticmethod - def _convert_number(value: str) -> float: - """Unified number conversion handler""" - try: - return float(value) - except ValueError: - raise DefaultValueTypeError(f"Cannot convert to number: {value}") - - @model_validator(mode="after") - def validate_value_type(self) -> DefaultValue: - # Type validation configuration - type_validators: dict[DefaultValueType, dict[str, Any]] = { - DefaultValueType.STRING: { - "type": str, - "converter": lambda x: x, - }, - DefaultValueType.NUMBER: { - "type": _NumberType, - "converter": self._convert_number, - }, - DefaultValueType.OBJECT: { - "type": dict, - "converter": self._parse_json, - }, - DefaultValueType.ARRAY_NUMBER: { - "type": list, - "element_type": _NumberType, - "converter": self._parse_json, - }, - DefaultValueType.ARRAY_STRING: { - "type": list, - "element_type": str, - "converter": self._parse_json, - }, - DefaultValueType.ARRAY_OBJECT: { - "type": list, - "element_type": dict, - "converter": self._parse_json, - }, - } - - validator: dict[str, Any] = type_validators.get(self.type, {}) - if not validator: - if self.type == DefaultValueType.ARRAY_FILES: - # Handle files type - return self - raise DefaultValueTypeError(f"Unsupported type: {self.type}") - - # Handle string input cases - if isinstance(self.value, str) and self.type != DefaultValueType.STRING: - self.value = validator["converter"](self.value) - - # Validate base type - if not isinstance(self.value, validator["type"]): - raise DefaultValueTypeError(f"Value must be {validator['type'].__name__} type for {self.value}") - - # Validate array element types - if validator["type"] == list and not self._validate_array(self.value, validator["element_type"]): - raise DefaultValueTypeError(f"All elements must be {validator['element_type'].__name__} for {self.value}") - - return self - - -class BaseNodeData(ABC, BaseModel): - title: str - desc: str | None = None - version: str = "1" - error_strategy: ErrorStrategy | None = None - default_value: list[DefaultValue] | None = None - retry_config: RetryConfig = RetryConfig() - - # Parent node ID when this node is used as an extractor. - # If set, this node is an "attached" extractor node that extracts values - # from list[PromptMessage] for the parent node's parameters. - parent_node_id: str | None = None - - @property - def is_extractor_node(self) -> bool: - """Check if this node is an extractor node (has parent_node_id).""" - return self.parent_node_id is not None - - @property - def default_value_dict(self) -> dict[str, Any]: - if self.default_value: - return {item.key: item.value for item in self.default_value} - return {} class BaseIterationNodeData(BaseNodeData): diff --git a/api/dify_graph/nodes/base/node.py b/api/dify_graph/nodes/base/node.py index 7efb4315f3..a1784ba23d 100644 --- a/api/dify_graph/nodes/base/node.py +++ b/api/dify_graph/nodes/base/node.py @@ -1,9 +1,7 @@ from __future__ import annotations -import importlib import logging import operator -import pkgutil from abc import abstractmethod from collections.abc import Generator, Mapping, Sequence from functools import singledispatchmethod @@ -11,7 +9,9 @@ from types import MappingProxyType from typing import Any, ClassVar, Generic, Protocol, TypeVar, cast, get_args, get_origin from uuid import uuid4 -from dify_graph.entities import AgentNodeStrategyInit, GraphInitParams +from dify_graph.entities import GraphInitParams +from dify_graph.entities.base_node_data import BaseNodeData, RetryConfig +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY from dify_graph.enums import ( ErrorStrategy, @@ -65,8 +65,6 @@ from dify_graph.node_events import ( from dify_graph.runtime import GraphRuntimeState from libs.datetime_utils import naive_utc_now -from .entities import BaseNodeData, RetryConfig - NodeDataT = TypeVar("NodeDataT", bound=BaseNodeData) _MISSING_RUN_CONTEXT_VALUE = object() @@ -156,15 +154,15 @@ class Node(Generic[NodeDataT]): Later, in __init__: :: - config["data"] ──► _hydrate_node_data() ──► _node_data_type.model_validate() - │ - ▼ - CodeNodeData instance - (stored in self._node_data) + config["data"] ──► _node_data_type.model_validate(..., from_attributes=True) + │ + ▼ + CodeNodeData instance + (stored in self._node_data) Example: class CodeNode(Node[CodeNodeData]): # CodeNodeData is auto-extracted - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE # No need to implement _get_title, _get_error_strategy, etc. """ super().__init_subclass__(**kwargs) @@ -182,7 +180,8 @@ class Node(Generic[NodeDataT]): # Skip base class itself if cls is Node: return - # Only register production node implementations defined under dify_graph.nodes.* + # Only register production node implementations defined under the + # canonical workflow namespaces. # This prevents test helper subclasses from polluting the global registry and # accidentally overriding real node types (e.g., a test Answer node). module_name = getattr(cls, "__module__", "") @@ -190,7 +189,7 @@ class Node(Generic[NodeDataT]): node_type = cls.node_type version = cls.version() bucket = Node._registry.setdefault(node_type, {}) - if module_name.startswith("dify_graph.nodes."): + if module_name.startswith(("dify_graph.nodes.", "core.workflow.nodes.")): # Production node definitions take precedence and may override bucket[version] = cls # type: ignore[index] else: @@ -206,6 +205,7 @@ class Node(Generic[NodeDataT]): else: latest_key = max(version_keys) if version_keys else version bucket["latest"] = bucket[latest_key] + Node._registry_version += 1 @classmethod def _extract_node_data_type_from_generic(cls) -> type[BaseNodeData] | None: @@ -240,11 +240,16 @@ class Node(Generic[NodeDataT]): # Global registry populated via __init_subclass__ _registry: ClassVar[dict[NodeType, dict[str, type[Node]]]] = {} + _registry_version: ClassVar[int] = 0 + + @classmethod + def get_registry_version(cls) -> int: + return cls._registry_version def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: GraphInitParams, graph_runtime_state: GraphRuntimeState, ) -> None: @@ -257,22 +262,25 @@ class Node(Generic[NodeDataT]): self.graph_runtime_state = graph_runtime_state self.state: NodeState = NodeState.UNKNOWN # node execution state - node_id = config.get("id") - if not node_id: - raise ValueError("Node ID is required.") + node_id = config["id"] self._node_id = node_id self._node_execution_id: str = "" self._start_at = naive_utc_now() - raw_node_data = config.get("data") or {} - if not isinstance(raw_node_data, Mapping): - raise ValueError("Node config data must be a mapping.") - - self._node_data: NodeDataT = self._hydrate_node_data(raw_node_data) + self._node_data = self.validate_node_data(config["data"]) self.post_init() + @classmethod + def validate_node_data(cls, node_data: BaseNodeData) -> NodeDataT: + """Validate shared graph node payloads against the subclass-declared NodeData model.""" + return cast(NodeDataT, cls._node_data_type.model_validate(node_data, from_attributes=True)) + + def init_node_data(self, data: BaseNodeData | Mapping[str, Any]) -> None: + """Hydrate `_node_data` for legacy callers that bypass `__init__`.""" + self._node_data = self.validate_node_data(cast(BaseNodeData, data)) + def post_init(self) -> None: """Optional hook for subclasses requiring extra initialization.""" return @@ -345,9 +353,6 @@ class Node(Generic[NodeDataT]): return None return str(execution_id) - def _hydrate_node_data(self, data: Mapping[str, Any]) -> NodeDataT: - return cast(NodeDataT, self._node_data_type.model_validate(data)) - @abstractmethod def _run(self) -> NodeRunResult | Generator[NodeEventBase, None, None]: """ @@ -357,12 +362,6 @@ class Node(Generic[NodeDataT]): raise NotImplementedError def _find_extractor_node_configs(self) -> list[dict[str, Any]]: - """ - Find all extractor node configurations that have parent_node_id == self._node_id. - - Returns: - List of node configuration dicts for extractor nodes - """ nodes = self.graph_config.get("nodes", []) extractor_configs = [] for node_config in nodes: @@ -372,12 +371,6 @@ class Node(Generic[NodeDataT]): return extractor_configs def _execute_nested_nodes(self) -> Generator[GraphNodeEventBase, None, None]: - """ - Execute all nested nodes associated with this node. - - Nested nodes are nodes with parent_node_id == self._node_id. - They are executed before the main node to extract values from list[PromptMessage]. - """ from core.workflow.node_factory import DifyNodeFactory extractor_configs = self._find_extractor_node_configs() @@ -411,6 +404,10 @@ class Node(Generic[NodeDataT]): if not isinstance(event, NodeRunStreamChunkEvent): yield event + def populate_start_event(self, event: NodeRunStartedEvent) -> None: + """Allow subclasses to enrich the started event without cross-node imports in the base class.""" + _ = event + def run(self) -> Generator[GraphNodeEventBase, None, None]: execution_id = self.ensure_execution_id() self._start_at = naive_utc_now() @@ -427,41 +424,10 @@ class Node(Generic[NodeDataT]): in_iteration_id=None, start_at=self._start_at, ) - - # === FIXME(-LAN-): Needs to refactor. - from dify_graph.nodes.tool.tool_node import ToolNode - - if isinstance(self, ToolNode): - start_event.provider_id = getattr(self.node_data, "provider_id", "") - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from dify_graph.nodes.datasource.datasource_node import DatasourceNode - - if isinstance(self, DatasourceNode): - plugin_id = getattr(self.node_data, "plugin_id", "") - provider_name = getattr(self.node_data, "provider_name", "") - - start_event.provider_id = f"{plugin_id}/{provider_name}" - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from dify_graph.nodes.trigger_plugin.trigger_event_node import TriggerEventNode - - if isinstance(self, TriggerEventNode): - start_event.provider_id = getattr(self.node_data, "provider_id", "") - start_event.provider_type = getattr(self.node_data, "provider_type", "") - - from typing import cast - - from dify_graph.nodes.agent.agent_node import AgentNode - from dify_graph.nodes.agent.entities import AgentNodeData - - if isinstance(self, AgentNode): - start_event.agent_strategy = AgentNodeStrategyInit( - name=cast(AgentNodeData, self.node_data).agent_strategy_name, - icon=self.agent_strategy_icon, - ) - - # === + try: + self.populate_start_event(start_event) + except Exception: + logger.warning("Failed to populate start event for node %s", self._node_id, exc_info=True) yield start_event try: @@ -503,7 +469,7 @@ class Node(Generic[NodeDataT]): cls, *, graph_config: Mapping[str, Any], - config: Mapping[str, Any], + config: NodeConfigDict, ) -> Mapping[str, Sequence[str]]: """Extracts references variable selectors from node configuration. @@ -541,13 +507,12 @@ class Node(Generic[NodeDataT]): :param config: node config :return: """ - node_id = config.get("id") - if not node_id: - raise ValueError("Node ID is required when extracting variable selector to variable mapping.") - - # Pass raw dict data instead of creating NodeData instance + node_id = config["id"] + node_data = cls.validate_node_data(config["data"]) data = cls._extract_variable_selector_to_variable_mapping( - graph_config=graph_config, node_id=node_id, node_data=config.get("data", {}) + graph_config=graph_config, + node_id=node_id, + node_data=node_data, ) return data @@ -557,7 +522,7 @@ class Node(Generic[NodeDataT]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: NodeDataT, ) -> Mapping[str, Sequence[str]]: return {} @@ -581,30 +546,20 @@ class Node(Generic[NodeDataT]): @abstractmethod def version(cls) -> str: """`node_version` returns the version of current node type.""" - # NOTE(QuantumGhost): This should be in sync with `NODE_TYPE_CLASSES_MAPPING`. - # - # If you have introduced a new node type, please add it to `NODE_TYPE_CLASSES_MAPPING` - # in `api/dify_graph/nodes/__init__.py`. + # NOTE(QuantumGhost): Node versions must remain unique per `NodeType` so + # registry lookups can resolve numeric versions and `latest`. raise NotImplementedError("subclasses of BaseNode must implement `version` method.") @classmethod def get_node_type_classes_mapping(cls) -> Mapping[NodeType, Mapping[str, type[Node]]]: - """Return mapping of NodeType -> {version -> Node subclass} using __init_subclass__ registry. + """Return a read-only view of the currently registered node classes. - Import all modules under dify_graph.nodes so subclasses register themselves on import. - Then we return a readonly view of the registry to avoid accidental mutation. + This accessor intentionally performs no imports. The embedding layer that + owns bootstrap (for example `core.workflow.node_factory`) must import any + extension node packages before calling it so their subclasses register via + `__init_subclass__`. """ - # Import all node modules to ensure they are loaded (thus registered) - import dify_graph.nodes as _nodes_pkg - - for _, _modname, _ in pkgutil.walk_packages(_nodes_pkg.__path__, _nodes_pkg.__name__ + "."): - # Avoid importing modules that depend on the registry to prevent circular imports. - if _modname == "dify_graph.nodes.node_mapping": - continue - importlib.import_module(_modname) - - # Return a readonly view so callers can't mutate the registry by accident - return {nt: MappingProxyType(ver_map) for nt, ver_map in cls._registry.items()} + return {node_type: MappingProxyType(version_map) for node_type, version_map in cls._registry.items()} @property def retry(self) -> bool: @@ -941,11 +896,16 @@ class Node(Generic[NodeDataT]): @_dispatch.register def _(self, event: RunRetrieverResourceEvent) -> NodeRunRetrieverResourceEvent: + from core.rag.entities.citation_metadata import RetrievalSourceMetadata + + retriever_resources = [ + RetrievalSourceMetadata.model_validate(resource) for resource in event.retriever_resources + ] return NodeRunRetrieverResourceEvent( id=self.execution_id, node_id=self._node_id, node_type=self.node_type, - retriever_resources=event.retriever_resources, + retriever_resources=retriever_resources, context=event.context, node_version=self.version(), ) diff --git a/api/dify_graph/nodes/code/code_node.py b/api/dify_graph/nodes/code/code_node.py index 83e72deea9..82d5fced62 100644 --- a/api/dify_graph/nodes/code/code_node.py +++ b/api/dify_graph/nodes/code/code_node.py @@ -3,7 +3,8 @@ from decimal import Decimal from textwrap import dedent from typing import TYPE_CHECKING, Any, Protocol, cast -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData @@ -71,13 +72,13 @@ _DEFAULT_CODE_BY_LANGUAGE: Mapping[CodeLanguage, str] = { class CodeNode(Node[CodeNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE _limits: CodeNodeLimits def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, @@ -466,15 +467,12 @@ class CodeNode(Node[CodeNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: CodeNodeData, ) -> Mapping[str, Sequence[str]]: _ = graph_config # Explicitly mark as unused - # Create typed NodeData from dict - typed_node_data = CodeNodeData.model_validate(node_data) - return { node_id + "." + variable_selector.variable: variable_selector.value_selector - for variable_selector in typed_node_data.variables + for variable_selector in node_data.variables } @property diff --git a/api/dify_graph/nodes/code/entities.py b/api/dify_graph/nodes/code/entities.py index 9e161c29d0..55b4ee4862 100644 --- a/api/dify_graph/nodes/code/entities.py +++ b/api/dify_graph/nodes/code/entities.py @@ -3,7 +3,8 @@ from typing import Annotated, Literal from pydantic import AfterValidator, BaseModel -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.entities import VariableSelector from dify_graph.variables.types import SegmentType @@ -39,6 +40,8 @@ class CodeNodeData(BaseNodeData): Code Node Data. """ + type: NodeType = BuiltinNodeTypes.CODE + class Output(BaseModel): type: Annotated[SegmentType, AfterValidator(_validate_type)] children: dict[str, "CodeNodeData.Output"] | None = None diff --git a/api/dify_graph/nodes/datasource/__init__.py b/api/dify_graph/nodes/datasource/__init__.py deleted file mode 100644 index f6ec44cb77..0000000000 --- a/api/dify_graph/nodes/datasource/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .datasource_node import DatasourceNode - -__all__ = ["DatasourceNode"] diff --git a/api/dify_graph/nodes/document_extractor/entities.py b/api/dify_graph/nodes/document_extractor/entities.py index f4949d0df8..1110cc2710 100644 --- a/api/dify_graph/nodes/document_extractor/entities.py +++ b/api/dify_graph/nodes/document_extractor/entities.py @@ -1,10 +1,12 @@ from collections.abc import Sequence from dataclasses import dataclass -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class DocumentExtractorNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.DOCUMENT_EXTRACTOR variable_selector: Sequence[str] diff --git a/api/dify_graph/nodes/document_extractor/node.py b/api/dify_graph/nodes/document_extractor/node.py index 5945e57926..27196f1aca 100644 --- a/api/dify_graph/nodes/document_extractor/node.py +++ b/api/dify_graph/nodes/document_extractor/node.py @@ -4,6 +4,7 @@ import json import logging import os import tempfile +import zipfile from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any @@ -20,7 +21,8 @@ from docx.oxml.text.paragraph import CT_P from docx.table import Table from docx.text.paragraph import Paragraph -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod, file_manager from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -44,7 +46,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): Supports plain text, PDF, and DOC/DOCX files. """ - node_type = NodeType.DOCUMENT_EXTRACTOR + node_type = BuiltinNodeTypes.DOCUMENT_EXTRACTOR @classmethod def version(cls) -> str: @@ -53,7 +55,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, @@ -82,8 +84,18 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): value = variable.value inputs = {"variable_selector": variable_selector} + if isinstance(value, list): + value = list(filter(lambda x: x, value)) process_data = {"documents": value if isinstance(value, list) else [value]} + if not value: + return NodeRunResult( + status=WorkflowNodeExecutionStatus.SUCCEEDED, + inputs=inputs, + process_data=process_data, + outputs={"text": ArrayStringSegment(value=[])}, + ) + try: if isinstance(value, list): extracted_text_list = [ @@ -111,6 +123,7 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): else: raise DocumentExtractorError(f"Unsupported variable type: {type(value)}") except DocumentExtractorError as e: + logger.warning(e, exc_info=True) return NodeRunResult( status=WorkflowNodeExecutionStatus.FAILED, error=str(e), @@ -124,12 +137,10 @@ class DocumentExtractorNode(Node[DocumentExtractorNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: DocumentExtractorNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = DocumentExtractorNodeData.model_validate(node_data) - - return {node_id + ".files": typed_node_data.variable_selector} + _ = graph_config # Explicitly mark as unused + return {node_id + ".files": node_data.variable_selector} def _extract_text_by_mime_type( @@ -385,6 +396,32 @@ def parser_docx_part(block, doc: Document, content_items, i): content_items.append((i, "table", Table(block, doc))) +def _normalize_docx_zip(file_content: bytes) -> bytes: + """ + Some DOCX files (e.g. exported by Evernote on Windows) are malformed: + ZIP entry names use backslash (\\) as path separator instead of the forward + slash (/) required by both the ZIP spec and OOXML. On Linux/Mac the entry + "word\\document.xml" is never found when python-docx looks for + "word/document.xml", which triggers a KeyError about a missing relationship. + + This function rewrites the ZIP in-memory, normalizing all entry names to + use forward slashes without touching any actual document content. + """ + try: + with zipfile.ZipFile(io.BytesIO(file_content), "r") as zin: + out_buf = io.BytesIO() + with zipfile.ZipFile(out_buf, "w", compression=zipfile.ZIP_DEFLATED) as zout: + for item in zin.infolist(): + data = zin.read(item.filename) + # Normalize backslash path separators to forward slash + item.filename = item.filename.replace("\\", "/") + zout.writestr(item, data) + return out_buf.getvalue() + except zipfile.BadZipFile: + # Not a valid zip — return as-is and let python-docx report the real error + return file_content + + def _extract_text_from_docx(file_content: bytes) -> str: """ Extract text from a DOCX file. @@ -392,7 +429,15 @@ def _extract_text_from_docx(file_content: bytes) -> str: """ try: doc_file = io.BytesIO(file_content) - doc = docx.Document(doc_file) + try: + doc = docx.Document(doc_file) + except Exception as e: + logger.warning("Failed to parse DOCX, attempting to normalize ZIP entry paths: %s", e) + # Some DOCX files exported by tools like Evernote on Windows use + # backslash path separators in ZIP entries and/or single-quoted XML + # attributes, both of which break python-docx on Linux. Normalize and retry. + file_content = _normalize_docx_zip(file_content) + doc = docx.Document(io.BytesIO(file_content)) text = [] # Keep track of paragraph and table positions diff --git a/api/dify_graph/nodes/end/end_node.py b/api/dify_graph/nodes/end/end_node.py index 7aa526b85b..1f5cfab22b 100644 --- a/api/dify_graph/nodes/end/end_node.py +++ b/api/dify_graph/nodes/end/end_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.template import Template @@ -6,7 +6,7 @@ from dify_graph.nodes.end.entities import EndNodeData class EndNode(Node[EndNodeData]): - node_type = NodeType.END + node_type = BuiltinNodeTypes.END execution_type = NodeExecutionType.RESPONSE @classmethod diff --git a/api/dify_graph/nodes/end/entities.py b/api/dify_graph/nodes/end/entities.py index a410087214..be7f0c8de8 100644 --- a/api/dify_graph/nodes/end/entities.py +++ b/api/dify_graph/nodes/end/entities.py @@ -1,6 +1,8 @@ from pydantic import BaseModel, Field -from dify_graph.nodes.base.entities import BaseNodeData, OutputVariableEntity +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base.entities import OutputVariableEntity class EndNodeData(BaseNodeData): @@ -8,6 +10,7 @@ class EndNodeData(BaseNodeData): END Node Data. """ + type: NodeType = BuiltinNodeTypes.END outputs: list[OutputVariableEntity] diff --git a/api/dify_graph/nodes/http_request/entities.py b/api/dify_graph/nodes/http_request/entities.py index a5564689f8..f594d58ae6 100644 --- a/api/dify_graph/nodes/http_request/entities.py +++ b/api/dify_graph/nodes/http_request/entities.py @@ -8,7 +8,8 @@ import charset_normalizer import httpx from pydantic import BaseModel, Field, ValidationInfo, field_validator -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType HTTP_REQUEST_CONFIG_FILTER_KEY = "http_request_config" @@ -89,6 +90,7 @@ class HttpRequestNodeData(BaseNodeData): Code Node Data. """ + type: NodeType = BuiltinNodeTypes.HTTP_REQUEST method: Literal[ "get", "post", diff --git a/api/dify_graph/nodes/http_request/node.py b/api/dify_graph/nodes/http_request/node.py index 2e48d5502a..b17c820a80 100644 --- a/api/dify_graph/nodes/http_request/node.py +++ b/api/dify_graph/nodes/http_request/node.py @@ -3,7 +3,8 @@ import mimetypes from collections.abc import Callable, Mapping, Sequence from typing import TYPE_CHECKING, Any -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base import variable_template_parser @@ -32,12 +33,12 @@ if TYPE_CHECKING: class HttpRequestNode(Node[HttpRequestNodeData]): - node_type = NodeType.HTTP_REQUEST + node_type = BuiltinNodeTypes.HTTP_REQUEST def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, @@ -163,18 +164,15 @@ class HttpRequestNode(Node[HttpRequestNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: HttpRequestNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = HttpRequestNodeData.model_validate(node_data) - selectors: list[VariableSelector] = [] - selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.url) - selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.headers) - selectors += variable_template_parser.extract_selectors_from_template(typed_node_data.params) - if typed_node_data.body: - body_type = typed_node_data.body.type - data = typed_node_data.body.data + selectors += variable_template_parser.extract_selectors_from_template(node_data.url) + selectors += variable_template_parser.extract_selectors_from_template(node_data.headers) + selectors += variable_template_parser.extract_selectors_from_template(node_data.params) + if node_data.body: + body_type = node_data.body.type + data = node_data.body.data match body_type: case "none": pass diff --git a/api/dify_graph/nodes/human_input/entities.py b/api/dify_graph/nodes/human_input/entities.py index 5616949dcc..7936e47213 100644 --- a/api/dify_graph/nodes/human_input/entities.py +++ b/api/dify_graph/nodes/human_input/entities.py @@ -10,7 +10,8 @@ from typing import Annotated, Any, ClassVar, Literal, Self from pydantic import BaseModel, Field, field_validator, model_validator -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser from dify_graph.runtime import VariablePool from dify_graph.variables.consts import SELECTORS_LENGTH @@ -71,8 +72,8 @@ class EmailDeliveryConfig(BaseModel): body: str debug_mode: bool = False - def with_debug_recipient(self, user_id: str) -> "EmailDeliveryConfig": - if not user_id: + def with_debug_recipient(self, user_id: str | None) -> "EmailDeliveryConfig": + if user_id is None: debug_recipients = EmailRecipients(whole_workspace=False, items=[]) return self.model_copy(update={"recipients": debug_recipients}) debug_recipients = EmailRecipients(whole_workspace=False, items=[MemberRecipient(user_id=user_id)]) @@ -140,7 +141,7 @@ def apply_debug_email_recipient( method: DeliveryChannelConfig, *, enabled: bool, - user_id: str, + user_id: str | None, ) -> DeliveryChannelConfig: if not enabled: return method @@ -148,7 +149,7 @@ def apply_debug_email_recipient( return method if not method.config.debug_mode: return method - debug_config = method.config.with_debug_recipient(user_id or "") + debug_config = method.config.with_debug_recipient(user_id) return method.model_copy(update={"config": debug_config}) @@ -214,6 +215,7 @@ class UserAction(BaseModel): class HumanInputNodeData(BaseNodeData): """Human Input node data.""" + type: NodeType = BuiltinNodeTypes.HUMAN_INPUT delivery_methods: list[DeliveryChannelConfig] = Field(default_factory=list) form_content: str = "" inputs: list[FormInput] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/human_input/human_input_node.py b/api/dify_graph/nodes/human_input/human_input_node.py index 03c2d17b1d..794e33d92e 100644 --- a/api/dify_graph/nodes/human_input/human_input_node.py +++ b/api/dify_graph/nodes/human_input/human_input_node.py @@ -3,8 +3,9 @@ import logging from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.pause_reason import HumanInputRequired -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import ( HumanInputFormFilledEvent, HumanInputFormTimeoutEvent, @@ -39,7 +40,7 @@ logger = logging.getLogger(__name__) class HumanInputNode(Node[HumanInputNodeData]): - node_type = NodeType.HUMAN_INPUT + node_type = BuiltinNodeTypes.HUMAN_INPUT execution_type = NodeExecutionType.BRANCH _BRANCH_SELECTION_KEYS: tuple[str, ...] = ( @@ -63,7 +64,7 @@ class HumanInputNode(Node[HumanInputNodeData]): def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", form_repository: HumanInputFormRepository, @@ -348,7 +349,7 @@ class HumanInputNode(Node[HumanInputNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: HumanInputNodeData, ) -> Mapping[str, Sequence[str]]: """ Extract variable selectors referenced in form content and input default values. @@ -357,5 +358,4 @@ class HumanInputNode(Node[HumanInputNodeData]): 1. Variables referenced in form_content ({{#node_name.var_name#}}) 2. Variables referenced in input default values """ - validated_node_data = HumanInputNodeData.model_validate(node_data) - return validated_node_data.extract_variable_selector_to_variable_mapping(node_id) + return node_data.extract_variable_selector_to_variable_mapping(node_id) diff --git a/api/dify_graph/nodes/if_else/entities.py b/api/dify_graph/nodes/if_else/entities.py index 4733944039..ff09f3c023 100644 --- a/api/dify_graph/nodes/if_else/entities.py +++ b/api/dify_graph/nodes/if_else/entities.py @@ -2,7 +2,8 @@ from typing import Literal from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.utils.condition.entities import Condition @@ -11,6 +12,8 @@ class IfElseNodeData(BaseNodeData): If Else Node Data. """ + type: NodeType = BuiltinNodeTypes.IF_ELSE + class Case(BaseModel): """ Case entity representing a single logical condition group diff --git a/api/dify_graph/nodes/if_else/if_else_node.py b/api/dify_graph/nodes/if_else/if_else_node.py index 3c5a33e2b7..7c0370e48c 100644 --- a/api/dify_graph/nodes/if_else/if_else_node.py +++ b/api/dify_graph/nodes/if_else/if_else_node.py @@ -3,7 +3,7 @@ from typing import Any, Literal from typing_extensions import deprecated -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.if_else.entities import IfElseNodeData @@ -13,7 +13,7 @@ from dify_graph.utils.condition.processor import ConditionProcessor class IfElseNode(Node[IfElseNodeData]): - node_type = NodeType.IF_ELSE + node_type = BuiltinNodeTypes.IF_ELSE execution_type = NodeExecutionType.BRANCH @classmethod @@ -97,13 +97,11 @@ class IfElseNode(Node[IfElseNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: IfElseNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = IfElseNodeData.model_validate(node_data) - var_mapping: dict[str, list[str]] = {} - for case in typed_node_data.cases or []: + _ = graph_config # Explicitly mark as unused + for case in node_data.cases or []: for condition in case.conditions: key = f"{node_id}.#{'.'.join(condition.variable_selector)}#" var_mapping[key] = condition.variable_selector diff --git a/api/dify_graph/nodes/iteration/entities.py b/api/dify_graph/nodes/iteration/entities.py index a31b05463e..58fd112b12 100644 --- a/api/dify_graph/nodes/iteration/entities.py +++ b/api/dify_graph/nodes/iteration/entities.py @@ -3,7 +3,9 @@ from typing import Any from pydantic import Field -from dify_graph.nodes.base import BaseIterationNodeData, BaseIterationState, BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base import BaseIterationNodeData, BaseIterationState class ErrorHandleMode(StrEnum): @@ -17,6 +19,7 @@ class IterationNodeData(BaseIterationNodeData): Iteration Node Data. """ + type: NodeType = BuiltinNodeTypes.ITERATION parent_loop_id: str | None = None # redundant field, not used currently iterator_selector: list[str] # variable selector output_selector: list[str] # output selector @@ -31,7 +34,7 @@ class IterationStartNodeData(BaseNodeData): Iteration Start Node Data. """ - pass + type: NodeType = BuiltinNodeTypes.ITERATION_START class IterationState(BaseIterationState): diff --git a/api/dify_graph/nodes/iteration/iteration_node.py b/api/dify_graph/nodes/iteration/iteration_node.py index 6d26cbfce4..f63ba0bc48 100644 --- a/api/dify_graph/nodes/iteration/iteration_node.py +++ b/api/dify_graph/nodes/iteration/iteration_node.py @@ -7,9 +7,10 @@ from typing import TYPE_CHECKING, Any, NewType, cast from typing_extensions import TypeIs from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -61,7 +62,7 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): Iteration Node. """ - node_type = NodeType.ITERATION + node_type = BuiltinNodeTypes.ITERATION execution_type = NodeExecutionType.CONTAINER @classmethod @@ -460,21 +461,18 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: IterationNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = IterationNodeData.model_validate(node_data) - variable_mapping: dict[str, Sequence[str]] = { - f"{node_id}.input_selector": typed_node_data.iterator_selector, + f"{node_id}.input_selector": node_data.iterator_selector, } iteration_node_ids = set() # Find all nodes that belong to this loop nodes = graph_config.get("nodes", []) for node in nodes: - node_data = node.get("data", {}) - if node_data.get("iteration_id") == node_id: + node_config_data = node.get("data", {}) + if node_config_data.get("iteration_id") == node_id: in_iteration_node_id = node.get("id") if in_iteration_node_id: iteration_node_ids.add(in_iteration_node_id) @@ -487,17 +485,16 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): # variable selector to variable mapping try: - # Get node class - from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - - node_type = NodeType(sub_node_config.get("data", {}).get("type")) - if node_type not in NODE_TYPE_CLASSES_MAPPING: + typed_sub_node_config = NodeConfigDictAdapter.validate_python(sub_node_config) + node_type = typed_sub_node_config["data"].type + node_mapping = Node.get_node_type_classes_mapping() + if node_type not in node_mapping: continue - node_version = sub_node_config.get("data", {}).get("version", "1") - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_version = str(typed_sub_node_config["data"].version) + node_cls = node_mapping[node_type][node_version] sub_node_variable_mapping = node_cls.extract_variable_selector_to_variable_mapping( - graph_config=graph_config, config=sub_node_config + graph_config=graph_config, config=typed_sub_node_config ) sub_node_variable_mapping = cast(dict[str, Sequence[str]], sub_node_variable_mapping) except NotImplementedError: @@ -563,7 +560,7 @@ class IterationNode(LLMUsageTrackingMixin, Node[IterationNodeData]): raise IterationIndexNotFoundError(f"iteration {self._node_id} current index not found") current_index = index_variable.value for event in rst: - if isinstance(event, GraphNodeEventBase) and event.node_type == NodeType.ITERATION_START: + if isinstance(event, GraphNodeEventBase) and event.node_type == BuiltinNodeTypes.ITERATION_START: continue if isinstance(event, GraphNodeEventBase): diff --git a/api/dify_graph/nodes/iteration/iteration_start_node.py b/api/dify_graph/nodes/iteration/iteration_start_node.py index 2e1f555ed2..a8ecf3d83b 100644 --- a/api/dify_graph/nodes/iteration/iteration_start_node.py +++ b/api/dify_graph/nodes/iteration/iteration_start_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.iteration.entities import IterationStartNodeData @@ -9,7 +9,7 @@ class IterationStartNode(Node[IterationStartNodeData]): Iteration Start Node. """ - node_type = NodeType.ITERATION_START + node_type = BuiltinNodeTypes.ITERATION_START @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/knowledge_index/__init__.py b/api/dify_graph/nodes/knowledge_index/__init__.py deleted file mode 100644 index 23897a1e42..0000000000 --- a/api/dify_graph/nodes/knowledge_index/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .knowledge_index_node import KnowledgeIndexNode - -__all__ = ["KnowledgeIndexNode"] diff --git a/api/dify_graph/nodes/knowledge_retrieval/__init__.py b/api/dify_graph/nodes/knowledge_retrieval/__init__.py deleted file mode 100644 index 4d4a4cbd9f..0000000000 --- a/api/dify_graph/nodes/knowledge_retrieval/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .knowledge_retrieval_node import KnowledgeRetrievalNode - -__all__ = ["KnowledgeRetrievalNode"] diff --git a/api/dify_graph/nodes/list_operator/entities.py b/api/dify_graph/nodes/list_operator/entities.py index 0fdd85f210..41b3a40b78 100644 --- a/api/dify_graph/nodes/list_operator/entities.py +++ b/api/dify_graph/nodes/list_operator/entities.py @@ -3,7 +3,8 @@ from enum import StrEnum from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class FilterOperator(StrEnum): @@ -62,6 +63,7 @@ class ExtractConfig(BaseModel): class ListOperatorNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.LIST_OPERATOR variable: Sequence[str] = Field(default_factory=list) filter_by: FilterBy order_by: OrderByConfig diff --git a/api/dify_graph/nodes/list_operator/node.py b/api/dify_graph/nodes/list_operator/node.py index d2fdadc29c..dc8b8904f7 100644 --- a/api/dify_graph/nodes/list_operator/node.py +++ b/api/dify_graph/nodes/list_operator/node.py @@ -1,7 +1,7 @@ from collections.abc import Callable, Sequence from typing import Any, TypeAlias, TypeVar -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node @@ -35,7 +35,7 @@ def _negation(filter_: Callable[[_T], bool]) -> Callable[[_T], bool]: class ListOperatorNode(Node[ListOperatorNodeData]): - node_type = NodeType.LIST_OPERATOR + node_type = BuiltinNodeTypes.LIST_OPERATOR @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/llm/entities.py b/api/dify_graph/nodes/llm/entities.py index f4212f04d7..ba47c6ac36 100644 --- a/api/dify_graph/nodes/llm/entities.py +++ b/api/dify_graph/nodes/llm/entities.py @@ -8,11 +8,12 @@ from core.agent.entities import AgentLog, AgentResult from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate, MemoryConfig from core.tools.entities.tool_entities import ToolProviderType from dify_graph.entities import ToolCall, ToolCallResult +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.file import File from dify_graph.model_runtime.entities import ImagePromptMessageContent, LLMMode from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.node_events import AgentLogEvent -from dify_graph.nodes.base import BaseNodeData from dify_graph.nodes.base.entities import VariableSelector @@ -367,6 +368,7 @@ class ToolSetting(BaseModel): class LLMNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.LLM model: ModelConfig prompt_template: Sequence[PromptTemplateItem] | LLMNodeCompletionModelPromptTemplate prompt_config: PromptConfig = Field(default_factory=PromptConfig) diff --git a/api/dify_graph/nodes/llm/file_saver.py b/api/dify_graph/nodes/llm/file_saver.py index b4f64f4093..50e52a3b6f 100644 --- a/api/dify_graph/nodes/llm/file_saver.py +++ b/api/dify_graph/nodes/llm/file_saver.py @@ -1,14 +1,11 @@ import mimetypes import typing as tp -from sqlalchemy import Engine - from constants.mimetypes import DEFAULT_EXTENSION, DEFAULT_MIME_TYPE -from core.helper import ssrf_proxy from core.tools.signature import sign_tool_file from core.tools.tool_file_manager import ToolFileManager from dify_graph.file import File, FileTransferMethod, FileType -from extensions.ext_database import db as global_db +from dify_graph.nodes.protocols import HttpClientProtocol class LLMFileSaver(tp.Protocol): @@ -59,30 +56,20 @@ class LLMFileSaver(tp.Protocol): raise NotImplementedError() -EngineFactory: tp.TypeAlias = tp.Callable[[], Engine] - - class FileSaverImpl(LLMFileSaver): - _engine_factory: EngineFactory _tenant_id: str _user_id: str - def __init__(self, user_id: str, tenant_id: str, engine_factory: EngineFactory | None = None): - if engine_factory is None: - - def _factory(): - return global_db.engine - - engine_factory = _factory - self._engine_factory = engine_factory + def __init__(self, user_id: str, tenant_id: str, http_client: HttpClientProtocol): self._user_id = user_id self._tenant_id = tenant_id + self._http_client = http_client def _get_tool_file_manager(self): - return ToolFileManager(engine=self._engine_factory()) + return ToolFileManager() def save_remote_url(self, url: str, file_type: FileType) -> File: - http_response = ssrf_proxy.get(url) + http_response = self._http_client.get(url) http_response.raise_for_status() data = http_response.content mime_type_from_header = http_response.headers.get("Content-Type") diff --git a/api/dify_graph/nodes/llm/node.py b/api/dify_graph/nodes/llm/node.py index 979b9aef69..a161bada30 100644 --- a/api/dify_graph/nodes/llm/node.py +++ b/api/dify_graph/nodes/llm/node.py @@ -46,8 +46,10 @@ from core.tools.tool_file_manager import ToolFileManager from core.tools.tool_manager import ToolManager from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams, ToolCall, ToolResult, ToolResultStatus +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.tool_entities import ToolCallResult from dify_graph.enums import ( + BuiltinNodeTypes, NodeType, SystemVariableKey, WorkflowNodeExecutionMetadataKey, @@ -95,6 +97,7 @@ from dify_graph.nodes.base.entities import VariableSelector from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.protocols import HttpClientProtocol from dify_graph.runtime import VariablePool from dify_graph.variables import ( ArrayFileSegment, @@ -146,7 +149,7 @@ logger = logging.getLogger(__name__) class LLMNode(Node[LLMNodeData]): - node_type = NodeType.LLM + node_type = BuiltinNodeTypes.LLM # Compiled regex for extracting blocks (with compatibility for attributes) _THINK_PATTERN = re.compile(r"]*>(.*?)", re.IGNORECASE | re.DOTALL) @@ -164,13 +167,14 @@ class LLMNode(Node[LLMNodeData]): def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: GraphInitParams, graph_runtime_state: GraphRuntimeState, *, credentials_provider: CredentialsProvider, model_factory: ModelFactory, model_instance: ModelInstance, + http_client: HttpClientProtocol, memory: PromptMessageMemory | None = None, llm_file_saver: LLMFileSaver | None = None, ): @@ -193,6 +197,7 @@ class LLMNode(Node[LLMNodeData]): llm_file_saver = FileSaverImpl( user_id=dify_ctx.user_id, tenant_id=dify_ctx.tenant_id, + http_client=http_client, ) self._llm_file_saver = llm_file_saver @@ -1220,7 +1225,7 @@ class LLMNode(Node[LLMNodeData]): ) elif isinstance(context_value_variable, ArraySegment): context_str = "" - original_retriever_resource: list[RetrievalSourceMetadata] = [] + original_retriever_resource: list[dict[str, Any]] = [] context_files: list[File] = [] for item in context_value_variable.value: if isinstance(item, str): @@ -1236,11 +1241,14 @@ class LLMNode(Node[LLMNodeData]): retriever_resource = self._convert_to_original_retriever_resource(item) if retriever_resource: original_retriever_resource.append(retriever_resource) + segment_id = retriever_resource.get("segment_id") + if not segment_id: + continue attachments_with_bindings = db.session.execute( select(SegmentAttachmentBinding, UploadFile) .join(UploadFile, UploadFile.id == SegmentAttachmentBinding.attachment_id) .where( - SegmentAttachmentBinding.segment_id == retriever_resource.segment_id, + SegmentAttachmentBinding.segment_id == segment_id, ) ).all() if attachments_with_bindings: @@ -1266,7 +1274,7 @@ class LLMNode(Node[LLMNodeData]): context_files=context_files, ) - def _convert_to_original_retriever_resource(self, context_dict: dict) -> RetrievalSourceMetadata | None: + def _convert_to_original_retriever_resource(self, context_dict: dict) -> dict[str, Any] | None: if ( "metadata" in context_dict and "_source" in context_dict["metadata"] @@ -1274,28 +1282,26 @@ class LLMNode(Node[LLMNodeData]): ): metadata = context_dict.get("metadata", {}) - source = RetrievalSourceMetadata( - position=metadata.get("position"), - dataset_id=metadata.get("dataset_id"), - dataset_name=metadata.get("dataset_name"), - document_id=metadata.get("document_id"), - document_name=metadata.get("document_name"), - data_source_type=metadata.get("data_source_type"), - segment_id=metadata.get("segment_id"), - retriever_from=metadata.get("retriever_from"), - score=metadata.get("score"), - hit_count=metadata.get("segment_hit_count"), - word_count=metadata.get("segment_word_count"), - segment_position=metadata.get("segment_position"), - index_node_hash=metadata.get("segment_index_node_hash"), - content=context_dict.get("content"), - page=metadata.get("page"), - doc_metadata=metadata.get("doc_metadata"), - files=context_dict.get("files"), - summary=context_dict.get("summary"), - ) - - return source + return { + "position": metadata.get("position"), + "dataset_id": metadata.get("dataset_id"), + "dataset_name": metadata.get("dataset_name"), + "document_id": metadata.get("document_id"), + "document_name": metadata.get("document_name"), + "data_source_type": metadata.get("data_source_type"), + "segment_id": metadata.get("segment_id"), + "retriever_from": metadata.get("retriever_from"), + "score": metadata.get("score"), + "hit_count": metadata.get("segment_hit_count"), + "word_count": metadata.get("segment_word_count"), + "segment_position": metadata.get("segment_position"), + "index_node_hash": metadata.get("segment_index_node_hash"), + "content": context_dict.get("content"), + "page": metadata.get("page"), + "doc_metadata": metadata.get("doc_metadata"), + "files": context_dict.get("files"), + "summary": context_dict.get("summary"), + } return None @@ -1503,14 +1509,11 @@ class LLMNode(Node[LLMNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: LLMNodeData, ) -> Mapping[str, Sequence[str]]: # graph_config is not used in this node type _ = graph_config # Explicitly mark as unused - # Create typed NodeData from dict - typed_node_data = LLMNodeData.model_validate(node_data) - - prompt_template = typed_node_data.prompt_template + prompt_template = node_data.prompt_template variable_selectors = [] prompt_context_selectors: list[Sequence[str]] = [] if isinstance(prompt_template, list): @@ -1538,7 +1541,7 @@ class LLMNode(Node[LLMNodeData]): variable_key = f"#{'.'.join(context_selector)}#" variable_mapping[variable_key] = list(context_selector) - memory = typed_node_data.memory + memory = node_data.memory if memory and memory.query_prompt_template: query_variable_selectors = VariableTemplateParser( template=memory.query_prompt_template @@ -1546,16 +1549,16 @@ class LLMNode(Node[LLMNodeData]): for variable_selector in query_variable_selectors: variable_mapping[variable_selector.variable] = variable_selector.value_selector - if typed_node_data.context.enabled: - variable_mapping["#context#"] = typed_node_data.context.variable_selector + if node_data.context.enabled: + variable_mapping["#context#"] = node_data.context.variable_selector - if typed_node_data.vision.enabled: - variable_mapping["#files#"] = typed_node_data.vision.configs.variable_selector + if node_data.vision.enabled: + variable_mapping["#files#"] = node_data.vision.configs.variable_selector - if typed_node_data.memory: + if node_data.memory: variable_mapping["#sys.query#"] = ["sys", SystemVariableKey.QUERY] - if typed_node_data.prompt_config: + if node_data.prompt_config: enable_jinja = False if isinstance(prompt_template, list): @@ -1567,7 +1570,7 @@ class LLMNode(Node[LLMNodeData]): enable_jinja = True if enable_jinja: - for variable_selector in typed_node_data.prompt_config.jinja2_variables or []: + for variable_selector in node_data.prompt_config.jinja2_variables or []: variable_mapping[variable_selector.variable] = variable_selector.value_selector variable_mapping = {node_id + "." + key: value for key, value in variable_mapping.items()} diff --git a/api/dify_graph/nodes/loop/entities.py b/api/dify_graph/nodes/loop/entities.py index b4a8518048..f0bfad5a0f 100644 --- a/api/dify_graph/nodes/loop/entities.py +++ b/api/dify_graph/nodes/loop/entities.py @@ -3,7 +3,9 @@ from typing import Annotated, Any, Literal from pydantic import AfterValidator, BaseModel, Field, field_validator -from dify_graph.nodes.base import BaseLoopNodeData, BaseLoopState, BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base import BaseLoopNodeData, BaseLoopState from dify_graph.utils.condition.entities import Condition from dify_graph.variables.types import SegmentType @@ -39,6 +41,7 @@ class LoopVariableData(BaseModel): class LoopNodeData(BaseLoopNodeData): + type: NodeType = BuiltinNodeTypes.LOOP loop_count: int # Maximum number of loops break_conditions: list[Condition] # Conditions to break the loop logical_operator: Literal["and", "or"] @@ -58,7 +61,7 @@ class LoopStartNodeData(BaseNodeData): Loop Start Node Data. """ - pass + type: NodeType = BuiltinNodeTypes.LOOP_START class LoopEndNodeData(BaseNodeData): @@ -66,7 +69,7 @@ class LoopEndNodeData(BaseNodeData): Loop End Node Data. """ - pass + type: NodeType = BuiltinNodeTypes.LOOP_END class LoopState(BaseLoopState): diff --git a/api/dify_graph/nodes/loop/loop_end_node.py b/api/dify_graph/nodes/loop/loop_end_node.py index 73ac5da927..0287708fb3 100644 --- a/api/dify_graph/nodes/loop/loop_end_node.py +++ b/api/dify_graph/nodes/loop/loop_end_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.loop.entities import LoopEndNodeData @@ -9,7 +9,7 @@ class LoopEndNode(Node[LoopEndNodeData]): Loop End Node. """ - node_type = NodeType.LOOP_END + node_type = BuiltinNodeTypes.LOOP_END @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/loop/loop_node.py b/api/dify_graph/nodes/loop/loop_node.py index 8279f0fc66..3c546ffa23 100644 --- a/api/dify_graph/nodes/loop/loop_node.py +++ b/api/dify_graph/nodes/loop/loop_node.py @@ -5,9 +5,10 @@ from collections.abc import Callable, Generator, Mapping, Sequence from datetime import datetime from typing import TYPE_CHECKING, Any, Literal, cast +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -45,7 +46,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): Loop Node. """ - node_type = NodeType.LOOP + node_type = BuiltinNodeTypes.LOOP execution_type = NodeExecutionType.CONTAINER @classmethod @@ -249,11 +250,11 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): if isinstance(event, GraphNodeEventBase): self._append_loop_info_to_event(event=event, loop_run_index=current_index) - if isinstance(event, GraphNodeEventBase) and event.node_type == NodeType.LOOP_START: + if isinstance(event, GraphNodeEventBase) and event.node_type == BuiltinNodeTypes.LOOP_START: continue if isinstance(event, GraphNodeEventBase): yield event - if isinstance(event, NodeRunSucceededEvent) and event.node_type == NodeType.LOOP_END: + if isinstance(event, NodeRunSucceededEvent) and event.node_type == BuiltinNodeTypes.LOOP_END: reach_break_node = True if isinstance(event, GraphRunFailedEvent): raise Exception(event.error) @@ -298,11 +299,8 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: LoopNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = LoopNodeData.model_validate(node_data) - variable_mapping = {} # Extract loop node IDs statically from graph_config @@ -317,17 +315,16 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): # variable selector to variable mapping try: - # Get node class - from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - - node_type = NodeType(sub_node_config.get("data", {}).get("type")) - if node_type not in NODE_TYPE_CLASSES_MAPPING: + typed_sub_node_config = NodeConfigDictAdapter.validate_python(sub_node_config) + node_type = typed_sub_node_config["data"].type + node_mapping = Node.get_node_type_classes_mapping() + if node_type not in node_mapping: continue - node_version = sub_node_config.get("data", {}).get("version", "1") - node_cls = NODE_TYPE_CLASSES_MAPPING[node_type][node_version] + node_version = str(typed_sub_node_config["data"].version) + node_cls = node_mapping[node_type][node_version] sub_node_variable_mapping = node_cls.extract_variable_selector_to_variable_mapping( - graph_config=graph_config, config=sub_node_config + graph_config=graph_config, config=typed_sub_node_config ) sub_node_variable_mapping = cast(dict[str, Sequence[str]], sub_node_variable_mapping) except NotImplementedError: @@ -342,7 +339,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]): variable_mapping.update(sub_node_variable_mapping) - for loop_variable in typed_node_data.loop_variables or []: + for loop_variable in node_data.loop_variables or []: if loop_variable.value_type == "variable": assert loop_variable.value is not None, "Loop variable value must be provided for variable type" # add loop variable to variable mapping diff --git a/api/dify_graph/nodes/loop/loop_start_node.py b/api/dify_graph/nodes/loop/loop_start_node.py index f469c8286e..e171b4df2f 100644 --- a/api/dify_graph/nodes/loop/loop_start_node.py +++ b/api/dify_graph/nodes/loop/loop_start_node.py @@ -1,4 +1,4 @@ -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.loop.entities import LoopStartNodeData @@ -9,7 +9,7 @@ class LoopStartNode(Node[LoopStartNodeData]): Loop Start Node. """ - node_type = NodeType.LOOP_START + node_type = BuiltinNodeTypes.LOOP_START @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/node_mapping.py b/api/dify_graph/nodes/node_mapping.py deleted file mode 100644 index 8e5405f1aa..0000000000 --- a/api/dify_graph/nodes/node_mapping.py +++ /dev/null @@ -1,9 +0,0 @@ -from collections.abc import Mapping - -from dify_graph.enums import NodeType -from dify_graph.nodes.base.node import Node - -LATEST_VERSION = "latest" - -# Mapping is built by Node.get_node_type_classes_mapping(), which imports and walks dify_graph.nodes -NODE_TYPE_CLASSES_MAPPING: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() diff --git a/api/dify_graph/nodes/parameter_extractor/entities.py b/api/dify_graph/nodes/parameter_extractor/entities.py index 3b042710f9..2fb042c16c 100644 --- a/api/dify_graph/nodes/parameter_extractor/entities.py +++ b/api/dify_graph/nodes/parameter_extractor/entities.py @@ -8,7 +8,8 @@ from pydantic import ( ) from core.prompt.entities.advanced_prompt_entities import MemoryConfig -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm.entities import ModelConfig, VisionConfig from dify_graph.variables.types import SegmentType @@ -83,6 +84,7 @@ class ParameterExtractorNodeData(BaseNodeData): Parameter Extractor Node Data. """ + type: NodeType = BuiltinNodeTypes.PARAMETER_EXTRACTOR model: ModelConfig query: list[str] parameters: list[ParameterConfig] diff --git a/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py b/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py index 0e6cc772ca..2dedd5e162 100644 --- a/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py +++ b/api/dify_graph/nodes/parameter_extractor/parameter_extractor_node.py @@ -10,8 +10,9 @@ from core.prompt.advanced_prompt_transform import AdvancedPromptTransform from core.prompt.entities.advanced_prompt_entities import ChatModelMessage, CompletionModelPromptTemplate from core.prompt.simple_prompt_transform import ModelMode from core.prompt.utils.prompt_message_util import PromptMessageUtil +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -96,7 +97,7 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]): Parameter Extractor Node. """ - node_type = NodeType.PARAMETER_EXTRACTOR + node_type = BuiltinNodeTypes.PARAMETER_EXTRACTOR _model_instance: ModelInstance _credentials_provider: "CredentialsProvider" @@ -106,7 +107,7 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]): def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, @@ -842,15 +843,13 @@ class ParameterExtractorNode(Node[ParameterExtractorNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: ParameterExtractorNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = ParameterExtractorNodeData.model_validate(node_data) + _ = graph_config # Explicitly mark as unused + variable_mapping: dict[str, Sequence[str]] = {"query": node_data.query} - variable_mapping: dict[str, Sequence[str]] = {"query": typed_node_data.query} - - if typed_node_data.instruction: - selectors = variable_template_parser.extract_selectors_from_template(typed_node_data.instruction) + if node_data.instruction: + selectors = variable_template_parser.extract_selectors_from_template(node_data.instruction) for selector in selectors: variable_mapping[selector.variable] = selector.value_selector diff --git a/api/dify_graph/nodes/protocols.py b/api/dify_graph/nodes/protocols.py index cc007150f1..62d3bcdca1 100644 --- a/api/dify_graph/nodes/protocols.py +++ b/api/dify_graph/nodes/protocols.py @@ -1,8 +1,10 @@ +from collections.abc import Generator from typing import Any, Protocol import httpx from dify_graph.file import File +from dify_graph.file.models import ToolFile class HttpClientProtocol(Protocol): @@ -40,3 +42,5 @@ class ToolFileManagerProtocol(Protocol): mimetype: str, filename: str | None = None, ) -> Any: ... + + def get_file_generator_by_tool_file_id(self, tool_file_id: str) -> tuple[Generator | None, ToolFile | None]: ... diff --git a/api/dify_graph/nodes/question_classifier/entities.py b/api/dify_graph/nodes/question_classifier/entities.py index 03e0a0ac53..0c1601d439 100644 --- a/api/dify_graph/nodes/question_classifier/entities.py +++ b/api/dify_graph/nodes/question_classifier/entities.py @@ -1,7 +1,8 @@ from pydantic import BaseModel, Field from core.prompt.entities.advanced_prompt_entities import MemoryConfig -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.llm import ModelConfig, VisionConfig @@ -11,6 +12,7 @@ class ClassConfig(BaseModel): class QuestionClassifierNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.QUESTION_CLASSIFIER query_variable_selector: list[str] model: ModelConfig classes: list[ClassConfig] diff --git a/api/dify_graph/nodes/question_classifier/question_classifier_node.py b/api/dify_graph/nodes/question_classifier/question_classifier_node.py index 860db05c84..c2f9fb5a6b 100644 --- a/api/dify_graph/nodes/question_classifier/question_classifier_node.py +++ b/api/dify_graph/nodes/question_classifier/question_classifier_node.py @@ -7,9 +7,10 @@ from core.model_manager import ModelInstance from core.prompt.simple_prompt_transform import ModelMode from core.prompt.utils.prompt_message_util import PromptMessageUtil from dify_graph.entities import GraphInitParams +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( + BuiltinNodeTypes, NodeExecutionType, - NodeType, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -28,6 +29,7 @@ from dify_graph.nodes.llm import ( ) from dify_graph.nodes.llm.file_saver import FileSaverImpl, LLMFileSaver from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.protocols import HttpClientProtocol from libs.json_in_md_parser import parse_and_check_json_markdown from .entities import QuestionClassifierNodeData @@ -48,7 +50,7 @@ if TYPE_CHECKING: class QuestionClassifierNode(Node[QuestionClassifierNodeData]): - node_type = NodeType.QUESTION_CLASSIFIER + node_type = BuiltinNodeTypes.QUESTION_CLASSIFIER execution_type = NodeExecutionType.BRANCH _file_outputs: list["File"] @@ -61,13 +63,14 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, credentials_provider: "CredentialsProvider", model_factory: "ModelFactory", model_instance: ModelInstance, + http_client: HttpClientProtocol, memory: PromptMessageMemory | None = None, llm_file_saver: LLMFileSaver | None = None, ): @@ -90,6 +93,7 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): llm_file_saver = FileSaverImpl( user_id=dify_ctx.user_id, tenant_id=dify_ctx.tenant_id, + http_client=http_client, ) self._llm_file_saver = llm_file_saver @@ -252,16 +256,13 @@ class QuestionClassifierNode(Node[QuestionClassifierNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: QuestionClassifierNodeData, ) -> Mapping[str, Sequence[str]]: # graph_config is not used in this node type - # Create typed NodeData from dict - typed_node_data = QuestionClassifierNodeData.model_validate(node_data) - - variable_mapping = {"query": typed_node_data.query_variable_selector} + variable_mapping = {"query": node_data.query_variable_selector} variable_selectors: list[VariableSelector] = [] - if typed_node_data.instruction: - variable_template_parser = VariableTemplateParser(template=typed_node_data.instruction) + if node_data.instruction: + variable_template_parser = VariableTemplateParser(template=node_data.instruction) variable_selectors.extend(variable_template_parser.extract_variable_selectors()) for variable_selector in variable_selectors: variable_mapping[variable_selector.variable] = list(variable_selector.value_selector) diff --git a/api/dify_graph/nodes/start/entities.py b/api/dify_graph/nodes/start/entities.py index 0df832740e..92ebd1a2ec 100644 --- a/api/dify_graph/nodes/start/entities.py +++ b/api/dify_graph/nodes/start/entities.py @@ -2,7 +2,8 @@ from collections.abc import Sequence from pydantic import Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.variables.input_entities import VariableEntity @@ -11,4 +12,5 @@ class StartNodeData(BaseNodeData): Start Node Data """ + type: NodeType = BuiltinNodeTypes.START variables: Sequence[VariableEntity] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/start/start_node.py b/api/dify_graph/nodes/start/start_node.py index c09ead0124..5e6055ea34 100644 --- a/api/dify_graph/nodes/start/start_node.py +++ b/api/dify_graph/nodes/start/start_node.py @@ -3,7 +3,7 @@ from typing import Any from jsonschema import Draft7Validator, ValidationError from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import NodeExecutionType, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.start.entities import StartNodeData @@ -11,7 +11,7 @@ from dify_graph.variables.input_entities import VariableEntityType class StartNode(Node[StartNodeData]): - node_type = NodeType.START + node_type = BuiltinNodeTypes.START execution_type = NodeExecutionType.ROOT @classmethod diff --git a/api/dify_graph/nodes/template_transform/entities.py b/api/dify_graph/nodes/template_transform/entities.py index 123fd41f81..ac29239958 100644 --- a/api/dify_graph/nodes/template_transform/entities.py +++ b/api/dify_graph/nodes/template_transform/entities.py @@ -1,4 +1,5 @@ -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.entities import VariableSelector @@ -7,5 +8,6 @@ class TemplateTransformNodeData(BaseNodeData): Template Transform Node Data. """ + type: NodeType = BuiltinNodeTypes.TEMPLATE_TRANSFORM variables: list[VariableSelector] template: str diff --git a/api/dify_graph/nodes/template_transform/template_transform_node.py b/api/dify_graph/nodes/template_transform/template_transform_node.py index 367442e997..dc6fce2b0a 100644 --- a/api/dify_graph/nodes/template_transform/template_transform_node.py +++ b/api/dify_graph/nodes/template_transform/template_transform_node.py @@ -1,7 +1,8 @@ from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.template_transform.entities import TemplateTransformNodeData @@ -18,14 +19,14 @@ DEFAULT_TEMPLATE_TRANSFORM_MAX_OUTPUT_LENGTH = 400_000 class TemplateTransformNode(Node[TemplateTransformNodeData]): - node_type = NodeType.TEMPLATE_TRANSFORM + node_type = BuiltinNodeTypes.TEMPLATE_TRANSFORM _template_renderer: Jinja2TemplateRenderer _max_output_length: int def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", *, @@ -86,12 +87,9 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]): @classmethod def _extract_variable_selector_to_variable_mapping( - cls, *, graph_config: Mapping[str, Any], node_id: str, node_data: Mapping[str, Any] + cls, *, graph_config: Mapping[str, Any], node_id: str, node_data: TemplateTransformNodeData ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = TemplateTransformNodeData.model_validate(node_data) - return { node_id + "." + variable_selector.variable: variable_selector.value_selector - for variable_selector in typed_node_data.variables + for variable_selector in node_data.variables } diff --git a/api/dify_graph/nodes/tool/entities.py b/api/dify_graph/nodes/tool/entities.py index cd690fff04..2c0faaf4bb 100644 --- a/api/dify_graph/nodes/tool/entities.py +++ b/api/dify_graph/nodes/tool/entities.py @@ -6,7 +6,8 @@ from pydantic import BaseModel, field_validator, model_validator from pydantic_core.core_schema import ValidationInfo from core.tools.entities.tool_entities import ToolProviderType -from dify_graph.nodes.base.entities import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType # Pattern to match mention format: {{@node.context@}}instruction MENTION_VALUE_PATTERN = re.compile(r"^\{\{@([a-zA-Z0-9_]+)\.context@\}\}(.*)$", re.DOTALL) @@ -69,6 +70,8 @@ class ToolEntity(BaseModel): class ToolNodeData(BaseNodeData, ToolEntity): + type: NodeType = BuiltinNodeTypes.TOOL + class ToolInput(BaseModel): # TODO: check this type value: Union[Any, list[str]] diff --git a/api/dify_graph/nodes/tool/tool_node.py b/api/dify_graph/nodes/tool/tool_node.py index 06fe788682..6d67526f35 100644 --- a/api/dify_graph/nodes/tool/tool_node.py +++ b/api/dify_graph/nodes/tool/tool_node.py @@ -2,19 +2,15 @@ import logging from collections.abc import Generator, Mapping, Sequence from typing import TYPE_CHECKING, Any -from sqlalchemy import select - -logger = logging.getLogger(__name__) -from sqlalchemy.orm import Session - from core.callback_handler.workflow_tool_callback_handler import DifyWorkflowCallbackHandler from core.tools.__base.tool import Tool from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter from core.tools.errors import ToolInvokeError from core.tools.tool_engine import ToolEngine from core.tools.utils.message_transformer import ToolFileMessageTransformer +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, SystemVariableKey, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, @@ -24,11 +20,10 @@ from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.node_events import NodeEventBase, NodeRunResult, StreamChunkEvent, StreamCompletedEvent from dify_graph.nodes.base.node import Node from dify_graph.nodes.base.variable_template_parser import VariableTemplateParser +from dify_graph.nodes.protocols import ToolFileManagerProtocol from dify_graph.variables.segments import ArrayAnySegment, ArrayFileSegment from dify_graph.variables.variables import ArrayAnyVariable -from extensions.ext_database import db from factories import file_factory -from models import ToolFile from services.tools.builtin_tools_manage_service import BuiltinToolManageService from .entities import ToolNodeData, is_variable_format @@ -39,7 +34,8 @@ from .exc import ( ) if TYPE_CHECKING: - from dify_graph.runtime import VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool class ToolNode(Node[ToolNodeData]): @@ -47,12 +43,33 @@ class ToolNode(Node[ToolNodeData]): Tool Node """ - node_type = NodeType.TOOL + node_type = BuiltinNodeTypes.TOOL + + def __init__( + self, + id: str, + config: NodeConfigDict, + graph_init_params: "GraphInitParams", + graph_runtime_state: "GraphRuntimeState", + *, + tool_file_manager_factory: ToolFileManagerProtocol, + ): + super().__init__( + id=id, + config=config, + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + self._tool_file_manager_factory = tool_file_manager_factory @classmethod def version(cls) -> str: return "1" + def populate_start_event(self, event) -> None: + event.provider_id = self.node_data.provider_id + event.provider_type = self.node_data.provider_type + def _run(self) -> Generator[NodeEventBase, None, None]: """ Run the tool node @@ -296,11 +313,9 @@ class ToolNode(Node[ToolNodeData]): tool_file_id = str(url).split("/")[-1].split(".")[0] - with Session(db.engine) as session: - stmt = select(ToolFile).where(ToolFile.id == tool_file_id) - tool_file = session.scalar(stmt) - if tool_file is None: - raise ToolFileError(f"Tool file {tool_file_id} does not exist") + _, tool_file = self._tool_file_manager_factory.get_file_generator_by_tool_file_id(tool_file_id) + if not tool_file: + raise ToolFileError(f"tool file {tool_file_id} not found") mapping = { "tool_file_id": tool_file_id, @@ -319,11 +334,9 @@ class ToolNode(Node[ToolNodeData]): assert message.meta tool_file_id = message.message.text.split("/")[-1].split(".")[0] - with Session(db.engine) as session: - stmt = select(ToolFile).where(ToolFile.id == tool_file_id) - tool_file = session.scalar(stmt) - if tool_file is None: - raise ToolFileError(f"tool file {tool_file_id} not exists") + _, tool_file = self._tool_file_manager_factory.get_file_generator_by_tool_file_id(tool_file_id) + if not tool_file: + raise ToolFileError(f"tool file {tool_file_id} not exists") mapping = { "tool_file_id": tool_file_id, @@ -499,7 +512,7 @@ class ToolNode(Node[ToolNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: ToolNodeData, ) -> Mapping[str, Sequence[str]]: """ Extract variable selector to variable mapping. @@ -514,9 +527,7 @@ class ToolNode(Node[ToolNodeData]): :param node_data: node data :return: mapping of variable key to variable selector """ - # Create typed NodeData from dict - typed_node_data = ToolNodeData.model_validate(node_data) - + typed_node_data = node_data result: dict[str, Sequence[str]] = {} for parameter_name in typed_node_data.tool_parameters: input = typed_node_data.tool_parameters[parameter_name] diff --git a/api/dify_graph/nodes/trigger_schedule/__init__.py b/api/dify_graph/nodes/trigger_schedule/__init__.py deleted file mode 100644 index c9b3ae6a0d..0000000000 --- a/api/dify_graph/nodes/trigger_schedule/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from dify_graph.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode - -__all__ = ["TriggerScheduleNode"] diff --git a/api/dify_graph/nodes/trigger_webhook/entities.py b/api/dify_graph/nodes/trigger_webhook/entities.py deleted file mode 100644 index fa36aeabd3..0000000000 --- a/api/dify_graph/nodes/trigger_webhook/entities.py +++ /dev/null @@ -1,79 +0,0 @@ -from collections.abc import Sequence -from enum import StrEnum -from typing import Literal - -from pydantic import BaseModel, Field, field_validator - -from dify_graph.nodes.base import BaseNodeData - - -class Method(StrEnum): - GET = "get" - POST = "post" - HEAD = "head" - PATCH = "patch" - PUT = "put" - DELETE = "delete" - - -class ContentType(StrEnum): - JSON = "application/json" - FORM_DATA = "multipart/form-data" - FORM_URLENCODED = "application/x-www-form-urlencoded" - TEXT = "text/plain" - BINARY = "application/octet-stream" - - -class WebhookParameter(BaseModel): - """Parameter definition for headers, query params, or body.""" - - name: str - required: bool = False - - -class WebhookBodyParameter(BaseModel): - """Body parameter with type information.""" - - name: str - type: Literal[ - "string", - "number", - "boolean", - "object", - "array[string]", - "array[number]", - "array[boolean]", - "array[object]", - "file", - ] = "string" - required: bool = False - - -class WebhookData(BaseNodeData): - """ - Webhook Node Data. - """ - - class SyncMode(StrEnum): - SYNC = "async" # only support - - method: Method = Method.GET - content_type: ContentType = Field(default=ContentType.JSON) - headers: Sequence[WebhookParameter] = Field(default_factory=list) - params: Sequence[WebhookParameter] = Field(default_factory=list) # query parameters - body: Sequence[WebhookBodyParameter] = Field(default_factory=list) - - @field_validator("method", mode="before") - @classmethod - def normalize_method(cls, v) -> str: - """Normalize HTTP method to lowercase to support both uppercase and lowercase input.""" - if isinstance(v, str): - return v.lower() - return v - - status_code: int = 200 # Expected status code for response - response_body: str = "" # Template for response body - - # Webhook specific fields (not from client data, set internally) - webhook_id: str | None = None # Set when webhook trigger is created - timeout: int = 30 # Timeout in seconds to wait for webhook response diff --git a/api/dify_graph/nodes/variable_aggregator/entities.py b/api/dify_graph/nodes/variable_aggregator/entities.py index 5f7c1dbe93..4779ebd9a9 100644 --- a/api/dify_graph/nodes/variable_aggregator/entities.py +++ b/api/dify_graph/nodes/variable_aggregator/entities.py @@ -1,6 +1,7 @@ from pydantic import BaseModel -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.variables.types import SegmentType @@ -28,6 +29,7 @@ class VariableAggregatorNodeData(BaseNodeData): Variable Aggregator Node Data. """ + type: NodeType = BuiltinNodeTypes.VARIABLE_AGGREGATOR output_type: str variables: list[list[str]] advanced_settings: AdvancedSettings | None = None diff --git a/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py b/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py index 98ab8105fe..7d26de6232 100644 --- a/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py +++ b/api/dify_graph/nodes/variable_aggregator/variable_aggregator_node.py @@ -1,6 +1,6 @@ from collections.abc import Mapping -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_aggregator.entities import VariableAggregatorNodeData @@ -8,7 +8,7 @@ from dify_graph.variables.segments import Segment class VariableAggregatorNode(Node[VariableAggregatorNodeData]): - node_type = NodeType.VARIABLE_AGGREGATOR + node_type = BuiltinNodeTypes.VARIABLE_AGGREGATOR @classmethod def version(cls) -> str: diff --git a/api/dify_graph/nodes/variable_assigner/v1/node.py b/api/dify_graph/nodes/variable_assigner/v1/node.py index 1aa7042b02..f9b261b191 100644 --- a/api/dify_graph/nodes/variable_assigner/v1/node.py +++ b/api/dify_graph/nodes/variable_assigner/v1/node.py @@ -3,7 +3,8 @@ from typing import TYPE_CHECKING, Any from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.entities import GraphInitParams -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -17,12 +18,12 @@ if TYPE_CHECKING: class VariableAssignerNode(Node[VariableAssignerData]): - node_type = NodeType.VARIABLE_ASSIGNER + node_type = BuiltinNodeTypes.VARIABLE_ASSIGNER def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", ): @@ -52,21 +53,18 @@ class VariableAssignerNode(Node[VariableAssignerData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: VariableAssignerData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = VariableAssignerData.model_validate(node_data) - mapping = {} - assigned_variable_node_id = typed_node_data.assigned_variable_selector[0] + assigned_variable_node_id = node_data.assigned_variable_selector[0] if assigned_variable_node_id == CONVERSATION_VARIABLE_NODE_ID: - selector_key = ".".join(typed_node_data.assigned_variable_selector) + selector_key = ".".join(node_data.assigned_variable_selector) key = f"{node_id}.#{selector_key}#" - mapping[key] = typed_node_data.assigned_variable_selector + mapping[key] = node_data.assigned_variable_selector - selector_key = ".".join(typed_node_data.input_variable_selector) + selector_key = ".".join(node_data.input_variable_selector) key = f"{node_id}.#{selector_key}#" - mapping[key] = typed_node_data.input_variable_selector + mapping[key] = node_data.input_variable_selector return mapping def _run(self) -> NodeRunResult: diff --git a/api/dify_graph/nodes/variable_assigner/v1/node_data.py b/api/dify_graph/nodes/variable_assigner/v1/node_data.py index 11e8f93f35..57acb29535 100644 --- a/api/dify_graph/nodes/variable_assigner/v1/node_data.py +++ b/api/dify_graph/nodes/variable_assigner/v1/node_data.py @@ -1,7 +1,8 @@ from collections.abc import Sequence from enum import StrEnum -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType class WriteMode(StrEnum): @@ -11,6 +12,7 @@ class WriteMode(StrEnum): class VariableAssignerData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.VARIABLE_ASSIGNER assigned_variable_selector: Sequence[str] write_mode: WriteMode input_variable_selector: Sequence[str] diff --git a/api/dify_graph/nodes/variable_assigner/v2/entities.py b/api/dify_graph/nodes/variable_assigner/v2/entities.py index 5f9211d600..2b2bbe85de 100644 --- a/api/dify_graph/nodes/variable_assigner/v2/entities.py +++ b/api/dify_graph/nodes/variable_assigner/v2/entities.py @@ -3,7 +3,8 @@ from typing import Any from pydantic import BaseModel, Field -from dify_graph.nodes.base import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from .enums import InputType, Operation @@ -22,5 +23,6 @@ class VariableOperationItem(BaseModel): class VariableAssignerNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.VARIABLE_ASSIGNER version: str = "2" items: Sequence[VariableOperationItem] = Field(default_factory=list) diff --git a/api/dify_graph/nodes/variable_assigner/v2/node.py b/api/dify_graph/nodes/variable_assigner/v2/node.py index 7753382cd0..f04a6b3b80 100644 --- a/api/dify_graph/nodes/variable_assigner/v2/node.py +++ b/api/dify_graph/nodes/variable_assigner/v2/node.py @@ -3,7 +3,8 @@ from collections.abc import Mapping, MutableMapping, Sequence from typing import TYPE_CHECKING, Any from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.graph_config import NodeConfigDict +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.variable_assigner.common import helpers as common_helpers @@ -51,12 +52,12 @@ def _source_mapping_from_item(mapping: MutableMapping[str, Sequence[str]], node_ class VariableAssignerNode(Node[VariableAssignerNodeData]): - node_type = NodeType.VARIABLE_ASSIGNER + node_type = BuiltinNodeTypes.VARIABLE_ASSIGNER def __init__( self, id: str, - config: Mapping[str, Any], + config: NodeConfigDict, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", ): @@ -94,13 +95,10 @@ class VariableAssignerNode(Node[VariableAssignerNodeData]): *, graph_config: Mapping[str, Any], node_id: str, - node_data: Mapping[str, Any], + node_data: VariableAssignerNodeData, ) -> Mapping[str, Sequence[str]]: - # Create typed NodeData from dict - typed_node_data = VariableAssignerNodeData.model_validate(node_data) - var_mapping: dict[str, Sequence[str]] = {} - for item in typed_node_data.items: + for item in node_data.items: _target_mapping_from_item(var_mapping, node_id, item) _source_mapping_from_item(var_mapping, node_id, item) return var_mapping diff --git a/api/dify_graph/repositories/summary_index_service_protocol.py b/api/dify_graph/repositories/summary_index_service_protocol.py deleted file mode 100644 index cbcfdd2a77..0000000000 --- a/api/dify_graph/repositories/summary_index_service_protocol.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import Protocol - - -class SummaryIndexServiceProtocol(Protocol): - def generate_and_vectorize_summary( - self, dataset_id: str, document_id: str, is_preview: bool, summary_index_setting: dict | None = None - ): ... diff --git a/api/dify_graph/runtime/variable_pool.py b/api/dify_graph/runtime/variable_pool.py index 4602d690e2..b8c65ebbb6 100644 --- a/api/dify_graph/runtime/variable_pool.py +++ b/api/dify_graph/runtime/variable_pool.py @@ -65,9 +65,15 @@ class VariablePool(BaseModel): # Add environment variables to the variable pool for var in self.environment_variables: self.add((ENVIRONMENT_VARIABLE_NODE_ID, var.name), var) - # Add conversation variables to the variable pool + # Add conversation variables to the variable pool. When restoring from a serialized + # snapshot, `variable_dictionary` already carries the latest runtime values. + # In that case, keep existing entries instead of overwriting them with the + # bootstrap list. for var in self.conversation_variables: - self.add((CONVERSATION_VARIABLE_NODE_ID, var.name), var) + selector = (CONVERSATION_VARIABLE_NODE_ID, var.name) + if self._has(selector): + continue + self.add(selector, var) # Add rag pipeline variables to the variable pool if self.rag_pipeline_variables: rag_pipeline_variables_map: defaultdict[Any, dict[Any, Any]] = defaultdict(dict) diff --git a/api/docker/entrypoint.sh b/api/docker/entrypoint.sh index 196fd3921c..48533efe66 100755 --- a/api/docker/entrypoint.sh +++ b/api/docker/entrypoint.sh @@ -35,10 +35,10 @@ if [[ "${MODE}" == "worker" ]]; then if [[ -z "${CELERY_QUEUES}" ]]; then if [[ "${EDITION}" == "CLOUD" ]]; then # Cloud edition: separate queues for dataset and trigger tasks - DEFAULT_QUEUES="api_token,dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" + DEFAULT_QUEUES="api_token,dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" else # Community edition (SELF_HOSTED): dataset, pipeline and workflow have separate queues - DEFAULT_QUEUES="api_token,dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" + DEFAULT_QUEUES="api_token,dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" fi else DEFAULT_QUEUES="${CELERY_QUEUES}" diff --git a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py index 5c02a16a7d..c43e99f0f4 100644 --- a/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py +++ b/api/events/event_handlers/delete_tool_parameters_cache_when_sync_draft_workflow.py @@ -2,7 +2,7 @@ import logging from core.tools.tool_manager import ToolManager from core.tools.utils.configuration import ToolParameterConfigurationManager -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.tool.entities import ToolEntity from events.app_event import app_draft_workflow_was_synced @@ -16,7 +16,7 @@ def handle(sender, **kwargs): if synced_draft_workflow is None: return for node_data in synced_draft_workflow.graph_dict.get("nodes", []): - if node_data.get("data", {}).get("type") == NodeType.TOOL: + if node_data.get("data", {}).get("type") == BuiltinNodeTypes.TOOL: try: tool_entity = ToolEntity.model_validate(node_data["data"]) tool_runtime = ToolManager.get_tool_runtime( diff --git a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py index 90f562d167..168513fc04 100644 --- a/api/events/event_handlers/sync_workflow_schedule_when_app_published.py +++ b/api/events/event_handlers/sync_workflow_schedule_when_app_published.py @@ -4,7 +4,7 @@ from typing import cast from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.nodes.trigger_schedule.entities import SchedulePlanUpdate +from core.workflow.nodes.trigger_schedule.entities import SchedulePlanUpdate from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models import AppMode, Workflow, WorkflowSchedulePlan diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py index 69959acd19..b70c2183d2 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_model_config_updated.py @@ -1,3 +1,5 @@ +from typing import Any, cast + from sqlalchemy import select from events.app_event import app_model_config_was_updated @@ -54,9 +56,11 @@ def get_dataset_ids_from_model_config(app_model_config: AppModelConfig) -> set[s continue tool_type = list(tool.keys())[0] - tool_config = list(tool.values())[0] + tool_config = cast(dict[str, Any], list(tool.values())[0]) if tool_type == "dataset": - dataset_ids.add(tool_config.get("id")) + dataset_id = tool_config.get("id") + if isinstance(dataset_id, str): + dataset_ids.add(dataset_id) # get dataset from dataset_configs dataset_configs = app_model_config.dataset_configs_dict diff --git a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py index 8da33d03b9..92bc9db075 100644 --- a/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_dataset_join_when_app_published_workflow_updated.py @@ -2,8 +2,8 @@ from typing import cast from sqlalchemy import select -from dify_graph.nodes import NodeType -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from dify_graph.nodes import BuiltinNodeTypes from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models.dataset import AppDatasetJoin @@ -53,7 +53,7 @@ def get_dataset_ids_from_workflow(published_workflow: Workflow) -> set[str]: # fetch all knowledge retrieval nodes knowledge_retrieval_nodes = [ - node for node in nodes if node.get("data", {}).get("type") == NodeType.KNOWLEDGE_RETRIEVAL + node for node in nodes if node.get("data", {}).get("type") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL ] if not knowledge_retrieval_nodes: diff --git a/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py b/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py index fd211a3e55..b3917d5622 100644 --- a/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py +++ b/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @@ -3,7 +3,7 @@ from typing import cast from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.nodes import NodeType +from core.trigger.constants import TRIGGER_NODE_TYPES from events.app_event import app_published_workflow_was_updated from extensions.ext_database import db from models import AppMode @@ -98,7 +98,7 @@ def get_trigger_infos_from_workflow(published_workflow: Workflow) -> list[dict]: return [] nodes = graph.get("nodes", []) - trigger_types = {NodeType.TRIGGER_WEBHOOK.value, NodeType.TRIGGER_SCHEDULE.value, NodeType.TRIGGER_PLUGIN.value} + trigger_types = TRIGGER_NODE_TYPES trigger_infos = [ { diff --git a/api/extensions/ext_commands.py b/api/extensions/ext_commands.py index 2b3cb97930..e3aba840de 100644 --- a/api/extensions/ext_commands.py +++ b/api/extensions/ext_commands.py @@ -13,6 +13,7 @@ def init_app(app: DifyApp): convert_to_agent_apps, create_tenant, delete_archived_workflow_runs, + export_app_messages, extract_plugins, extract_unique_plugins, file_usage, @@ -68,6 +69,7 @@ def init_app(app: DifyApp): restore_workflow_runs, clean_workflow_runs, clean_expired_messages, + export_app_messages, ] for cmd in cmds_to_register: app.cli.add_command(cmd) diff --git a/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py b/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py index 7ee4638e77..a94d75ec76 100644 --- a/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py +++ b/api/extensions/logstore/repositories/logstore_api_workflow_node_execution_repository.py @@ -17,7 +17,8 @@ from dify_graph.enums import WorkflowNodeExecutionStatus from extensions.logstore.aliyun_logstore import AliyunLogStore from extensions.logstore.repositories import safe_float, safe_int from extensions.logstore.sql_escape import escape_identifier, escape_logstore_query_value -from models.workflow import WorkflowNodeExecutionModel +from models.enums import CreatorUserRole +from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository logger = logging.getLogger(__name__) @@ -47,12 +48,28 @@ def _dict_to_workflow_node_execution_model(data: dict[str, Any]) -> WorkflowNode model.tenant_id = data.get("tenant_id") or "" model.app_id = data.get("app_id") or "" model.workflow_id = data.get("workflow_id") or "" - model.triggered_from = data.get("triggered_from") or "" + triggered_from_val = data.get("triggered_from") + try: + model.triggered_from = ( + WorkflowNodeExecutionTriggeredFrom(str(triggered_from_val)) + if triggered_from_val + else WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN + ) + except ValueError: + logger.warning("Invalid triggered_from value: %s, falling back to WORKFLOW_RUN", triggered_from_val) + model.triggered_from = WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN model.node_id = data.get("node_id") or "" model.node_type = data.get("node_type") or "" model.status = data.get("status") or "running" # Default status if missing model.title = data.get("title") or "" - model.created_by_role = data.get("created_by_role") or "" + created_by_role_val = data.get("created_by_role") + try: + model.created_by_role = ( + CreatorUserRole(str(created_by_role_val)) if created_by_role_val else CreatorUserRole.ACCOUNT + ) + except ValueError: + logger.warning("Invalid created_by_role value: %s, falling back to ACCOUNT", created_by_role_val) + model.created_by_role = CreatorUserRole.ACCOUNT model.created_by = data.get("created_by") or "" model.index = safe_int(data.get("index", 0)) diff --git a/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py b/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py index 14382ed876..bdfc81bd1c 100644 --- a/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py +++ b/api/extensions/logstore/repositories/logstore_api_workflow_run_repository.py @@ -22,12 +22,13 @@ from typing import Any, cast from sqlalchemy.orm import sessionmaker +from dify_graph.enums import WorkflowExecutionStatus from extensions.logstore.aliyun_logstore import AliyunLogStore from extensions.logstore.repositories import safe_float, safe_int from extensions.logstore.sql_escape import escape_identifier, escape_logstore_query_value, escape_sql_string from libs.infinite_scroll_pagination import InfiniteScrollPagination -from models.enums import WorkflowRunTriggeredFrom -from models.workflow import WorkflowRun +from models.enums import CreatorUserRole, WorkflowRunTriggeredFrom +from models.workflow import WorkflowRun, WorkflowType from repositories.api_workflow_run_repository import APIWorkflowRunRepository from repositories.types import ( AverageInteractionStats, @@ -59,11 +60,37 @@ def _dict_to_workflow_run(data: dict[str, Any]) -> WorkflowRun: model.tenant_id = data.get("tenant_id") or "" model.app_id = data.get("app_id") or "" model.workflow_id = data.get("workflow_id") or "" - model.type = data.get("type") or "" - model.triggered_from = data.get("triggered_from") or "" + type_val = data.get("type") + try: + model.type = WorkflowType(str(type_val)) if type_val else WorkflowType.WORKFLOW + except ValueError: + logger.warning("Invalid type value: %s, falling back to WORKFLOW", type_val) + model.type = WorkflowType.WORKFLOW + triggered_from_val = data.get("triggered_from") + try: + model.triggered_from = ( + WorkflowRunTriggeredFrom(str(triggered_from_val)) + if triggered_from_val + else WorkflowRunTriggeredFrom.APP_RUN + ) + except ValueError: + logger.warning("Invalid triggered_from value: %s, falling back to APP_RUN", triggered_from_val) + model.triggered_from = WorkflowRunTriggeredFrom.APP_RUN model.version = data.get("version") or "" - model.status = data.get("status") or "running" # Default status if missing - model.created_by_role = data.get("created_by_role") or "" + status_val = data.get("status") + try: + model.status = WorkflowExecutionStatus(str(status_val)) if status_val else WorkflowExecutionStatus.RUNNING + except ValueError: + logger.warning("Invalid status value: %s, falling back to RUNNING", status_val) + model.status = WorkflowExecutionStatus.RUNNING + created_by_role_val = data.get("created_by_role") + try: + model.created_by_role = ( + CreatorUserRole(str(created_by_role_val)) if created_by_role_val else CreatorUserRole.ACCOUNT + ) + except ValueError: + logger.warning("Invalid created_by_role value: %s, falling back to ACCOUNT", created_by_role_val) + model.created_by_role = CreatorUserRole.ACCOUNT model.created_by = data.get("created_by") or "" model.total_tokens = safe_int(data.get("total_tokens", 0)) diff --git a/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py b/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py index bd1c08d96e..d84c0bc432 100644 --- a/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py +++ b/api/extensions/logstore/repositories/logstore_workflow_node_execution_repository.py @@ -19,7 +19,6 @@ from sqlalchemy.orm import sessionmaker from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository from dify_graph.entities import WorkflowNodeExecution from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus -from dify_graph.enums import NodeType from dify_graph.model_runtime.utils.encoders import jsonable_encoder from dify_graph.repositories.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository from dify_graph.workflow_type_encoder import WorkflowRuntimeTypeConverter @@ -78,7 +77,7 @@ def _dict_to_workflow_node_execution(data: dict[str, Any]) -> WorkflowNodeExecut index=safe_int(data.get("index", 0)), predecessor_node_id=data.get("predecessor_node_id"), node_id=data.get("node_id", ""), - node_type=NodeType(data.get("node_type", "start")), + node_type=data.get("node_type", "start"), title=data.get("title", ""), inputs=inputs, process_data=process_data, @@ -185,7 +184,7 @@ class LogstoreWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository): ("predecessor_node_id", domain_model.predecessor_node_id or ""), ("node_execution_id", domain_model.node_execution_id or ""), ("node_id", domain_model.node_id), - ("node_type", domain_model.node_type.value), + ("node_type", domain_model.node_type), ("title", domain_model.title), ( "inputs", diff --git a/api/extensions/otel/celery_sqlcommenter.py b/api/extensions/otel/celery_sqlcommenter.py new file mode 100644 index 0000000000..8abb1ce15a --- /dev/null +++ b/api/extensions/otel/celery_sqlcommenter.py @@ -0,0 +1,114 @@ +""" +Celery SQL comment context for OpenTelemetry SQLCommenter. + +Injects Celery-specific metadata (framework, task_name, traceparent, celery_retries, +routing_key) into SQL comments for queries executed by Celery workers. This improves +trace-to-SQL correlation and debugging in production. + +Uses the OpenTelemetry context key SQLCOMMENTER_ORM_TAGS_AND_VALUES, which is read +by opentelemetry.instrumentation.sqlcommenter_utils._add_framework_tags() when the +SQLAlchemy instrumentor appends comments to SQL statements. +""" + +import logging +from typing import Any + +from celery.signals import task_postrun, task_prerun +from opentelemetry import context +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + +logger = logging.getLogger(__name__) +_TRACE_PROPAGATOR = TraceContextTextMapPropagator() + +_SQLCOMMENTER_CONTEXT_KEY = "SQLCOMMENTER_ORM_TAGS_AND_VALUES" +_TOKEN_ATTR = "_dify_sqlcommenter_context_token" + + +def _build_celery_sqlcommenter_tags(task: Any) -> dict[str, str | int]: + """Build SQL commenter tags from the current Celery task and OpenTelemetry context.""" + tags: dict[str, str | int] = {} + + try: + tags["framework"] = f"celery:{_get_celery_version()}" + except Exception: + tags["framework"] = "celery:unknown" + + if task and getattr(task, "name", None): + tags["task_name"] = str(task.name) + + traceparent = _get_traceparent() + if traceparent: + tags["traceparent"] = traceparent + + if task and hasattr(task, "request"): + request = task.request + retries = getattr(request, "retries", None) + if retries is not None and retries > 0: + tags["celery_retries"] = int(retries) + + delivery_info = getattr(request, "delivery_info", None) or {} + if isinstance(delivery_info, dict): + routing_key = delivery_info.get("routing_key") + if routing_key: + tags["routing_key"] = str(routing_key) + + return tags + + +def _get_celery_version() -> str: + import celery + + return getattr(celery, "__version__", "unknown") + + +def _get_traceparent() -> str | None: + """Extract traceparent from the current OpenTelemetry context.""" + carrier: dict[str, str] = {} + _TRACE_PROPAGATOR.inject(carrier) + return carrier.get("traceparent") + + +def _on_task_prerun(*args: object, **kwargs: object) -> None: + task = kwargs.get("task") + if not task: + return + + tags = _build_celery_sqlcommenter_tags(task) + if not tags: + return + + current = context.get_current() + new_ctx = context.set_value(_SQLCOMMENTER_CONTEXT_KEY, tags, current) + token = context.attach(new_ctx) + setattr(task, _TOKEN_ATTR, token) + + +def _on_task_postrun(*args: object, **kwargs: object) -> None: + task = kwargs.get("task") + if not task: + return + + token = getattr(task, _TOKEN_ATTR, None) + if token is None: + return + + try: + context.detach(token) + except Exception: + logger.debug("Failed to detach SQL commenter context", exc_info=True) + finally: + try: + delattr(task, _TOKEN_ATTR) + except AttributeError: + pass + + +def setup_celery_sqlcommenter() -> None: + """ + Connect Celery task_prerun and task_postrun handlers to inject SQL comment + context for worker queries. Call this from init_celery_worker after + CeleryInstrumentor().instrument() so our handlers run after the OTEL + instrumentor's and the trace context is already attached. + """ + task_prerun.connect(_on_task_prerun, weak=False) + task_postrun.connect(_on_task_postrun, weak=False) diff --git a/api/extensions/otel/parser/base.py b/api/extensions/otel/parser/base.py index fc84147e01..544ef3fe18 100644 --- a/api/extensions/otel/parser/base.py +++ b/api/extensions/otel/parser/base.py @@ -9,7 +9,7 @@ from opentelemetry.trace import Span from opentelemetry.trace.status import Status, StatusCode from pydantic import BaseModel -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.file.models import File from dify_graph.graph_events import GraphNodeEventBase from dify_graph.nodes.base.node import Node @@ -84,21 +84,17 @@ class DefaultNodeOTelParser: span.set_attribute("node.id", node.id) if node.execution_id: span.set_attribute("node.execution_id", node.execution_id) - if hasattr(node, "node_type") and node.node_type: - span.set_attribute("node.type", node.node_type.value) + span.set_attribute("node.type", node.node_type) span.set_attribute(GenAIAttributes.FRAMEWORK, "dify") - node_type = getattr(node, "node_type", None) - if isinstance(node_type, NodeType): - if node_type == NodeType.LLM: - span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM") - elif node_type == NodeType.KNOWLEDGE_RETRIEVAL: - span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER") - elif node_type == NodeType.TOOL: - span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL") - else: - span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK") + node_type = node.node_type + if node_type == BuiltinNodeTypes.LLM: + span.set_attribute(GenAIAttributes.SPAN_KIND, "LLM") + elif node_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: + span.set_attribute(GenAIAttributes.SPAN_KIND, "RETRIEVER") + elif node_type == BuiltinNodeTypes.TOOL: + span.set_attribute(GenAIAttributes.SPAN_KIND, "TOOL") else: span.set_attribute(GenAIAttributes.SPAN_KIND, "TASK") diff --git a/api/extensions/otel/runtime.py b/api/extensions/otel/runtime.py index a7181d2683..b1c703f944 100644 --- a/api/extensions/otel/runtime.py +++ b/api/extensions/otel/runtime.py @@ -7,7 +7,7 @@ from celery.signals import worker_init from flask_login import user_loaded_from_request, user_logged_in from opentelemetry import trace from opentelemetry.propagate import set_global_textmap -from opentelemetry.propagators.b3 import B3Format +from opentelemetry.propagators.b3 import B3MultiFormat from opentelemetry.propagators.composite import CompositePropagator from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator @@ -24,7 +24,7 @@ def setup_context_propagation() -> None: CompositePropagator( [ TraceContextTextMapPropagator(), - B3Format(), + B3MultiFormat(), ] ) ) @@ -67,11 +67,14 @@ def init_celery_worker(*args, **kwargs): from opentelemetry.metrics import get_meter_provider from opentelemetry.trace import get_tracer_provider + from extensions.otel.celery_sqlcommenter import setup_celery_sqlcommenter + tracer_provider = get_tracer_provider() metric_provider = get_meter_provider() if dify_config.DEBUG: logger.info("Initializing OpenTelemetry for Celery worker") CeleryInstrumentor(tracer_provider=tracer_provider, meter_provider=metric_provider).instrument() + setup_celery_sqlcommenter() def is_instrument_flag_enabled() -> bool: diff --git a/api/migrations/env.py b/api/migrations/env.py index 66a4614e80..3b1fa7bb89 100644 --- a/api/migrations/env.py +++ b/api/migrations/env.py @@ -66,6 +66,7 @@ def run_migrations_offline(): context.configure( url=url, target_metadata=get_metadata(), literal_binds=True ) + logger.info("Generating offline migration SQL with url: %s", url) with context.begin_transaction(): context.run_migrations() diff --git a/api/models/account.py b/api/models/account.py index f7a9c20026..1a43c9ca17 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -8,12 +8,12 @@ from uuid import uuid4 import sqlalchemy as sa from flask_login import UserMixin from sqlalchemy import DateTime, String, func, select -from sqlalchemy.orm import Mapped, Session, mapped_column, validates +from sqlalchemy.orm import Mapped, Session, mapped_column from typing_extensions import deprecated from .base import TypeBase from .engine import db -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID class TenantAccountRole(enum.StrEnum): @@ -104,7 +104,9 @@ class Account(UserMixin, TypeBase): last_active_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False ) - status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'active'"), default="active") + status: Mapped[AccountStatus] = mapped_column( + EnumText(AccountStatus, length=16), server_default=sa.text("'active'"), default=AccountStatus.ACTIVE + ) initialized_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -116,12 +118,6 @@ class Account(UserMixin, TypeBase): role: TenantAccountRole | None = field(default=None, init=False) _current_tenant: "Tenant | None" = field(default=None, init=False) - @validates("status") - def _normalize_status(self, _key: str, value: str | AccountStatus) -> str: - if isinstance(value, AccountStatus): - return value.value - return value - @property def is_password_set(self): return self.password is not None @@ -177,8 +173,7 @@ class Account(UserMixin, TypeBase): return self.role def get_status(self) -> AccountStatus: - status_str = self.status - return AccountStatus(status_str) + return self.status @classmethod def get_by_openid(cls, provider: str, open_id: str): @@ -249,7 +244,9 @@ class Tenant(TypeBase): name: Mapped[str] = mapped_column(String(255)) encrypt_public_key: Mapped[str | None] = mapped_column(LongText, default=None) plan: Mapped[str] = mapped_column(String(255), server_default=sa.text("'basic'"), default="basic") - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'"), default="normal") + status: Mapped[TenantStatus] = mapped_column( + EnumText(TenantStatus, length=255), server_default=sa.text("'normal'"), default=TenantStatus.NORMAL + ) custom_config: Mapped[str | None] = mapped_column(LongText, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -291,7 +288,9 @@ class TenantAccountJoin(TypeBase): tenant_id: Mapped[str] = mapped_column(StringUUID) account_id: Mapped[str] = mapped_column(StringUUID) current: Mapped[bool] = mapped_column(sa.Boolean, server_default=sa.text("false"), default=False) - role: Mapped[str] = mapped_column(String(16), server_default="normal", default="normal") + role: Mapped[TenantAccountRole] = mapped_column( + EnumText(TenantAccountRole, length=16), server_default="normal", default=TenantAccountRole.NORMAL + ) invited_by: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) created_at: Mapped[datetime] = mapped_column( DateTime, server_default=func.current_timestamp(), nullable=False, init=False @@ -324,6 +323,11 @@ class AccountIntegrate(TypeBase): ) +class InvitationCodeStatus(enum.StrEnum): + UNUSED = "unused" + USED = "used" + + class InvitationCode(TypeBase): __tablename__ = "invitation_codes" __table_args__ = ( @@ -335,7 +339,11 @@ class InvitationCode(TypeBase): id: Mapped[int] = mapped_column(sa.Integer, init=False) batch: Mapped[str] = mapped_column(String(255)) code: Mapped[str] = mapped_column(String(32)) - status: Mapped[str] = mapped_column(String(16), server_default=sa.text("'unused'"), default="unused") + status: Mapped[InvitationCodeStatus] = mapped_column( + EnumText(InvitationCodeStatus, length=16), + server_default=sa.text("'unused'"), + default=InvitationCodeStatus.UNUSED, + ) used_at: Mapped[datetime | None] = mapped_column(DateTime, default=None) used_by_tenant_id: Mapped[str | None] = mapped_column(StringUUID, default=None) used_by_account_id: Mapped[str | None] = mapped_column(StringUUID, default=None) @@ -367,10 +375,13 @@ class TenantPluginPermission(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) install_permission: Mapped[InstallPermission] = mapped_column( - String(16), nullable=False, server_default="everyone", default=InstallPermission.EVERYONE + EnumText(InstallPermission, length=16), + nullable=False, + server_default="everyone", + default=InstallPermission.EVERYONE, ) debug_permission: Mapped[DebugPermission] = mapped_column( - String(16), nullable=False, server_default="noone", default=DebugPermission.NOBODY + EnumText(DebugPermission, length=16), nullable=False, server_default="noone", default=DebugPermission.NOBODY ) @@ -396,10 +407,13 @@ class TenantPluginAutoUpgradeStrategy(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) strategy_setting: Mapped[StrategySetting] = mapped_column( - String(16), nullable=False, server_default="fix_only", default=StrategySetting.FIX_ONLY + EnumText(StrategySetting, length=16), + nullable=False, + server_default="fix_only", + default=StrategySetting.FIX_ONLY, ) upgrade_mode: Mapped[UpgradeMode] = mapped_column( - String(16), nullable=False, server_default="exclude", default=UpgradeMode.EXCLUDE + EnumText(UpgradeMode, length=16), nullable=False, server_default="exclude", default=UpgradeMode.EXCLUDE ) exclude_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) include_plugins: Mapped[list[str]] = mapped_column(sa.JSON, nullable=False, default_factory=list) diff --git a/api/models/dataset.py b/api/models/dataset.py index 4ef39fcde1..b3fa11a58c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -30,8 +30,9 @@ from services.entities.knowledge_entities.knowledge_entities import ParentMode, from .account import Account from .base import Base, TypeBase from .engine import db +from .enums import CreatorUserRole from .model import App, Tag, TagBinding, UploadFile -from .types import AdjustedJSON, BinaryData, LongText, StringUUID, adjusted_json_index +from .types import AdjustedJSON, BinaryData, EnumText, LongText, StringUUID, adjusted_json_index logger = logging.getLogger(__name__) @@ -59,7 +60,11 @@ class Dataset(Base): name: Mapped[str] = mapped_column(String(255)) description = mapped_column(LongText, nullable=True) provider: Mapped[str] = mapped_column(String(255), server_default=sa.text("'vendor'")) - permission: Mapped[str] = mapped_column(String(255), server_default=sa.text("'only_me'")) + permission: Mapped[DatasetPermissionEnum] = mapped_column( + EnumText(DatasetPermissionEnum, length=255), + server_default=sa.text("'only_me'"), + default=DatasetPermissionEnum.ONLY_ME, + ) data_source_type = mapped_column(String(255)) indexing_technique: Mapped[str | None] = mapped_column(String(255)) index_struct = mapped_column(LongText, nullable=True) @@ -1003,7 +1008,7 @@ class DatasetQuery(TypeBase): content: Mapped[str] = mapped_column(LongText, nullable=False) source: Mapped[str] = mapped_column(String(255), nullable=False) source_app_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=sa.func.current_timestamp(), init=False diff --git a/api/models/enums.py b/api/models/enums.py index ed6236209f..eb478fe02c 100644 --- a/api/models/enums.py +++ b/api/models/enums.py @@ -1,6 +1,10 @@ from enum import StrEnum -from dify_graph.enums import NodeType +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) class CreatorUserRole(StrEnum): @@ -66,9 +70,29 @@ class AppTriggerStatus(StrEnum): class AppTriggerType(StrEnum): """App Trigger Type Enum""" - TRIGGER_WEBHOOK = NodeType.TRIGGER_WEBHOOK.value - TRIGGER_SCHEDULE = NodeType.TRIGGER_SCHEDULE.value - TRIGGER_PLUGIN = NodeType.TRIGGER_PLUGIN.value + TRIGGER_WEBHOOK = TRIGGER_WEBHOOK_NODE_TYPE + TRIGGER_SCHEDULE = TRIGGER_SCHEDULE_NODE_TYPE + TRIGGER_PLUGIN = TRIGGER_PLUGIN_NODE_TYPE # for backward compatibility UNKNOWN = "unknown" + + +class AppStatus(StrEnum): + """App Status Enum""" + + NORMAL = "normal" + + +class AppMCPServerStatus(StrEnum): + """AppMCPServer Status Enum""" + + NORMAL = "normal" + ACTIVE = "active" + INACTIVE = "inactive" + + +class ConversationStatus(StrEnum): + """Conversation Status Enum""" + + NORMAL = "normal" diff --git a/api/models/model.py b/api/models/model.py index 30a09d7e73..93dd862591 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -7,7 +7,7 @@ from collections.abc import Mapping, Sequence from datetime import datetime from decimal import Decimal from enum import StrEnum, auto -from typing import TYPE_CHECKING, Any, Literal, cast +from typing import TYPE_CHECKING, Any, Literal, NotRequired, cast from uuid import uuid4 import sqlalchemy as sa @@ -15,6 +15,7 @@ from flask import request from flask_login import UserMixin # type: ignore[import-untyped] from sqlalchemy import BigInteger, Float, Index, PrimaryKeyConstraint, String, exists, func, select, text from sqlalchemy.orm import Mapped, Session, mapped_column +from typing_extensions import TypedDict from configs import dify_config from constants import DEFAULT_FILE_NUMBER_LIMITS @@ -28,9 +29,9 @@ from libs.uuid_utils import uuidv7 from .account import Account, Tenant from .base import Base, TypeBase, gen_uuidv4_string from .engine import db -from .enums import CreatorUserRole +from .enums import AppMCPServerStatus, AppStatus, ConversationStatus, CreatorUserRole, MessageStatus from .provider_ids import GenericProviderID -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID if TYPE_CHECKING: from core.app.entities.llm_generation_entities import LLMGenerationDetailData @@ -38,6 +39,259 @@ if TYPE_CHECKING: from .workflow import Workflow +# --- TypedDict definitions for structured dict return types --- + + +class EnabledConfig(TypedDict): + enabled: bool + + +class EmbeddingModelInfo(TypedDict): + embedding_provider_name: str + embedding_model_name: str + + +class AnnotationReplyDisabledConfig(TypedDict): + enabled: Literal[False] + + +class AnnotationReplyEnabledConfig(TypedDict): + id: str + enabled: Literal[True] + score_threshold: float + embedding_model: EmbeddingModelInfo + + +AnnotationReplyConfig = AnnotationReplyEnabledConfig | AnnotationReplyDisabledConfig + + +class SensitiveWordAvoidanceConfig(TypedDict): + enabled: bool + type: str + config: dict[str, Any] + + +class AgentToolConfig(TypedDict): + provider_type: str + provider_id: str + tool_name: str + tool_parameters: dict[str, Any] + plugin_unique_identifier: NotRequired[str | None] + credential_id: NotRequired[str | None] + + +class AgentModeConfig(TypedDict): + enabled: bool + strategy: str | None + tools: list[AgentToolConfig | dict[str, Any]] + prompt: str | None + + +class ImageUploadConfig(TypedDict): + enabled: bool + number_limits: int + detail: str + transfer_methods: list[str] + + +class FileUploadConfig(TypedDict): + image: ImageUploadConfig + + +class DeletedToolInfo(TypedDict): + type: str + tool_name: str + provider_id: str + + +class ExternalDataToolConfig(TypedDict): + enabled: bool + variable: str + type: str + config: dict[str, Any] + + +class UserInputFormItemConfig(TypedDict): + variable: str + label: str + description: NotRequired[str] + required: NotRequired[bool] + max_length: NotRequired[int] + options: NotRequired[list[str]] + default: NotRequired[str] + type: NotRequired[str] + config: NotRequired[dict[str, Any]] + + +# Each item is a single-key dict, e.g. {"text-input": UserInputFormItemConfig} +UserInputFormItem = dict[str, UserInputFormItemConfig] + + +class DatasetConfigs(TypedDict): + retrieval_model: str + datasets: NotRequired[dict[str, Any]] + top_k: NotRequired[int] + score_threshold: NotRequired[float] + score_threshold_enabled: NotRequired[bool] + reranking_model: NotRequired[dict[str, Any] | None] + weights: NotRequired[dict[str, Any] | None] + reranking_enabled: NotRequired[bool] + reranking_mode: NotRequired[str] + metadata_filtering_mode: NotRequired[str] + metadata_model_config: NotRequired[dict[str, Any] | None] + metadata_filtering_conditions: NotRequired[dict[str, Any] | None] + + +class ChatPromptMessage(TypedDict): + text: str + role: str + + +class ChatPromptConfig(TypedDict, total=False): + prompt: list[ChatPromptMessage] + + +class CompletionPromptText(TypedDict): + text: str + + +class ConversationHistoriesRole(TypedDict): + user_prefix: str + assistant_prefix: str + + +class CompletionPromptConfig(TypedDict): + prompt: CompletionPromptText + conversation_histories_role: NotRequired[ConversationHistoriesRole] + + +class ModelConfig(TypedDict): + provider: str + name: str + mode: str + completion_params: NotRequired[dict[str, Any]] + + +class AppModelConfigDict(TypedDict): + opening_statement: str | None + suggested_questions: list[str] + suggested_questions_after_answer: EnabledConfig + speech_to_text: EnabledConfig + text_to_speech: EnabledConfig + retriever_resource: EnabledConfig + annotation_reply: AnnotationReplyConfig + more_like_this: EnabledConfig + sensitive_word_avoidance: SensitiveWordAvoidanceConfig + external_data_tools: list[ExternalDataToolConfig] + model: ModelConfig + user_input_form: list[UserInputFormItem] + dataset_query_variable: str | None + pre_prompt: str | None + agent_mode: AgentModeConfig + prompt_type: str + chat_prompt_config: ChatPromptConfig + completion_prompt_config: CompletionPromptConfig + dataset_configs: DatasetConfigs + file_upload: FileUploadConfig + # Added dynamically in Conversation.model_config + model_id: NotRequired[str | None] + provider: NotRequired[str | None] + + +class ConversationDict(TypedDict): + id: str + app_id: str + app_model_config_id: str | None + model_provider: str | None + override_model_configs: str | None + model_id: str | None + mode: str + name: str + summary: str | None + inputs: dict[str, Any] + introduction: str | None + system_instruction: str | None + system_instruction_tokens: int + status: str + invoke_from: str | None + from_source: str + from_end_user_id: str | None + from_account_id: str | None + read_at: datetime | None + read_account_id: str | None + dialogue_count: int + created_at: datetime + updated_at: datetime + + +class MessageDict(TypedDict): + id: str + app_id: str + conversation_id: str + model_id: str | None + inputs: dict[str, Any] + query: str + total_price: Decimal | None + message: dict[str, Any] + answer: str + status: str + error: str | None + message_metadata: dict[str, Any] + from_source: str + from_end_user_id: str | None + from_account_id: str | None + created_at: str + updated_at: str + agent_based: bool + workflow_run_id: str | None + + +class MessageFeedbackDict(TypedDict): + id: str + app_id: str + conversation_id: str + message_id: str + rating: str + content: str | None + from_source: str + from_end_user_id: str | None + from_account_id: str | None + created_at: str + updated_at: str + + +class MessageFileInfo(TypedDict, total=False): + belongs_to: str | None + upload_file_id: str | None + id: str + tenant_id: str + type: str + transfer_method: str + remote_url: str | None + related_id: str | None + filename: str | None + extension: str | None + mime_type: str | None + size: int + dify_model_identity: str + url: str | None + + +class ExtraContentDict(TypedDict, total=False): + type: str + workflow_run_id: str + + +class TraceAppConfigDict(TypedDict): + id: str + app_id: str + tracing_provider: str | None + tracing_config: dict[str, Any] + is_active: bool + created_at: str | None + updated_at: str | None + + class DifySetup(TypeBase): __tablename__ = "dify_setups" __table_args__ = (sa.PrimaryKeyConstraint("version", name="dify_setup_pkey"),) @@ -85,13 +339,15 @@ class App(Base): tenant_id: Mapped[str] = mapped_column(StringUUID) name: Mapped[str] = mapped_column(String(255)) description: Mapped[str] = mapped_column(LongText, default=sa.text("''")) - mode: Mapped[str] = mapped_column(String(255)) - icon_type: Mapped[str | None] = mapped_column(String(255)) # image, emoji, link + mode: Mapped[AppMode] = mapped_column(EnumText(AppMode, length=255)) + icon_type: Mapped[IconType | None] = mapped_column(EnumText(IconType, length=255)) icon = mapped_column(String(255)) icon_background: Mapped[str | None] = mapped_column(String(255)) app_model_config_id = mapped_column(StringUUID, nullable=True) workflow_id = mapped_column(StringUUID, nullable=True) - status: Mapped[str] = mapped_column(String(255), server_default=sa.text("'normal'")) + status: Mapped[AppStatus] = mapped_column( + EnumText(AppStatus, length=255), server_default=sa.text("'normal'"), default=AppStatus.NORMAL + ) enable_site: Mapped[bool] = mapped_column(sa.Boolean) enable_api: Mapped[bool] = mapped_column(sa.Boolean) api_rpm: Mapped[int] = mapped_column(sa.Integer, server_default=sa.text("0")) @@ -178,7 +434,7 @@ class App(Base): return str(self.mode) @property - def deleted_tools(self) -> list[dict[str, str]]: + def deleted_tools(self) -> list[DeletedToolInfo]: from core.tools.tool_manager import ToolManager, ToolProviderType from services.plugin.plugin_service import PluginService @@ -259,7 +515,7 @@ class App(Base): provider_id.provider_name: existence[i] for i, provider_id in enumerate(builtin_provider_ids) } - deleted_tools: list[dict[str, str]] = [] + deleted_tools: list[DeletedToolInfo] = [] for tool in tools: keys = list(tool.keys()) @@ -366,35 +622,38 @@ class AppModelConfig(TypeBase): return app @property - def model_dict(self) -> dict[str, Any]: - return json.loads(self.model) if self.model else {} + def model_dict(self) -> ModelConfig: + return cast(ModelConfig, json.loads(self.model) if self.model else {}) @property def suggested_questions_list(self) -> list[str]: return json.loads(self.suggested_questions) if self.suggested_questions else [] @property - def suggested_questions_after_answer_dict(self) -> dict[str, Any]: - return ( + def suggested_questions_after_answer_dict(self) -> EnabledConfig: + return cast( + EnabledConfig, json.loads(self.suggested_questions_after_answer) if self.suggested_questions_after_answer - else {"enabled": False} + else {"enabled": False}, ) @property - def speech_to_text_dict(self) -> dict[str, Any]: - return json.loads(self.speech_to_text) if self.speech_to_text else {"enabled": False} + def speech_to_text_dict(self) -> EnabledConfig: + return cast(EnabledConfig, json.loads(self.speech_to_text) if self.speech_to_text else {"enabled": False}) @property - def text_to_speech_dict(self) -> dict[str, Any]: - return json.loads(self.text_to_speech) if self.text_to_speech else {"enabled": False} + def text_to_speech_dict(self) -> EnabledConfig: + return cast(EnabledConfig, json.loads(self.text_to_speech) if self.text_to_speech else {"enabled": False}) @property - def retriever_resource_dict(self) -> dict[str, Any]: - return json.loads(self.retriever_resource) if self.retriever_resource else {"enabled": True} + def retriever_resource_dict(self) -> EnabledConfig: + return cast( + EnabledConfig, json.loads(self.retriever_resource) if self.retriever_resource else {"enabled": True} + ) @property - def annotation_reply_dict(self) -> dict[str, Any]: + def annotation_reply_dict(self) -> AnnotationReplyConfig: annotation_setting = ( db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == self.app_id).first() ) @@ -417,56 +676,62 @@ class AppModelConfig(TypeBase): return {"enabled": False} @property - def more_like_this_dict(self) -> dict[str, Any]: - return json.loads(self.more_like_this) if self.more_like_this else {"enabled": False} + def more_like_this_dict(self) -> EnabledConfig: + return cast(EnabledConfig, json.loads(self.more_like_this) if self.more_like_this else {"enabled": False}) @property - def sensitive_word_avoidance_dict(self) -> dict[str, Any]: - return ( + def sensitive_word_avoidance_dict(self) -> SensitiveWordAvoidanceConfig: + return cast( + SensitiveWordAvoidanceConfig, json.loads(self.sensitive_word_avoidance) if self.sensitive_word_avoidance - else {"enabled": False, "type": "", "configs": []} + else {"enabled": False, "type": "", "config": {}}, ) @property - def external_data_tools_list(self) -> list[dict[str, Any]]: + def external_data_tools_list(self) -> list[ExternalDataToolConfig]: return json.loads(self.external_data_tools) if self.external_data_tools else [] @property - def user_input_form_list(self) -> list[dict[str, Any]]: + def user_input_form_list(self) -> list[UserInputFormItem]: return json.loads(self.user_input_form) if self.user_input_form else [] @property - def agent_mode_dict(self) -> dict[str, Any]: - return ( + def agent_mode_dict(self) -> AgentModeConfig: + return cast( + AgentModeConfig, json.loads(self.agent_mode) if self.agent_mode - else {"enabled": False, "strategy": None, "tools": [], "prompt": None} + else {"enabled": False, "strategy": None, "tools": [], "prompt": None}, ) @property - def chat_prompt_config_dict(self) -> dict[str, Any]: - return json.loads(self.chat_prompt_config) if self.chat_prompt_config else {} + def chat_prompt_config_dict(self) -> ChatPromptConfig: + return cast(ChatPromptConfig, json.loads(self.chat_prompt_config) if self.chat_prompt_config else {}) @property - def completion_prompt_config_dict(self) -> dict[str, Any]: - return json.loads(self.completion_prompt_config) if self.completion_prompt_config else {} + def completion_prompt_config_dict(self) -> CompletionPromptConfig: + return cast( + CompletionPromptConfig, + json.loads(self.completion_prompt_config) if self.completion_prompt_config else {}, + ) @property - def dataset_configs_dict(self) -> dict[str, Any]: + def dataset_configs_dict(self) -> DatasetConfigs: if self.dataset_configs: - dataset_configs: dict[str, Any] = json.loads(self.dataset_configs) + dataset_configs = json.loads(self.dataset_configs) if "retrieval_model" not in dataset_configs: return {"retrieval_model": "single"} else: - return dataset_configs + return cast(DatasetConfigs, dataset_configs) return { "retrieval_model": "multiple", } @property - def file_upload_dict(self) -> dict[str, Any]: - return ( + def file_upload_dict(self) -> FileUploadConfig: + return cast( + FileUploadConfig, json.loads(self.file_upload) if self.file_upload else { @@ -476,10 +741,10 @@ class AppModelConfig(TypeBase): "detail": "high", "transfer_methods": ["remote_url", "local_file"], } - } + }, ) - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> AppModelConfigDict: return { "opening_statement": self.opening_statement, "suggested_questions": self.suggested_questions_list, @@ -503,36 +768,42 @@ class AppModelConfig(TypeBase): "file_upload": self.file_upload_dict, } - def from_model_config_dict(self, model_config: Mapping[str, Any]): + def from_model_config_dict(self, model_config: AppModelConfigDict): self.opening_statement = model_config.get("opening_statement") self.suggested_questions = ( - json.dumps(model_config["suggested_questions"]) if model_config.get("suggested_questions") else None + json.dumps(model_config.get("suggested_questions")) if model_config.get("suggested_questions") else None ) self.suggested_questions_after_answer = ( - json.dumps(model_config["suggested_questions_after_answer"]) + json.dumps(model_config.get("suggested_questions_after_answer")) if model_config.get("suggested_questions_after_answer") else None ) - self.speech_to_text = json.dumps(model_config["speech_to_text"]) if model_config.get("speech_to_text") else None - self.text_to_speech = json.dumps(model_config["text_to_speech"]) if model_config.get("text_to_speech") else None - self.more_like_this = json.dumps(model_config["more_like_this"]) if model_config.get("more_like_this") else None + self.speech_to_text = ( + json.dumps(model_config.get("speech_to_text")) if model_config.get("speech_to_text") else None + ) + self.text_to_speech = ( + json.dumps(model_config.get("text_to_speech")) if model_config.get("text_to_speech") else None + ) + self.more_like_this = ( + json.dumps(model_config.get("more_like_this")) if model_config.get("more_like_this") else None + ) self.sensitive_word_avoidance = ( - json.dumps(model_config["sensitive_word_avoidance"]) + json.dumps(model_config.get("sensitive_word_avoidance")) if model_config.get("sensitive_word_avoidance") else None ) self.external_data_tools = ( - json.dumps(model_config["external_data_tools"]) if model_config.get("external_data_tools") else None + json.dumps(model_config.get("external_data_tools")) if model_config.get("external_data_tools") else None ) - self.model = json.dumps(model_config["model"]) if model_config.get("model") else None + self.model = json.dumps(model_config.get("model")) if model_config.get("model") else None self.user_input_form = ( - json.dumps(model_config["user_input_form"]) if model_config.get("user_input_form") else None + json.dumps(model_config.get("user_input_form")) if model_config.get("user_input_form") else None ) self.dataset_query_variable = model_config.get("dataset_query_variable") - self.pre_prompt = model_config["pre_prompt"] - self.agent_mode = json.dumps(model_config["agent_mode"]) if model_config.get("agent_mode") else None + self.pre_prompt = model_config.get("pre_prompt") + self.agent_mode = json.dumps(model_config.get("agent_mode")) if model_config.get("agent_mode") else None self.retriever_resource = ( - json.dumps(model_config["retriever_resource"]) if model_config.get("retriever_resource") else None + json.dumps(model_config.get("retriever_resource")) if model_config.get("retriever_resource") else None ) self.prompt_type = model_config.get("prompt_type", "simple") self.chat_prompt_config = ( @@ -733,14 +1004,16 @@ class Conversation(Base): model_provider = mapped_column(String(255), nullable=True) override_model_configs = mapped_column(LongText) model_id = mapped_column(String(255), nullable=True) - mode: Mapped[str] = mapped_column(String(255)) + mode: Mapped[AppMode] = mapped_column(EnumText(AppMode, length=255)) name: Mapped[str] = mapped_column(String(255), nullable=False) summary = mapped_column(LongText) _inputs: Mapped[dict[str, Any]] = mapped_column("inputs", sa.JSON) introduction = mapped_column(LongText) system_instruction = mapped_column(LongText) system_instruction_tokens: Mapped[int] = mapped_column(sa.Integer, nullable=False, server_default=sa.text("0")) - status: Mapped[str] = mapped_column(String(255), nullable=False) + status: Mapped[ConversationStatus] = mapped_column( + EnumText(ConversationStatus, length=255), nullable=False, default=ConversationStatus.NORMAL + ) # The `invoke_from` records how the conversation is created. # @@ -825,24 +1098,26 @@ class Conversation(Base): self._inputs = inputs @property - def model_config(self): - model_config = {} + def model_config(self) -> AppModelConfigDict: + model_config = cast(AppModelConfigDict, {}) app_model_config: AppModelConfig | None = None if self.mode == AppMode.ADVANCED_CHAT: if self.override_model_configs: override_model_configs = json.loads(self.override_model_configs) - model_config = override_model_configs + model_config = cast(AppModelConfigDict, override_model_configs) else: if self.override_model_configs: override_model_configs = json.loads(self.override_model_configs) if "model" in override_model_configs: # where is app_id? - app_model_config = AppModelConfig(app_id=self.app_id).from_model_config_dict(override_model_configs) + app_model_config = AppModelConfig(app_id=self.app_id).from_model_config_dict( + cast(AppModelConfigDict, override_model_configs) + ) model_config = app_model_config.to_dict() else: - model_config["configs"] = override_model_configs + model_config["configs"] = override_model_configs # type: ignore[typeddict-unknown-key] else: app_model_config = ( db.session.query(AppModelConfig).where(AppModelConfig.id == self.app_model_config_id).first() @@ -1017,7 +1292,7 @@ class Conversation(Base): def in_debug_mode(self) -> bool: return self.override_model_configs is not None - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> ConversationDict: return { "id": self.id, "app_id": self.app_id, @@ -1082,7 +1357,12 @@ class Message(Base): provider_response_latency: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) total_price: Mapped[Decimal | None] = mapped_column(sa.Numeric(10, 7)) currency: Mapped[str] = mapped_column(String(255), nullable=False) - status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[MessageStatus] = mapped_column( + EnumText(MessageStatus, length=255), + nullable=False, + server_default=sa.text("'normal'"), + default=MessageStatus.NORMAL, + ) error: Mapped[str | None] = mapped_column(LongText) message_metadata: Mapped[str | None] = mapped_column(LongText) invoke_from: Mapped[str | None] = mapped_column(String(255), nullable=True) @@ -1095,7 +1375,7 @@ class Message(Base): ) agent_based: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) workflow_run_id: Mapped[str | None] = mapped_column(StringUUID) - app_mode: Mapped[str | None] = mapped_column(String(255), nullable=True) + app_mode: Mapped[AppMode | None] = mapped_column(EnumText(AppMode, length=255), nullable=True) @property def inputs(self) -> dict[str, Any]: @@ -1309,7 +1589,7 @@ class Message(Base): return self.message_metadata_dict.get("retriever_resources") if self.message_metadata else [] @property - def message_files(self) -> list[dict[str, Any]]: + def message_files(self) -> list[MessageFileInfo]: from factories import file_factory message_files = db.session.scalars(select(MessageFile).where(MessageFile.message_id == self.id)).all() @@ -1364,10 +1644,13 @@ class Message(Base): ) files.append(file) - result: list[dict[str, Any]] = [ - {"belongs_to": message_file.belongs_to, "upload_file_id": message_file.upload_file_id, **file.to_dict()} - for (file, message_file) in zip(files, message_files) - ] + result = cast( + list[MessageFileInfo], + [ + {"belongs_to": message_file.belongs_to, "upload_file_id": message_file.upload_file_id, **file.to_dict()} + for (file, message_file) in zip(files, message_files) + ], + ) db.session.commit() return result @@ -1377,7 +1660,7 @@ class Message(Base): self._extra_contents = list(contents) @property - def extra_contents(self) -> list[dict[str, Any]]: + def extra_contents(self) -> list[ExtraContentDict]: return getattr(self, "_extra_contents", []) @property @@ -1393,7 +1676,7 @@ class Message(Base): return None - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> MessageDict: return { "id": self.id, "app_id": self.app_id, @@ -1417,7 +1700,7 @@ class Message(Base): } @classmethod - def from_dict(cls, data: dict[str, Any]) -> Message: + def from_dict(cls, data: MessageDict) -> Message: return cls( id=data["id"], app_id=data["app_id"], @@ -1477,7 +1760,7 @@ class MessageFeedback(TypeBase): account = db.session.query(Account).where(Account.id == self.from_account_id).first() return account - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> MessageFeedbackDict: return { "id": str(self.id), "app_id": str(self.app_id), @@ -1506,8 +1789,10 @@ class MessageFile(TypeBase): ) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) type: Mapped[str] = mapped_column(String(255), nullable=False) - transfer_method: Mapped[FileTransferMethod] = mapped_column(String(255), nullable=False) - created_by_role: Mapped[CreatorUserRole] = mapped_column(String(255), nullable=False) + transfer_method: Mapped[FileTransferMethod] = mapped_column( + EnumText(FileTransferMethod, length=255), nullable=False + ) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) belongs_to: Mapped[Literal["user", "assistant"] | None] = mapped_column(String(255), nullable=True, default=None) url: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) @@ -1716,7 +2001,9 @@ class AppMCPServer(TypeBase): name: Mapped[str] = mapped_column(String(255), nullable=False) description: Mapped[str] = mapped_column(String(255), nullable=False) server_code: Mapped[str] = mapped_column(String(255), nullable=False) - status: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[AppMCPServerStatus] = mapped_column( + EnumText(AppMCPServerStatus, length=255), nullable=False, server_default=sa.text("'normal'") + ) parameters: Mapped[str] = mapped_column(LongText, nullable=False) created_at: Mapped[datetime] = mapped_column( @@ -1740,8 +2027,8 @@ class AppMCPServer(TypeBase): return result @property - def parameters_dict(self) -> dict[str, Any]: - return cast(dict[str, Any], json.loads(self.parameters)) + def parameters_dict(self) -> dict[str, str]: + return cast(dict[str, str], json.loads(self.parameters)) class Site(Base): @@ -1755,7 +2042,7 @@ class Site(Base): id = mapped_column(StringUUID, default=lambda: str(uuid4())) app_id = mapped_column(StringUUID, nullable=False) title: Mapped[str] = mapped_column(String(255), nullable=False) - icon_type = mapped_column(String(255), nullable=True) + icon_type: Mapped[IconType | None] = mapped_column(EnumText(IconType, length=255), nullable=True) icon = mapped_column(String(255)) icon_background = mapped_column(String(255)) description = mapped_column(LongText) @@ -1770,7 +2057,9 @@ class Site(Base): customize_domain = mapped_column(String(255)) customize_token_strategy: Mapped[str] = mapped_column(String(255), nullable=False) prompt_public: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("false")) - status = mapped_column(String(255), nullable=False, server_default=sa.text("'normal'")) + status: Mapped[AppStatus] = mapped_column( + EnumText(AppStatus, length=255), nullable=False, server_default=sa.text("'normal'"), default=AppStatus.NORMAL + ) created_by = mapped_column(StringUUID, nullable=True) created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp()) updated_by = mapped_column(StringUUID, nullable=True) @@ -1850,7 +2139,12 @@ class UploadFile(Base): # The `created_by_role` field indicates whether the file was created by an `Account` or an `EndUser`. # Its value is derived from the `CreatorUserRole` enumeration. - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'account'")) + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), + nullable=False, + server_default=sa.text("'account'"), + default=CreatorUserRole.ACCOUNT, + ) # The `created_by` field stores the ID of the entity that created this upload file. # @@ -1903,7 +2197,7 @@ class UploadFile(Base): self.size = size self.extension = extension self.mime_type = mime_type - self.created_by_role = created_by_role.value + self.created_by_role = created_by_role self.created_by = created_by self.created_at = created_at self.used = used @@ -1966,7 +2260,7 @@ class MessageAgentThought(TypeBase): ) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) position: Mapped[int] = mapped_column(sa.Integer, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) message_chain_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True, default=None) thought: Mapped[str | None] = mapped_column(LongText, nullable=True, default=None) @@ -2181,7 +2475,7 @@ class TraceAppConfig(TypeBase): def tracing_config_str(self) -> str: return json.dumps(self.tracing_config_dict) - def to_dict(self) -> dict[str, Any]: + def to_dict(self) -> TraceAppConfigDict: return { "id": self.id, "app_id": self.app_id, diff --git a/api/models/provider.py b/api/models/provider.py index 6175a3ae88..18a0fe92c8 100644 --- a/api/models/provider.py +++ b/api/models/provider.py @@ -13,7 +13,7 @@ from libs.uuid_utils import uuidv7 from .base import TypeBase from .engine import db -from .types import LongText, StringUUID +from .types import EnumText, LongText, StringUUID class ProviderType(StrEnum): @@ -69,8 +69,8 @@ class Provider(TypeBase): ) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) provider_name: Mapped[str] = mapped_column(String(255), nullable=False) - provider_type: Mapped[str] = mapped_column( - String(40), nullable=False, server_default=text("'custom'"), default="custom" + provider_type: Mapped[ProviderType] = mapped_column( + EnumText(ProviderType, length=40), nullable=False, server_default=text("'custom'"), default=ProviderType.CUSTOM ) is_valid: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=text("false"), default=False) last_used: Mapped[datetime | None] = mapped_column(DateTime, nullable=True, init=False) diff --git a/api/models/trigger.py b/api/models/trigger.py index 209345eb84..43d7fc5b24 100644 --- a/api/models/trigger.py +++ b/api/models/trigger.py @@ -227,7 +227,7 @@ class WorkflowTriggerLog(TypeBase): queue_name: Mapped[str] = mapped_column(String(100), nullable=False) celery_task_id: Mapped[str | None] = mapped_column(String(255), nullable=True) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(String(255), nullable=False) retry_count: Mapped[int] = mapped_column(sa.Integer, nullable=False, default=0) elapsed_time: Mapped[float | None] = mapped_column(sa.Float, nullable=True, default=None) diff --git a/api/models/web.py b/api/models/web.py index 5f6a7b40bf..a1cc11c375 100644 --- a/api/models/web.py +++ b/api/models/web.py @@ -2,13 +2,14 @@ from datetime import datetime from uuid import uuid4 import sqlalchemy as sa -from sqlalchemy import DateTime, String, func +from sqlalchemy import DateTime, func from sqlalchemy.orm import Mapped, mapped_column from .base import TypeBase from .engine import db +from .enums import CreatorUserRole from .model import Message -from .types import StringUUID +from .types import EnumText, StringUUID class SavedMessage(TypeBase): @@ -24,7 +25,9 @@ class SavedMessage(TypeBase): ) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) message_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False, server_default=sa.text("'end_user'")) + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), nullable=False, server_default=sa.text("'end_user'") + ) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, @@ -50,8 +53,8 @@ class PinnedConversation(TypeBase): ) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) conversation_id: Mapped[str] = mapped_column(StringUUID) - created_by_role: Mapped[str] = mapped_column( - String(255), + created_by_role: Mapped[CreatorUserRole] = mapped_column( + EnumText(CreatorUserRole, length=255), nullable=False, server_default=sa.text("'end_user'"), ) diff --git a/api/models/workflow.py b/api/models/workflow.py index b445f0eee2..6d94858df2 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -22,13 +22,14 @@ from sqlalchemy import ( from sqlalchemy.orm import Mapped, declared_attr, mapped_column from typing_extensions import deprecated +from core.trigger.constants import TRIGGER_INFO_METADATA_KEY, TRIGGER_PLUGIN_NODE_TYPE from dify_graph.constants import ( CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID, ) from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.pause_reason import HumanInputRequired, PauseReason, PauseReasonType, SchedulingPause -from dify_graph.enums import NodeType, WorkflowExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowExecutionStatus from dify_graph.file.constants import maybe_file_object from dify_graph.file.models import File from dify_graph.variables import utils as variable_utils @@ -53,7 +54,7 @@ from libs import helper from .account import Account from .base import Base, DefaultFieldsMixin, TypeBase from .engine import db -from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType +from .enums import CreatorUserRole, DraftVariableType, ExecutionOffLoadType, WorkflowRunTriggeredFrom from .types import EnumText, LongText, StringUUID logger = logging.getLogger(__name__) @@ -172,7 +173,7 @@ class Workflow(Base): # bug id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4())) tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - type: Mapped[str] = mapped_column(String(255), nullable=False) + type: Mapped[WorkflowType] = mapped_column(EnumText(WorkflowType, length=255), nullable=False) version: Mapped[str] = mapped_column(String(255), nullable=False) marked_name: Mapped[str] = mapped_column(String(255), default="", server_default="") marked_comment: Mapped[str] = mapped_column(String(255), default="", server_default="") @@ -219,7 +220,7 @@ class Workflow(Base): # bug workflow.id = str(uuid4()) workflow.tenant_id = tenant_id workflow.app_id = app_id - workflow.type = type + workflow.type = WorkflowType(type) workflow.version = version workflow.graph = graph workflow.features = features @@ -264,8 +265,11 @@ class Workflow(Base): # bug def get_node_config_by_id(self, node_id: str) -> NodeConfigDict: """Extract a node configuration from the workflow graph by node ID. - A node configuration is a dictionary containing the node's properties, including - the node's id, title, and its data as a dict. + + A node configuration includes the node id and a typed `BaseNodeData` for `data`. + `BaseNodeData` keeps a dict-like `get`/`__getitem__` compatibility layer backed by + model fields plus Pydantic extra storage for legacy consumers, but callers should + prefer attribute access. """ workflow_graph = self.graph_dict @@ -283,12 +287,9 @@ class Workflow(Base): # bug return NodeConfigDictAdapter.validate_python(node_config) @staticmethod - def get_node_type_from_node_config(node_config: Mapping[str, Any]) -> NodeType: + def get_node_type_from_node_config(node_config: NodeConfigDict) -> NodeType: """Extract type of a node from the node configuration returned by `get_node_config_by_id`.""" - node_config_data = node_config.get("data", {}) - # Get node class - node_type = NodeType(node_config_data.get("type")) - return node_type + return node_config["data"].type @staticmethod def get_enclosing_node_type_and_id( @@ -300,12 +301,12 @@ class Workflow(Base): # bug loop_id = node_config.get("loop_id") if loop_id is None: raise _InvalidGraphDefinitionError("invalid graph") - return NodeType.LOOP, loop_id + return BuiltinNodeTypes.LOOP, loop_id elif in_iteration: iteration_id = node_config.get("iteration_id") if iteration_id is None: raise _InvalidGraphDefinitionError("invalid graph") - return NodeType.ITERATION, iteration_id + return BuiltinNodeTypes.ITERATION, iteration_id else: return None @@ -387,9 +388,7 @@ class Workflow(Base): # bug if specific_node_type: yield from ( - (node["id"], node["data"]) - for node in graph_dict["nodes"] - if node["data"]["type"] == specific_node_type.value + (node["id"], node["data"]) for node in graph_dict["nodes"] if node["data"]["type"] == specific_node_type ) else: yield from ((node["id"], node["data"]) for node in graph_dict["nodes"]) @@ -642,8 +641,8 @@ class WorkflowRun(Base): app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID) - type: Mapped[str] = mapped_column(String(255)) - triggered_from: Mapped[str] = mapped_column(String(255)) + type: Mapped[WorkflowType] = mapped_column(EnumText(WorkflowType, length=255)) + triggered_from: Mapped[WorkflowRunTriggeredFrom] = mapped_column(EnumText(WorkflowRunTriggeredFrom, length=255)) version: Mapped[str] = mapped_column(String(255)) graph: Mapped[str | None] = mapped_column(LongText) inputs: Mapped[str | None] = mapped_column(LongText) @@ -869,7 +868,9 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) workflow_id: Mapped[str] = mapped_column(StringUUID) - triggered_from: Mapped[str] = mapped_column(String(255)) + triggered_from: Mapped[WorkflowNodeExecutionTriggeredFrom] = mapped_column( + EnumText(WorkflowNodeExecutionTriggeredFrom, length=255) + ) workflow_run_id: Mapped[str | None] = mapped_column(StringUUID) index: Mapped[int] = mapped_column(sa.Integer) predecessor_node_id: Mapped[str | None] = mapped_column(String(255)) @@ -885,7 +886,7 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo elapsed_time: Mapped[float] = mapped_column(sa.Float, server_default=sa.text("0")) execution_metadata: Mapped[str | None] = mapped_column(LongText) created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.current_timestamp()) - created_by_role: Mapped[str] = mapped_column(String(255)) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255)) created_by: Mapped[str] = mapped_column(StringUUID) finished_at: Mapped[datetime | None] = mapped_column(DateTime) @@ -960,18 +961,18 @@ class WorkflowNodeExecutionModel(Base): # This model is expected to have `offlo extras: dict[str, Any] = {} execution_metadata = self.execution_metadata_dict if execution_metadata: - if self.node_type == NodeType.TOOL and "tool_info" in execution_metadata: + if self.node_type == BuiltinNodeTypes.TOOL and "tool_info" in execution_metadata: tool_info: dict[str, Any] = execution_metadata["tool_info"] extras["icon"] = ToolManager.get_tool_icon( tenant_id=self.tenant_id, provider_type=tool_info["provider_type"], provider_id=tool_info["provider_id"], ) - elif self.node_type == NodeType.DATASOURCE and "datasource_info" in execution_metadata: + elif self.node_type == BuiltinNodeTypes.DATASOURCE and "datasource_info" in execution_metadata: datasource_info = execution_metadata["datasource_info"] extras["icon"] = datasource_info.get("icon") - elif self.node_type == NodeType.TRIGGER_PLUGIN and "trigger_info" in execution_metadata: - trigger_info = execution_metadata["trigger_info"] or {} + elif self.node_type == TRIGGER_PLUGIN_NODE_TYPE and TRIGGER_INFO_METADATA_KEY in execution_metadata: + trigger_info = execution_metadata[TRIGGER_INFO_METADATA_KEY] or {} provider_id = trigger_info.get("provider_id") if provider_id: extras["icon"] = TriggerManager.get_trigger_plugin_icon( @@ -1169,7 +1170,7 @@ class WorkflowAppLog(TypeBase): workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID) created_from: Mapped[str] = mapped_column(String(255), nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) created_at: Mapped[datetime] = mapped_column( DateTime, nullable=False, server_default=func.current_timestamp(), init=False @@ -1243,7 +1244,7 @@ class WorkflowArchiveLog(TypeBase): app_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID, nullable=False) - created_by_role: Mapped[str] = mapped_column(String(255), nullable=False) + created_by_role: Mapped[CreatorUserRole] = mapped_column(EnumText(CreatorUserRole, length=255), nullable=False) created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) log_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True) @@ -1252,7 +1253,9 @@ class WorkflowArchiveLog(TypeBase): run_version: Mapped[str] = mapped_column(String(255), nullable=False) run_status: Mapped[str] = mapped_column(String(255), nullable=False) - run_triggered_from: Mapped[str] = mapped_column(String(255), nullable=False) + run_triggered_from: Mapped[WorkflowRunTriggeredFrom] = mapped_column( + EnumText(WorkflowRunTriggeredFrom, length=255), nullable=False + ) run_error: Mapped[str | None] = mapped_column(LongText, nullable=True) run_elapsed_time: Mapped[float] = mapped_column(sa.Float, nullable=False, server_default=sa.text("0")) run_total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0")) diff --git a/api/pyproject.toml b/api/pyproject.toml index d90a9b7db9..64df4d1e77 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,48 +1,46 @@ [project] name = "dify-api" -version = "1.14.0-rc1" +version = "1.13.0" requires-python = ">=3.11,<3.13" dependencies = [ "aliyun-log-python-sdk~=0.9.37", - "arize-phoenix-otel~=0.9.2", - "azure-identity==1.16.1", + "arize-phoenix-otel~=0.15.0", + "azure-identity==1.25.2", "beautifulsoup4==4.12.2", - "boto3==1.35.99", + "boto3==1.42.65", "bs4~=0.0.1", "cachetools~=5.3.0", "celery~=5.5.2", "charset-normalizer>=3.4.4", - "daytona==0.128.1", "flask~=3.1.2", - "flask-compress>=1.17,<1.18", + "flask-compress>=1.17,<1.24", "flask-cors~=6.0.0", "flask-login~=0.6.3", - "flask-migrate~=4.0.7", + "flask-migrate~=4.1.0", "flask-orjson~=2.0.0", "flask-sqlalchemy~=3.1.1", "gevent~=25.9.1", - "gevent-websocket~=0.10.1", "gmpy2~=2.3.0", "google-api-core>=2.19.1", - "google-api-python-client==2.189.0", + "google-api-python-client==2.192.0", "google-auth>=2.47.0", - "google-auth-httplib2==0.2.0", + "google-auth-httplib2==0.3.0", "google-cloud-aiplatform>=1.123.0", "googleapis-common-protos>=1.65.0", - "gunicorn~=23.0.0", - "httpx[socks]~=0.28.1", + "gunicorn~=25.1.0", + "httpx[socks]~=0.28.0", "jieba==0.42.1", "json-repair>=0.55.1", "jsonschema>=4.25.1", "langfuse~=2.51.3", - "langsmith~=0.1.77", - "markdown~=3.5.1", + "langsmith~=0.7.16", + "markdown~=3.8.1", "mlflow-skinny>=3.0.0", "numpy~=1.26.4", "openpyxl~=3.1.5", - "opik~=1.8.72", - "litellm==1.77.1", # Pinned to avoid madoka dependency issue + "opik~=1.10.37", + "litellm==1.82.1", # Pinned to avoid madoka dependency issue "opentelemetry-api==1.28.0", "opentelemetry-distro==0.49b0", "opentelemetry-exporter-otlp==1.28.0", @@ -55,35 +53,31 @@ dependencies = [ "opentelemetry-instrumentation-httpx==0.49b0", "opentelemetry-instrumentation-redis==0.49b0", "opentelemetry-instrumentation-sqlalchemy==0.49b0", - "opentelemetry-propagator-b3==1.28.0", + "opentelemetry-propagator-b3==1.40.0", "opentelemetry-proto==1.28.0", "opentelemetry-sdk==1.28.0", "opentelemetry-semantic-conventions==0.49b0", "opentelemetry-util-http==0.49b0", "pandas[excel,output-formatting,performance]~=2.2.2", - "paramiko>=3.5.1", "psycogreen~=1.0.2", "psycopg2-binary~=2.9.6", "pycryptodome==3.23.0", "pydantic~=2.12.5", - "pydantic-extra-types~=2.10.3", - "pydantic-settings~=2.12.0", - "pyjwt~=2.11.0", + "pydantic-extra-types~=2.11.0", + "pydantic-settings~=2.13.1", + "pyjwt~=2.12.0", "pypdfium2==5.2.0", "python-docx~=1.2.0", "python-dotenv==1.0.1", - "python-socketio~=5.13.0", "pyyaml~=6.0.1", "readabilipy~=0.3.0", - "redis[hiredis]~=7.2.0", + "redis[hiredis]~=7.3.0", "resend~=2.9.0", "sentry-sdk[flask]~=2.28.0", - # opentelemetry-instrumentation==0.48b0 imports pkg_resources, removed for setuptools>=81. - "setuptools<81", "sqlalchemy~=2.0.29", "starlette==0.49.1", - "tiktoken~=0.9.0", - "transformers~=4.56.1", + "tiktoken~=0.12.0", + "transformers~=5.3.0", "unstructured[docx,epub,md,ppt,pptx]~=0.18.18", "yarl~=1.18.3", "webvtt-py~=0.5.1", @@ -91,14 +85,12 @@ dependencies = [ "httpx-sse~=0.4.0", "sendgrid~=6.12.3", "flask-restx~=1.3.2", - "packaging==24.1", + "packaging~=23.2", "croniter>=6.0.0", "weaviate-client==4.17.0", "apscheduler>=3.11.0", "weave>=0.52.16", "fastopenapi[flask]>=0.7.0", - "docker>=7.1.0", - "e2b-code-interpreter>=2.4.1", ] # Before adding new dependency, consider place it in # alphabet order (a-z) and suitable group. @@ -117,46 +109,46 @@ package = false # Required for development and running tests ############################################################ dev = [ - "coverage~=7.2.4", - "dotenv-linter~=0.5.0", - "faker~=38.2.0", + "coverage~=7.13.4", + "dotenv-linter~=0.7.0", + "faker~=40.8.0", "lxml-stubs~=0.5.1", - "basedpyright~=1.31.0", - "ruff~=0.14.0", - "pytest~=8.3.2", - "pytest-benchmark~=4.0.0", - "pytest-cov~=4.1.0", + "basedpyright~=1.38.2", + "ruff~=0.15.5", + "pytest~=9.0.2", + "pytest-benchmark~=5.2.3", + "pytest-cov~=7.0.0", "pytest-env~=1.1.3", - "pytest-mock~=3.14.0", + "pytest-mock~=3.15.1", "testcontainers~=4.13.2", "types-aiofiles~=25.1.0", "types-beautifulsoup4~=4.12.0", - "types-cachetools~=5.5.0", + "types-cachetools~=6.2.0", "types-colorama~=0.4.15", "types-defusedxml~=0.7.0", - "types-deprecated~=1.2.15", - "types-docutils~=0.21.0", - "types-jsonschema~=4.23.0", - "types-flask-cors~=5.0.0", + "types-deprecated~=1.3.1", + "types-docutils~=0.22.3", + "types-jsonschema~=4.26.0", + "types-flask-cors~=6.0.0", "types-flask-migrate~=4.1.0", "types-gevent~=25.9.0", "types-greenlet~=3.3.0", "types-html5lib~=1.1.11", "types-markdown~=3.10.2", - "types-oauthlib~=3.2.0", + "types-oauthlib~=3.3.0", "types-objgraph~=3.6.0", "types-olefile~=0.47.0", "types-openpyxl~=3.1.5", "types-pexpect~=4.9.0", - "types-protobuf~=5.29.1", + "types-protobuf~=6.32.1", "types-psutil~=7.2.2", "types-psycopg2~=2.9.21", "types-pygments~=2.19.0", "types-pymysql~=1.1.0", "types-python-dateutil~=2.9.0", - "types-pywin32~=310.0.0", + "types-pywin32~=311.0.0", "types-pyyaml~=6.0.12", - "types-regex~=2024.11.6", + "types-regex~=2026.2.28", "types-shapely~=2.1.0", "types-simplejson>=3.20.0", "types-six>=1.17.0", @@ -169,18 +161,18 @@ dev = [ "types_pyOpenSSL>=24.1.0", "types_cffi>=1.17.0", "types_setuptools>=80.9.0", - "pandas-stubs~=2.2.3", + "pandas-stubs~=3.0.0", "scipy-stubs>=1.15.3.0", "types-python-http-client>=3.3.7.20240910", "import-linter>=2.3", "types-redis>=4.6.0.20241004", "celery-types>=0.23.0", - "mypy~=1.17.1", + "mypy~=1.19.1", # "locust>=2.40.4", # Temporarily removed due to compatibility issues. Uncomment when resolved. "sseclient-py>=1.8.0", "pytest-timeout>=2.4.0", "pytest-xdist>=3.8.0", - "pyrefly>=0.54.0", + "pyrefly>=0.55.0", ] ############################################################ @@ -188,13 +180,13 @@ dev = [ # Required for storage clients ############################################################ storage = [ - "azure-storage-blob==12.26.0", + "azure-storage-blob==12.28.0", "bce-python-sdk~=0.9.23", - "cos-python-sdk-v5==1.9.38", - "esdk-obs-python==3.25.8", + "cos-python-sdk-v5==1.9.41", + "esdk-obs-python==3.26.2", "google-cloud-storage>=3.0.0", "opendal~=0.46.0", - "oss2==2.18.5", + "oss2==2.19.1", "supabase~=2.18.1", "tos~=2.9.0", ] @@ -233,7 +225,7 @@ vdb = [ "weaviate-client==4.17.0", "xinference-client~=1.2.2", "mo-vector~=0.1.13", - "mysql-connector-python==9.5.0", + "mysql-connector-python>=9.3.0", ] [tool.mypy] @@ -255,3 +247,13 @@ module = [ "extensions.logstore.repositories.logstore_api_workflow_run_repository", ] ignore_errors = true + +[tool.pyrefly] +project-includes = ["."] +project-excludes = [ + ".venv", + "migrations/", +] +python-platform = "linux" +python-version = "3.11.0" +infer-with-first-use = false diff --git a/api/pyrefly-local-excludes.txt b/api/pyrefly-local-excludes.txt new file mode 100644 index 0000000000..c044824a82 --- /dev/null +++ b/api/pyrefly-local-excludes.txt @@ -0,0 +1,200 @@ +configs/middleware/cache/redis_pubsub_config.py +controllers/console/app/annotation.py +controllers/console/app/app.py +controllers/console/app/app_import.py +controllers/console/app/mcp_server.py +controllers/console/app/site.py +controllers/console/auth/email_register.py +controllers/console/human_input_form.py +controllers/console/init_validate.py +controllers/console/ping.py +controllers/console/setup.py +controllers/console/version.py +controllers/console/workspace/trigger_providers.py +controllers/service_api/app/annotation.py +controllers/web/workflow_events.py +core/agent/fc_agent_runner.py +core/app/apps/advanced_chat/app_generator.py +core/app/apps/advanced_chat/app_runner.py +core/app/apps/advanced_chat/generate_task_pipeline.py +core/app/apps/agent_chat/app_generator.py +core/app/apps/base_app_generate_response_converter.py +core/app/apps/base_app_generator.py +core/app/apps/chat/app_generator.py +core/app/apps/common/workflow_response_converter.py +core/app/apps/completion/app_generator.py +core/app/apps/pipeline/pipeline_generator.py +core/app/apps/pipeline/pipeline_runner.py +core/app/apps/workflow/app_generator.py +core/app/apps/workflow/app_runner.py +core/app/apps/workflow/generate_task_pipeline.py +core/app/apps/workflow_app_runner.py +core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py +core/datasource/datasource_manager.py +core/external_data_tool/api/api.py +core/llm_generator/llm_generator.py +core/llm_generator/output_parser/structured_output.py +core/mcp/mcp_client.py +core/ops/aliyun_trace/data_exporter/traceclient.py +core/ops/arize_phoenix_trace/arize_phoenix_trace.py +core/ops/mlflow_trace/mlflow_trace.py +core/ops/ops_trace_manager.py +core/ops/tencent_trace/client.py +core/ops/tencent_trace/utils.py +core/plugin/backwards_invocation/base.py +core/plugin/backwards_invocation/model.py +core/prompt/utils/extract_thread_messages.py +core/rag/datasource/keyword/jieba/jieba.py +core/rag/datasource/keyword/jieba/jieba_keyword_table_handler.py +core/rag/datasource/vdb/analyticdb/analyticdb_vector.py +core/rag/datasource/vdb/analyticdb/analyticdb_vector_openapi.py +core/rag/datasource/vdb/baidu/baidu_vector.py +core/rag/datasource/vdb/chroma/chroma_vector.py +core/rag/datasource/vdb/clickzetta/clickzetta_vector.py +core/rag/datasource/vdb/couchbase/couchbase_vector.py +core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py +core/rag/datasource/vdb/huawei/huawei_cloud_vector.py +core/rag/datasource/vdb/lindorm/lindorm_vector.py +core/rag/datasource/vdb/matrixone/matrixone_vector.py +core/rag/datasource/vdb/milvus/milvus_vector.py +core/rag/datasource/vdb/myscale/myscale_vector.py +core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +core/rag/datasource/vdb/opensearch/opensearch_vector.py +core/rag/datasource/vdb/oracle/oraclevector.py +core/rag/datasource/vdb/pgvecto_rs/pgvecto_rs.py +core/rag/datasource/vdb/relyt/relyt_vector.py +core/rag/datasource/vdb/tablestore/tablestore_vector.py +core/rag/datasource/vdb/tencent/tencent_vector.py +core/rag/datasource/vdb/tidb_on_qdrant/tidb_on_qdrant_vector.py +core/rag/datasource/vdb/tidb_on_qdrant/tidb_service.py +core/rag/datasource/vdb/tidb_vector/tidb_vector.py +core/rag/datasource/vdb/upstash/upstash_vector.py +core/rag/datasource/vdb/vikingdb/vikingdb_vector.py +core/rag/datasource/vdb/weaviate/weaviate_vector.py +core/rag/extractor/csv_extractor.py +core/rag/extractor/excel_extractor.py +core/rag/extractor/firecrawl/firecrawl_app.py +core/rag/extractor/firecrawl/firecrawl_web_extractor.py +core/rag/extractor/html_extractor.py +core/rag/extractor/jina_reader_extractor.py +core/rag/extractor/markdown_extractor.py +core/rag/extractor/notion_extractor.py +core/rag/extractor/pdf_extractor.py +core/rag/extractor/text_extractor.py +core/rag/extractor/unstructured/unstructured_doc_extractor.py +core/rag/extractor/unstructured/unstructured_eml_extractor.py +core/rag/extractor/unstructured/unstructured_epub_extractor.py +core/rag/extractor/unstructured/unstructured_markdown_extractor.py +core/rag/extractor/unstructured/unstructured_msg_extractor.py +core/rag/extractor/unstructured/unstructured_ppt_extractor.py +core/rag/extractor/unstructured/unstructured_pptx_extractor.py +core/rag/extractor/unstructured/unstructured_xml_extractor.py +core/rag/extractor/watercrawl/client.py +core/rag/extractor/watercrawl/extractor.py +core/rag/extractor/watercrawl/provider.py +core/rag/extractor/word_extractor.py +core/rag/index_processor/processor/paragraph_index_processor.py +core/rag/index_processor/processor/parent_child_index_processor.py +core/rag/index_processor/processor/qa_index_processor.py +core/rag/retrieval/router/multi_dataset_function_call_router.py +core/rag/summary_index/summary_index.py +core/repositories/sqlalchemy_workflow_execution_repository.py +core/repositories/sqlalchemy_workflow_node_execution_repository.py +core/tools/__base/tool.py +core/tools/mcp_tool/provider.py +core/tools/plugin_tool/provider.py +core/tools/utils/message_transformer.py +core/tools/utils/web_reader_tool.py +core/tools/workflow_as_tool/provider.py +core/trigger/debug/event_selectors.py +core/trigger/entities/entities.py +core/trigger/provider.py +core/workflow/workflow_entry.py +dify_graph/entities/workflow_execution.py +dify_graph/file/file_manager.py +dify_graph/graph_engine/error_handler.py +dify_graph/graph_engine/layers/execution_limits.py +dify_graph/nodes/agent/agent_node.py +dify_graph/nodes/base/node.py +dify_graph/nodes/code/code_node.py +dify_graph/nodes/datasource/datasource_node.py +dify_graph/nodes/document_extractor/node.py +dify_graph/nodes/human_input/human_input_node.py +dify_graph/nodes/if_else/if_else_node.py +dify_graph/nodes/iteration/iteration_node.py +dify_graph/nodes/knowledge_index/knowledge_index_node.py +core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +dify_graph/nodes/list_operator/node.py +dify_graph/nodes/llm/node.py +dify_graph/nodes/loop/loop_node.py +dify_graph/nodes/parameter_extractor/parameter_extractor_node.py +dify_graph/nodes/question_classifier/question_classifier_node.py +dify_graph/nodes/start/start_node.py +dify_graph/nodes/template_transform/template_transform_node.py +dify_graph/nodes/tool/tool_node.py +dify_graph/nodes/trigger_plugin/trigger_event_node.py +dify_graph/nodes/trigger_schedule/trigger_schedule_node.py +dify_graph/nodes/trigger_webhook/node.py +dify_graph/nodes/variable_aggregator/variable_aggregator_node.py +dify_graph/nodes/variable_assigner/v1/node.py +dify_graph/nodes/variable_assigner/v2/node.py +dify_graph/variables/types.py +extensions/ext_fastopenapi.py +extensions/logstore/repositories/logstore_api_workflow_run_repository.py +extensions/otel/instrumentation.py +extensions/otel/runtime.py +extensions/storage/aliyun_oss_storage.py +extensions/storage/aws_s3_storage.py +extensions/storage/azure_blob_storage.py +extensions/storage/baidu_obs_storage.py +extensions/storage/clickzetta_volume/clickzetta_volume_storage.py +extensions/storage/clickzetta_volume/file_lifecycle.py +extensions/storage/google_cloud_storage.py +extensions/storage/huawei_obs_storage.py +extensions/storage/opendal_storage.py +extensions/storage/oracle_oci_storage.py +extensions/storage/supabase_storage.py +extensions/storage/tencent_cos_storage.py +extensions/storage/volcengine_tos_storage.py +factories/variable_factory.py +libs/external_api.py +libs/gmpy2_pkcs10aep_cipher.py +libs/helper.py +libs/login.py +libs/module_loading.py +libs/oauth.py +libs/oauth_data_source.py +models/trigger.py +models/workflow.py +repositories/sqlalchemy_api_workflow_node_execution_repository.py +repositories/sqlalchemy_api_workflow_run_repository.py +repositories/sqlalchemy_execution_extra_content_repository.py +schedule/queue_monitor_task.py +services/account_service.py +services/audio_service.py +services/auth/firecrawl/firecrawl.py +services/auth/jina.py +services/auth/jina/jina.py +services/auth/watercrawl/watercrawl.py +services/conversation_service.py +services/dataset_service.py +services/document_indexing_proxy/document_indexing_task_proxy.py +services/document_indexing_proxy/duplicate_document_indexing_task_proxy.py +services/external_knowledge_service.py +services/plugin/plugin_migration.py +services/recommend_app/buildin/buildin_retrieval.py +services/recommend_app/database/database_retrieval.py +services/recommend_app/remote/remote_retrieval.py +services/summary_index_service.py +services/tools/tools_transform_service.py +services/trigger/trigger_provider_service.py +services/trigger/trigger_subscription_builder_service.py +services/trigger/webhook_service.py +services/workflow_draft_variable_service.py +services/workflow_event_snapshot_service.py +services/workflow_service.py +tasks/app_generate/workflow_execute_task.py +tasks/regenerate_summary_index_task.py +tasks/trigger_processing_tasks.py +tasks/workflow_cfs_scheduler/cfs_scheduler.py +tasks/workflow_execution_tasks.py diff --git a/api/pyrefly.toml b/api/pyrefly.toml deleted file mode 100644 index 01f4c5a529..0000000000 --- a/api/pyrefly.toml +++ /dev/null @@ -1,8 +0,0 @@ -project-includes = ["."] -project-excludes = [ - ".venv", - "migrations/", -] -python-platform = "linux" -python-version = "3.11.0" -infer-with-first-use = false diff --git a/api/pytest.ini b/api/pytest.ini index 4a9470fa0c..588dafe7eb 100644 --- a/api/pytest.ini +++ b/api/pytest.ini @@ -1,5 +1,6 @@ [pytest] -addopts = --cov=./api --cov-report=json +pythonpath = . +addopts = --cov=./api --cov-report=json --import-mode=importlib env = ANTHROPIC_API_KEY = sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz AZURE_OPENAI_API_BASE = https://difyai-openai.openai.azure.com @@ -19,7 +20,7 @@ env = GOOGLE_API_KEY = abcdefghijklmnopqrstuvwxyz HUGGINGFACE_API_KEY = hf-awuwuwuwuwuwuwuwuwuwuwuwuwuwuwuwuwu HUGGINGFACE_EMBEDDINGS_ENDPOINT_URL = c - HUGGINGFACE_TEXT2TEXT_GEN_ENDPOINT_URL = b + HUGGINGFACE_TEXT2TEXT_GEN_ENDPOINT_URL = b HUGGINGFACE_TEXT_GEN_ENDPOINT_URL = a MIXEDBREAD_API_KEY = mk-aaaaaaaaaaaaaaaaaaaa MOCK_SWITCH = true diff --git a/api/schedule/queue_monitor_task.py b/api/schedule/queue_monitor_task.py index 77d6b5a138..01642e397e 100644 --- a/api/schedule/queue_monitor_task.py +++ b/api/schedule/queue_monitor_task.py @@ -21,6 +21,10 @@ celery_redis = Redis( ssl_cert_reqs=getattr(dify_config, "REDIS_SSL_CERT_REQS", None) if dify_config.BROKER_USE_SSL else None, ssl_certfile=getattr(dify_config, "REDIS_SSL_CERTFILE", None) if dify_config.BROKER_USE_SSL else None, ssl_keyfile=getattr(dify_config, "REDIS_SSL_KEYFILE", None) if dify_config.BROKER_USE_SSL else None, + # Add conservative socket timeouts and health checks to avoid long-lived half-open sockets + socket_timeout=5, + socket_connect_timeout=5, + health_check_interval=30, ) logger = logging.getLogger(__name__) diff --git a/api/schedule/trigger_provider_refresh_task.py b/api/schedule/trigger_provider_refresh_task.py index 3b3e478793..df5058d70a 100644 --- a/api/schedule/trigger_provider_refresh_task.py +++ b/api/schedule/trigger_provider_refresh_task.py @@ -3,6 +3,7 @@ import math import time from collections.abc import Iterable, Sequence +from celery import group from sqlalchemy import ColumnElement, and_, func, or_, select from sqlalchemy.engine.row import Row from sqlalchemy.orm import Session @@ -85,20 +86,25 @@ def trigger_provider_refresh() -> None: lock_keys: list[str] = build_trigger_refresh_lock_keys(subscriptions) acquired: list[bool] = _acquire_locks(keys=lock_keys, ttl_seconds=lock_ttl) - enqueued: int = 0 - for (tenant_id, subscription_id), is_locked in zip(subscriptions, acquired): - if not is_locked: - continue - trigger_subscription_refresh.delay(tenant_id=tenant_id, subscription_id=subscription_id) - enqueued += 1 + if not any(acquired): + continue + + jobs = [ + trigger_subscription_refresh.s(tenant_id=tenant_id, subscription_id=subscription_id) + for (tenant_id, subscription_id), is_locked in zip(subscriptions, acquired) + if is_locked + ] + result = group(jobs).apply_async() + enqueued = len(jobs) logger.info( - "Trigger refresh page %d/%d: scanned=%d locks_acquired=%d enqueued=%d", + "Trigger refresh page %d/%d: scanned=%d locks_acquired=%d enqueued=%d result=%s", page + 1, pages, len(subscriptions), sum(1 for x in acquired if x), enqueued, + result, ) logger.info("Trigger refresh scan done: due=%d", total_due) diff --git a/api/schedule/workflow_schedule_task.py b/api/schedule/workflow_schedule_task.py index d68b9565ec..2fee9e467d 100644 --- a/api/schedule/workflow_schedule_task.py +++ b/api/schedule/workflow_schedule_task.py @@ -1,6 +1,6 @@ import logging -from celery import group, shared_task +from celery import current_app, group, shared_task from sqlalchemy import and_, select from sqlalchemy.orm import Session, sessionmaker @@ -29,31 +29,27 @@ def poll_workflow_schedules() -> None: with session_factory() as session: total_dispatched = 0 - # Process in batches until we've handled all due schedules or hit the limit while True: due_schedules = _fetch_due_schedules(session) if not due_schedules: break - dispatched_count = _process_schedules(session, due_schedules) - total_dispatched += dispatched_count + with current_app.producer_or_acquire() as producer: # type: ignore + dispatched_count = _process_schedules(session, due_schedules, producer) + total_dispatched += dispatched_count - logger.debug("Batch processed: %d dispatched", dispatched_count) - - # Circuit breaker: check if we've hit the per-tick limit (if enabled) - if ( - dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK > 0 - and total_dispatched >= dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK - ): - logger.warning( - "Circuit breaker activated: reached dispatch limit (%d), will continue next tick", - dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK, - ) - break + logger.debug("Batch processed: %d dispatched", dispatched_count) + # Circuit breaker: check if we've hit the per-tick limit (if enabled) + if 0 < dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK <= total_dispatched: + logger.warning( + "Circuit breaker activated: reached dispatch limit (%d), will continue next tick", + dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK, + ) + break if total_dispatched > 0: - logger.info("Total processed: %d dispatched", total_dispatched) + logger.info("Total processed: %d workflow schedule(s) dispatched", total_dispatched) def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]: @@ -90,7 +86,7 @@ def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]: return list(due_schedules) -def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) -> int: +def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan], producer=None) -> int: """Process schedules: check quota, update next run time and dispatch to Celery in parallel.""" if not schedules: return 0 @@ -107,7 +103,7 @@ def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) if tasks_to_dispatch: job = group(run_schedule_trigger.s(schedule_id) for schedule_id in tasks_to_dispatch) - job.apply_async() + job.apply_async(producer=producer) logger.debug("Dispatched %d tasks in parallel", len(tasks_to_dispatch)) diff --git a/api/services/account_service.py b/api/services/account_service.py index f0eac2a522..bd520f54cf 100644 --- a/api/services/account_service.py +++ b/api/services/account_service.py @@ -1089,9 +1089,9 @@ class TenantService: ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=account.id).first() if ta: - ta.role = role + ta.role = TenantAccountRole(role) else: - ta = TenantAccountJoin(tenant_id=tenant.id, account_id=account.id, role=role) + ta = TenantAccountJoin(tenant_id=tenant.id, account_id=account.id, role=TenantAccountRole(role)) db.session.add(ta) db.session.commit() @@ -1319,10 +1319,10 @@ class TenantService: db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, role="owner").first() ) if current_owner_join: - current_owner_join.role = "admin" + current_owner_join.role = TenantAccountRole.ADMIN # Update the role of the target member - target_member_join.role = new_role + target_member_join.role = TenantAccountRole(new_role) db.session.commit() @staticmethod diff --git a/api/services/app_dsl_service.py b/api/services/app_dsl_service.py index 5790c8b9ec..49e8b3cd60 100644 --- a/api/services/app_dsl_service.py +++ b/api/services/app_dsl_service.py @@ -4,6 +4,7 @@ import logging import uuid from collections.abc import Mapping from enum import StrEnum +from typing import cast from urllib.parse import urlparse from uuid import uuid4 @@ -19,20 +20,25 @@ from sqlalchemy.orm import Session from configs import dify_config from core.helper import ssrf_proxy from core.plugin.entities.plugin import PluginDependency -from dify_graph.enums import NodeType +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from core.workflow.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode +from dify_graph.enums import BuiltinNodeTypes from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData from dify_graph.nodes.tool.entities import ToolNodeData -from dify_graph.nodes.trigger_schedule.trigger_schedule_node import TriggerScheduleNode from events.app_event import app_model_config_was_updated, app_was_created from extensions.ext_redis import redis_client from factories import variable_factory from libs.datetime_utils import naive_utc_now from models import Account, App, AppMode -from models.model import AppModelConfig, IconType +from models.model import AppModelConfig, AppModelConfigDict, IconType from models.workflow import Workflow from services.plugin.dependencies_analysis import DependenciesAnalysisService from services.workflow_draft_variable_service import WorkflowDraftVariableService @@ -428,17 +434,18 @@ class AppDslService: # Set icon type icon_type_value = icon_type or app_data.get("icon_type") + resolved_icon_type: IconType if icon_type_value in [IconType.EMOJI, IconType.IMAGE, IconType.LINK]: - icon_type = icon_type_value + resolved_icon_type = IconType(icon_type_value) else: - icon_type = IconType.EMOJI + resolved_icon_type = IconType.EMOJI icon = icon or str(app_data.get("icon", "")) if app: # Update existing app app.name = name or app_data.get("name", app.name) app.description = description or app_data.get("description", app.description) - app.icon_type = icon_type + app.icon_type = resolved_icon_type app.icon = icon app.icon_background = icon_background or app_data.get("icon_background", app.icon_background) app.updated_by = account.id @@ -451,10 +458,10 @@ class AppDslService: app = App() app.id = str(uuid4()) app.tenant_id = account.current_tenant_id - app.mode = app_mode.value + app.mode = app_mode app.name = name or app_data.get("name", "") app.description = description or app_data.get("description", "") - app.icon_type = icon_type + app.icon_type = resolved_icon_type app.icon = icon app.icon_background = icon_background or app_data.get("icon_background", "#FFFFFF") app.enable_site = True @@ -498,7 +505,7 @@ class AppDslService: unique_hash = None graph = workflow_data.get("graph", {}) for node in graph.get("nodes", []): - if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL: + if node.get("data", {}).get("type", "") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node["data"].get("dataset_ids", []) node["data"]["dataset_ids"] = [ decrypted_id @@ -523,7 +530,7 @@ class AppDslService: if not app.app_model_config: app_model_config = AppModelConfig( app_id=app.id, created_by=account.id, updated_by=account.id - ).from_model_config_dict(model_config) + ).from_model_config_dict(cast(AppModelConfigDict, model_config)) app_model_config.id = str(uuid4()) app.app_model_config_id = app_model_config.id @@ -548,7 +555,7 @@ class AppDslService: "kind": "app", "app": { "name": app_model.name, - "mode": app_model.mode, + "mode": app_model.mode.value if isinstance(app_model.mode, AppMode) else app_model.mode, "icon": app_model.icon if app_model.icon_type == "image" else "🤖", "icon_background": "#FFEAD5" if app_model.icon_type == "image" else app_model.icon_background, "description": app_model.description, @@ -586,27 +593,27 @@ class AppDslService: if not node_data: continue data_type = node_data.get("type", "") - if data_type == NodeType.KNOWLEDGE_RETRIEVAL: + if data_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node_data.get("dataset_ids", []) node_data["dataset_ids"] = [ cls.encrypt_dataset_id(dataset_id=dataset_id, tenant_id=app_model.tenant_id) for dataset_id in dataset_ids ] # filter credential id from tool node - if not include_secret and data_type == NodeType.TOOL: + if not include_secret and data_type == BuiltinNodeTypes.TOOL: node_data.pop("credential_id", None) # filter credential id from agent node - if not include_secret and data_type == NodeType.AGENT: + if not include_secret and data_type == BuiltinNodeTypes.AGENT: for tool in node_data.get("agent_parameters", {}).get("tools", {}).get("value", []): tool.pop("credential_id", None) - if data_type == NodeType.TRIGGER_SCHEDULE.value: + if data_type == TRIGGER_SCHEDULE_NODE_TYPE: # override the config with the default config node_data["config"] = TriggerScheduleNode.get_default_config()["config"] - if data_type == NodeType.TRIGGER_WEBHOOK.value: + if data_type == TRIGGER_WEBHOOK_NODE_TYPE: # clear the webhook_url node_data["webhook_url"] = "" node_data["webhook_debug_url"] = "" - if data_type == NodeType.TRIGGER_PLUGIN.value: + if data_type == TRIGGER_PLUGIN_NODE_TYPE: # clear the subscription_id node_data["subscription_id"] = "" @@ -670,31 +677,31 @@ class AppDslService: try: typ = node.get("data", {}).get("type") match typ: - case NodeType.TOOL: + case BuiltinNodeTypes.TOOL: tool_entity = ToolNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id), ) - case NodeType.LLM: + case BuiltinNodeTypes.LLM: llm_entity = LLMNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency(llm_entity.model.provider), ) - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: question_classifier_entity = QuestionClassifierNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( question_classifier_entity.model.provider ), ) - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: parameter_extractor_entity = ParameterExtractorNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( parameter_extractor_entity.model.provider ), ) - case NodeType.KNOWLEDGE_RETRIEVAL: + case BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: knowledge_retrieval_entity = KnowledgeRetrievalNodeData.model_validate(node["data"]) if knowledge_retrieval_entity.retrieval_mode == "multiple": if knowledge_retrieval_entity.multiple_retrieval_config: diff --git a/api/services/app_model_config_service.py b/api/services/app_model_config_service.py index 6f54f90734..3bc30cb323 100644 --- a/api/services/app_model_config_service.py +++ b/api/services/app_model_config_service.py @@ -1,12 +1,12 @@ from core.app.apps.agent_chat.app_config_manager import AgentChatAppConfigManager from core.app.apps.chat.app_config_manager import ChatAppConfigManager from core.app.apps.completion.app_config_manager import CompletionAppConfigManager -from models.model import AppMode +from models.model import AppMode, AppModelConfigDict class AppModelConfigService: @classmethod - def validate_configuration(cls, tenant_id: str, config: dict, app_mode: AppMode): + def validate_configuration(cls, tenant_id: str, config: dict, app_mode: AppMode) -> AppModelConfigDict: if app_mode == AppMode.CHAT: return ChatAppConfigManager.config_validate(tenant_id, config) elif app_mode == AppMode.AGENT_CHAT: diff --git a/api/services/app_service.py b/api/services/app_service.py index ce6826ef5c..c5d1479a20 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -1,6 +1,6 @@ import json import logging -from typing import TypedDict, cast +from typing import Any, TypedDict, cast import sqlalchemy as sa from flask_sqlalchemy.pagination import Pagination @@ -19,7 +19,7 @@ from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from libs.login import current_user from models import Account -from models.model import App, AppMode, AppModelConfig, Site +from models.model import App, AppMode, AppModelConfig, IconType, Site from models.tools import ApiToolProvider from services.billing_service import BillingService from services.enterprise.enterprise_service import EnterpriseService @@ -187,7 +187,10 @@ class AppService: for tool in agent_mode.get("tools") or []: if not isinstance(tool, dict) or len(tool.keys()) <= 3: continue - agent_tool_entity = AgentToolEntity(**tool) + typed_tool = {key: value for key, value in tool.items() if isinstance(key, str)} + if len(typed_tool) != len(tool): + continue + agent_tool_entity = AgentToolEntity.model_validate(typed_tool) # get tool try: tool_runtime = ToolManager.get_agent_tool_runtime( @@ -254,7 +257,7 @@ class AppService: assert current_user is not None app.name = args["name"] app.description = args["description"] - app.icon_type = args["icon_type"] + app.icon_type = IconType(args["icon_type"]) if args["icon_type"] else None app.icon = args["icon"] app.icon_background = args["icon_background"] app.use_icon_as_answer_icon = args.get("use_icon_as_answer_icon", False) @@ -388,7 +391,7 @@ class AppService: agent_config = app_model_config.agent_mode_dict # get all tools - tools = agent_config.get("tools", []) + tools = cast(list[dict[str, Any]], agent_config.get("tools", [])) url_prefix = dify_config.CONSOLE_API_URL + "/console/api/workspaces/current/tool-provider/builtin/" diff --git a/api/services/audio_service.py b/api/services/audio_service.py index 1b698fad17..1794ea9947 100644 --- a/api/services/audio_service.py +++ b/api/services/audio_service.py @@ -2,6 +2,7 @@ import io import logging import uuid from collections.abc import Generator +from typing import cast from flask import Response, stream_with_context from werkzeug.datastructures import FileStorage @@ -106,7 +107,7 @@ class AudioService: if not text_to_speech_dict.get("enabled"): raise ValueError("TTS is not enabled") - voice = text_to_speech_dict.get("voice") + voice = cast(str | None, text_to_speech_dict.get("voice")) model_manager = ModelManager() model_instance = model_manager.get_default_model_instance( diff --git a/api/services/billing_service.py b/api/services/billing_service.py index 946b8cdfdb..5ab47c799a 100644 --- a/api/services/billing_service.py +++ b/api/services/billing_service.py @@ -393,3 +393,78 @@ class BillingService: for item in data: tenant_whitelist.append(item["tenant_id"]) return tenant_whitelist + + @classmethod + def get_account_notification(cls, account_id: str) -> dict: + """Return the active in-product notification for account_id, if any. + + Calling this endpoint also marks the notification as seen; subsequent + calls will return should_show=false when frequency='once'. + + Response shape (mirrors GetAccountNotificationReply): + { + "should_show": bool, + "notification": { # present only when should_show=true + "notification_id": str, + "contents": { # lang -> LangContent + "en": {"lang": "en", "title": ..., "subtitle": ..., "body": ..., "title_pic_url": ...}, + ... + }, + "frequency": "once" | "every_page_load" + } + } + """ + return cls._send_request("GET", "/notifications/active", params={"account_id": account_id}) + + @classmethod + def upsert_notification( + cls, + contents: list[dict], + frequency: str = "once", + status: str = "active", + notification_id: str | None = None, + start_time: str | None = None, + end_time: str | None = None, + ) -> dict: + """Create or update a notification. + + contents: list of {"lang": str, "title": str, "subtitle": str, "body": str, "title_pic_url": str} + start_time / end_time: RFC3339 strings (e.g. "2026-03-01T00:00:00Z"), optional. + Returns {"notification_id": str}. + """ + payload: dict = { + "contents": contents, + "frequency": frequency, + "status": status, + } + if notification_id: + payload["notification_id"] = notification_id + if start_time: + payload["start_time"] = start_time + if end_time: + payload["end_time"] = end_time + return cls._send_request("POST", "/notifications", json=payload) + + @classmethod + def batch_add_notification_accounts(cls, notification_id: str, account_ids: list[str]) -> dict: + """Register target account IDs for a notification (max 1000 per call). + + Returns {"count": int}. + """ + return cls._send_request( + "POST", + f"/notifications/{notification_id}/accounts", + json={"account_ids": account_ids}, + ) + + @classmethod + def dismiss_notification(cls, notification_id: str, account_id: str) -> dict: + """Mark a notification as dismissed for an account. + + Returns {"success": bool}. + """ + return cls._send_request( + "POST", + f"/notifications/{notification_id}/dismiss", + json={"account_id": account_id}, + ) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 3a7d483a9d..c527c71d7b 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -254,7 +254,7 @@ class DatasetService: dataset.embedding_model_provider = embedding_model.provider if embedding_model else None dataset.embedding_model = embedding_model.model_name if embedding_model else None dataset.retrieval_model = retrieval_model.model_dump() if retrieval_model else None - dataset.permission = permission or DatasetPermissionEnum.ONLY_ME + dataset.permission = DatasetPermissionEnum(permission) if permission else DatasetPermissionEnum.ONLY_ME dataset.provider = provider if summary_index_setting is not None: dataset.summary_index_setting = summary_index_setting diff --git a/api/services/datasource_provider_service.py b/api/services/datasource_provider_service.py index 95a50f0512..f3b2adb965 100644 --- a/api/services/datasource_provider_service.py +++ b/api/services/datasource_provider_service.py @@ -824,6 +824,7 @@ class DatasourceProviderService: "langgenius/firecrawl_datasource", "langgenius/notion_datasource", "langgenius/jina_datasource", + "watercrawl/watercrawl_datasource", ]: datasource_provider_id = DatasourceProviderID(f"{datasource.plugin_id}/{datasource.provider}") credentials = self.list_datasource_credentials( diff --git a/api/services/enterprise/plugin_manager_service.py b/api/services/enterprise/plugin_manager_service.py index 817dbd95f8..598f9692eb 100644 --- a/api/services/enterprise/plugin_manager_service.py +++ b/api/services/enterprise/plugin_manager_service.py @@ -3,6 +3,7 @@ import logging from pydantic import BaseModel +from configs import dify_config from services.enterprise.base import EnterprisePluginManagerRequest from services.errors.base import BaseServiceError @@ -28,6 +29,11 @@ class CheckCredentialPolicyComplianceRequest(BaseModel): return data +class PreUninstallPluginRequest(BaseModel): + tenant_id: str + plugin_unique_identifier: str + + class CredentialPolicyViolationError(BaseServiceError): pass @@ -55,3 +61,21 @@ class PluginManagerService: body.dify_credential_id, ret.get("result", False), ) + + @classmethod + def try_pre_uninstall_plugin(cls, body: PreUninstallPluginRequest): + try: + # the invocation must be synchronous. + EnterprisePluginManagerRequest.send_request( + "POST", + "/pre-uninstall-plugin", + json=body.model_dump(), + raise_for_status=True, + timeout=dify_config.ENTERPRISE_REQUEST_TIMEOUT, + ) + except Exception: + logger.exception( + "failed to perform pre uninstall plugin hook. tenant_id: %s, plugin_unique_identifier: %s", + body.tenant_id, + body.plugin_unique_identifier, + ) diff --git a/api/services/file_service.py b/api/services/file_service.py index e08b78bf4c..ecb30faaa8 100644 --- a/api/services/file_service.py +++ b/api/services/file_service.py @@ -58,8 +58,9 @@ class FileService: # get file extension extension = os.path.splitext(filename)[1].lstrip(".").lower() - # check if filename contains invalid characters - if any(c in filename for c in ["/", "\\", ":", "*", "?", '"', "<", ">", "|"]): + # Only reject path separators here. The original filename is stored as metadata, + # while the storage key is UUID-based. + if any(c in filename for c in ["/", "\\"]): raise ValueError("Filename contains invalid characters") if len(filename) > 200: diff --git a/api/services/hit_testing_service.py b/api/services/hit_testing_service.py index c00c76a826..d85b290534 100644 --- a/api/services/hit_testing_service.py +++ b/api/services/hit_testing_service.py @@ -13,6 +13,7 @@ from dify_graph.model_runtime.entities import LLMMode from extensions.ext_database import db from models import Account from models.dataset import Dataset, DatasetQuery +from models.enums import CreatorUserRole logger = logging.getLogger(__name__) @@ -98,7 +99,7 @@ class HitTestingService: content=json.dumps(dataset_queries), source="hit_testing", source_app_id=None, - created_by_role="account", + created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, ) db.session.add(dataset_query) @@ -138,7 +139,7 @@ class HitTestingService: content=query, source="hit_testing", source_app_id=None, - created_by_role="account", + created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, ) diff --git a/api/services/human_input_delivery_test_service.py b/api/services/human_input_delivery_test_service.py index 7b43c49686..80deb37a56 100644 --- a/api/services/human_input_delivery_test_service.py +++ b/api/services/human_input_delivery_test_service.py @@ -245,5 +245,6 @@ class EmailDeliveryTestHandler: ) if token: substitutions["form_token"] = token - substitutions["form_link"] = _build_form_link(token) or "" + link = _build_form_link(token) + substitutions["form_link"] = link if link is not None else f"/form/{token}" return substitutions diff --git a/api/services/plugin/plugin_service.py b/api/services/plugin/plugin_service.py index 6eed3a6b38..55a3ffde78 100644 --- a/api/services/plugin/plugin_service.py +++ b/api/services/plugin/plugin_service.py @@ -32,6 +32,10 @@ from extensions.ext_database import db from extensions.ext_redis import redis_client from models.provider import Provider, ProviderCredential from models.provider_ids import GenericProviderID +from services.enterprise.plugin_manager_service import ( + PluginManagerService, + PreUninstallPluginRequest, +) from services.errors.plugin import PluginInstallationForbiddenError from services.feature_service import FeatureService, PluginInstallationScope @@ -519,6 +523,13 @@ class PluginService: if not plugin: return manager.uninstall(tenant_id, plugin_installation_id) + if dify_config.ENTERPRISE_ENABLED: + PluginManagerService.try_pre_uninstall_plugin( + PreUninstallPluginRequest( + tenant_id=tenant_id, + plugin_unique_identifier=plugin.plugin_unique_identifier, + ) + ) with Session(db.engine) as session, session.begin(): plugin_id = plugin.plugin_id logger.info("Deleting credentials for plugin: %s", plugin_id) diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index ce745a4679..899a6ba378 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -36,19 +36,19 @@ from core.rag.entities.event import ( ) from core.repositories.factory import DifyCoreRepositoryFactory from core.repositories.sqlalchemy_workflow_node_execution_repository import SQLAlchemyWorkflowNodeExecutionRepository +from core.workflow.node_factory import LATEST_VERSION, get_node_type_classes_mapping from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import ErrorStrategy, NodeType, SystemVariableKey +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, NodeType, SystemVariableKey from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.graph_events import NodeRunFailedEvent, NodeRunSucceededEvent from dify_graph.graph_events.base import GraphNodeEventBase from dify_graph.node_events.base import NodeRunResult from dify_graph.nodes.base.node import Node from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, build_http_request_config -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from dify_graph.repositories.workflow_node_execution_repository import OrderConfig from dify_graph.runtime import VariablePool from dify_graph.system_variable import SystemVariable @@ -381,10 +381,10 @@ class RagPipelineService: """ # return default block config default_block_configs: list[dict[str, Any]] = [] - for node_type, node_class_mapping in NODE_TYPE_CLASSES_MAPPING.items(): + for node_type, node_class_mapping in get_node_type_classes_mapping().items(): node_class = node_class_mapping[LATEST_VERSION] filters = None - if node_type is NodeType.HTTP_REQUEST: + if node_type == BuiltinNodeTypes.HTTP_REQUEST: filters = { HTTP_REQUEST_CONFIG_FILTER_KEY: build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, @@ -410,14 +410,15 @@ class RagPipelineService: :return: """ node_type_enum = NodeType(node_type) + node_mapping = get_node_type_classes_mapping() # return default block config - if node_type_enum not in NODE_TYPE_CLASSES_MAPPING: + if node_type_enum not in node_mapping: return None - node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION] + node_class = node_mapping[node_type_enum][LATEST_VERSION] final_filters = dict(filters) if filters else {} - if node_type_enum is NodeType.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in final_filters: + if node_type_enum == BuiltinNodeTypes.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in final_filters: final_filters[HTTP_REQUEST_CONFIG_FILTER_KEY] = build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, max_read_timeout=dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, @@ -499,7 +500,7 @@ class RagPipelineService: session=session, app_id=pipeline.id, node_id=workflow_node_execution.node_id, - node_type=NodeType(workflow_node_execution.node_type), + node_type=workflow_node_execution.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=workflow_node_execution.id, user=account, @@ -1261,7 +1262,7 @@ class RagPipelineService: session=session, app_id=pipeline.id, node_id=workflow_node_execution_db_model.node_id, - node_type=NodeType(workflow_node_execution_db_model.node_type), + node_type=workflow_node_execution_db_model.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=workflow_node_execution.id, user=current_user, diff --git a/api/services/rag_pipeline/rag_pipeline_dsl_service.py b/api/services/rag_pipeline/rag_pipeline_dsl_service.py index 58bb4b7c90..c7da1afe1b 100644 --- a/api/services/rag_pipeline/rag_pipeline_dsl_service.py +++ b/api/services/rag_pipeline/rag_pipeline_dsl_service.py @@ -22,10 +22,11 @@ from sqlalchemy.orm import Session from core.helper import ssrf_proxy from core.helper.name_generator import generate_incremental_name from core.plugin.entities.plugin import PluginDependency -from dify_graph.enums import NodeType +from core.workflow.nodes.datasource.entities import DatasourceNodeData +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE +from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData +from dify_graph.enums import BuiltinNodeTypes from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes.datasource.entities import DatasourceNodeData -from dify_graph.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData from dify_graph.nodes.llm.entities import LLMNodeData from dify_graph.nodes.parameter_extractor.entities import ParameterExtractorNodeData from dify_graph.nodes.question_classifier.entities import QuestionClassifierNodeData @@ -287,7 +288,7 @@ class RagPipelineDslService: nodes = graph.get("nodes", []) dataset_id = None for node in nodes: - if node.get("data", {}).get("type") == "knowledge-index": + if node.get("data", {}).get("type") == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_configuration = KnowledgeConfiguration.model_validate(node.get("data", {})) if ( dataset @@ -428,7 +429,7 @@ class RagPipelineDslService: nodes = graph.get("nodes", []) dataset_id = None for node in nodes: - if node.get("data", {}).get("type") == "knowledge-index": + if node.get("data", {}).get("type") == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_configuration = KnowledgeConfiguration.model_validate(node.get("data", {})) if not dataset: dataset = Dataset( @@ -562,7 +563,7 @@ class RagPipelineDslService: graph = workflow_data.get("graph", {}) for node in graph.get("nodes", []): - if node.get("data", {}).get("type", "") == NodeType.KNOWLEDGE_RETRIEVAL: + if node.get("data", {}).get("type", "") == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node["data"].get("dataset_ids", []) node["data"]["dataset_ids"] = [ decrypted_id @@ -696,17 +697,17 @@ class RagPipelineDslService: if not node_data: continue data_type = node_data.get("type", "") - if data_type == NodeType.KNOWLEDGE_RETRIEVAL: + if data_type == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: dataset_ids = node_data.get("dataset_ids", []) node["data"]["dataset_ids"] = [ self.encrypt_dataset_id(dataset_id=dataset_id, tenant_id=pipeline.tenant_id) for dataset_id in dataset_ids ] # filter credential id from tool node - if not include_secret and data_type == NodeType.TOOL: + if not include_secret and data_type == BuiltinNodeTypes.TOOL: node_data.pop("credential_id", None) # filter credential id from agent node - if not include_secret and data_type == NodeType.AGENT: + if not include_secret and data_type == BuiltinNodeTypes.AGENT: for tool in node_data.get("agent_parameters", {}).get("tools", {}).get("value", []): tool.pop("credential_id", None) @@ -740,35 +741,35 @@ class RagPipelineDslService: try: typ = node.get("data", {}).get("type") match typ: - case NodeType.TOOL: + case BuiltinNodeTypes.TOOL: tool_entity = ToolNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id), ) - case NodeType.DATASOURCE: + case BuiltinNodeTypes.DATASOURCE: datasource_entity = DatasourceNodeData.model_validate(node["data"]) if datasource_entity.provider_type != "local_file": dependencies.append(datasource_entity.plugin_id) - case NodeType.LLM: + case BuiltinNodeTypes.LLM: llm_entity = LLMNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency(llm_entity.model.provider), ) - case NodeType.QUESTION_CLASSIFIER: + case BuiltinNodeTypes.QUESTION_CLASSIFIER: question_classifier_entity = QuestionClassifierNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( question_classifier_entity.model.provider ), ) - case NodeType.PARAMETER_EXTRACTOR: + case BuiltinNodeTypes.PARAMETER_EXTRACTOR: parameter_extractor_entity = ParameterExtractorNodeData.model_validate(node["data"]) dependencies.append( DependenciesAnalysisService.analyze_model_provider_dependency( parameter_extractor_entity.model.provider ), ) - case NodeType.KNOWLEDGE_INDEX: + case _ if typ == KNOWLEDGE_INDEX_NODE_TYPE: knowledge_index_entity = KnowledgeConfiguration.model_validate(node["data"]) if knowledge_index_entity.indexing_technique == "high_quality": if knowledge_index_entity.embedding_model_provider: @@ -789,7 +790,7 @@ class RagPipelineDslService: knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name ), ) - case NodeType.KNOWLEDGE_RETRIEVAL: + case BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: knowledge_retrieval_entity = KnowledgeRetrievalNodeData.model_validate(node["data"]) if knowledge_retrieval_entity.retrieval_mode == "multiple": if knowledge_retrieval_entity.multiple_retrieval_config: diff --git a/api/services/rag_pipeline/rag_pipeline_transform_service.py b/api/services/rag_pipeline/rag_pipeline_transform_service.py index d0dfbc1070..cee18387b3 100644 --- a/api/services/rag_pipeline/rag_pipeline_transform_service.py +++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py @@ -63,7 +63,12 @@ class RagPipelineTransformService: ): node = self._deal_file_extensions(node) if node.get("data", {}).get("type") == "knowledge-index": - node = self._deal_knowledge_index(dataset, doc_form, indexing_technique, retrieval_model, node) + knowledge_configuration = KnowledgeConfiguration.model_validate(node.get("data", {})) + if dataset.tenant_id != current_user.current_tenant_id: + raise ValueError("Unauthorized") + node = self._deal_knowledge_index( + knowledge_configuration, dataset, indexing_technique, retrieval_model, node + ) new_nodes.append(node) if new_nodes: graph["nodes"] = new_nodes @@ -155,14 +160,13 @@ class RagPipelineTransformService: def _deal_knowledge_index( self, + knowledge_configuration: KnowledgeConfiguration, dataset: Dataset, - doc_form: str, indexing_technique: str | None, retrieval_model: RetrievalSetting | None, node: dict, ): knowledge_configuration_dict = node.get("data", {}) - knowledge_configuration = KnowledgeConfiguration.model_validate(knowledge_configuration_dict) if indexing_technique == "high_quality": knowledge_configuration.embedding_model = dataset.embedding_model diff --git a/api/services/retention/conversation/message_export_service.py b/api/services/retention/conversation/message_export_service.py new file mode 100644 index 0000000000..fbe0d2795d --- /dev/null +++ b/api/services/retention/conversation/message_export_service.py @@ -0,0 +1,304 @@ +""" +Export app messages to JSONL.GZ format. + +Outputs: conversation_id, message_id, query, answer, inputs (raw JSON), +retriever_resources (from message_metadata), feedback (user feedbacks array). + +Uses (created_at, id) cursor pagination and batch-loads feedbacks to avoid N+1. +Does NOT touch Message.inputs / Message.user_feedback properties. +""" + +import datetime +import gzip +import json +import logging +import tempfile +from collections import defaultdict +from collections.abc import Generator, Iterable +from pathlib import Path, PurePosixPath +from typing import Any, BinaryIO, cast + +import orjson +import sqlalchemy as sa +from pydantic import BaseModel, ConfigDict, Field +from sqlalchemy import select, tuple_ +from sqlalchemy.orm import Session + +from extensions.ext_database import db +from extensions.ext_storage import storage +from models.model import Message, MessageFeedback + +logger = logging.getLogger(__name__) + +MAX_FILENAME_BASE_LENGTH = 1024 +FORBIDDEN_FILENAME_SUFFIXES = (".jsonl.gz", ".jsonl", ".gz") + + +class AppMessageExportFeedback(BaseModel): + id: str + app_id: str + conversation_id: str + message_id: str + rating: str + content: str | None = None + from_source: str + from_end_user_id: str | None = None + from_account_id: str | None = None + created_at: str + updated_at: str + + model_config = ConfigDict(extra="forbid") + + +class AppMessageExportRecord(BaseModel): + conversation_id: str + message_id: str + query: str + answer: str + inputs: dict[str, Any] + retriever_resources: list[Any] = Field(default_factory=list) + feedback: list[AppMessageExportFeedback] = Field(default_factory=list) + + model_config = ConfigDict(extra="forbid") + + +class AppMessageExportStats(BaseModel): + batches: int = 0 + total_messages: int = 0 + messages_with_feedback: int = 0 + total_feedbacks: int = 0 + + model_config = ConfigDict(extra="forbid") + + +class AppMessageExportService: + @staticmethod + def validate_export_filename(filename: str) -> str: + normalized = filename.strip() + if not normalized: + raise ValueError("--filename must not be empty.") + + normalized_lower = normalized.lower() + if normalized_lower.endswith(FORBIDDEN_FILENAME_SUFFIXES): + raise ValueError("--filename must not include .jsonl.gz/.jsonl/.gz suffix; pass base filename only.") + + if normalized.startswith("/"): + raise ValueError("--filename must be a relative path; absolute paths are not allowed.") + + if "\\" in normalized: + raise ValueError("--filename must use '/' as path separator; '\\' is not allowed.") + + if "//" in normalized: + raise ValueError("--filename must not contain empty path segments ('//').") + + if len(normalized) > MAX_FILENAME_BASE_LENGTH: + raise ValueError(f"--filename is too long; max length is {MAX_FILENAME_BASE_LENGTH}.") + + for ch in normalized: + if ch == "\x00" or ord(ch) < 32 or ord(ch) == 127: + raise ValueError("--filename must not contain control characters or NUL.") + + parts = PurePosixPath(normalized).parts + if not parts: + raise ValueError("--filename must include a file name.") + + if any(part in (".", "..") for part in parts): + raise ValueError("--filename must not contain '.' or '..' path segments.") + + return normalized + + @property + def output_gz_name(self) -> str: + return f"{self._filename_base}.jsonl.gz" + + @property + def output_jsonl_name(self) -> str: + return f"{self._filename_base}.jsonl" + + def __init__( + self, + app_id: str, + end_before: datetime.datetime, + filename: str, + *, + start_from: datetime.datetime | None = None, + batch_size: int = 1000, + use_cloud_storage: bool = False, + dry_run: bool = False, + ) -> None: + if start_from and start_from >= end_before: + raise ValueError(f"start_from ({start_from}) must be before end_before ({end_before})") + + self._app_id = app_id + self._end_before = end_before + self._start_from = start_from + self._filename_base = self.validate_export_filename(filename) + self._batch_size = batch_size + self._use_cloud_storage = use_cloud_storage + self._dry_run = dry_run + + def run(self) -> AppMessageExportStats: + stats = AppMessageExportStats() + + logger.info( + "export_app_messages: app_id=%s, start_from=%s, end_before=%s, dry_run=%s, cloud=%s, output_gz=%s", + self._app_id, + self._start_from, + self._end_before, + self._dry_run, + self._use_cloud_storage, + self.output_gz_name, + ) + + if self._dry_run: + for _ in self._iter_records_with_stats(stats): + pass + self._finalize_stats(stats) + return stats + + if self._use_cloud_storage: + self._export_to_cloud(stats) + else: + self._export_to_local(stats) + + self._finalize_stats(stats) + return stats + + def iter_records(self) -> Generator[AppMessageExportRecord, None, None]: + for batch in self._iter_record_batches(): + yield from batch + + @staticmethod + def write_jsonl_gz(records: Iterable[AppMessageExportRecord], fileobj: BinaryIO) -> None: + with gzip.GzipFile(fileobj=fileobj, mode="wb") as gz: + for record in records: + gz.write(orjson.dumps(record.model_dump(mode="json")) + b"\n") + + def _export_to_local(self, stats: AppMessageExportStats) -> None: + output_path = Path.cwd() / self.output_gz_name + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("wb") as output_file: + self.write_jsonl_gz(self._iter_records_with_stats(stats), output_file) + + def _export_to_cloud(self, stats: AppMessageExportStats) -> None: + with tempfile.SpooledTemporaryFile(max_size=64 * 1024 * 1024) as tmp: + self.write_jsonl_gz(self._iter_records_with_stats(stats), cast(BinaryIO, tmp)) + tmp.seek(0) + data = tmp.read() + + storage.save(self.output_gz_name, data) + logger.info("export_app_messages: uploaded %d bytes to cloud key=%s", len(data), self.output_gz_name) + + def _iter_records_with_stats(self, stats: AppMessageExportStats) -> Generator[AppMessageExportRecord, None, None]: + for record in self.iter_records(): + self._update_stats(stats, record) + yield record + + @staticmethod + def _update_stats(stats: AppMessageExportStats, record: AppMessageExportRecord) -> None: + stats.total_messages += 1 + if record.feedback: + stats.messages_with_feedback += 1 + stats.total_feedbacks += len(record.feedback) + + def _finalize_stats(self, stats: AppMessageExportStats) -> None: + if stats.total_messages == 0: + stats.batches = 0 + return + stats.batches = (stats.total_messages + self._batch_size - 1) // self._batch_size + + def _iter_record_batches(self) -> Generator[list[AppMessageExportRecord], None, None]: + cursor: tuple[datetime.datetime, str] | None = None + while True: + rows, cursor = self._fetch_batch(cursor) + if not rows: + break + + message_ids = [str(row.id) for row in rows] + feedbacks_map = self._fetch_feedbacks(message_ids) + yield [self._build_record(row, feedbacks_map) for row in rows] + + def _fetch_batch( + self, cursor: tuple[datetime.datetime, str] | None + ) -> tuple[list[Any], tuple[datetime.datetime, str] | None]: + with Session(db.engine, expire_on_commit=False) as session: + stmt = ( + select( + Message.id, + Message.conversation_id, + Message.query, + Message.answer, + Message._inputs, # pyright: ignore[reportPrivateUsage] + Message.message_metadata, + Message.created_at, + ) + .where( + Message.app_id == self._app_id, + Message.created_at < self._end_before, + ) + .order_by(Message.created_at, Message.id) + .limit(self._batch_size) + ) + + if self._start_from: + stmt = stmt.where(Message.created_at >= self._start_from) + + if cursor: + stmt = stmt.where( + tuple_(Message.created_at, Message.id) + > tuple_( + sa.literal(cursor[0], type_=sa.DateTime()), + sa.literal(cursor[1], type_=Message.id.type), + ) + ) + + rows = list(session.execute(stmt).all()) + + if not rows: + return [], cursor + + last = rows[-1] + return rows, (last.created_at, last.id) + + def _fetch_feedbacks(self, message_ids: list[str]) -> dict[str, list[AppMessageExportFeedback]]: + if not message_ids: + return {} + + with Session(db.engine, expire_on_commit=False) as session: + stmt = ( + select(MessageFeedback) + .where( + MessageFeedback.message_id.in_(message_ids), + MessageFeedback.from_source == "user", + ) + .order_by(MessageFeedback.message_id, MessageFeedback.created_at) + ) + feedbacks = list(session.scalars(stmt).all()) + + result: dict[str, list[AppMessageExportFeedback]] = defaultdict(list) + for feedback in feedbacks: + result[str(feedback.message_id)].append(AppMessageExportFeedback.model_validate(feedback.to_dict())) + return result + + @staticmethod + def _build_record(row: Any, feedbacks_map: dict[str, list[AppMessageExportFeedback]]) -> AppMessageExportRecord: + retriever_resources: list[Any] = [] + if row.message_metadata: + try: + metadata = json.loads(row.message_metadata) + value = metadata.get("retriever_resources", []) + if isinstance(value, list): + retriever_resources = value + except (json.JSONDecodeError, TypeError): + pass + + message_id = str(row.id) + return AppMessageExportRecord( + conversation_id=str(row.conversation_id), + message_id=message_id, + query=row.query, + answer=row.answer, + inputs=row._inputs if isinstance(row._inputs, dict) else {}, + retriever_resources=retriever_resources, + feedback=feedbacks_map.get(message_id, []), + ) diff --git a/api/services/retention/conversation/messages_clean_service.py b/api/services/retention/conversation/messages_clean_service.py index f7836a2b14..04265817d7 100644 --- a/api/services/retention/conversation/messages_clean_service.py +++ b/api/services/retention/conversation/messages_clean_service.py @@ -12,6 +12,7 @@ from sqlalchemy.engine import CursorResult from sqlalchemy.orm import Session from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.model import ( App, AppAnnotationHitHistory, @@ -142,7 +143,7 @@ class MessagesCleanService: if batch_size <= 0: raise ValueError(f"batch_size ({batch_size}) must be greater than 0") - end_before = datetime.datetime.now() - datetime.timedelta(days=days) + end_before = naive_utc_now() - datetime.timedelta(days=days) logger.info( "clean_messages: days=%s, end_before=%s, batch_size=%s, policy=%s", diff --git a/api/services/retention/workflow_run/restore_archived_workflow_run.py b/api/services/retention/workflow_run/restore_archived_workflow_run.py index d4a6e87585..64dad7ba52 100644 --- a/api/services/retention/workflow_run/restore_archived_workflow_run.py +++ b/api/services/retention/workflow_run/restore_archived_workflow_run.py @@ -358,21 +358,19 @@ class WorkflowRunRestore: self, model: type[DeclarativeBase] | Any, ) -> tuple[set[str], set[str], set[str]]: - columns = list(model.__table__.columns) + table = model.__table__ + columns = list(table.columns) + autoincrement_column = getattr(table, "autoincrement_column", None) + + def has_insert_default(column: Any) -> bool: + # SQLAlchemy may set column.autoincrement to "auto" on non-PK columns. + # Only treat the resolved autoincrement column as DB-generated. + return column.default is not None or column.server_default is not None or column is autoincrement_column + column_names = {column.key for column in columns} - required_columns = { - column.key - for column in columns - if not column.nullable - and column.default is None - and column.server_default is None - and not column.autoincrement - } + required_columns = {column.key for column in columns if not column.nullable and not has_insert_default(column)} non_nullable_with_default = { - column.key - for column in columns - if not column.nullable - and (column.default is not None or column.server_default is not None or column.autoincrement) + column.key for column in columns if not column.nullable and has_insert_default(column) } return column_names, required_columns, non_nullable_with_default diff --git a/api/services/saved_message_service.py b/api/services/saved_message_service.py index 4dd6c8107b..d0f4f27968 100644 --- a/api/services/saved_message_service.py +++ b/api/services/saved_message_service.py @@ -3,6 +3,7 @@ from typing import Union from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models import Account +from models.enums import CreatorUserRole from models.model import App, EndUser from models.web import SavedMessage from services.message_service import MessageService @@ -54,7 +55,7 @@ class SavedMessageService: saved_message = SavedMessage( app_id=app_model.id, message_id=message.id, - created_by_role="account" if isinstance(user, Account) else "end_user", + created_by_role=CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER, created_by=user.id, ) diff --git a/api/services/tools/tools_transform_service.py b/api/services/tools/tools_transform_service.py index e323b3cda9..b6e5367023 100644 --- a/api/services/tools/tools_transform_service.py +++ b/api/services/tools/tools_transform_service.py @@ -33,6 +33,8 @@ logger = logging.getLogger(__name__) class ToolTransformService: + _MCP_SCHEMA_TYPE_RESOLUTION_MAX_DEPTH = 10 + @classmethod def get_tool_provider_icon_url( cls, provider_type: str, provider_name: str, icon: str | Mapping[str, str] @@ -435,6 +437,46 @@ class ToolTransformService: :return: list of ToolParameter instances """ + def resolve_property_type(prop: dict[str, Any], depth: int = 0) -> str: + """ + Resolve a JSON schema property type while guarding against cyclic or deeply nested unions. + """ + if depth >= ToolTransformService._MCP_SCHEMA_TYPE_RESOLUTION_MAX_DEPTH: + return "string" + prop_type = prop.get("type") + if isinstance(prop_type, list): + non_null_types = [type_name for type_name in prop_type if type_name != "null"] + if non_null_types: + return non_null_types[0] + if prop_type: + return "string" + elif isinstance(prop_type, str): + if prop_type == "null": + return "string" + return prop_type + + for union_key in ("anyOf", "oneOf"): + union_schemas = prop.get(union_key) + if not isinstance(union_schemas, list): + continue + + for union_schema in union_schemas: + if not isinstance(union_schema, dict): + continue + union_type = resolve_property_type(union_schema, depth + 1) + if union_type != "null": + return union_type + + all_of_schemas = prop.get("allOf") + if isinstance(all_of_schemas, list): + for all_of_schema in all_of_schemas: + if not isinstance(all_of_schema, dict): + continue + all_of_type = resolve_property_type(all_of_schema, depth + 1) + if all_of_type != "null": + return all_of_type + return "string" + def create_parameter( name: str, description: str, param_type: str, required: bool, input_schema: dict[str, Any] | None = None ) -> ToolParameter: @@ -461,10 +503,7 @@ class ToolTransformService: parameters = [] for name, prop in props.items(): current_description = prop.get("description", "") - prop_type = prop.get("type", "string") - - if isinstance(prop_type, list): - prop_type = prop_type[0] + prop_type = resolve_property_type(prop) if prop_type in TYPE_MAPPING: prop_type = TYPE_MAPPING[prop_type] input_schema = prop if prop_type in COMPLEX_TYPES else None diff --git a/api/services/trigger/schedule_service.py b/api/services/trigger/schedule_service.py index 8389ccbb34..7e9d010d2f 100644 --- a/api/services/trigger/schedule_service.py +++ b/api/services/trigger/schedule_service.py @@ -1,15 +1,19 @@ import json import logging -from collections.abc import Mapping from datetime import datetime -from typing import Any from sqlalchemy import select from sqlalchemy.orm import Session -from dify_graph.nodes import NodeType -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig, SchedulePlanUpdate, VisualConfig -from dify_graph.nodes.trigger_schedule.exc import ScheduleConfigError, ScheduleNotFoundError +from core.trigger.constants import TRIGGER_SCHEDULE_NODE_TYPE +from core.workflow.nodes.trigger_schedule.entities import ( + ScheduleConfig, + SchedulePlanUpdate, + TriggerScheduleNodeData, + VisualConfig, +) +from core.workflow.nodes.trigger_schedule.exc import ScheduleConfigError, ScheduleNotFoundError +from dify_graph.entities.graph_config import NodeConfigDict from libs.schedule_utils import calculate_next_run_at, convert_12h_to_24h from models.account import Account, TenantAccountJoin from models.trigger import WorkflowSchedulePlan @@ -176,26 +180,26 @@ class ScheduleService: return next_run_at @staticmethod - def to_schedule_config(node_config: Mapping[str, Any]) -> ScheduleConfig: + def to_schedule_config(node_config: NodeConfigDict) -> ScheduleConfig: """ Converts user-friendly visual schedule settings to cron expression. Maintains consistency with frontend UI expectations while supporting croniter's extended syntax. """ - node_data = node_config.get("data", {}) - mode = node_data.get("mode", "visual") - timezone = node_data.get("timezone", "UTC") - node_id = node_config.get("id", "start") + node_data = TriggerScheduleNodeData.model_validate(node_config["data"], from_attributes=True) + mode = node_data.mode + timezone = node_data.timezone + node_id = node_config["id"] cron_expression = None if mode == "cron": - cron_expression = node_data.get("cron_expression") + cron_expression = node_data.cron_expression if not cron_expression: raise ScheduleConfigError("Cron expression is required for cron mode") elif mode == "visual": - frequency = str(node_data.get("frequency")) + frequency = str(node_data.frequency or "") if not frequency: raise ScheduleConfigError("Frequency is required for visual mode") - visual_config = VisualConfig(**node_data.get("visual_config", {})) + visual_config = VisualConfig.model_validate(node_data.visual_config or {}) cron_expression = ScheduleService.visual_to_cron(frequency=frequency, visual_config=visual_config) if not cron_expression: raise ScheduleConfigError("Cron expression is required for visual mode") @@ -236,22 +240,24 @@ class ScheduleService: for node in nodes: node_data = node.get("data", {}) - if node_data.get("type") != NodeType.TRIGGER_SCHEDULE.value: + if node_data.get("type") != TRIGGER_SCHEDULE_NODE_TYPE: continue - mode = node_data.get("mode", "visual") - timezone = node_data.get("timezone", "UTC") node_id = node.get("id", "start") + trigger_data = TriggerScheduleNodeData.model_validate(node_data) + mode = trigger_data.mode + timezone = trigger_data.timezone cron_expression = None if mode == "cron": - cron_expression = node_data.get("cron_expression") + cron_expression = trigger_data.cron_expression if not cron_expression: raise ScheduleConfigError("Cron expression is required for cron mode") elif mode == "visual": - frequency = node_data.get("frequency") - visual_config_dict = node_data.get("visual_config", {}) - visual_config = VisualConfig(**visual_config_dict) + frequency = trigger_data.frequency + if not frequency: + raise ScheduleConfigError("Frequency is required for visual mode") + visual_config = VisualConfig.model_validate(trigger_data.visual_config or {}) cron_expression = ScheduleService.visual_to_cron(frequency, visual_config) else: raise ScheduleConfigError(f"Invalid schedule mode: {mode}") diff --git a/api/services/trigger/trigger_service.py b/api/services/trigger/trigger_service.py index f1f0d0ea84..24bbeda329 100644 --- a/api/services/trigger/trigger_service.py +++ b/api/services/trigger/trigger_service.py @@ -12,12 +12,13 @@ from sqlalchemy.orm import Session from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.entities.request import TriggerDispatchResponse, TriggerInvokeEventResponse from core.plugin.impl.exc import PluginNotFoundError +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.debug.events import PluginTriggerDebugEvent from core.trigger.provider import PluginTriggerProviderController from core.trigger.trigger_manager import TriggerManager from core.trigger.utils.encryption import create_trigger_provider_encrypter_for_subscription -from dify_graph.enums import NodeType -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from dify_graph.entities.graph_config import NodeConfigDict from extensions.ext_database import db from extensions.ext_redis import redis_client from models.model import App @@ -41,7 +42,7 @@ class TriggerService: @classmethod def invoke_trigger_event( - cls, tenant_id: str, user_id: str, node_config: Mapping[str, Any], event: PluginTriggerDebugEvent + cls, tenant_id: str, user_id: str, node_config: NodeConfigDict, event: PluginTriggerDebugEvent ) -> TriggerInvokeEventResponse: """Invoke a trigger event.""" subscription: TriggerSubscription | None = TriggerProviderService.get_subscription_by_id( @@ -50,7 +51,7 @@ class TriggerService: ) if not subscription: raise ValueError("Subscription not found") - node_data: TriggerEventNodeData = TriggerEventNodeData.model_validate(node_config.get("data", {})) + node_data = TriggerEventNodeData.model_validate(node_config["data"], from_attributes=True) request = TriggerHttpRequestCachingService.get_request(event.request_id) payload = TriggerHttpRequestCachingService.get_payload(event.request_id) # invoke triger @@ -178,7 +179,7 @@ class TriggerService: # Walk nodes to find plugin triggers nodes_in_graph: list[Mapping[str, Any]] = [] - for node_id, node_config in workflow.walk_nodes(NodeType.TRIGGER_PLUGIN): + for node_id, node_config in workflow.walk_nodes(TRIGGER_PLUGIN_NODE_TYPE): # Extract plugin trigger configuration from node plugin_id = node_config.get("plugin_id", "") provider_id = node_config.get("provider_id", "") diff --git a/api/services/trigger/webhook_service.py b/api/services/trigger/webhook_service.py index 285645edce..3c1a4cc747 100644 --- a/api/services/trigger/webhook_service.py +++ b/api/services/trigger/webhook_service.py @@ -2,7 +2,7 @@ import json import logging import mimetypes import secrets -from collections.abc import Mapping +from collections.abc import Callable, Mapping, Sequence from typing import Any import orjson @@ -16,9 +16,16 @@ from werkzeug.exceptions import RequestEntityTooLarge from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom from core.tools.tool_file_manager import ToolFileManager -from dify_graph.enums import NodeType +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE +from core.workflow.nodes.trigger_webhook.entities import ( + ContentType, + WebhookBodyParameter, + WebhookData, + WebhookParameter, +) +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.file.models import FileTransferMethod -from dify_graph.variables.types import SegmentType +from dify_graph.variables.types import ArrayValidation, SegmentType from enums.quota_type import QuotaType from extensions.ext_database import db from extensions.ext_redis import redis_client @@ -57,7 +64,7 @@ class WebhookService: @classmethod def get_webhook_trigger_and_workflow( cls, webhook_id: str, is_debug: bool = False - ) -> tuple[WorkflowWebhookTrigger, Workflow, Mapping[str, Any]]: + ) -> tuple[WorkflowWebhookTrigger, Workflow, NodeConfigDict]: """Get webhook trigger, workflow, and node configuration. Args: @@ -135,7 +142,7 @@ class WebhookService: @classmethod def extract_and_validate_webhook_data( - cls, webhook_trigger: WorkflowWebhookTrigger, node_config: Mapping[str, Any] + cls, webhook_trigger: WorkflowWebhookTrigger, node_config: NodeConfigDict ) -> dict[str, Any]: """Extract and validate webhook data in a single unified process. @@ -153,7 +160,7 @@ class WebhookService: raw_data = cls.extract_webhook_data(webhook_trigger) # Validate HTTP metadata (method, content-type) - node_data = node_config.get("data", {}) + node_data = WebhookData.model_validate(node_config["data"], from_attributes=True) validation_result = cls._validate_http_metadata(raw_data, node_data) if not validation_result["valid"]: raise ValueError(validation_result["error"]) @@ -192,7 +199,7 @@ class WebhookService: content_type = cls._extract_content_type(dict(request.headers)) # Route to appropriate extractor based on content type - extractors = { + extractors: dict[str, Callable[[], tuple[dict[str, Any], dict[str, Any]]]] = { "application/json": cls._extract_json_body, "application/x-www-form-urlencoded": cls._extract_form_body, "multipart/form-data": lambda: cls._extract_multipart_body(webhook_trigger), @@ -214,7 +221,7 @@ class WebhookService: return data @classmethod - def _process_and_validate_data(cls, raw_data: dict[str, Any], node_data: dict[str, Any]) -> dict[str, Any]: + def _process_and_validate_data(cls, raw_data: dict[str, Any], node_data: WebhookData) -> dict[str, Any]: """Process and validate webhook data according to node configuration. Args: @@ -230,18 +237,13 @@ class WebhookService: result = raw_data.copy() # Validate and process headers - cls._validate_required_headers(raw_data["headers"], node_data.get("headers", [])) + cls._validate_required_headers(raw_data["headers"], node_data.headers) # Process query parameters with type conversion and validation - result["query_params"] = cls._process_parameters( - raw_data["query_params"], node_data.get("params", []), is_form_data=True - ) + result["query_params"] = cls._process_parameters(raw_data["query_params"], node_data.params, is_form_data=True) # Process body parameters based on content type - configured_content_type = node_data.get("content_type", "application/json").lower() - result["body"] = cls._process_body_parameters( - raw_data["body"], node_data.get("body", []), configured_content_type - ) + result["body"] = cls._process_body_parameters(raw_data["body"], node_data.body, node_data.content_type) return result @@ -424,7 +426,11 @@ class WebhookService: @classmethod def _process_parameters( - cls, raw_params: dict[str, str], param_configs: list, is_form_data: bool = False + cls, + raw_params: dict[str, str], + param_configs: Sequence[WebhookParameter], + *, + is_form_data: bool = False, ) -> dict[str, Any]: """Process parameters with unified validation and type conversion. @@ -440,13 +446,13 @@ class WebhookService: ValueError: If required parameters are missing or validation fails """ processed = {} - configured_params = {config.get("name", ""): config for config in param_configs} + configured_params = {config.name: config for config in param_configs} # Process configured parameters for param_config in param_configs: - name = param_config.get("name", "") - param_type = param_config.get("type", SegmentType.STRING) - required = param_config.get("required", False) + name = param_config.name + param_type = param_config.type + required = param_config.required # Check required parameters if required and name not in raw_params: @@ -465,7 +471,10 @@ class WebhookService: @classmethod def _process_body_parameters( - cls, raw_body: dict[str, Any], body_configs: list, content_type: str + cls, + raw_body: dict[str, Any], + body_configs: Sequence[WebhookBodyParameter], + content_type: ContentType, ) -> dict[str, Any]: """Process body parameters based on content type and configuration. @@ -480,25 +489,28 @@ class WebhookService: Raises: ValueError: If required body parameters are missing or validation fails """ - if content_type in ["text/plain", "application/octet-stream"]: - # For text/plain and octet-stream, validate required content exists - if body_configs and any(config.get("required", False) for config in body_configs): - raw_content = raw_body.get("raw") - if not raw_content: - raise ValueError(f"Required body content missing for {content_type} request") - return raw_body + match content_type: + case ContentType.TEXT | ContentType.BINARY: + # For text/plain and octet-stream, validate required content exists + if body_configs and any(config.required for config in body_configs): + raw_content = raw_body.get("raw") + if not raw_content: + raise ValueError(f"Required body content missing for {content_type} request") + return raw_body + case _: + pass # For structured data (JSON, form-data, etc.) processed = {} - configured_params = {config.get("name", ""): config for config in body_configs} + configured_params: dict[str, WebhookBodyParameter] = {config.name: config for config in body_configs} for body_config in body_configs: - name = body_config.get("name", "") - param_type = body_config.get("type", SegmentType.STRING) - required = body_config.get("required", False) + name = body_config.name + param_type = body_config.type + required = body_config.required # Handle file parameters for multipart data - if param_type == SegmentType.FILE and content_type == "multipart/form-data": + if param_type == SegmentType.FILE and content_type == ContentType.FORM_DATA: # File validation is handled separately in extract phase continue @@ -508,7 +520,7 @@ class WebhookService: if name in raw_body: raw_value = raw_body[name] - is_form_data = content_type in ["application/x-www-form-urlencoded", "multipart/form-data"] + is_form_data = content_type in [ContentType.FORM_URLENCODED, ContentType.FORM_DATA] processed[name] = cls._validate_and_convert_value(name, raw_value, param_type, is_form_data) # Include unconfigured parameters @@ -519,7 +531,9 @@ class WebhookService: return processed @classmethod - def _validate_and_convert_value(cls, param_name: str, value: Any, param_type: str, is_form_data: bool) -> Any: + def _validate_and_convert_value( + cls, param_name: str, value: Any, param_type: SegmentType | str, is_form_data: bool + ) -> Any: """Unified validation and type conversion for parameter values. Args: @@ -532,7 +546,8 @@ class WebhookService: Any: The validated and converted value Raises: - ValueError: If validation or conversion fails + ValueError: If validation or conversion fails. The original validation + error is preserved as ``__cause__`` for debugging. """ try: if is_form_data: @@ -542,10 +557,10 @@ class WebhookService: # JSON data should already be in correct types, just validate return cls._validate_json_value(param_name, value, param_type) except Exception as e: - raise ValueError(f"Parameter '{param_name}' validation failed: {str(e)}") + raise ValueError(f"Parameter '{param_name}' validation failed: {str(e)}") from e @classmethod - def _convert_form_value(cls, param_name: str, value: str, param_type: str) -> Any: + def _convert_form_value(cls, param_name: str, value: str, param_type: SegmentType | str) -> Any: """Convert form data string values to specified types. Args: @@ -576,7 +591,7 @@ class WebhookService: raise ValueError(f"Unsupported type '{param_type}' for form data parameter '{param_name}'") @classmethod - def _validate_json_value(cls, param_name: str, value: Any, param_type: str) -> Any: + def _validate_json_value(cls, param_name: str, value: Any, param_type: SegmentType | str) -> Any: """Validate JSON values against expected types. Args: @@ -590,43 +605,43 @@ class WebhookService: Raises: ValueError: If the value type doesn't match the expected type """ - type_validators = { - SegmentType.STRING: (lambda v: isinstance(v, str), "string"), - SegmentType.NUMBER: (lambda v: isinstance(v, (int, float)), "number"), - SegmentType.BOOLEAN: (lambda v: isinstance(v, bool), "boolean"), - SegmentType.OBJECT: (lambda v: isinstance(v, dict), "object"), - SegmentType.ARRAY_STRING: ( - lambda v: isinstance(v, list) and all(isinstance(item, str) for item in v), - "array of strings", - ), - SegmentType.ARRAY_NUMBER: ( - lambda v: isinstance(v, list) and all(isinstance(item, (int, float)) for item in v), - "array of numbers", - ), - SegmentType.ARRAY_BOOLEAN: ( - lambda v: isinstance(v, list) and all(isinstance(item, bool) for item in v), - "array of booleans", - ), - SegmentType.ARRAY_OBJECT: ( - lambda v: isinstance(v, list) and all(isinstance(item, dict) for item in v), - "array of objects", - ), - } - - validator_info = type_validators.get(SegmentType(param_type)) - if not validator_info: - logger.warning("Unknown parameter type: %s for parameter %s", param_type, param_name) + param_type_enum = cls._coerce_segment_type(param_type, param_name=param_name) + if param_type_enum is None: return value - validator, expected_type = validator_info - if not validator(value): + if not param_type_enum.is_valid(value, array_validation=ArrayValidation.ALL): actual_type = type(value).__name__ + expected_type = cls._expected_type_label(param_type_enum) raise ValueError(f"Expected {expected_type}, got {actual_type}") return value @classmethod - def _validate_required_headers(cls, headers: dict[str, Any], header_configs: list) -> None: + def _coerce_segment_type(cls, param_type: SegmentType | str, *, param_name: str) -> SegmentType | None: + if isinstance(param_type, SegmentType): + return param_type + try: + return SegmentType(param_type) + except Exception: + logger.warning("Unknown parameter type: %s for parameter %s", param_type, param_name) + return None + + @staticmethod + def _expected_type_label(param_type: SegmentType) -> str: + match param_type: + case SegmentType.ARRAY_STRING: + return "array of strings" + case SegmentType.ARRAY_NUMBER: + return "array of numbers" + case SegmentType.ARRAY_BOOLEAN: + return "array of booleans" + case SegmentType.ARRAY_OBJECT: + return "array of objects" + case _: + return param_type.value + + @classmethod + def _validate_required_headers(cls, headers: dict[str, Any], header_configs: Sequence[WebhookParameter]) -> None: """Validate required headers are present. Args: @@ -639,14 +654,14 @@ class WebhookService: headers_lower = {k.lower(): v for k, v in headers.items()} headers_sanitized = {cls._sanitize_key(k).lower(): v for k, v in headers.items()} for header_config in header_configs: - if header_config.get("required", False): - header_name = header_config.get("name", "") + if header_config.required: + header_name = header_config.name sanitized_name = cls._sanitize_key(header_name).lower() if header_name.lower() not in headers_lower and sanitized_name not in headers_sanitized: raise ValueError(f"Required header missing: {header_name}") @classmethod - def _validate_http_metadata(cls, webhook_data: dict[str, Any], node_data: dict[str, Any]) -> dict[str, Any]: + def _validate_http_metadata(cls, webhook_data: dict[str, Any], node_data: WebhookData) -> dict[str, Any]: """Validate HTTP method and content-type. Args: @@ -657,13 +672,13 @@ class WebhookService: dict[str, Any]: Validation result with 'valid' key and optional 'error' key """ # Validate HTTP method - configured_method = node_data.get("method", "get").upper() + configured_method = node_data.method.value.upper() request_method = webhook_data["method"].upper() if configured_method != request_method: return cls._validation_error(f"HTTP method mismatch. Expected {configured_method}, got {request_method}") # Validate Content-type - configured_content_type = node_data.get("content_type", "application/json").lower() + configured_content_type = node_data.content_type.value.lower() request_content_type = cls._extract_content_type(webhook_data["headers"]) if configured_content_type != request_content_type: @@ -788,7 +803,7 @@ class WebhookService: raise @classmethod - def generate_webhook_response(cls, node_config: Mapping[str, Any]) -> tuple[dict[str, Any], int]: + def generate_webhook_response(cls, node_config: NodeConfigDict) -> tuple[dict[str, Any], int]: """Generate HTTP response based on node configuration. Args: @@ -797,11 +812,11 @@ class WebhookService: Returns: tuple[dict[str, Any], int]: Response data and HTTP status code """ - node_data = node_config.get("data", {}) + node_data = WebhookData.model_validate(node_config["data"], from_attributes=True) # Get configured status code and response body - status_code = node_data.get("status_code", 200) - response_body = node_data.get("response_body", "") + status_code = node_data.status_code + response_body = node_data.response_body # Parse response body as JSON if it's valid JSON, otherwise return as text try: @@ -847,7 +862,7 @@ class WebhookService: node_id: str webhook_id: str - nodes_id_in_graph = [node_id for node_id, _ in workflow.walk_nodes(NodeType.TRIGGER_WEBHOOK)] + nodes_id_in_graph = [node_id for node_id, _ in workflow.walk_nodes(TRIGGER_WEBHOOK_NODE_TYPE)] # Check webhook node limit if len(nodes_id_in_graph) > cls.MAX_WEBHOOK_NODES_PER_WORKFLOW: diff --git a/api/services/web_conversation_service.py b/api/services/web_conversation_service.py index 560aec2330..e028e3e5e3 100644 --- a/api/services/web_conversation_service.py +++ b/api/services/web_conversation_service.py @@ -7,6 +7,7 @@ from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models import Account +from models.enums import CreatorUserRole from models.model import App, EndUser from models.web import PinnedConversation from services.conversation_service import ConversationService @@ -84,7 +85,7 @@ class WebConversationService: pinned_conversation = PinnedConversation( app_id=app_model.id, conversation_id=conversation.id, - created_by_role="account" if isinstance(user, Account) else "end_user", + created_by_role=CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER, created_by=user.id, ) diff --git a/api/services/website_service.py b/api/services/website_service.py index fe48c3b08e..15ec4657d9 100644 --- a/api/services/website_service.py +++ b/api/services/website_service.py @@ -124,7 +124,7 @@ class WebsiteService: if provider == "firecrawl": plugin_id = "langgenius/firecrawl_datasource" elif provider == "watercrawl": - plugin_id = "langgenius/watercrawl_datasource" + plugin_id = "watercrawl/watercrawl_datasource" elif provider == "jinareader": plugin_id = "langgenius/jina_datasource" else: diff --git a/api/services/workflow/workflow_converter.py b/api/services/workflow/workflow_converter.py index 0153046acc..006483fe97 100644 --- a/api/services/workflow/workflow_converter.py +++ b/api/services/workflow/workflow_converter.py @@ -18,13 +18,13 @@ from core.prompt.utils.prompt_template_parser import PromptTemplateParser from dify_graph.file.models import FileUploadConfig from dify_graph.model_runtime.entities.llm_entities import LLMMode from dify_graph.model_runtime.utils.encoders import jsonable_encoder -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.variables.input_entities import VariableEntity from events.app_event import app_was_created from extensions.ext_database import db from models import Account from models.api_based_extension import APIBasedExtension, APIBasedExtensionPoint -from models.model import App, AppMode, AppModelConfig +from models.model import App, AppMode, AppModelConfig, IconType from models.workflow import Workflow, WorkflowType @@ -72,7 +72,7 @@ class WorkflowConverter: new_app.tenant_id = app_model.tenant_id new_app.name = name or app_model.name + "(workflow)" new_app.mode = AppMode.ADVANCED_CHAT if app_model.mode == AppMode.CHAT else AppMode.WORKFLOW - new_app.icon_type = icon_type or app_model.icon_type + new_app.icon_type = IconType(icon_type) if icon_type else app_model.icon_type new_app.icon = icon or app_model.icon new_app.icon_background = icon_background or app_model.icon_background new_app.enable_site = app_model.enable_site @@ -234,7 +234,7 @@ class WorkflowConverter: "position": None, "data": { "title": "START", - "type": NodeType.START, + "type": BuiltinNodeTypes.START, "variables": [jsonable_encoder(v) for v in variables], }, } @@ -296,7 +296,7 @@ class WorkflowConverter: "position": None, "data": { "title": f"HTTP REQUEST {api_based_extension.name}", - "type": NodeType.HTTP_REQUEST, + "type": BuiltinNodeTypes.HTTP_REQUEST, "method": "post", "url": api_based_extension.api_endpoint, "authorization": {"type": "api-key", "config": {"type": "bearer", "api_key": api_key}}, @@ -314,7 +314,7 @@ class WorkflowConverter: "position": None, "data": { "title": f"Parse {api_based_extension.name} Response", - "type": NodeType.CODE, + "type": BuiltinNodeTypes.CODE, "variables": [{"variable": "response_json", "value_selector": [http_request_node["id"], "body"]}], "code_language": "python3", "code": "import json\n\ndef main(response_json: str) -> str:\n response_body = json.loads(" @@ -354,7 +354,7 @@ class WorkflowConverter: "position": None, "data": { "title": "KNOWLEDGE RETRIEVAL", - "type": NodeType.KNOWLEDGE_RETRIEVAL, + "type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "query_variable_selector": query_variable_selector, "dataset_ids": dataset_config.dataset_ids, "retrieval_mode": retrieve_config.retrieve_strategy.value, @@ -402,9 +402,9 @@ class WorkflowConverter: :param external_data_variable_node_mapping: external data variable node mapping """ # fetch start and knowledge retrieval node - start_node = next(filter(lambda n: n["data"]["type"] == NodeType.START, graph["nodes"])) + start_node = next(filter(lambda n: n["data"]["type"] == BuiltinNodeTypes.START, graph["nodes"])) knowledge_retrieval_node = next( - filter(lambda n: n["data"]["type"] == NodeType.KNOWLEDGE_RETRIEVAL, graph["nodes"]), None + filter(lambda n: n["data"]["type"] == BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, graph["nodes"]), None ) role_prefix = None @@ -523,7 +523,7 @@ class WorkflowConverter: "position": None, "data": { "title": "LLM", - "type": NodeType.LLM, + "type": BuiltinNodeTypes.LLM, "model": { "provider": model_config.provider, "name": model_config.model, @@ -578,7 +578,7 @@ class WorkflowConverter: "position": None, "data": { "title": "END", - "type": NodeType.END, + "type": BuiltinNodeTypes.END, "outputs": [{"variable": "result", "value_selector": ["llm", "text"]}], }, } @@ -592,7 +592,7 @@ class WorkflowConverter: return { "id": "answer", "position": None, - "data": {"title": "ANSWER", "type": NodeType.ANSWER, "answer": "{{#llm.text#}}"}, + "data": {"title": "ANSWER", "type": BuiltinNodeTypes.ANSWER, "answer": "{{#llm.text#}}"}, } def _create_edge(self, source: str, target: str): diff --git a/api/services/workflow_draft_variable_service.py b/api/services/workflow_draft_variable_service.py index b6f6fc5490..804bf28b66 100644 --- a/api/services/workflow_draft_variable_service.py +++ b/api/services/workflow_draft_variable_service.py @@ -14,10 +14,11 @@ from sqlalchemy.sql.expression import and_, or_ from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom +from core.trigger.constants import is_trigger_node_type from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import SystemVariableKey +from dify_graph.enums import NodeType, SystemVariableKey from dify_graph.file.models import File -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.variable_assigner.common.helpers import get_updated_variables from dify_graph.variable_loader import VariableLoader from dify_graph.variables import Segment, StringSegment, VariableBase @@ -386,7 +387,7 @@ class WorkflowDraftVariableService: # # This implementation must remain synchronized with the `_build_from_variable_assigner_mapping` # and `save` methods. - if node_type == NodeType.VARIABLE_ASSIGNER: + if node_type == BuiltinNodeTypes.VARIABLE_ASSIGNER: return variable output_value = outputs_dict.get(variable.name, absent) else: @@ -753,8 +754,8 @@ class DraftVariableSaver: # technical variables from being exposed in the draft environment, particularly those # that aren't meant to be directly edited or viewed by users. _EXCLUDE_VARIABLE_NAMES_MAPPING: dict[NodeType, frozenset[str]] = { - NodeType.LLM: frozenset(["finish_reason"]), - NodeType.LOOP: frozenset(["loop_round"]), + BuiltinNodeTypes.LLM: frozenset(["finish_reason"]), + BuiltinNodeTypes.LOOP: frozenset(["loop_round"]), } # Database session used for persisting draft variables. @@ -815,7 +816,7 @@ class DraftVariableSaver: ) def _should_save_output_variables_for_draft(self) -> bool: - if self._enclosing_node_id is not None and self._node_type != NodeType.VARIABLE_ASSIGNER: + if self._enclosing_node_id is not None and self._node_type != BuiltinNodeTypes.VARIABLE_ASSIGNER: # Currently we do not save output variables for nodes inside loop or iteration. return False return True @@ -1053,9 +1054,9 @@ class DraftVariableSaver: process_data = {} if not self._should_save_output_variables_for_draft(): return - if self._node_type == NodeType.VARIABLE_ASSIGNER: + if self._node_type == BuiltinNodeTypes.VARIABLE_ASSIGNER: draft_vars = self._build_from_variable_assigner_mapping(process_data=process_data) - elif self._node_type == NodeType.START or self._node_type.is_trigger_node: + elif self._node_type == BuiltinNodeTypes.START or is_trigger_node_type(self._node_type): draft_vars = self._build_variables_from_start_mapping(outputs) else: draft_vars = self._build_variables_from_mapping(outputs) @@ -1071,7 +1072,7 @@ class DraftVariableSaver: @staticmethod def _should_variable_be_visible(node_id: str, node_type: NodeType, name: str) -> bool: - if node_type in NodeType.IF_ELSE: + if node_type == BuiltinNodeTypes.IF_ELSE: return False if node_id == SYSTEM_VARIABLE_NODE_ID and not is_system_variable_editable(name): return False diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index 9bc1371895..455ed42fda 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -14,15 +14,23 @@ from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom, build_dify_run_context from core.repositories import DifyCoreRepositoryFactory from core.repositories.human_input_repository import HumanInputFormRepositoryImpl +from core.trigger.constants import is_trigger_node_type +from core.workflow.node_factory import LATEST_VERSION, get_node_type_classes_mapping, is_start_node_type from core.workflow.workflow_entry import WorkflowEntry from dify_graph.entities import GraphInitParams, WorkflowNodeExecution +from dify_graph.entities.graph_config import NodeConfigDict from dify_graph.entities.pause_reason import HumanInputRequired -from dify_graph.enums import ErrorStrategy, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.enums import ( + ErrorStrategy, + NodeType, + WorkflowNodeExecutionMetadataKey, + WorkflowNodeExecutionStatus, +) from dify_graph.errors import WorkflowNodeRunFailedError from dify_graph.file import File from dify_graph.graph_events import GraphNodeEventBase, NodeRunFailedEvent, NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.nodes.base.node import Node from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, build_http_request_config from dify_graph.nodes.human_input.entities import ( @@ -33,7 +41,6 @@ from dify_graph.nodes.human_input.entities import ( ) from dify_graph.nodes.human_input.enums import HumanInputFormKind from dify_graph.nodes.human_input.human_input_node import HumanInputNode -from dify_graph.nodes.node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING from dify_graph.nodes.start.entities import StartNodeData from dify_graph.repositories.human_input_form_repository import FormCreateParams from dify_graph.runtime import GraphRuntimeState, VariablePool @@ -386,7 +393,7 @@ class WorkflowService: for _, node_data in draft_workflow.walk_nodes() if (node_type_str := node_data.get("type")) and isinstance(node_type_str, str) - and NodeType(node_type_str).is_trigger_node + and is_trigger_node_type(node_type_str) ) if trigger_node_count > 2: raise TriggerNodeLimitExceededError(count=trigger_node_count, limit=2) @@ -695,10 +702,10 @@ class WorkflowService: """ # return default block config default_block_configs: list[Mapping[str, object]] = [] - for node_type, node_class_mapping in NODE_TYPE_CLASSES_MAPPING.items(): + for node_type, node_class_mapping in get_node_type_classes_mapping().items(): node_class = node_class_mapping[LATEST_VERSION] filters = None - if node_type is NodeType.HTTP_REQUEST: + if node_type == BuiltinNodeTypes.HTTP_REQUEST: filters = { HTTP_REQUEST_CONFIG_FILTER_KEY: build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, @@ -726,14 +733,15 @@ class WorkflowService: :return: """ node_type_enum = NodeType(node_type) + node_mapping = get_node_type_classes_mapping() # return default block config - if node_type_enum not in NODE_TYPE_CLASSES_MAPPING: + if node_type_enum not in node_mapping: return {} - node_class = NODE_TYPE_CLASSES_MAPPING[node_type_enum][LATEST_VERSION] + node_class = node_mapping[node_type_enum][LATEST_VERSION] resolved_filters = dict(filters) if filters else {} - if node_type_enum is NodeType.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in resolved_filters: + if node_type_enum == BuiltinNodeTypes.HTTP_REQUEST and HTTP_REQUEST_CONFIG_FILTER_KEY not in resolved_filters: resolved_filters[HTTP_REQUEST_CONFIG_FILTER_KEY] = build_http_request_config( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, max_read_timeout=dify_config.HTTP_REQUEST_MAX_READ_TIMEOUT, @@ -770,8 +778,8 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - node_data = node_config.get("data", {}) - if node_type.is_start_node: + node_data = node_config["data"] + if is_start_node_type(node_type): with Session(bind=db.engine) as session, session.begin(): draft_var_srv = WorkflowDraftVariableService(session) conversation_id = draft_var_srv.get_or_create_conversation( @@ -779,8 +787,8 @@ class WorkflowService: app=app_model, workflow=draft_workflow, ) - if node_type is NodeType.START: - start_data = StartNodeData.model_validate(node_data) + if node_type == BuiltinNodeTypes.START: + start_data = StartNodeData.model_validate(node_data, from_attributes=True) user_inputs = _rebuild_file_for_user_inputs_in_start_node( tenant_id=draft_workflow.tenant_id, start_node_data=start_data, user_inputs=user_inputs ) @@ -873,7 +881,7 @@ class WorkflowService: session=session, app_id=app_model.id, node_id=workflow_node_execution.node_id, - node_type=NodeType(workflow_node_execution.node_type), + node_type=workflow_node_execution.node_type, enclosing_node_id=enclosing_node_id, node_execution_id=node_execution.id, user=account, @@ -906,7 +914,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") # inputs: values used to fill missing upstream variables referenced in form_content. @@ -965,7 +973,7 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") # inputs: values used to fill missing upstream variables referenced in form_content. @@ -1005,7 +1013,7 @@ class WorkflowService: session=session, app_id=app_model.id, node_id=node_id, - node_type=NodeType.HUMAN_INPUT, + node_type=BuiltinNodeTypes.HUMAN_INPUT, node_execution_id=str(uuid.uuid4()), user=account, enclosing_node_id=enclosing_node_id, @@ -1030,10 +1038,10 @@ class WorkflowService: node_config = draft_workflow.get_node_config_by_id(node_id) node_type = Workflow.get_node_type_from_node_config(node_config) - if node_type is not NodeType.HUMAN_INPUT: + if node_type != BuiltinNodeTypes.HUMAN_INPUT: raise ValueError("Node type must be human-input.") - node_data = HumanInputNodeData.model_validate(node_config.get("data", {})) + node_data = HumanInputNodeData.model_validate(node_config["data"], from_attributes=True) delivery_method = self._resolve_human_input_delivery_method( node_data=node_data, delivery_method_id=delivery_method_id, @@ -1043,7 +1051,7 @@ class WorkflowService: delivery_method = apply_debug_email_recipient( delivery_method, enabled=True, - user_id=account.id or "", + user_id=account.id, ) variable_pool = self._build_human_input_variable_pool( @@ -1151,7 +1159,7 @@ class WorkflowService: *, workflow: Workflow, account: Account, - node_config: Mapping[str, Any], + node_config: NodeConfigDict, variable_pool: VariablePool, ) -> HumanInputNode: graph_init_params = GraphInitParams( @@ -1171,7 +1179,7 @@ class WorkflowService: start_at=time.perf_counter(), ) node = HumanInputNode( - id=node_config.get("id", str(uuid.uuid4())), + id=node_config["id"], config=node_config, graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, @@ -1184,7 +1192,7 @@ class WorkflowService: *, app_model: App, workflow: Workflow, - node_config: Mapping[str, Any], + node_config: NodeConfigDict, manual_inputs: Mapping[str, Any], ) -> VariablePool: with Session(bind=db.engine, expire_on_commit=False) as session, session.begin(): @@ -1423,18 +1431,18 @@ class WorkflowService: for node in node_configs: node_type = node.get("data", {}).get("type") if node_type: - node_types.add(NodeType(node_type)) + node_types.add(node_type) # start node and trigger node cannot coexist - if NodeType.START in node_types: - if any(nt.is_trigger_node for nt in node_types): + if BuiltinNodeTypes.START in node_types: + if any(is_trigger_node_type(nt) for nt in node_types): raise ValueError("Start node and trigger nodes cannot coexist in the same workflow") for node in node_configs: node_data = node.get("data", {}) node_type = node_data.get("type") - if node_type == NodeType.HUMAN_INPUT: + if node_type == BuiltinNodeTypes.HUMAN_INPUT: self._validate_human_input_node_data(node_data) def validate_features_structure(self, app_model: App, features: dict): @@ -1556,7 +1564,7 @@ def _setup_variable_pool( conversation_variables: list[VariableBase], ): # Only inject system variables for START node type. - if node_type == NodeType.START or node_type.is_trigger_node: + if is_start_node_type(node_type): system_variable = SystemVariable( user_id=user_id, app_id=workflow.app_id, diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index 11edcf151f..b3f36d8f44 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -1,9 +1,10 @@ import logging import time -from collections.abc import Callable, Sequence +from collections.abc import Sequence +from typing import Any, Protocol import click -from celery import shared_task +from celery import current_app, shared_task from configs import dify_config from core.db.session_factory import session_factory @@ -19,6 +20,12 @@ from tasks.generate_summary_index_task import generate_summary_index_task logger = logging.getLogger(__name__) +class CeleryTaskLike(Protocol): + def delay(self, *args: Any, **kwargs: Any) -> Any: ... + + def apply_async(self, *args: Any, **kwargs: Any) -> Any: ... + + @shared_task(queue="dataset") def document_indexing_task(dataset_id: str, document_ids: list): """ @@ -179,8 +186,8 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]): def _document_indexing_with_tenant_queue( - tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: Callable[[str, str, Sequence[str]], None] -): + tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: CeleryTaskLike +) -> None: try: _document_indexing(dataset_id, document_ids) except Exception: @@ -201,16 +208,20 @@ def _document_indexing_with_tenant_queue( logger.info("document indexing tenant isolation queue %s next tasks: %s", tenant_id, next_tasks) if next_tasks: - for next_task in next_tasks: - document_task = DocumentTask(**next_task) - # Process the next waiting task - # Keep the flag set to indicate a task is running - tenant_isolated_task_queue.set_task_waiting_time() - task_func.delay( # type: ignore - tenant_id=document_task.tenant_id, - dataset_id=document_task.dataset_id, - document_ids=document_task.document_ids, - ) + with current_app.producer_or_acquire() as producer: # type: ignore + for next_task in next_tasks: + document_task = DocumentTask(**next_task) + # Keep the flag set to indicate a task is running + tenant_isolated_task_queue.set_task_waiting_time() + task_func.apply_async( + kwargs={ + "tenant_id": document_task.tenant_id, + "dataset_id": document_task.dataset_id, + "document_ids": document_task.document_ids, + }, + producer=producer, + ) + else: # No more waiting tasks, clear the flag tenant_isolated_task_queue.delete_task_key() diff --git a/api/tasks/generate_summary_index_task.py b/api/tasks/generate_summary_index_task.py index e4273e16b5..6493833edc 100644 --- a/api/tasks/generate_summary_index_task.py +++ b/api/tasks/generate_summary_index_task.py @@ -14,7 +14,7 @@ from services.summary_index_service import SummaryIndexService logger = logging.getLogger(__name__) -@shared_task(queue="dataset") +@shared_task(queue="dataset_summary") def generate_summary_index_task(dataset_id: str, document_id: str, segment_ids: list[str] | None = None): """ Async generate summary index for document segments. diff --git a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py index 6ad04aab0d..5d201bd801 100644 --- a/api/tasks/process_tenant_plugin_autoupgrade_check_task.py +++ b/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @@ -6,7 +6,6 @@ import typing import click from celery import shared_task -from core.helper.marketplace import record_install_plugin_event from core.plugin.entities.marketplace import MarketplacePluginSnapshot from core.plugin.entities.plugin import PluginInstallationSource from core.plugin.impl.plugin import PluginInstaller @@ -166,7 +165,6 @@ def process_tenant_plugin_autoupgrade_check_task( # execute upgrade new_unique_identifier = manifest.latest_package_identifier - record_install_plugin_event(new_unique_identifier) click.echo( click.style( f"Upgrade plugin: {original_unique_identifier} -> {new_unique_identifier}", diff --git a/api/tasks/rag_pipeline/rag_pipeline_run_task.py b/api/tasks/rag_pipeline/rag_pipeline_run_task.py index 093342d1a3..52f66dddb8 100644 --- a/api/tasks/rag_pipeline/rag_pipeline_run_task.py +++ b/api/tasks/rag_pipeline/rag_pipeline_run_task.py @@ -3,12 +3,13 @@ import json import logging import time import uuid -from collections.abc import Mapping +from collections.abc import Mapping, Sequence from concurrent.futures import ThreadPoolExecutor +from itertools import islice from typing import Any import click -from celery import shared_task # type: ignore +from celery import group, shared_task from flask import current_app, g from sqlalchemy.orm import Session, sessionmaker @@ -27,6 +28,11 @@ from services.file_service import FileService logger = logging.getLogger(__name__) +def chunked(iterable: Sequence, size: int): + it = iter(iterable) + return iter(lambda: list(islice(it, size)), []) + + @shared_task(queue="pipeline") def rag_pipeline_run_task( rag_pipeline_invoke_entities_file_id: str, @@ -83,16 +89,24 @@ def rag_pipeline_run_task( logger.info("rag pipeline tenant isolation queue %s next files: %s", tenant_id, next_file_ids) if next_file_ids: - for next_file_id in next_file_ids: - # Process the next waiting task - # Keep the flag set to indicate a task is running - tenant_isolated_task_queue.set_task_waiting_time() - rag_pipeline_run_task.delay( # type: ignore - rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8") - if isinstance(next_file_id, bytes) - else next_file_id, - tenant_id=tenant_id, - ) + for batch in chunked(next_file_ids, 100): + jobs = [] + for next_file_id in batch: + tenant_isolated_task_queue.set_task_waiting_time() + + file_id = ( + next_file_id.decode("utf-8") if isinstance(next_file_id, (bytes, bytearray)) else next_file_id + ) + + jobs.append( + rag_pipeline_run_task.s( + rag_pipeline_invoke_entities_file_id=file_id, + tenant_id=tenant_id, + ) + ) + + if jobs: + group(jobs).apply_async() else: # No more waiting tasks, clear the flag tenant_isolated_task_queue.delete_task_key() diff --git a/api/tasks/regenerate_summary_index_task.py b/api/tasks/regenerate_summary_index_task.py index cf8988d13e..39c2f4103e 100644 --- a/api/tasks/regenerate_summary_index_task.py +++ b/api/tasks/regenerate_summary_index_task.py @@ -16,7 +16,7 @@ from services.summary_index_service import SummaryIndexService logger = logging.getLogger(__name__) -@shared_task(queue="dataset") +@shared_task(queue="dataset_summary") def regenerate_summary_index_task( dataset_id: str, regenerate_reason: str = "summary_model_changed", diff --git a/api/tasks/trigger_processing_tasks.py b/api/tasks/trigger_processing_tasks.py index d06b8c980b..75ae1f6316 100644 --- a/api/tasks/trigger_processing_tasks.py +++ b/api/tasks/trigger_processing_tasks.py @@ -20,13 +20,14 @@ from core.db.session_factory import session_factory from core.plugin.entities.plugin_daemon import CredentialType from core.plugin.entities.request import TriggerInvokeEventResponse from core.plugin.impl.exc import PluginInvokeError +from core.trigger.constants import TRIGGER_PLUGIN_NODE_TYPE from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.events import PluginTriggerDebugEvent, build_plugin_pool_key from core.trigger.entities.entities import TriggerProviderEntity from core.trigger.provider import PluginTriggerProviderController from core.trigger.trigger_manager import TriggerManager -from dify_graph.enums import NodeType, WorkflowExecutionStatus -from dify_graph.nodes.trigger_plugin.entities import TriggerEventNodeData +from core.workflow.nodes.trigger_plugin.entities import TriggerEventNodeData +from dify_graph.enums import WorkflowExecutionStatus from enums.quota_type import QuotaType, unlimited from models.enums import ( AppTriggerType, @@ -164,7 +165,7 @@ def _record_trigger_failure_log( elapsed_time=0.0, total_tokens=0, total_steps=0, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, created_at=now, finished_at=now, @@ -179,7 +180,7 @@ def _record_trigger_failure_log( workflow_id=workflow.id, workflow_run_id=workflow_run.id, created_from=WorkflowAppLogCreatedFrom.SERVICE_API.value, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, ) session.add(workflow_app_log) @@ -212,7 +213,7 @@ def _record_trigger_failure_log( error=error_message, queue_name=queue_name, retry_count=0, - created_by_role=created_by_role.value, + created_by_role=created_by_role, created_by=created_by, triggered_at=now, finished_at=now, @@ -278,7 +279,7 @@ def dispatch_triggered_workflow( # Find the trigger node in the workflow event_node = None - for node_id, node_config in workflow.walk_nodes(NodeType.TRIGGER_PLUGIN): + for node_id, node_config in workflow.walk_nodes(TRIGGER_PLUGIN_NODE_TYPE): if node_id == plugin_trigger.node_id: event_node = node_config break diff --git a/api/tasks/workflow_execution_tasks.py b/api/tasks/workflow_execution_tasks.py index db8721e90b..f41118e592 100644 --- a/api/tasks/workflow_execution_tasks.py +++ b/api/tasks/workflow_execution_tasks.py @@ -94,13 +94,15 @@ def _create_workflow_run_from_execution( workflow_run.tenant_id = tenant_id workflow_run.app_id = app_id workflow_run.workflow_id = execution.workflow_id - workflow_run.type = execution.workflow_type.value - workflow_run.triggered_from = triggered_from.value + from models.workflow import WorkflowType as ModelWorkflowType + + workflow_run.type = ModelWorkflowType(execution.workflow_type.value) + workflow_run.triggered_from = triggered_from workflow_run.version = execution.workflow_version json_converter = WorkflowRuntimeTypeConverter() workflow_run.graph = json.dumps(json_converter.to_json_encodable(execution.graph)) workflow_run.inputs = json.dumps(json_converter.to_json_encodable(execution.inputs)) - workflow_run.status = execution.status.value + workflow_run.status = execution.status workflow_run.outputs = ( json.dumps(json_converter.to_json_encodable(execution.outputs)) if execution.outputs else "{}" ) @@ -108,7 +110,7 @@ def _create_workflow_run_from_execution( workflow_run.elapsed_time = execution.elapsed_time workflow_run.total_tokens = execution.total_tokens workflow_run.total_steps = execution.total_steps - workflow_run.created_by_role = creator_user_role.value + workflow_run.created_by_role = creator_user_role workflow_run.created_by = creator_user_id workflow_run.created_at = execution.started_at workflow_run.finished_at = execution.finished_at @@ -121,7 +123,7 @@ def _update_workflow_run_from_execution(workflow_run: WorkflowRun, execution: Wo Update a WorkflowRun database model from a WorkflowExecution domain entity. """ json_converter = WorkflowRuntimeTypeConverter() - workflow_run.status = execution.status.value + workflow_run.status = execution.status workflow_run.outputs = ( json.dumps(json_converter.to_json_encodable(execution.outputs)) if execution.outputs else "{}" ) diff --git a/api/tasks/workflow_node_execution_tasks.py b/api/tasks/workflow_node_execution_tasks.py index 3f607dc55e..466ef6c858 100644 --- a/api/tasks/workflow_node_execution_tasks.py +++ b/api/tasks/workflow_node_execution_tasks.py @@ -98,12 +98,12 @@ def _create_node_execution_from_domain( node_execution.tenant_id = tenant_id node_execution.app_id = app_id node_execution.workflow_id = execution.workflow_id - node_execution.triggered_from = triggered_from.value + node_execution.triggered_from = triggered_from node_execution.workflow_run_id = execution.workflow_execution_id node_execution.index = execution.index node_execution.predecessor_node_id = execution.predecessor_node_id node_execution.node_id = execution.node_id - node_execution.node_type = execution.node_type.value + node_execution.node_type = execution.node_type node_execution.title = execution.title node_execution.node_execution_id = execution.node_execution_id @@ -128,7 +128,7 @@ def _create_node_execution_from_domain( node_execution.status = execution.status.value node_execution.error = execution.error node_execution.elapsed_time = execution.elapsed_time - node_execution.created_by_role = creator_user_role.value + node_execution.created_by_role = creator_user_role node_execution.created_by = creator_user_id node_execution.created_at = execution.created_at node_execution.finished_at = execution.finished_at diff --git a/api/tasks/workflow_schedule_tasks.py b/api/tasks/workflow_schedule_tasks.py index ced7ef973b..8c64d3ab27 100644 --- a/api/tasks/workflow_schedule_tasks.py +++ b/api/tasks/workflow_schedule_tasks.py @@ -3,7 +3,7 @@ import logging from celery import shared_task from core.db.session_factory import session_factory -from dify_graph.nodes.trigger_schedule.exc import ( +from core.workflow.nodes.trigger_schedule.exc import ( ScheduleExecutionError, ScheduleNotFoundError, TenantOwnerNotFoundError, diff --git a/api/tests/integration_tests/.env.example b/api/tests/integration_tests/.env.example index 39effbab58..37f8830482 100644 --- a/api/tests/integration_tests/.env.example +++ b/api/tests/integration_tests/.env.example @@ -60,7 +60,6 @@ VECTOR_STORE=weaviate # Weaviate configuration WEAVIATE_ENDPOINT=http://localhost:8080 WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih -WEAVIATE_GRPC_ENABLED=false WEAVIATE_BATCH_SIZE=100 WEAVIATE_TOKENIZATION=word diff --git a/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py b/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py index 498ac56d5d..afb6938baa 100644 --- a/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py +++ b/api/tests/integration_tests/controllers/console/app/test_chat_message_permissions.py @@ -165,7 +165,7 @@ class TestChatMessageApiPermissions: agent_thoughts=[], message_files=[], message_metadata_dict={}, - status="success", + status="normal", error="", parent_message_id=None, ) diff --git a/api/tests/integration_tests/controllers/console/app/test_description_validation.py b/api/tests/integration_tests/controllers/console/app/test_description_validation.py index 8160807e48..f36c596eb8 100644 --- a/api/tests/integration_tests/controllers/console/app/test_description_validation.py +++ b/api/tests/integration_tests/controllers/console/app/test_description_validation.py @@ -5,14 +5,10 @@ This test module validates the 400-character limit enforcement for App descriptions across all creation and editing endpoints. """ -import os import sys import pytest -# Add the API root to Python path for imports -sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) - class TestAppDescriptionValidationUnit: """Unit tests for description validation function""" diff --git a/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py b/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py index c043c7dc10..3e79792b5b 100644 --- a/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py +++ b/api/tests/integration_tests/core/workflow/nodes/datasource/test_datasource_node_integration.py @@ -1,6 +1,7 @@ +from core.workflow.nodes.datasource.datasource_node import DatasourceNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult, StreamCompletedEvent -from dify_graph.nodes.datasource.datasource_node import DatasourceNode class _Seg: @@ -28,13 +29,17 @@ class _GS: class _GP: - tenant_id = "t1" - app_id = "app-1" workflow_id = "wf-1" graph_config = {} - user_id = "u1" - user_from = "account" - invoke_from = "debugger" + run_context = { + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "t1", + "app_id": "app-1", + "user_id": "u1", + "user_from": "account", + "invoke_from": "debugger", + } + } call_depth = 0 @@ -61,6 +66,8 @@ def test_node_integration_minimal_stream(mocker): def get_upload_file_by_id(cls, **_): raise AssertionError + mocker.patch("core.workflow.nodes.datasource.datasource_node.DatasourceManager", new=_Mgr) + node = DatasourceNode( id="n", config={ @@ -77,7 +84,6 @@ def test_node_integration_minimal_stream(mocker): }, graph_init_params=_GP(), graph_runtime_state=_GS(vp), - datasource_manager=_Mgr, ) out = list(node._run()) diff --git a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py index 7c4dcda2dc..b19b4ebdad 100644 --- a/api/tests/integration_tests/services/test_workflow_draft_variable_service.py +++ b/api/tests/integration_tests/services/test_workflow_draft_variable_service.py @@ -7,7 +7,7 @@ from sqlalchemy import delete from sqlalchemy.orm import Session from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIABLE_NODE_ID -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType from dify_graph.variables.variables import StringVariable @@ -286,7 +286,7 @@ class TestDraftVariableLoader(unittest.TestCase): session=session, app_id=self._test_app_id, node_id="test_offload_node", - node_type=NodeType.LLM, # Use a real node type + node_type=BuiltinNodeTypes.LLM, # Use a real node type node_execution_id=node_execution_id, user=setup_account, ) @@ -542,7 +542,7 @@ class TestWorkflowDraftVariableServiceResetVariable(unittest.TestCase): index=1, node_execution_id=str(uuid.uuid4()), node_id=self._node_id, - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", inputs='{"input": "test input"}', process_data='{"test_var": "process_value", "other_var": "other_process"}', diff --git a/api/tests/integration_tests/workflow/nodes/test_code.py b/api/tests/integration_tests/workflow/nodes/test_code.py index f8b7f95493..e3a2b6b866 100644 --- a/api/tests/integration_tests/workflow/nodes/test_code.py +++ b/api/tests/integration_tests/workflow/nodes/test_code.py @@ -60,7 +60,7 @@ def init_code_node(code_config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = CodeNode( id=str(uuid.uuid4()), diff --git a/api/tests/integration_tests/workflow/nodes/test_http.py b/api/tests/integration_tests/workflow/nodes/test_http.py index f691113511..f885f69e55 100644 --- a/api/tests/integration_tests/workflow/nodes/test_http.py +++ b/api/tests/integration_tests/workflow/nodes/test_http.py @@ -70,7 +70,7 @@ def init_http_node(config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = HttpRequestNode( id=str(uuid.uuid4()), @@ -189,6 +189,7 @@ def test_custom_authorization_header(setup_http_mock): @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock): """Test: In custom authentication mode, when the api_key is empty, AuthorizationConfigError should be raised.""" + from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.http_request.entities import ( HttpRequestNodeAuthorization, HttpRequestNodeData, @@ -209,6 +210,7 @@ def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock): # Create node data with custom auth and empty api_key node_data = HttpRequestNodeData( + type=BuiltinNodeTypes.HTTP_REQUEST, title="http", desc="", url="http://example.com", @@ -715,7 +717,7 @@ def test_nested_object_variable_selector(setup_http_mock): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node = HttpRequestNode( id=str(uuid.uuid4()), diff --git a/api/tests/integration_tests/workflow/nodes/test_llm.py b/api/tests/integration_tests/workflow/nodes/test_llm.py index b4779ebcdd..2aca9f5157 100644 --- a/api/tests/integration_tests/workflow/nodes/test_llm.py +++ b/api/tests/integration_tests/workflow/nodes/test_llm.py @@ -11,6 +11,7 @@ from dify_graph.enums import WorkflowNodeExecutionStatus from dify_graph.node_events import StreamCompletedEvent from dify_graph.nodes.llm.node import LLMNode from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory +from dify_graph.nodes.protocols import HttpClientProtocol from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from extensions.ext_database import db @@ -74,6 +75,7 @@ def init_llm_node(config: dict) -> LLMNode: credentials_provider=MagicMock(spec=CredentialsProvider), model_factory=MagicMock(spec=ModelFactory), model_instance=MagicMock(spec=ModelInstance), + http_client=MagicMock(spec=HttpClientProtocol), ) return node diff --git a/api/tests/integration_tests/workflow/nodes/test_template_transform.py b/api/tests/integration_tests/workflow/nodes/test_template_transform.py index 970e2cae00..7bb4f905c3 100644 --- a/api/tests/integration_tests/workflow/nodes/test_template_transform.py +++ b/api/tests/integration_tests/workflow/nodes/test_template_transform.py @@ -82,7 +82,7 @@ def test_execute_template_transform(): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph is not None node = TemplateTransformNode( diff --git a/api/tests/integration_tests/workflow/nodes/test_tool.py b/api/tests/integration_tests/workflow/nodes/test_tool.py index f70bf46979..a6717ada31 100644 --- a/api/tests/integration_tests/workflow/nodes/test_tool.py +++ b/api/tests/integration_tests/workflow/nodes/test_tool.py @@ -1,6 +1,6 @@ import time import uuid -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.tools.utils.configuration import ToolParameterConfigurationManager @@ -8,6 +8,7 @@ from core.workflow.node_factory import DifyNodeFactory from dify_graph.enums import WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.node_events import StreamCompletedEvent +from dify_graph.nodes.protocols import ToolFileManagerProtocol from dify_graph.nodes.tool.tool_node import ToolNode from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable @@ -53,13 +54,16 @@ def init_tool_node(config: dict): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") + + tool_file_manager_factory = MagicMock(spec=ToolFileManagerProtocol) node = ToolNode( id=str(uuid.uuid4()), config=config, graph_init_params=init_params, graph_runtime_state=graph_runtime_state, + tool_file_manager_factory=tool_file_manager_factory, ) return node @@ -83,17 +87,20 @@ def test_tool_variable_invoke(): } ) - ToolParameterConfigurationManager.decrypt_tool_parameters = MagicMock(return_value={"format": "%Y-%m-%d %H:%M:%S"}) + with patch.object( + ToolParameterConfigurationManager, + "decrypt_tool_parameters", + return_value={"format": "%Y-%m-%d %H:%M:%S"}, + ): + node.graph_runtime_state.variable_pool.add(["1", "args1"], "1+1") - node.graph_runtime_state.variable_pool.add(["1", "args1"], "1+1") - - # execute node - result = node._run() - for item in result: - if isinstance(item, StreamCompletedEvent): - assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED - assert item.node_run_result.outputs is not None - assert item.node_run_result.outputs.get("text") is not None + # execute node + result = node._run() + for item in result: + if isinstance(item, StreamCompletedEvent): + assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + assert item.node_run_result.outputs is not None + assert item.node_run_result.outputs.get("text") is not None def test_tool_mixed_invoke(): @@ -117,12 +124,15 @@ def test_tool_mixed_invoke(): } ) - ToolParameterConfigurationManager.decrypt_tool_parameters = MagicMock(return_value={"format": "%Y-%m-%d %H:%M:%S"}) - - # execute node - result = node._run() - for item in result: - if isinstance(item, StreamCompletedEvent): - assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED - assert item.node_run_result.outputs is not None - assert item.node_run_result.outputs.get("text") is not None + with patch.object( + ToolParameterConfigurationManager, + "decrypt_tool_parameters", + return_value={"format": "%Y-%m-%d %H:%M:%S"}, + ): + # execute node + result = node._run() + for item in result: + if isinstance(item, StreamCompletedEvent): + assert item.node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED + assert item.node_run_result.outputs is not None + assert item.node_run_result.outputs.get("text") is not None diff --git a/api/tests/test_containers_integration_tests/conftest.py b/api/tests/test_containers_integration_tests/conftest.py index b67b48947c..3987e3a19d 100644 --- a/api/tests/test_containers_integration_tests/conftest.py +++ b/api/tests/test_containers_integration_tests/conftest.py @@ -10,8 +10,11 @@ more reliable and realistic test scenarios. import logging import os from collections.abc import Generator +from contextlib import contextmanager from pathlib import Path +from typing import Protocol, TypeVar +import psycopg2 import pytest from flask import Flask from flask.testing import FlaskClient @@ -31,6 +34,25 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(level logger = logging.getLogger(__name__) +class _CloserProtocol(Protocol): + """_Closer is any type which implement the close() method.""" + + def close(self): + """close the current object, release any external resouece (file, transaction, connection etc.) + associated with it. + """ + pass + + +_Closer = TypeVar("_Closer", bound=_CloserProtocol) + + +@contextmanager +def _auto_close(closer: _Closer) -> Generator[_Closer, None, None]: + yield closer + closer.close() + + class DifyTestContainers: """ Manages all test containers required for Dify integration tests. @@ -97,45 +119,28 @@ class DifyTestContainers: wait_for_logs(self.postgres, "is ready to accept connections", timeout=30) logger.info("PostgreSQL container is ready and accepting connections") - # Install uuid-ossp extension for UUID generation - logger.info("Installing uuid-ossp extension...") - try: - import psycopg2 - - conn = psycopg2.connect( - host=db_host, - port=db_port, - user=self.postgres.username, - password=self.postgres.password, - database=self.postgres.dbname, - ) - conn.autocommit = True - cursor = conn.cursor() - cursor.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') - cursor.close() - conn.close() + conn = psycopg2.connect( + host=db_host, + port=db_port, + user=self.postgres.username, + password=self.postgres.password, + database=self.postgres.dbname, + ) + conn.autocommit = True + with _auto_close(conn): + with conn.cursor() as cursor: + # Install uuid-ossp extension for UUID generation + logger.info("Installing uuid-ossp extension...") + cursor.execute('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";') logger.info("uuid-ossp extension installed successfully") - except Exception as e: - logger.warning("Failed to install uuid-ossp extension: %s", e) - # Create plugin database for dify-plugin-daemon - logger.info("Creating plugin database...") - try: - conn = psycopg2.connect( - host=db_host, - port=db_port, - user=self.postgres.username, - password=self.postgres.password, - database=self.postgres.dbname, - ) - conn.autocommit = True - cursor = conn.cursor() - cursor.execute("CREATE DATABASE dify_plugin;") - cursor.close() - conn.close() + # NOTE: We cannot use `with conn.cursor() as cursor:` as it will wrap the statement + # inside a transaction. However, the `CREATE DATABASE` statement cannot run inside a transaction block. + with _auto_close(conn.cursor()) as cursor: + # Create plugin database for dify-plugin-daemon + logger.info("Creating plugin database...") + cursor.execute("CREATE DATABASE dify_plugin;") logger.info("Plugin database created successfully") - except Exception as e: - logger.warning("Failed to create plugin database: %s", e) # Set up storage environment variables os.environ.setdefault("STORAGE_TYPE", "opendal") @@ -258,23 +263,16 @@ class DifyTestContainers: containers = [self.redis, self.postgres, self.dify_sandbox, self.dify_plugin_daemon] for container in containers: if container: - try: - container_name = container.image - logger.info("Stopping container: %s", container_name) - container.stop() - logger.info("Successfully stopped container: %s", container_name) - except Exception as e: - # Log error but don't fail the test cleanup - logger.warning("Failed to stop container %s: %s", container, e) + container_name = container.image + logger.info("Stopping container: %s", container_name) + container.stop() + logger.info("Successfully stopped container: %s", container_name) # Stop and remove the network if self.network: - try: - logger.info("Removing Docker network...") - self.network.remove() - logger.info("Successfully removed Docker network") - except Exception as e: - logger.warning("Failed to remove Docker network: %s", e) + logger.info("Removing Docker network...") + self.network.remove() + logger.info("Successfully removed Docker network") self._containers_started = False logger.info("All test containers stopped and cleaned up successfully") diff --git a/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py index cdf390b327..a60159c66a 100644 --- a/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py +++ b/api/tests/test_containers_integration_tests/core/rag/pipeline/test_queue_integration.py @@ -18,7 +18,7 @@ from faker import Faker from core.rag.pipeline.queue import TaskWrapper, TenantIsolatedTaskQueue from extensions.ext_redis import redis_client -from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAccountRole, TenantStatus @dataclass @@ -47,7 +47,7 @@ class TestTenantIsolatedTaskQueueIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() @@ -55,7 +55,7 @@ class TestTenantIsolatedTaskQueueIntegration: # Create tenant tenant = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant) db_session_with_containers.commit() @@ -101,7 +101,7 @@ class TestTenantIsolatedTaskQueueIntegration: # Create second tenant tenant2 = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant2) db_session_with_containers.commit() @@ -410,7 +410,7 @@ class TestTenantIsolatedTaskQueueCompatibility: email=fake.email(), name=fake.name(), interface_language="en-US", - status="active", + status=AccountStatus.ACTIVE, ) db_session_with_containers.add(account) db_session_with_containers.commit() @@ -418,7 +418,7 @@ class TestTenantIsolatedTaskQueueCompatibility: # Create tenant tenant = Tenant( name=fake.company(), - status="normal", + status=TenantStatus.NORMAL, ) db_session_with_containers.add(tenant) db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py b/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py index e5d3655771..75471afef8 100644 --- a/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py +++ b/api/tests/test_containers_integration_tests/core/rag/retrieval/test_dataset_retrieval_integration.py @@ -5,9 +5,10 @@ import pytest from faker import Faker from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from models.dataset import Dataset, Document from services.account_service import AccountService, TenantService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestGetAvailableDatasetsIntegration: @@ -22,7 +23,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -83,7 +84,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -136,7 +137,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -189,7 +190,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -252,7 +253,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -286,7 +287,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account1, name=fake.company()) tenant1 = account1.current_tenant @@ -295,7 +296,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account2, name=fake.company()) tenant2 = account2.current_tenant @@ -362,7 +363,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -384,7 +385,7 @@ class TestGetAvailableDatasetsIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -445,7 +446,7 @@ class TestKnowledgeRetrievalIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -513,7 +514,7 @@ class TestKnowledgeRetrievalIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -561,7 +562,7 @@ class TestKnowledgeRetrievalIntegration: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/helpers/__init__.py b/api/tests/test_containers_integration_tests/helpers/__init__.py index 40d03889a9..0b753abd1f 100644 --- a/api/tests/test_containers_integration_tests/helpers/__init__.py +++ b/api/tests/test_containers_integration_tests/helpers/__init__.py @@ -1 +1,24 @@ """Helper utilities for integration tests.""" + +import re + + +def generate_valid_password(fake, length: int = 12) -> str: + """Generate a password that always satisfies the project's password validation rules. + + The password validation rule in ``api/libs/password.py`` requires passwords to + contain **both letters and digits** with a minimum length of 8: + + ``^(?=.*[a-zA-Z])(?=.*\\d).{8,}$`` + + ``Faker.password()`` does **not** guarantee that the generated password will + contain both character types, which can cause intermittent test failures. + + This helper re-generates until the result is valid (typically first attempt). + """ + for _ in range(100): + pwd = fake.password(length=length) + if re.search(r"[a-zA-Z]", pwd) and re.search(r"\d", pwd): + return pwd + # Fallback: should never be reached in practice + return fake.password(length=max(length - 2, 6)) + "a1" diff --git a/api/tests/test_containers_integration_tests/models/test_app_model_config.py b/api/tests/test_containers_integration_tests/models/test_app_model_config.py new file mode 100644 index 0000000000..e8b36097e1 --- /dev/null +++ b/api/tests/test_containers_integration_tests/models/test_app_model_config.py @@ -0,0 +1,32 @@ +""" +Integration tests for AppModelConfig using testcontainers. + +These tests validate database-backed model behavior without mocking SQLAlchemy queries. +""" + +from uuid import uuid4 + +from sqlalchemy.orm import Session + +from models.model import AppModelConfig + + +class TestAppModelConfig: + """Integration tests for AppModelConfig.""" + + def test_annotation_reply_dict_disabled_without_setting(self, db_session_with_containers: Session) -> None: + """Return disabled annotation reply dict when no AppAnnotationSetting exists.""" + # Arrange + config = AppModelConfig(app_id=str(uuid4())) + db_session_with_containers.add(config) + db_session_with_containers.commit() + + # Act + result = config.annotation_reply_dict + + # Assert + assert result == {"enabled": False} + + # Cleanup + db_session_with_containers.delete(config) + db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/services/test_account_service.py b/api/tests/test_containers_integration_tests/services/test_account_service.py index 8595f5bf14..cc9596d15f 100644 --- a/api/tests/test_containers_integration_tests/services/test_account_service.py +++ b/api/tests/test_containers_integration_tests/services/test_account_service.py @@ -20,6 +20,7 @@ from services.errors.account import ( TenantNotFoundError, ) from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAccountService: @@ -53,7 +54,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -133,7 +134,7 @@ class TestAccountService: email=email, name=name, interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) def test_create_account_email_in_freeze( @@ -145,7 +146,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = True @@ -169,7 +170,7 @@ class TestAccountService: """ fake = Faker() email = fake.email() - password = fake.password(length=12) + password = generate_valid_password(fake) with pytest.raises(AccountPasswordError): AccountService.authenticate(email, password) @@ -180,7 +181,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -208,8 +209,8 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - correct_password = fake.password(length=12) - wrong_password = fake.password(length=12) + correct_password = generate_valid_password(fake) + wrong_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -234,7 +235,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - new_password = fake.password(length=12) + new_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -267,7 +268,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -297,8 +298,8 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - old_password = fake.password(length=12) - new_password = fake.password(length=12) + old_password = generate_valid_password(fake) + new_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -327,9 +328,9 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - old_password = fake.password(length=12) - wrong_password = fake.password(length=12) - new_password = fake.password(length=12) + old_password = generate_valid_password(fake) + wrong_password = generate_valid_password(fake) + new_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -354,7 +355,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - old_password = fake.password(length=12) + old_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -378,7 +379,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies[ @@ -412,7 +413,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies[ @@ -437,7 +438,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies[ @@ -535,7 +536,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -563,7 +564,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) updated_name = fake.name() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -592,7 +593,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -615,7 +616,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) ip_address = fake.ipv4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -645,7 +646,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) ip_address = fake.ipv4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -684,7 +685,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -714,7 +715,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -747,7 +748,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant_name = fake.company() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -792,7 +793,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -825,7 +826,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant_name = fake.company() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -864,7 +865,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -892,7 +893,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -926,7 +927,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant_name = fake.company() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -957,7 +958,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -997,7 +998,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -1043,7 +1044,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -1080,7 +1081,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -1110,7 +1111,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -1139,7 +1140,7 @@ class TestAccountService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) wrong_code = fake.numerify(text="######") # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -1259,7 +1260,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1291,10 +1292,10 @@ class TestTenantService: tenant_name = fake.company() email1 = fake.email() name1 = fake.name() - password1 = fake.password(length=12) + password1 = generate_valid_password(fake) email2 = fake.email() name2 = fake.name() - password2 = fake.password(length=12) + password2 = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1332,7 +1333,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1364,7 +1365,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant1_name = fake.company() tenant2_name = fake.company() # Setup mocks @@ -1403,7 +1404,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant_name = fake.company() # Setup mocks mock_external_service_dependencies[ @@ -1441,7 +1442,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1466,7 +1467,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant1_name = fake.company() tenant2_name = fake.company() # Setup mocks @@ -1507,7 +1508,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1534,7 +1535,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) tenant_name = fake.company() # Setup mocks mock_external_service_dependencies[ @@ -1562,10 +1563,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) admin_email = fake.email() admin_name = fake.name() - admin_password = fake.password(length=12) + admin_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1631,7 +1632,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1664,10 +1665,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) member_email = fake.email() member_name = fake.name() - member_password = fake.password(length=12) + member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1705,7 +1706,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) invalid_action = "invalid_action_that_doesnt_exist" # Setup mocks mock_external_service_dependencies[ @@ -1738,7 +1739,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1770,10 +1771,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) member_email = fake.email() member_name = fake.name() - member_password = fake.password(length=12) + member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1829,7 +1830,7 @@ class TestTenantService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1861,10 +1862,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) non_member_email = fake.email() non_member_name = fake.name() - non_member_password = fake.password(length=12) + non_member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1900,10 +1901,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) member_email = fake.email() member_name = fake.name() - member_password = fake.password(length=12) + member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -1949,10 +1950,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) member_email = fake.email() member_name = fake.name() - member_password = fake.password(length=12) + member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -2006,10 +2007,10 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) member_email = fake.email() member_name = fake.name() - member_password = fake.password(length=12) + member_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -2071,7 +2072,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) workspace_name = fake.company() # Setup mocks mock_external_service_dependencies[ @@ -2110,7 +2111,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) existing_tenant_name = fake.company() new_workspace_name = fake.company() # Setup mocks @@ -2151,7 +2152,7 @@ class TestTenantService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) workspace_name = fake.company() # Setup mocks to disable workspace creation mock_external_service_dependencies[ @@ -2178,13 +2179,13 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) admin_email = fake.email() admin_name = fake.name() - admin_password = fake.password(length=12) + admin_password = generate_valid_password(fake) normal_email = fake.email() normal_name = fake.name() - normal_password = fake.password(length=12) + normal_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -2244,13 +2245,13 @@ class TestTenantService: tenant_name = fake.company() owner_email = fake.email() owner_name = fake.name() - owner_password = fake.password(length=12) + owner_password = generate_valid_password(fake) operator_email = fake.email() operator_name = fake.name() - operator_password = fake.password(length=12) + operator_password = generate_valid_password(fake) normal_email = fake.email() normal_name = fake.name() - normal_password = fake.password(length=12) + normal_password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies[ "feature_service" @@ -2351,7 +2352,7 @@ class TestRegisterService: fake = Faker() admin_email = fake.email() admin_name = fake.name() - admin_password = fake.password(length=12) + admin_password = generate_valid_password(fake) ip_address = fake.ipv4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2399,7 +2400,7 @@ class TestRegisterService: fake = Faker() admin_email = fake.email() admin_name = fake.name() - admin_password = fake.password(length=12) + admin_password = generate_valid_password(fake) ip_address = fake.ipv4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2440,7 +2441,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2531,7 +2532,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2576,7 +2577,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2614,7 +2615,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2653,7 +2654,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2690,7 +2691,7 @@ class TestRegisterService: tenant_name = fake.company() inviter_email = fake.email() inviter_name = fake.name() - inviter_password = fake.password(length=12) + inviter_password = generate_valid_password(fake) new_member_email = fake.email() language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks @@ -2760,10 +2761,10 @@ class TestRegisterService: tenant_name = fake.company() inviter_email = fake.email() inviter_name = fake.name() - inviter_password = fake.password(length=12) + inviter_password = generate_valid_password(fake) existing_member_email = fake.email() existing_member_name = fake.name() - existing_member_password = fake.password(length=12) + existing_member_password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2824,10 +2825,10 @@ class TestRegisterService: tenant_name = fake.company() inviter_email = fake.email() inviter_name = fake.name() - inviter_password = fake.password(length=12) + inviter_password = generate_valid_password(fake) existing_pending_member_email = fake.email() existing_pending_member_name = fake.name() - existing_pending_member_password = fake.password(length=12) + existing_pending_member_password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2914,10 +2915,10 @@ class TestRegisterService: tenant_name = fake.company() inviter_email = fake.email() inviter_name = fake.name() - inviter_password = fake.password(length=12) + inviter_password = generate_valid_password(fake) already_in_tenant_email = fake.email() already_in_tenant_name = fake.name() - already_in_tenant_password = fake.password(length=12) + already_in_tenant_password = generate_valid_password(fake) language = fake.random_element(elements=("en-US", "zh-CN")) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -2967,7 +2968,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -3011,7 +3012,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -3058,7 +3059,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -3101,7 +3102,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -3144,7 +3145,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True mock_external_service_dependencies["billing_service"].is_email_in_freeze.return_value = False @@ -3212,7 +3213,7 @@ class TestRegisterService: fake = Faker() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) invalid_tenant_id = fake.uuid4() token = fake.uuid4() # Setup mocks @@ -3263,7 +3264,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) token = fake.uuid4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -3313,7 +3314,7 @@ class TestRegisterService: tenant_name = fake.company() email = fake.email() name = fake.name() - password = fake.password(length=12) + password = generate_valid_password(fake) token = fake.uuid4() # Setup mocks mock_external_service_dependencies["feature_service"].get_system_features.return_value.is_allow_register = True @@ -3330,7 +3331,7 @@ class TestRegisterService: TenantService.create_tenant_member(tenant, account, role="normal") # Change tenant status to non-normal - tenant.status = "suspended" + tenant.status = "archive" db_session_with_containers.commit() diff --git a/api/tests/test_containers_integration_tests/services/test_agent_service.py b/api/tests/test_containers_integration_tests/services/test_agent_service.py index 45839fd463..4759d244fd 100644 --- a/api/tests/test_containers_integration_tests/services/test_agent_service.py +++ b/api/tests/test_containers_integration_tests/services/test_agent_service.py @@ -11,6 +11,7 @@ from models.model import AppModelConfig, Conversation, EndUser, Message, Message from services.account_service import AccountService, TenantService from services.agent_service import AgentService from services.app_service import AppService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAgentService: @@ -111,7 +112,7 @@ class TestAgentService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_annotation_service.py b/api/tests/test_containers_integration_tests/services/test_annotation_service.py index 004d643955..a260d823a2 100644 --- a/api/tests/test_containers_integration_tests/services/test_annotation_service.py +++ b/api/tests/test_containers_integration_tests/services/test_annotation_service.py @@ -9,6 +9,7 @@ from models import Account from models.model import MessageAnnotation from services.annotation_service import AppAnnotationService from services.app_service import AppService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAnnotationService: @@ -78,7 +79,7 @@ class TestAnnotationService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py index b8bf8543bc..7ce7357b41 100644 --- a/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py +++ b/api/tests/test_containers_integration_tests/services/test_api_based_extension_service.py @@ -7,6 +7,7 @@ from sqlalchemy.orm import Session from models.api_based_extension import APIBasedExtension from services.account_service import AccountService, TenantService from services.api_based_extension_service import APIBasedExtensionService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAPIBasedExtensionService: @@ -55,7 +56,7 @@ class TestAPIBasedExtensionService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_app_dsl_service.py b/api/tests/test_containers_integration_tests/services/test_app_dsl_service.py index e2a450b90c..8a362e1f5e 100644 --- a/api/tests/test_containers_integration_tests/services/test_app_dsl_service.py +++ b/api/tests/test_containers_integration_tests/services/test_app_dsl_service.py @@ -9,6 +9,7 @@ from models.model import App, AppModelConfig from services.account_service import AccountService, TenantService from services.app_dsl_service import AppDslService, ImportMode, ImportStatus from services.app_service import AppService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAppDslService: @@ -89,7 +90,7 @@ class TestAppDslService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py index 787a99f3e8..5b1a4790f5 100644 --- a/api/tests/test_containers_integration_tests/services/test_app_generate_service.py +++ b/api/tests/test_containers_integration_tests/services/test_app_generate_service.py @@ -2,6 +2,7 @@ import uuid from unittest.mock import ANY, MagicMock, patch import pytest +import sqlalchemy as sa from faker import Faker from sqlalchemy.orm import Session @@ -10,6 +11,7 @@ from models.model import EndUser from models.workflow import Workflow from services.app_generate_service import AppGenerateService from services.errors.app import WorkflowIdFormatError, WorkflowNotFoundError +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestAppGenerateService: @@ -147,7 +149,7 @@ class TestAppGenerateService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -491,20 +493,20 @@ class TestAppGenerateService: ) # Manually set invalid mode after creation + # With EnumText, invalid values are rejected at the DB level during flush, + # raising StatementError wrapping ValueError app.mode = "invalid_mode" # Setup test arguments args = {"inputs": {"query": fake.text(max_nb_chars=50)}, "response_mode": "streaming"} - # Execute the method under test and expect ValueError - with pytest.raises(ValueError) as exc_info: + # Execute the method under test and expect either ValueError (direct) or + # StatementError (from EnumText validation during autoflush) + with pytest.raises((ValueError, sa.exc.StatementError)): AppGenerateService.generate( app_model=app, user=account, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=True ) - # Verify error message - assert "Invalid app mode" in str(exc_info.value) - def test_generate_with_workflow_id_format_error( self, db_session_with_containers: Session, mock_external_service_dependencies ): diff --git a/api/tests/test_containers_integration_tests/services/test_app_service.py b/api/tests/test_containers_integration_tests/services/test_app_service.py index fc3b20aaae..d79f80c009 100644 --- a/api/tests/test_containers_integration_tests/services/test_app_service.py +++ b/api/tests/test_containers_integration_tests/services/test_app_service.py @@ -8,6 +8,7 @@ from constants.model_template import default_app_templates from models import Account from models.model import App, Site from services.account_service import AccountService, TenantService +from tests.test_containers_integration_tests.helpers import generate_valid_password # Delay import of AppService to avoid circular dependency # from services.app_service import AppService @@ -56,7 +57,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -112,7 +113,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -155,7 +156,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -203,7 +204,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -259,7 +260,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -334,7 +335,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -404,7 +405,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -473,7 +474,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -526,7 +527,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -585,7 +586,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -645,7 +646,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -705,7 +706,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -756,7 +757,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -808,7 +809,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -868,7 +869,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -907,7 +908,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -947,7 +948,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -997,7 +998,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -1039,7 +1040,7 @@ class TestAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_permission_service.py b/api/tests/test_containers_integration_tests/services/test_dataset_permission_service.py new file mode 100644 index 0000000000..44525e0036 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_dataset_permission_service.py @@ -0,0 +1,497 @@ +""" +Container-backed integration tests for dataset permission services on the real SQL path. + +This module exercises persisted DatasetPermission rows and dataset permission +checks with testcontainers-backed infrastructure instead of database-chain mocks. +""" + +from uuid import uuid4 + +import pytest + +from extensions.ext_database import db +from models import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.dataset import ( + Dataset, + DatasetPermission, + DatasetPermissionEnum, +) +from services.dataset_service import DatasetPermissionService, DatasetService +from services.errors.account import NoPermissionError + + +class DatasetPermissionTestDataFactory: + """Create persisted entities and request payloads for dataset permission integration tests.""" + + @staticmethod + def create_account_with_tenant( + role: TenantAccountRole = TenantAccountRole.NORMAL, + tenant: Tenant | None = None, + ) -> tuple[Account, Tenant]: + """Create a real account and tenant with specified role.""" + account = Account( + email=f"{uuid4()}@example.com", + name=f"user-{uuid4()}", + interface_language="en-US", + status="active", + ) + if tenant is None: + tenant = Tenant(name=f"tenant-{uuid4()}", status="normal") + db.session.add_all([account, tenant]) + else: + db.session.add(account) + + db.session.flush() + + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=role, + current=True, + ) + db.session.add(join) + db.session.commit() + + account.current_tenant = tenant + return account, tenant + + @staticmethod + def create_dataset( + tenant_id: str, + created_by: str, + permission: DatasetPermissionEnum = DatasetPermissionEnum.ONLY_ME, + name: str = "Test Dataset", + ) -> Dataset: + """Create a real dataset with specified attributes.""" + dataset = Dataset( + tenant_id=tenant_id, + name=name, + description="desc", + data_source_type="upload_file", + indexing_technique="high_quality", + created_by=created_by, + permission=permission, + provider="vendor", + retrieval_model={"top_k": 2}, + ) + db.session.add(dataset) + db.session.commit() + return dataset + + @staticmethod + def create_dataset_permission( + dataset_id: str, + account_id: str, + tenant_id: str, + has_permission: bool = True, + ) -> DatasetPermission: + """Create a real DatasetPermission instance.""" + permission = DatasetPermission( + dataset_id=dataset_id, + account_id=account_id, + tenant_id=tenant_id, + has_permission=has_permission, + ) + db.session.add(permission) + db.session.commit() + return permission + + @staticmethod + def build_user_list_payload(user_ids: list[str]) -> list[dict[str, str]]: + """Build the request payload shape used by partial-member list updates.""" + return [{"user_id": user_id} for user_id in user_ids] + + +class TestDatasetPermissionServiceGetPartialMemberList: + """Verify partial-member list reads against persisted DatasetPermission rows.""" + + def test_get_dataset_partial_member_list_with_members(self, db_session_with_containers): + """ + Test retrieving partial member list with multiple members. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + user_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + user_3, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + + expected_account_ids = [user_1.id, user_2.id, user_3.id] + for account_id in expected_account_ids: + DatasetPermissionTestDataFactory.create_dataset_permission(dataset.id, account_id, tenant.id) + + # Act + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + + # Assert + assert set(result) == set(expected_account_ids) + assert len(result) == 3 + + def test_get_dataset_partial_member_list_with_single_member(self, db_session_with_containers): + """ + Test retrieving partial member list with single member. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + + expected_account_ids = [user.id] + DatasetPermissionTestDataFactory.create_dataset_permission(dataset.id, user.id, tenant.id) + + # Act + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + + # Assert + assert set(result) == set(expected_account_ids) + assert len(result) == 1 + + def test_get_dataset_partial_member_list_empty(self, db_session_with_containers): + """ + Test retrieving partial member list when no members exist. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + + # Act + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + + # Assert + assert result == [] + assert len(result) == 0 + + +class TestDatasetPermissionServiceUpdatePartialMemberList: + """Verify partial-member list updates against persisted DatasetPermission rows.""" + + def test_update_partial_member_list_add_new_members(self, db_session_with_containers): + """ + Test adding new partial members to a dataset. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + user_list = DatasetPermissionTestDataFactory.build_user_list_payload([member_1.id, member_2.id]) + + # Act + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, user_list) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert set(result) == {member_1.id, member_2.id} + + def test_update_partial_member_list_replace_existing(self, db_session_with_containers): + """ + Test replacing existing partial members with new ones. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + old_member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + old_member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + new_member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + new_member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + + old_users = DatasetPermissionTestDataFactory.build_user_list_payload([old_member_1.id, old_member_2.id]) + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, old_users) + + new_users = DatasetPermissionTestDataFactory.build_user_list_payload([new_member_1.id, new_member_2.id]) + + # Act + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, new_users) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert set(result) == {new_member_1.id, new_member_2.id} + + def test_update_partial_member_list_empty_list(self, db_session_with_containers): + """ + Test updating with empty member list (clearing all members). + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + users = DatasetPermissionTestDataFactory.build_user_list_payload([member_1.id, member_2.id]) + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, users) + + # Act + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, []) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert result == [] + + def test_update_partial_member_list_database_error_rollback(self, db_session_with_containers): + """ + Test error handling and rollback on database error. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + existing_member, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + replacement_member, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + DatasetPermissionService.update_partial_member_list( + tenant.id, + dataset.id, + DatasetPermissionTestDataFactory.build_user_list_payload([existing_member.id]), + ) + user_list = DatasetPermissionTestDataFactory.build_user_list_payload([replacement_member.id]) + rollback_called = {"count": 0} + original_rollback = db.session.rollback + + # Act / Assert + with pytest.MonkeyPatch.context() as mp: + + def _raise_commit(): + raise Exception("Database connection error") + + def _rollback_and_mark(): + rollback_called["count"] += 1 + original_rollback() + + mp.setattr("services.dataset_service.db.session.commit", _raise_commit) + mp.setattr("services.dataset_service.db.session.rollback", _rollback_and_mark) + with pytest.raises(Exception, match="Database connection error"): + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, user_list) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert rollback_called["count"] == 1 + assert result == [existing_member.id] + assert db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).count() == 1 + + +class TestDatasetPermissionServiceClearPartialMemberList: + """Verify partial-member clearing against persisted DatasetPermission rows.""" + + def test_clear_partial_member_list_success(self, db_session_with_containers): + """ + Test successful clearing of partial member list. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + users = DatasetPermissionTestDataFactory.build_user_list_payload([member_1.id, member_2.id]) + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, users) + + # Act + DatasetPermissionService.clear_partial_member_list(dataset.id) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert result == [] + + def test_clear_partial_member_list_empty_list(self, db_session_with_containers): + """ + Test clearing partial member list when no members exist. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + + # Act + DatasetPermissionService.clear_partial_member_list(dataset.id) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert result == [] + + def test_clear_partial_member_list_database_error_rollback(self, db_session_with_containers): + """ + Test error handling and rollback on database error. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + member_1, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + member_2, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + dataset = DatasetPermissionTestDataFactory.create_dataset(tenant.id, owner.id) + users = DatasetPermissionTestDataFactory.build_user_list_payload([member_1.id, member_2.id]) + DatasetPermissionService.update_partial_member_list(tenant.id, dataset.id, users) + rollback_called = {"count": 0} + original_rollback = db.session.rollback + + # Act / Assert + with pytest.MonkeyPatch.context() as mp: + + def _raise_commit(): + raise Exception("Database connection error") + + def _rollback_and_mark(): + rollback_called["count"] += 1 + original_rollback() + + mp.setattr("services.dataset_service.db.session.commit", _raise_commit) + mp.setattr("services.dataset_service.db.session.rollback", _rollback_and_mark) + with pytest.raises(Exception, match="Database connection error"): + DatasetPermissionService.clear_partial_member_list(dataset.id) + + # Assert + result = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert rollback_called["count"] == 1 + assert set(result) == {member_1.id, member_2.id} + assert db_session_with_containers.query(DatasetPermission).filter_by(dataset_id=dataset.id).count() == 2 + + +class TestDatasetServiceCheckDatasetPermission: + """Verify dataset access checks against persisted partial-member permissions.""" + + def test_check_dataset_permission_partial_members_with_permission_success(self, db_session_with_containers): + """ + Test that user with explicit permission can access partial_members dataset. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + + dataset = DatasetPermissionTestDataFactory.create_dataset( + tenant.id, + owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionTestDataFactory.create_dataset_permission(dataset.id, user.id, tenant.id) + + # Act (should not raise) + DatasetService.check_dataset_permission(dataset, user) + + # Assert + permissions = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert user.id in permissions + + def test_check_dataset_permission_partial_members_without_permission_error(self, db_session_with_containers): + """ + Test error when user without permission tries to access partial_members dataset. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + + dataset = DatasetPermissionTestDataFactory.create_dataset( + tenant.id, + owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + + # Act & Assert + with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): + DatasetService.check_dataset_permission(dataset, user) + + +class TestDatasetServiceCheckDatasetOperatorPermission: + """Verify operator permission checks against persisted partial-member permissions.""" + + def test_check_dataset_operator_permission_partial_members_with_permission_success( + self, db_session_with_containers + ): + """ + Test that user with explicit permission can access partial_members dataset. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + + dataset = DatasetPermissionTestDataFactory.create_dataset( + tenant.id, + owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + DatasetPermissionTestDataFactory.create_dataset_permission(dataset.id, user.id, tenant.id) + + # Act (should not raise) + DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) + + # Assert + permissions = DatasetPermissionService.get_dataset_partial_member_list(dataset.id) + assert user.id in permissions + + def test_check_dataset_operator_permission_partial_members_without_permission_error( + self, db_session_with_containers + ): + """ + Test error when user without permission tries to access partial_members dataset. + """ + # Arrange + owner, tenant = DatasetPermissionTestDataFactory.create_account_with_tenant(role=TenantAccountRole.OWNER) + user, _ = DatasetPermissionTestDataFactory.create_account_with_tenant( + role=TenantAccountRole.NORMAL, + tenant=tenant, + ) + + dataset = DatasetPermissionTestDataFactory.create_dataset( + tenant.id, + owner.id, + permission=DatasetPermissionEnum.PARTIAL_TEAM, + ) + + # Act & Assert + with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): + DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) diff --git a/api/tests/test_containers_integration_tests/services/test_dataset_service_delete_dataset.py b/api/tests/test_containers_integration_tests/services/test_dataset_service_delete_dataset.py new file mode 100644 index 0000000000..c47e35791d --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_dataset_service_delete_dataset.py @@ -0,0 +1,244 @@ +"""Container-backed integration tests for DatasetService.delete_dataset real SQL paths.""" + +from unittest.mock import patch +from uuid import uuid4 + +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.dataset import Dataset, Document +from services.dataset_service import DatasetService + + +class DatasetDeleteIntegrationDataFactory: + """Create persisted entities used by delete_dataset integration tests.""" + + @staticmethod + def create_account_with_tenant(db_session_with_containers) -> tuple[Account, Tenant]: + """Persist an owner account, tenant, and tenant join for dataset deletion tests.""" + account = Account( + email=f"owner-{uuid4()}@example.com", + name="Owner", + interface_language="en-US", + status="active", + ) + db_session_with_containers.add(account) + db_session_with_containers.commit() + + tenant = Tenant( + name=f"tenant-{uuid4()}", + status="normal", + ) + db_session_with_containers.add(tenant) + db_session_with_containers.commit() + + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + db_session_with_containers.add(join) + db_session_with_containers.commit() + + account.current_tenant = tenant + return account, tenant + + @staticmethod + def create_dataset( + db_session_with_containers, + tenant_id: str, + created_by: str, + *, + indexing_technique: str | None, + chunk_structure: str | None, + index_struct: str | None = '{"type": "paragraph"}', + collection_binding_id: str | None = None, + pipeline_id: str | None = None, + ) -> Dataset: + """Persist a dataset with delete_dataset-relevant fields configured.""" + dataset = Dataset( + tenant_id=tenant_id, + name=f"dataset-{uuid4()}", + data_source_type="upload_file", + indexing_technique=indexing_technique, + index_struct=index_struct, + created_by=created_by, + collection_binding_id=collection_binding_id, + pipeline_id=pipeline_id, + chunk_structure=chunk_structure, + ) + db_session_with_containers.add(dataset) + db_session_with_containers.commit() + return dataset + + @staticmethod + def create_document( + db_session_with_containers, + *, + tenant_id: str, + dataset_id: str, + created_by: str, + doc_form: str = "text_model", + ) -> Document: + """Persist a document so dataset.doc_form resolves through the real document path.""" + document = Document( + tenant_id=tenant_id, + dataset_id=dataset_id, + position=1, + data_source_type="upload_file", + batch=f"batch-{uuid4()}", + name="Document", + created_from="upload_file", + created_by=created_by, + doc_form=doc_form, + ) + db_session_with_containers.add(document) + db_session_with_containers.commit() + return document + + +class TestDatasetServiceDeleteDataset: + """Integration coverage for DatasetService.delete_dataset using testcontainers.""" + + def test_delete_dataset_with_documents_success(self, db_session_with_containers): + """Delete a dataset with documents and dispatch cleanup through the real signal handler.""" + # Arrange + owner, tenant = DatasetDeleteIntegrationDataFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetDeleteIntegrationDataFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + indexing_technique="high_quality", + chunk_structure=None, + index_struct='{"type": "paragraph"}', + collection_binding_id=str(uuid4()), + pipeline_id=str(uuid4()), + ) + DatasetDeleteIntegrationDataFactory.create_document( + db_session_with_containers, + tenant_id=tenant.id, + dataset_id=dataset.id, + created_by=owner.id, + doc_form="text_model", + ) + + # Act + with patch( + "events.event_handlers.clean_when_dataset_deleted.clean_dataset_task.delay", + autospec=True, + ) as clean_dataset_delay: + result = DatasetService.delete_dataset(dataset.id, owner) + + # Assert + db_session_with_containers.expire_all() + assert result is True + assert db_session_with_containers.get(Dataset, dataset.id) is None + clean_dataset_delay.assert_called_once_with( + dataset.id, + dataset.tenant_id, + dataset.indexing_technique, + dataset.index_struct, + dataset.collection_binding_id, + dataset.doc_form, + dataset.pipeline_id, + ) + + def test_delete_empty_dataset_success(self, db_session_with_containers): + """Delete an empty dataset without scheduling cleanup when both gating fields are absent.""" + # Arrange + owner, tenant = DatasetDeleteIntegrationDataFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetDeleteIntegrationDataFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + indexing_technique=None, + chunk_structure=None, + index_struct=None, + collection_binding_id=None, + pipeline_id=None, + ) + + # Act + with patch( + "events.event_handlers.clean_when_dataset_deleted.clean_dataset_task.delay", + autospec=True, + ) as clean_dataset_delay: + result = DatasetService.delete_dataset(dataset.id, owner) + + # Assert + db_session_with_containers.expire_all() + assert result is True + assert db_session_with_containers.get(Dataset, dataset.id) is None + clean_dataset_delay.assert_not_called() + + def test_delete_dataset_with_partial_none_values(self, db_session_with_containers): + """Delete a dataset without cleanup when indexing_technique is missing but doc_form resolves.""" + # Arrange + owner, tenant = DatasetDeleteIntegrationDataFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetDeleteIntegrationDataFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + indexing_technique=None, + chunk_structure="text_model", + index_struct='{"type": "paragraph"}', + collection_binding_id=str(uuid4()), + pipeline_id=str(uuid4()), + ) + + # Act + with patch( + "events.event_handlers.clean_when_dataset_deleted.clean_dataset_task.delay", + autospec=True, + ) as clean_dataset_delay: + result = DatasetService.delete_dataset(dataset.id, owner) + + # Assert + db_session_with_containers.expire_all() + assert result is True + assert db_session_with_containers.get(Dataset, dataset.id) is None + clean_dataset_delay.assert_not_called() + + def test_delete_dataset_with_doc_form_none_indexing_technique_exists(self, db_session_with_containers): + """Delete a dataset without cleanup when indexing exists but doc_form resolves to None.""" + # Arrange + owner, tenant = DatasetDeleteIntegrationDataFactory.create_account_with_tenant(db_session_with_containers) + dataset = DatasetDeleteIntegrationDataFactory.create_dataset( + db_session_with_containers, + tenant_id=tenant.id, + created_by=owner.id, + indexing_technique="high_quality", + chunk_structure=None, + index_struct='{"type": "paragraph"}', + collection_binding_id=str(uuid4()), + pipeline_id=str(uuid4()), + ) + + # Act + with patch( + "events.event_handlers.clean_when_dataset_deleted.clean_dataset_task.delay", + autospec=True, + ) as clean_dataset_delay: + result = DatasetService.delete_dataset(dataset.id, owner) + + # Assert + db_session_with_containers.expire_all() + assert result is True + assert db_session_with_containers.get(Dataset, dataset.id) is None + clean_dataset_delay.assert_not_called() + + def test_delete_dataset_not_found(self, db_session_with_containers): + """Return False without scheduling cleanup when the target dataset does not exist.""" + # Arrange + owner, _ = DatasetDeleteIntegrationDataFactory.create_account_with_tenant(db_session_with_containers) + missing_dataset_id = str(uuid4()) + + # Act + with patch( + "events.event_handlers.clean_when_dataset_deleted.clean_dataset_task.delay", + autospec=True, + ) as clean_dataset_delay: + result = DatasetService.delete_dataset(missing_dataset_id, owner) + + # Assert + assert result is False + clean_dataset_delay.assert_not_called() diff --git a/api/tests/test_containers_integration_tests/services/test_file_service.py b/api/tests/test_containers_integration_tests/services/test_file_service.py index 6712fe8454..50f5b7a8c0 100644 --- a/api/tests/test_containers_integration_tests/services/test_file_service.py +++ b/api/tests/test_containers_integration_tests/services/test_file_service.py @@ -263,6 +263,27 @@ class TestFileService: user=account, ) + def test_upload_file_allows_regular_punctuation_in_filename( + self, db_session_with_containers: Session, engine, mock_external_service_dependencies + ): + """ + Test file upload allows punctuation that is safe when stored as metadata. + """ + account = self._create_test_account(db_session_with_containers, mock_external_service_dependencies) + + filename = 'candidate?resume for "dify"|v2:.txt' + content = b"test content" + mimetype = "text/plain" + + upload_file = FileService(engine).upload_file( + filename=filename, + content=content, + mimetype=mimetype, + user=account, + ) + + assert upload_file.name == filename + def test_upload_file_filename_too_long( self, db_session_with_containers: Session, engine, mock_external_service_dependencies ): diff --git a/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py b/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py index 08f99cf55a..70d05792ce 100644 --- a/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py +++ b/api/tests/test_containers_integration_tests/services/test_human_input_delivery_test.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, @@ -68,7 +68,7 @@ def _create_app_with_draft_workflow(session, *, delivery_method_id: uuid.UUID) - inputs=[], user_actions=[], ).model_dump(mode="json") - node_data["type"] = NodeType.HUMAN_INPUT.value + node_data["type"] = BuiltinNodeTypes.HUMAN_INPUT graph = json.dumps({"nodes": [{"id": "human-node", "data": node_data}], "edges": []}) workflow = Workflow.new( diff --git a/api/tests/test_containers_integration_tests/services/test_message_export_service.py b/api/tests/test_containers_integration_tests/services/test_message_export_service.py new file mode 100644 index 0000000000..200f688ae9 --- /dev/null +++ b/api/tests/test_containers_integration_tests/services/test_message_export_service.py @@ -0,0 +1,233 @@ +import datetime +import json +import uuid +from decimal import Decimal + +import pytest +from sqlalchemy.orm import Session + +from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole +from models.model import ( + App, + AppAnnotationHitHistory, + Conversation, + DatasetRetrieverResource, + Message, + MessageAgentThought, + MessageAnnotation, + MessageChain, + MessageFeedback, + MessageFile, +) +from models.web import SavedMessage +from services.retention.conversation.message_export_service import AppMessageExportService, AppMessageExportStats + + +class TestAppMessageExportServiceIntegration: + @pytest.fixture(autouse=True) + def cleanup_database(self, db_session_with_containers: Session): + yield + db_session_with_containers.query(DatasetRetrieverResource).delete() + db_session_with_containers.query(AppAnnotationHitHistory).delete() + db_session_with_containers.query(SavedMessage).delete() + db_session_with_containers.query(MessageFile).delete() + db_session_with_containers.query(MessageAgentThought).delete() + db_session_with_containers.query(MessageChain).delete() + db_session_with_containers.query(MessageAnnotation).delete() + db_session_with_containers.query(MessageFeedback).delete() + db_session_with_containers.query(Message).delete() + db_session_with_containers.query(Conversation).delete() + db_session_with_containers.query(App).delete() + db_session_with_containers.query(TenantAccountJoin).delete() + db_session_with_containers.query(Tenant).delete() + db_session_with_containers.query(Account).delete() + db_session_with_containers.commit() + + @staticmethod + def _create_app_context(session: Session) -> tuple[App, Conversation]: + account = Account( + email=f"test-{uuid.uuid4()}@example.com", + name="tester", + interface_language="en-US", + status="active", + ) + session.add(account) + session.flush() + + tenant = Tenant(name=f"tenant-{uuid.uuid4()}", status="normal") + session.add(tenant) + session.flush() + + join = TenantAccountJoin( + tenant_id=tenant.id, + account_id=account.id, + role=TenantAccountRole.OWNER, + current=True, + ) + session.add(join) + session.flush() + + app = App( + tenant_id=tenant.id, + name="export-app", + description="integration test app", + mode="chat", + enable_site=True, + enable_api=True, + api_rpm=60, + api_rph=3600, + is_demo=False, + is_public=False, + created_by=account.id, + updated_by=account.id, + ) + session.add(app) + session.flush() + + conversation = Conversation( + app_id=app.id, + app_model_config_id=str(uuid.uuid4()), + model_provider="openai", + model_id="gpt-4o-mini", + mode="chat", + name="conv", + inputs={"seed": 1}, + status="normal", + from_source="api", + from_end_user_id=str(uuid.uuid4()), + ) + session.add(conversation) + session.commit() + return app, conversation + + @staticmethod + def _create_message( + session: Session, + app: App, + conversation: Conversation, + created_at: datetime.datetime, + *, + query: str, + answer: str, + inputs: dict, + message_metadata: str | None, + ) -> Message: + message = Message( + app_id=app.id, + conversation_id=conversation.id, + model_provider="openai", + model_id="gpt-4o-mini", + inputs=inputs, + query=query, + answer=answer, + message=[{"role": "assistant", "content": answer}], + message_tokens=10, + message_unit_price=Decimal("0.001"), + answer_tokens=20, + answer_unit_price=Decimal("0.002"), + total_price=Decimal("0.003"), + currency="USD", + message_metadata=message_metadata, + from_source="api", + from_end_user_id=conversation.from_end_user_id, + created_at=created_at, + ) + session.add(message) + session.flush() + return message + + def test_iter_records_with_stats(self, db_session_with_containers: Session): + app, conversation = self._create_app_context(db_session_with_containers) + + first_inputs = { + "plain": "v1", + "nested": {"a": 1, "b": [1, {"x": True}]}, + "list": ["x", 2, {"y": "z"}], + } + second_inputs = {"other": "value", "items": [1, 2, 3]} + + base_time = datetime.datetime(2026, 2, 25, 10, 0, 0) + first_message = self._create_message( + db_session_with_containers, + app, + conversation, + created_at=base_time, + query="q1", + answer="a1", + inputs=first_inputs, + message_metadata=json.dumps({"retriever_resources": [{"dataset_id": "ds-1"}]}), + ) + second_message = self._create_message( + db_session_with_containers, + app, + conversation, + created_at=base_time + datetime.timedelta(minutes=1), + query="q2", + answer="a2", + inputs=second_inputs, + message_metadata=None, + ) + + user_feedback_1 = MessageFeedback( + app_id=app.id, + conversation_id=conversation.id, + message_id=first_message.id, + rating="like", + from_source="user", + content="first", + from_end_user_id=conversation.from_end_user_id, + ) + user_feedback_2 = MessageFeedback( + app_id=app.id, + conversation_id=conversation.id, + message_id=first_message.id, + rating="dislike", + from_source="user", + content="second", + from_end_user_id=conversation.from_end_user_id, + ) + admin_feedback = MessageFeedback( + app_id=app.id, + conversation_id=conversation.id, + message_id=first_message.id, + rating="like", + from_source="admin", + content="should-be-filtered", + from_account_id=str(uuid.uuid4()), + ) + db_session_with_containers.add_all([user_feedback_1, user_feedback_2, admin_feedback]) + user_feedback_1.created_at = base_time + datetime.timedelta(minutes=2) + user_feedback_2.created_at = base_time + datetime.timedelta(minutes=3) + admin_feedback.created_at = base_time + datetime.timedelta(minutes=4) + db_session_with_containers.commit() + + service = AppMessageExportService( + app_id=app.id, + start_from=base_time - datetime.timedelta(minutes=1), + end_before=base_time + datetime.timedelta(minutes=10), + filename="unused", + batch_size=1, + dry_run=True, + ) + stats = AppMessageExportStats() + records = list(service._iter_records_with_stats(stats)) + service._finalize_stats(stats) + + assert len(records) == 2 + assert records[0].message_id == first_message.id + assert records[1].message_id == second_message.id + + assert records[0].inputs == first_inputs + assert records[1].inputs == second_inputs + + assert records[0].retriever_resources == [{"dataset_id": "ds-1"}] + assert records[1].retriever_resources == [] + + assert [feedback.rating for feedback in records[0].feedback] == ["like", "dislike"] + assert [feedback.content for feedback in records[0].feedback] == ["first", "second"] + assert records[1].feedback == [] + + assert stats.batches == 2 + assert stats.total_messages == 2 + assert stats.messages_with_feedback == 1 + assert stats.total_feedbacks == 2 diff --git a/api/tests/test_containers_integration_tests/services/test_message_service.py b/api/tests/test_containers_integration_tests/services/test_message_service.py index 19a684a58a..a6d7bf27fd 100644 --- a/api/tests/test_containers_integration_tests/services/test_message_service.py +++ b/api/tests/test_containers_integration_tests/services/test_message_service.py @@ -13,6 +13,7 @@ from services.errors.message import ( SuggestedQuestionsAfterAnswerDisabledError, ) from services.message_service import MessageService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestMessageService: @@ -95,7 +96,7 @@ class TestMessageService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -633,7 +634,7 @@ class TestMessageService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(other_account, name=fake.company()) diff --git a/api/tests/test_containers_integration_tests/services/test_saved_message_service.py b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py index e3ec1d1df3..dd743d46c2 100644 --- a/api/tests/test_containers_integration_tests/services/test_saved_message_service.py +++ b/api/tests/test_containers_integration_tests/services/test_saved_message_service.py @@ -8,6 +8,7 @@ from models.model import EndUser, Message from models.web import SavedMessage from services.app_service import AppService from services.saved_message_service import SavedMessageService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestSavedMessageService: @@ -64,7 +65,7 @@ class TestSavedMessageService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -162,7 +163,7 @@ class TestSavedMessageService: answer_unit_price=0.002, total_price=0.003, currency="USD", - status="success", + status="normal", ) db_session_with_containers.add(message) diff --git a/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py b/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py index 912aa3dd2f..e0ea8211f6 100644 --- a/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py +++ b/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py @@ -10,6 +10,7 @@ from core.trigger.entities.entities import Subscription as TriggerSubscriptionEn from models.provider_ids import TriggerProviderID from models.trigger import TriggerSubscription from services.trigger.trigger_provider_service import TriggerProviderService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestTriggerProviderService: @@ -75,7 +76,7 @@ class TestTriggerProviderService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py b/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py index f1e8c152f1..425611744b 100644 --- a/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py +++ b/api/tests/test_containers_integration_tests/services/test_web_conversation_service.py @@ -12,6 +12,7 @@ from models.web import PinnedConversation from services.account_service import AccountService, TenantService from services.app_service import AppService from services.web_conversation_service import WebConversationService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWebConversationService: @@ -69,7 +70,7 @@ class TestWebConversationService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py index 9a1595d266..4fe65d5803 100644 --- a/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py +++ b/api/tests/test_containers_integration_tests/services/test_webapp_auth_service.py @@ -12,6 +12,7 @@ from models import Account, AccountStatus, Tenant, TenantAccountJoin, TenantAcco from models.model import App, Site from services.errors.account import AccountLoginError, AccountNotFoundError, AccountPasswordError from services.webapp_auth_service import WebAppAuthService, WebAppAuthType +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWebAppAuthService: @@ -109,7 +110,7 @@ class TestWebAppAuthService: tuple: (account, tenant, password) - Created account, tenant and password """ fake = Faker() - password = fake.password(length=12) + password = generate_valid_password(fake) # Create account with password import uuid @@ -272,7 +273,7 @@ class TestWebAppAuthService: """ # Arrange: Create banned account fake = Faker() - password = fake.password(length=12) + password = generate_valid_password(fake) unique_email = f"test_{uuid.uuid4().hex[:8]}@example.com" account = Account( diff --git a/api/tests/test_containers_integration_tests/services/test_webhook_service.py b/api/tests/test_containers_integration_tests/services/test_webhook_service.py index 8f345b9cea..970da98c55 100644 --- a/api/tests/test_containers_integration_tests/services/test_webhook_service.py +++ b/api/tests/test_containers_integration_tests/services/test_webhook_service.py @@ -13,6 +13,7 @@ from models.trigger import AppTrigger, WorkflowWebhookTrigger from models.workflow import Workflow from services.account_service import AccountService, TenantService from services.trigger.webhook_service import WebhookService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWebhookService: @@ -60,7 +61,7 @@ class TestWebhookService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -172,7 +173,7 @@ class TestWebhookService: assert workflow.app_id == test_data["app"].id assert node_config is not None assert node_config["id"] == "webhook_node" - assert node_config["data"]["title"] == "Test Webhook" + assert node_config["data"].title == "Test Webhook" def test_get_webhook_trigger_and_workflow_not_found(self, flask_app_with_containers): """Test webhook trigger not found scenario.""" diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py index a3440b6b67..8ab8df2a5a 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_app_service.py @@ -15,6 +15,7 @@ from services.account_service import AccountService, TenantService # Delay import of AppService to avoid circular dependency # from services.app_service import AppService from services.workflow_app_service import WorkflowAppService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWorkflowAppService: @@ -72,7 +73,7 @@ class TestWorkflowAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -120,7 +121,7 @@ class TestWorkflowAppService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_run_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_run_service.py index 38ef3975b7..e080d6ef6b 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_run_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_run_service.py @@ -15,6 +15,7 @@ from models.workflow import WorkflowRun from services.account_service import AccountService, TenantService from services.app_service import AppService from services.workflow_run_service import WorkflowRunService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWorkflowRunService: @@ -72,7 +73,7 @@ class TestWorkflowRunService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/services/test_workflow_service.py b/api/tests/test_containers_integration_tests/services/test_workflow_service.py index bfb23bac68..056db41750 100644 --- a/api/tests/test_containers_integration_tests/services/test_workflow_service.py +++ b/api/tests/test_containers_integration_tests/services/test_workflow_service.py @@ -62,7 +62,7 @@ class TestWorkflowService: tenant = Tenant( name=f"Test Tenant {fake.company()}", plan="basic", - status="active", + status="normal", ) tenant.id = account.current_tenant_id tenant.created_at = fake.date_time_this_year() @@ -860,8 +860,8 @@ class TestWorkflowService: # Act try: result = workflow_service.get_default_block_config(node_type=invalid_node_type) - # If we get here, the service should return None for invalid types - assert result is None + # If we get here, the service should return an empty config for invalid types. + assert result == {} except ValueError: # It's also acceptable for the service to raise a ValueError for invalid types pass @@ -1090,20 +1090,19 @@ class TestWorkflowService: This test ensures that the service correctly handles feature validation for unsupported app modes, preventing invalid operations. + With EnumText, invalid values are rejected at the DB level during flush, + raising StatementError wrapping ValueError. """ # Arrange fake = Faker() app = self._create_test_app(db_session_with_containers, fake) app.mode = "invalid_mode" # Invalid mode - db_session_with_containers.commit() + # Act & Assert - EnumText validation rejects invalid values at DB flush + import sqlalchemy as sa - workflow_service = WorkflowService() - features = {"test": "value"} - - # Act & Assert - with pytest.raises(ValueError, match="Invalid app mode: invalid_mode"): - workflow_service.validate_features_structure(app_model=app, features=features) + with pytest.raises((ValueError, sa.exc.StatementError)): + db_session_with_containers.commit() def test_update_workflow_success(self, db_session_with_containers: Session): """ @@ -1429,14 +1428,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.START + mock_node.node_type = BuiltinNodeTypes.START mock_node.title = "Test Node" mock_node.error_strategy = None @@ -1453,7 +1452,7 @@ class TestWorkflowService: mock_event = NodeRunSucceededEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_run_result=mock_result, start_at=datetime.now(), ) @@ -1474,9 +1473,9 @@ class TestWorkflowService: # Assert assert result is not None assert result.node_id == node_id - from dify_graph.enums import NodeType + from dify_graph.enums import BuiltinNodeTypes - assert result.node_type == NodeType.START # Should match the mock node type + assert result.node_type == BuiltinNodeTypes.START # Should match the mock node type assert result.title == "Test Node" # Import the enum for comparison from dify_graph.enums import WorkflowNodeExecutionStatus @@ -1504,14 +1503,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunFailedEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.LLM + mock_node.node_type = BuiltinNodeTypes.LLM mock_node.title = "Test Node" mock_node.error_strategy = None @@ -1526,7 +1525,7 @@ class TestWorkflowService: mock_event = NodeRunFailedEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_run_result=mock_result, error="Test error message", start_at=datetime.now(), @@ -1573,14 +1572,14 @@ class TestWorkflowService: import uuid from datetime import datetime - from dify_graph.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus + from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, WorkflowNodeExecutionStatus from dify_graph.graph_events import NodeRunFailedEvent from dify_graph.node_events import NodeRunResult from dify_graph.nodes.base.node import Node # Create mock node with continue_on_error mock_node = MagicMock(spec=Node) - mock_node.node_type = NodeType.TOOL + mock_node.node_type = BuiltinNodeTypes.TOOL mock_node.title = "Test Node" mock_node.error_strategy = ErrorStrategy.DEFAULT_VALUE mock_node.default_value_dict = {"default_output": "default_value"} @@ -1596,7 +1595,7 @@ class TestWorkflowService: mock_event = NodeRunFailedEvent( id=str(uuid.uuid4()), node_id=node_id, - node_type=NodeType.TOOL, + node_type=BuiltinNodeTypes.TOOL, node_run_result=mock_result, error="Test error message", start_at=datetime.now(), diff --git a/api/tests/test_containers_integration_tests/services/tools/test_workflow_tools_manage_service.py b/api/tests/test_containers_integration_tests/services/tools/test_workflow_tools_manage_service.py index 0b3c1112bd..34906a4e54 100644 --- a/api/tests/test_containers_integration_tests/services/tools/test_workflow_tools_manage_service.py +++ b/api/tests/test_containers_integration_tests/services/tools/test_workflow_tools_manage_service.py @@ -13,6 +13,7 @@ from models.workflow import Workflow as WorkflowModel from services.account_service import AccountService, TenantService from services.app_service import AppService from services.tools.workflow_tools_manage_service import WorkflowToolManageService +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestWorkflowToolManageService: @@ -87,7 +88,7 @@ class TestWorkflowToolManageService: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py index 8eb881258a..41d9fc8a29 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_dataset_task.py @@ -110,7 +110,7 @@ class TestCleanDatasetTask: tenant = Tenant( name=fake.company(), plan="basic", - status="active", + status="normal", ) db_session_with_containers.add(tenant) diff --git a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py index 379986c191..3ce199c602 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_clean_notion_document_task.py @@ -15,6 +15,7 @@ from faker import Faker from models.dataset import Dataset, Document, DocumentSegment from services.account_service import AccountService, TenantService from tasks.clean_notion_document_task import clean_notion_document_task +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestCleanNotionDocumentTask: @@ -76,7 +77,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -208,7 +209,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -252,7 +253,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -345,7 +346,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -431,7 +432,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -546,7 +547,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -642,7 +643,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -724,7 +725,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -834,7 +835,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -951,7 +952,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant @@ -1054,7 +1055,7 @@ class TestCleanNotionDocumentTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py index 207bdad751..4a62383590 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py @@ -322,11 +322,14 @@ class TestDatasetIndexingTaskIntegration: _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy) # Assert - task_dispatch_spy.delay.assert_called_once_with( - tenant_id=next_task["tenant_id"], - dataset_id=next_task["dataset_id"], - document_ids=next_task["document_ids"], - ) + # apply_async is used by implementation; assert it was called once with expected kwargs + assert task_dispatch_spy.apply_async.call_count == 1 + call_kwargs = task_dispatch_spy.apply_async.call_args.kwargs.get("kwargs", {}) + assert call_kwargs == { + "tenant_id": next_task["tenant_id"], + "dataset_id": next_task["dataset_id"], + "document_ids": next_task["document_ids"], + } set_waiting_spy.assert_called_once() delete_key_spy.assert_not_called() @@ -352,7 +355,7 @@ class TestDatasetIndexingTaskIntegration: _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy) # Assert - task_dispatch_spy.delay.assert_not_called() + task_dispatch_spy.apply_async.assert_not_called() delete_key_spy.assert_called_once() def test_validation_failure_sets_error_status_when_vector_space_at_limit( @@ -447,7 +450,7 @@ class TestDatasetIndexingTaskIntegration: _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy) # Assert - task_dispatch_spy.delay.assert_called_once() + task_dispatch_spy.apply_async.assert_called_once() def test_sessions_close_on_successful_indexing( self, @@ -534,7 +537,7 @@ class TestDatasetIndexingTaskIntegration: _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy) # Assert - assert task_dispatch_spy.delay.call_count == concurrency_limit + assert task_dispatch_spy.apply_async.call_count == concurrency_limit assert set_waiting_spy.call_count == concurrency_limit def test_task_queue_fifo_ordering(self, db_session_with_containers, patched_external_dependencies): @@ -565,9 +568,10 @@ class TestDatasetIndexingTaskIntegration: _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy) # Assert - assert task_dispatch_spy.delay.call_count == 3 + assert task_dispatch_spy.apply_async.call_count == 3 for index, expected_task in enumerate(ordered_tasks): - assert task_dispatch_spy.delay.call_args_list[index].kwargs["document_ids"] == expected_task["document_ids"] + call_kwargs = task_dispatch_spy.apply_async.call_args_list[index].kwargs.get("kwargs", {}) + assert call_kwargs.get("document_ids") == expected_task["document_ids"] def test_billing_disabled_skips_limit_checks(self, db_session_with_containers, patched_external_dependencies): """Skip limit checks when billing feature is disabled.""" diff --git a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py index 58c3ab5509..10c719fb6d 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_deal_dataset_vector_index_task.py @@ -15,6 +15,7 @@ from faker import Faker from models.dataset import Dataset, Document, DocumentSegment from services.account_service import AccountService, TenantService from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task +from tests.test_containers_integration_tests.helpers import generate_valid_password class TestDealDatasetVectorIndexTask: @@ -61,7 +62,7 @@ class TestDealDatasetVectorIndexTask: email=fake.email(), name=fake.name(), interface_language="en-US", - password=fake.password(length=12), + password=generate_valid_password(fake), ) TenantService.create_owner_tenant_if_not_exist(account, name=fake.company()) tenant = account.current_tenant diff --git a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py index bc0ed3bd2b..69ed5b632d 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_delete_segment_from_index_task.py @@ -48,7 +48,7 @@ class TestDeleteSegmentFromIndexTask: Tenant: Created test tenant instance """ fake = fake or Faker() - tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="active") + tenant = Tenant(name=f"Test Tenant {fake.company()}", plan="basic", status="normal") tenant.id = fake.uuid4() tenant.created_at = fake.date_time_this_year() tenant.updated_at = tenant.created_at diff --git a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py index 8f47b48ae2..6f7d2c28b5 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_disable_segments_from_index_task.py @@ -65,7 +65,7 @@ class TestDisableSegmentsFromIndexTask: tenant = Tenant( name=f"Test Tenant {fake.company()}", plan="basic", - status="active", + status="normal", ) tenant.id = account.tenant_id tenant.created_at = fake.date_time_this_year() diff --git a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py index 4be1180c73..5dc1f6bee0 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py @@ -762,11 +762,12 @@ class TestDocumentIndexingTasks: mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify task function was called for each waiting task - assert mock_task_func.delay.call_count == 1 + assert mock_task_func.apply_async.call_count == 1 # Verify correct parameters for each call - calls = mock_task_func.delay.call_args_list - assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]} + calls = mock_task_func.apply_async.call_args_list + sent_kwargs = calls[0][1]["kwargs"] + assert sent_kwargs == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]} # Verify queue is empty after processing (tasks were pulled) remaining_tasks = queue.pull_tasks(count=10) # Pull more than we added @@ -830,11 +831,15 @@ class TestDocumentIndexingTasks: assert updated_document.processing_started_at is not None # Verify waiting task was still processed despite core processing error - mock_task_func.delay.assert_called_once() + mock_task_func.apply_async.assert_called_once() # Verify correct parameters for the call - call = mock_task_func.delay.call_args - assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]} + call = mock_task_func.apply_async.call_args + assert call[1]["kwargs"] == { + "tenant_id": tenant_id, + "dataset_id": dataset_id, + "document_ids": ["waiting-doc-1"], + } # Verify queue is empty after processing (task was pulled) remaining_tasks = queue.pull_tasks(count=10) @@ -896,9 +901,13 @@ class TestDocumentIndexingTasks: mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once() # Verify only tenant1's waiting task was processed - mock_task_func.delay.assert_called_once() - call = mock_task_func.delay.call_args - assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]} + mock_task_func.apply_async.assert_called_once() + call = mock_task_func.apply_async.call_args + assert call[1]["kwargs"] == { + "tenant_id": tenant1_id, + "dataset_id": dataset1_id, + "document_ids": ["tenant1-doc-1"], + } # Verify tenant1's queue is empty remaining_tasks1 = queue1.pull_tasks(count=10) diff --git a/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py b/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py index 3cdec70df7..c0ddc27286 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_mail_email_code_login_task.py @@ -118,7 +118,7 @@ class TestSendEmailCodeLoginMailTask: tenant = Tenant( name=fake.company(), plan="basic", - status="active", + status="normal", ) db_session_with_containers.add(tenant) diff --git a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py index ef7191299a..f01fcc1742 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py +++ b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py @@ -1,6 +1,6 @@ import json import uuid -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest from faker import Faker @@ -388,8 +388,10 @@ class TestRagPipelineRunTasks: # Set the task key to indicate there are waiting tasks (legacy behavior) redis_client.set(legacy_task_key, 1, ex=60 * 60) - # Mock the task function calls - with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Mock the Celery group scheduling used by the implementation + with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group: + mock_group.return_value.apply_async = MagicMock() + # Act: Execute the priority task with new code but legacy queue data rag_pipeline_run_task(file_id, tenant.id) @@ -398,13 +400,14 @@ class TestRagPipelineRunTasks: mock_file_service["delete_file"].assert_called_once_with(file_id) assert mock_pipeline_generator.call_count == 1 - # Verify waiting tasks were processed, pull 1 task a time by default - assert mock_delay.call_count == 1 + # Verify waiting tasks were processed via group, pull 1 task a time by default + assert mock_group.return_value.apply_async.called - # Verify correct parameters for the call - call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} - assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0] - assert call_kwargs.get("tenant_id") == tenant.id + # Verify correct parameters for the first scheduled job signature + jobs = mock_group.call_args.args[0] if mock_group.call_args else [] + first_kwargs = jobs[0].kwargs if jobs else {} + assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0] + assert first_kwargs.get("tenant_id") == tenant.id # Verify that new code can process legacy queue entries # The new TenantIsolatedTaskQueue should be able to read from the legacy format @@ -446,8 +449,10 @@ class TestRagPipelineRunTasks: waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)] queue.push_tasks(waiting_file_ids) - # Mock the task function calls - with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Mock the Celery group scheduling used by the implementation + with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group: + mock_group.return_value.apply_async = MagicMock() + # Act: Execute the regular task rag_pipeline_run_task(file_id, tenant.id) @@ -456,13 +461,14 @@ class TestRagPipelineRunTasks: mock_file_service["delete_file"].assert_called_once_with(file_id) assert mock_pipeline_generator.call_count == 1 - # Verify waiting tasks were processed, pull 1 task a time by default - assert mock_delay.call_count == 1 + # Verify waiting tasks were processed via group.apply_async + assert mock_group.return_value.apply_async.called - # Verify correct parameters for the call - call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} - assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0] - assert call_kwargs.get("tenant_id") == tenant.id + # Verify correct parameters for the first scheduled job signature + jobs = mock_group.call_args.args[0] if mock_group.call_args else [] + first_kwargs = jobs[0].kwargs if jobs else {} + assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0] + assert first_kwargs.get("tenant_id") == tenant.id # Verify queue still has remaining tasks (only 1 was pulled) remaining_tasks = queue.pull_tasks(count=10) @@ -557,8 +563,10 @@ class TestRagPipelineRunTasks: waiting_file_id = str(uuid.uuid4()) queue.push_tasks([waiting_file_id]) - # Mock the task function calls - with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Mock the Celery group scheduling used by the implementation + with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group: + mock_group.return_value.apply_async = MagicMock() + # Act: Execute the regular task (should not raise exception) rag_pipeline_run_task(file_id, tenant.id) @@ -569,12 +577,13 @@ class TestRagPipelineRunTasks: assert mock_pipeline_generator.call_count == 1 # Verify waiting task was still processed despite core processing error - mock_delay.assert_called_once() + assert mock_group.return_value.apply_async.called - # Verify correct parameters for the call - call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} - assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id - assert call_kwargs.get("tenant_id") == tenant.id + # Verify correct parameters for the first scheduled job signature + jobs = mock_group.call_args.args[0] if mock_group.call_args else [] + first_kwargs = jobs[0].kwargs if jobs else {} + assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert first_kwargs.get("tenant_id") == tenant.id # Verify queue is empty after processing (task was pulled) remaining_tasks = queue.pull_tasks(count=10) @@ -684,8 +693,10 @@ class TestRagPipelineRunTasks: queue1.push_tasks([waiting_file_id1]) queue2.push_tasks([waiting_file_id2]) - # Mock the task function calls - with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Mock the Celery group scheduling used by the implementation + with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group: + mock_group.return_value.apply_async = MagicMock() + # Act: Execute the regular task for tenant1 only rag_pipeline_run_task(file_id1, tenant1.id) @@ -694,11 +705,12 @@ class TestRagPipelineRunTasks: assert mock_file_service["delete_file"].call_count == 1 assert mock_pipeline_generator.call_count == 1 - # Verify only tenant1's waiting task was processed - mock_delay.assert_called_once() - call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} - assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1 - assert call_kwargs.get("tenant_id") == tenant1.id + # Verify only tenant1's waiting task was processed (via group) + assert mock_group.return_value.apply_async.called + jobs = mock_group.call_args.args[0] if mock_group.call_args else [] + first_kwargs = jobs[0].kwargs if jobs else {} + assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1 + assert first_kwargs.get("tenant_id") == tenant1.id # Verify tenant1's queue is empty remaining_tasks1 = queue1.pull_tasks(count=10) @@ -913,8 +925,10 @@ class TestRagPipelineRunTasks: waiting_file_id = str(uuid.uuid4()) queue.push_tasks([waiting_file_id]) - # Mock the task function calls - with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay: + # Mock the Celery group scheduling used by the implementation + with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group: + mock_group.return_value.apply_async = MagicMock() + # Act & Assert: Execute the regular task (should raise Exception) with pytest.raises(Exception, match="File not found"): rag_pipeline_run_task(file_id, tenant.id) @@ -924,12 +938,13 @@ class TestRagPipelineRunTasks: mock_pipeline_generator.assert_not_called() # Verify waiting task was still processed despite file error - mock_delay.assert_called_once() + assert mock_group.return_value.apply_async.called - # Verify correct parameters for the call - call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {} - assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id - assert call_kwargs.get("tenant_id") == tenant.id + # Verify correct parameters for the first scheduled job signature + jobs = mock_group.call_args.args[0] if mock_group.call_args else [] + first_kwargs = jobs[0].kwargs if jobs else {} + assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id + assert first_kwargs.get("tenant_id") == tenant.id # Verify queue is empty after processing (task was pulled) remaining_tasks = queue.pull_tasks(count=10) diff --git a/api/tests/test_containers_integration_tests/trigger/conftest.py b/api/tests/test_containers_integration_tests/trigger/conftest.py index 9c1fd5e0ec..e3832fb2ef 100644 --- a/api/tests/test_containers_integration_tests/trigger/conftest.py +++ b/api/tests/test_containers_integration_tests/trigger/conftest.py @@ -105,18 +105,26 @@ def app_model( class MockCeleryGroup: - """Mock for celery group() function that collects dispatched tasks.""" + """Mock for celery group() function that collects dispatched tasks. + + Matches the Celery group API loosely, accepting arbitrary kwargs on apply_async + (e.g. producer) so production code can pass broker-related options without + breaking tests. + """ def __init__(self) -> None: self.collected: list[dict[str, Any]] = [] self._applied = False + self.last_apply_async_kwargs: dict[str, Any] | None = None def __call__(self, items: Any) -> MockCeleryGroup: self.collected = list(items) return self - def apply_async(self) -> None: + def apply_async(self, **kwargs: Any) -> None: + # Accept arbitrary kwargs like producer to be compatible with Celery self._applied = True + self.last_apply_async_kwargs = kwargs @property def applied(self) -> bool: diff --git a/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py b/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py index 7bfc6c9e13..4ea8d8c1c7 100644 --- a/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py +++ b/api/tests/test_containers_integration_tests/trigger/test_trigger_e2e.py @@ -14,11 +14,16 @@ from sqlalchemy.orm import Session from configs import dify_config from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) from core.trigger.debug import event_selectors from core.trigger.debug.event_bus import TriggerDebugEventBus from core.trigger.debug.event_selectors import PluginTriggerDebugEventPoller, WebhookTriggerDebugEventPoller from core.trigger.debug.events import PluginTriggerDebugEvent, build_plugin_pool_key -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from libs.datetime_utils import naive_utc_now from models.account import Account, Tenant from models.enums import AppTriggerStatus, AppTriggerType, CreatorUserRole, WorkflowTriggerStatus @@ -48,10 +53,10 @@ WEBHOOK_ID_DEBUG = "whdebug1234567890123456" TEST_TRIGGER_URL = "https://trigger.example.com/base" -def _build_workflow_graph(root_node_id: str, trigger_type: NodeType) -> str: +def _build_workflow_graph(root_node_id: str, trigger_type: str) -> str: """Build a minimal workflow graph JSON for testing.""" - node_data: dict[str, Any] = {"type": trigger_type.value, "title": "trigger"} - if trigger_type == NodeType.TRIGGER_WEBHOOK: + node_data: dict[str, Any] = {"type": trigger_type, "title": "trigger"} + if trigger_type == TRIGGER_WEBHOOK_NODE_TYPE: node_data.update( { "method": "POST", @@ -64,7 +69,7 @@ def _build_workflow_graph(root_node_id: str, trigger_type: NodeType) -> str: graph = { "nodes": [ {"id": root_node_id, "data": node_data}, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": root_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -82,8 +87,8 @@ def test_publish_blocks_start_and_trigger_coexistence( graph = { "nodes": [ - {"id": "start", "data": {"type": NodeType.START.value}}, - {"id": "trig", "data": {"type": NodeType.TRIGGER_WEBHOOK.value}}, + {"id": "start", "data": {"type": BuiltinNodeTypes.START}}, + {"id": "trig", "data": {"type": TRIGGER_WEBHOOK_NODE_TYPE}}, ], "edges": [], } @@ -152,7 +157,7 @@ def test_webhook_trigger_creates_trigger_log( tenant, account = tenant_and_account webhook_node_id = "webhook-node" - graph_json = _build_workflow_graph(webhook_node_id, NodeType.TRIGGER_WEBHOOK) + graph_json = _build_workflow_graph(webhook_node_id, TRIGGER_WEBHOOK_NODE_TYPE) published_workflow = Workflow.new( tenant_id=tenant.id, app_id=app_model.id, @@ -282,7 +287,7 @@ def test_schedule_visual_debug_poll_generates_event(monkeypatch: pytest.MonkeyPa node_config = { "id": "schedule-visual", "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "mode": "visual", "frequency": "daily", "visual_config": {"time": "3:00 PM"}, @@ -372,7 +377,7 @@ def test_webhook_debug_dispatches_event( """Webhook single-step debug should dispatch debug event and be pollable.""" tenant, account = tenant_and_account webhook_node_id = "webhook-debug-node" - graph_json = _build_workflow_graph(webhook_node_id, NodeType.TRIGGER_WEBHOOK) + graph_json = _build_workflow_graph(webhook_node_id, TRIGGER_WEBHOOK_NODE_TYPE) draft_workflow = Workflow.new( tenant_id=tenant.id, app_id=app_model.id, @@ -443,7 +448,7 @@ def test_plugin_single_step_debug_flow( node_config = { "id": node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin", "plugin_id": "plugin-1", "plugin_unique_identifier": "plugin-1", @@ -519,14 +524,14 @@ def test_schedule_trigger_creates_trigger_log( { "id": schedule_node_id, "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "title": "schedule", "mode": "cron", "cron_expression": "0 9 * * *", "timezone": "UTC", }, }, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": schedule_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -639,7 +644,7 @@ def test_schedule_visual_cron_conversion( node_config: dict[str, Any] = { "id": "schedule-node", "data": { - "type": NodeType.TRIGGER_SCHEDULE.value, + "type": TRIGGER_SCHEDULE_NODE_TYPE, "mode": mode, "timezone": "UTC", }, @@ -680,7 +685,7 @@ def test_plugin_trigger_full_chain_with_db_verification( { "id": plugin_node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin", "plugin_id": "test-plugin", "plugin_unique_identifier": "test-plugin", @@ -690,7 +695,7 @@ def test_plugin_trigger_full_chain_with_db_verification( "parameters": {}, }, }, - {"id": "answer-1", "data": {"type": NodeType.ANSWER.value, "title": "answer"}}, + {"id": "answer-1", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "answer"}}, ], "edges": [{"source": plugin_node_id, "target": "answer-1", "sourceHandle": "success"}], } @@ -826,7 +831,7 @@ def test_plugin_debug_via_http_endpoint( node_config = { "id": node_id, "data": { - "type": NodeType.TRIGGER_PLUGIN.value, + "type": TRIGGER_PLUGIN_NODE_TYPE, "title": "plugin-debug", "plugin_id": "debug-plugin", "plugin_unique_identifier": "debug-plugin", diff --git a/api/tests/unit_tests/commands/test_clean_expired_messages.py b/api/tests/unit_tests/commands/test_clean_expired_messages.py new file mode 100644 index 0000000000..60173f723d --- /dev/null +++ b/api/tests/unit_tests/commands/test_clean_expired_messages.py @@ -0,0 +1,181 @@ +import datetime +import re +from unittest.mock import MagicMock, patch + +import click +import pytest + +from commands import clean_expired_messages + + +def _mock_service() -> MagicMock: + service = MagicMock() + service.run.return_value = { + "batches": 1, + "total_messages": 10, + "filtered_messages": 5, + "total_deleted": 5, + } + return service + + +def test_absolute_mode_calls_from_time_range(): + policy = object() + service = _mock_service() + start_from = datetime.datetime(2024, 1, 1, 0, 0, 0) + end_before = datetime.datetime(2024, 2, 1, 0, 0, 0) + + with ( + patch("commands.retention.create_message_clean_policy", return_value=policy), + patch("commands.retention.MessagesCleanService.from_time_range", return_value=service) as mock_from_time_range, + patch("commands.retention.MessagesCleanService.from_days") as mock_from_days, + ): + clean_expired_messages.callback( + batch_size=200, + graceful_period=21, + start_from=start_from, + end_before=end_before, + from_days_ago=None, + before_days=None, + dry_run=True, + ) + + mock_from_time_range.assert_called_once_with( + policy=policy, + start_from=start_from, + end_before=end_before, + batch_size=200, + dry_run=True, + ) + mock_from_days.assert_not_called() + + +def test_relative_mode_before_days_only_calls_from_days(): + policy = object() + service = _mock_service() + + with ( + patch("commands.retention.create_message_clean_policy", return_value=policy), + patch("commands.retention.MessagesCleanService.from_days", return_value=service) as mock_from_days, + patch("commands.retention.MessagesCleanService.from_time_range") as mock_from_time_range, + ): + clean_expired_messages.callback( + batch_size=500, + graceful_period=14, + start_from=None, + end_before=None, + from_days_ago=None, + before_days=30, + dry_run=False, + ) + + mock_from_days.assert_called_once_with( + policy=policy, + days=30, + batch_size=500, + dry_run=False, + ) + mock_from_time_range.assert_not_called() + + +def test_relative_mode_with_from_days_ago_calls_from_time_range(): + policy = object() + service = _mock_service() + fixed_now = datetime.datetime(2024, 8, 20, 12, 0, 0) + + with ( + patch("commands.retention.create_message_clean_policy", return_value=policy), + patch("commands.retention.MessagesCleanService.from_time_range", return_value=service) as mock_from_time_range, + patch("commands.retention.MessagesCleanService.from_days") as mock_from_days, + patch("commands.retention.naive_utc_now", return_value=fixed_now), + ): + clean_expired_messages.callback( + batch_size=1000, + graceful_period=21, + start_from=None, + end_before=None, + from_days_ago=60, + before_days=30, + dry_run=False, + ) + + mock_from_time_range.assert_called_once_with( + policy=policy, + start_from=fixed_now - datetime.timedelta(days=60), + end_before=fixed_now - datetime.timedelta(days=30), + batch_size=1000, + dry_run=False, + ) + mock_from_days.assert_not_called() + + +@pytest.mark.parametrize( + ("kwargs", "message"), + [ + ( + { + "start_from": datetime.datetime(2024, 1, 1), + "end_before": datetime.datetime(2024, 2, 1), + "from_days_ago": None, + "before_days": 30, + }, + "mutually exclusive", + ), + ( + { + "start_from": datetime.datetime(2024, 1, 1), + "end_before": None, + "from_days_ago": None, + "before_days": None, + }, + "Both --start-from and --end-before are required", + ), + ( + { + "start_from": None, + "end_before": None, + "from_days_ago": 10, + "before_days": None, + }, + "--from-days-ago must be used together with --before-days", + ), + ( + { + "start_from": None, + "end_before": None, + "from_days_ago": None, + "before_days": -1, + }, + "--before-days must be >= 0", + ), + ( + { + "start_from": None, + "end_before": None, + "from_days_ago": 30, + "before_days": 30, + }, + "--from-days-ago must be greater than --before-days", + ), + ( + { + "start_from": None, + "end_before": None, + "from_days_ago": None, + "before_days": None, + }, + "You must provide either (--start-from,--end-before) or (--before-days [--from-days-ago])", + ), + ], +) +def test_invalid_inputs_raise_usage_error(kwargs: dict, message: str): + with pytest.raises(click.UsageError, match=re.escape(message)): + clean_expired_messages.callback( + batch_size=1000, + graceful_period=21, + start_from=kwargs["start_from"], + end_before=kwargs["end_before"], + from_days_ago=kwargs["from_days_ago"], + before_days=kwargs["before_days"], + dry_run=False, + ) diff --git a/api/tests/unit_tests/commands/test_upgrade_db.py b/api/tests/unit_tests/commands/test_upgrade_db.py index 80173f5d46..5aa0313429 100644 --- a/api/tests/unit_tests/commands/test_upgrade_db.py +++ b/api/tests/unit_tests/commands/test_upgrade_db.py @@ -4,6 +4,7 @@ import types from unittest.mock import MagicMock import commands +from commands import system as system_commands from libs.db_migration_lock import LockNotOwnedError, RedisError HEARTBEAT_WAIT_TIMEOUT_SECONDS = 5.0 @@ -24,11 +25,11 @@ def _invoke_upgrade_db() -> int: def test_upgrade_db_skips_when_lock_not_acquired(monkeypatch, capsys): - monkeypatch.setattr(commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 1234) + monkeypatch.setattr(system_commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 1234) lock = MagicMock() lock.acquire.return_value = False - commands.redis_client.lock.return_value = lock + system_commands.redis_client.lock.return_value = lock exit_code = _invoke_upgrade_db() captured = capsys.readouterr() @@ -36,18 +37,18 @@ def test_upgrade_db_skips_when_lock_not_acquired(monkeypatch, capsys): assert exit_code == 0 assert "Database migration skipped" in captured.out - commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=1234, thread_local=False) + system_commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=1234, thread_local=False) lock.acquire.assert_called_once_with(blocking=False) lock.release.assert_not_called() def test_upgrade_db_failure_not_masked_by_lock_release(monkeypatch, capsys): - monkeypatch.setattr(commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 321) + monkeypatch.setattr(system_commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 321) lock = MagicMock() lock.acquire.return_value = True lock.release.side_effect = LockNotOwnedError("simulated") - commands.redis_client.lock.return_value = lock + system_commands.redis_client.lock.return_value = lock def _upgrade(): raise RuntimeError("boom") @@ -60,18 +61,18 @@ def test_upgrade_db_failure_not_masked_by_lock_release(monkeypatch, capsys): assert exit_code == 1 assert "Database migration failed: boom" in captured.out - commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=321, thread_local=False) + system_commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=321, thread_local=False) lock.acquire.assert_called_once_with(blocking=False) lock.release.assert_called_once() def test_upgrade_db_success_ignores_lock_not_owned_on_release(monkeypatch, capsys): - monkeypatch.setattr(commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 999) + monkeypatch.setattr(system_commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 999) lock = MagicMock() lock.acquire.return_value = True lock.release.side_effect = LockNotOwnedError("simulated") - commands.redis_client.lock.return_value = lock + system_commands.redis_client.lock.return_value = lock _install_fake_flask_migrate(monkeypatch, lambda: None) @@ -81,7 +82,7 @@ def test_upgrade_db_success_ignores_lock_not_owned_on_release(monkeypatch, capsy assert exit_code == 0 assert "Database migration successful!" in captured.out - commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=999, thread_local=False) + system_commands.redis_client.lock.assert_called_once_with(name="db_upgrade_lock", timeout=999, thread_local=False) lock.acquire.assert_called_once_with(blocking=False) lock.release.assert_called_once() @@ -92,11 +93,11 @@ def test_upgrade_db_renews_lock_during_migration(monkeypatch, capsys): """ # Use a small TTL so the heartbeat interval triggers quickly. - monkeypatch.setattr(commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 0.3) + monkeypatch.setattr(system_commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 0.3) lock = MagicMock() lock.acquire.return_value = True - commands.redis_client.lock.return_value = lock + system_commands.redis_client.lock.return_value = lock renewed = threading.Event() @@ -120,11 +121,11 @@ def test_upgrade_db_renews_lock_during_migration(monkeypatch, capsys): def test_upgrade_db_ignores_reacquire_errors(monkeypatch, capsys): # Use a small TTL so heartbeat runs during the upgrade call. - monkeypatch.setattr(commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 0.3) + monkeypatch.setattr(system_commands, "DB_UPGRADE_LOCK_TTL_SECONDS", 0.3) lock = MagicMock() lock.acquire.return_value = True - commands.redis_client.lock.return_value = lock + system_commands.redis_client.lock.return_value = lock attempted = threading.Event() diff --git a/api/tests/unit_tests/configs/test_dify_config.py b/api/tests/unit_tests/configs/test_dify_config.py index cf52980e57..d6933e2180 100644 --- a/api/tests/unit_tests/configs/test_dify_config.py +++ b/api/tests/unit_tests/configs/test_dify_config.py @@ -25,7 +25,8 @@ def test_dify_config(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("HTTP_REQUEST_MAX_READ_TIMEOUT", "300") # Custom value for testing # load dotenv file with pydantic-settings - config = DifyConfig() + # Disable `.env` loading to ensure test stability across environments + config = DifyConfig(_env_file=None) # constant values assert config.COMMIT_SHA == "" @@ -59,7 +60,8 @@ def test_http_timeout_defaults(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("DB_PORT", "5432") monkeypatch.setenv("DB_DATABASE", "dify") - config = DifyConfig() + # Disable `.env` loading to ensure test stability across environments + config = DifyConfig(_env_file=None) # Verify default timeout values assert config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT == 10 @@ -86,7 +88,8 @@ def test_flask_configs(monkeypatch: pytest.MonkeyPatch): monkeypatch.setenv("WEB_API_CORS_ALLOW_ORIGINS", "http://127.0.0.1:3000,*") monkeypatch.setenv("CODE_EXECUTION_ENDPOINT", "http://127.0.0.1:8194/") - flask_app.config.from_mapping(DifyConfig().model_dump()) # pyright: ignore + # Disable `.env` loading to ensure test stability across environments + flask_app.config.from_mapping(DifyConfig(_env_file=None).model_dump()) # pyright: ignore config = flask_app.config # configs read from pydantic-settings diff --git a/api/tests/unit_tests/conftest.py b/api/tests/unit_tests/conftest.py index d2111ebac8..3f75fd2851 100644 --- a/api/tests/unit_tests/conftest.py +++ b/api/tests/unit_tests/conftest.py @@ -32,11 +32,6 @@ os.environ.setdefault("OPENDAL_SCHEME", "fs") os.environ.setdefault("OPENDAL_FS_ROOT", "/tmp/dify-storage") os.environ.setdefault("STORAGE_TYPE", "opendal") -# Add the API directory to Python path to ensure proper imports -import sys - -sys.path.insert(0, PROJECT_DIR) - from core.db.session_factory import configure_session_factory, session_factory from extensions import ext_redis diff --git a/api/tests/unit_tests/controllers/common/test_errors.py b/api/tests/unit_tests/controllers/common/test_errors.py new file mode 100644 index 0000000000..25a9fe5b66 --- /dev/null +++ b/api/tests/unit_tests/controllers/common/test_errors.py @@ -0,0 +1,70 @@ +from controllers.common.errors import ( + BlockedFileExtensionError, + FilenameNotExistsError, + FileTooLargeError, + NoFileUploadedError, + RemoteFileUploadError, + TooManyFilesError, + UnsupportedFileTypeError, +) + + +class TestFilenameNotExistsError: + def test_defaults(self): + error = FilenameNotExistsError() + + assert error.code == 400 + assert error.description == "The specified filename does not exist." + + +class TestRemoteFileUploadError: + def test_defaults(self): + error = RemoteFileUploadError() + + assert error.code == 400 + assert error.description == "Error uploading remote file." + + +class TestFileTooLargeError: + def test_defaults(self): + error = FileTooLargeError() + + assert error.code == 413 + assert error.error_code == "file_too_large" + assert error.description == "File size exceeded. {message}" + + +class TestUnsupportedFileTypeError: + def test_defaults(self): + error = UnsupportedFileTypeError() + + assert error.code == 415 + assert error.error_code == "unsupported_file_type" + assert error.description == "File type not allowed." + + +class TestBlockedFileExtensionError: + def test_defaults(self): + error = BlockedFileExtensionError() + + assert error.code == 400 + assert error.error_code == "file_extension_blocked" + assert error.description == "The file extension is blocked for security reasons." + + +class TestTooManyFilesError: + def test_defaults(self): + error = TooManyFilesError() + + assert error.code == 400 + assert error.error_code == "too_many_files" + assert error.description == "Only one file is allowed." + + +class TestNoFileUploadedError: + def test_defaults(self): + error = NoFileUploadedError() + + assert error.code == 400 + assert error.error_code == "no_file_uploaded" + assert error.description == "Please upload your file." diff --git a/api/tests/unit_tests/controllers/common/test_file_response.py b/api/tests/unit_tests/controllers/common/test_file_response.py index 2487c362bd..b7500fb7f9 100644 --- a/api/tests/unit_tests/controllers/common/test_file_response.py +++ b/api/tests/unit_tests/controllers/common/test_file_response.py @@ -1,22 +1,95 @@ from flask import Response -from controllers.common.file_response import enforce_download_for_html, is_html_content +from controllers.common.file_response import ( + _normalize_mime_type, + enforce_download_for_html, + is_html_content, +) -class TestFileResponseHelpers: - def test_is_html_content_detects_mime_type(self): +class TestNormalizeMimeType: + def test_returns_empty_string_for_none(self): + assert _normalize_mime_type(None) == "" + + def test_returns_empty_string_for_empty_string(self): + assert _normalize_mime_type("") == "" + + def test_normalizes_mime_type(self): + assert _normalize_mime_type("Text/HTML; Charset=UTF-8") == "text/html" + + +class TestIsHtmlContent: + def test_detects_html_via_mime_type(self): mime_type = "text/html; charset=UTF-8" - result = is_html_content(mime_type, filename="file.txt", extension="txt") + result = is_html_content( + mime_type=mime_type, + filename="file.txt", + extension="txt", + ) assert result is True - def test_is_html_content_detects_extension(self): - result = is_html_content("text/plain", filename="report.html", extension=None) + def test_detects_html_via_extension_argument(self): + result = is_html_content( + mime_type="text/plain", + filename=None, + extension="html", + ) assert result is True - def test_enforce_download_for_html_sets_headers(self): + def test_detects_html_via_filename_extension(self): + result = is_html_content( + mime_type="text/plain", + filename="report.html", + extension=None, + ) + + assert result is True + + def test_returns_false_when_no_html_detected_anywhere(self): + """ + Missing negative test: + - MIME type is not HTML + - filename has no HTML extension + - extension argument is not HTML + """ + result = is_html_content( + mime_type="application/json", + filename="data.json", + extension="json", + ) + + assert result is False + + def test_returns_false_when_all_inputs_are_none(self): + result = is_html_content( + mime_type=None, + filename=None, + extension=None, + ) + + assert result is False + + +class TestEnforceDownloadForHtml: + def test_sets_attachment_when_filename_missing(self): + response = Response("payload", mimetype="text/html") + + updated = enforce_download_for_html( + response, + mime_type="text/html", + filename=None, + extension="html", + ) + + assert updated is True + assert response.headers["Content-Disposition"] == "attachment" + assert response.headers["Content-Type"] == "application/octet-stream" + assert response.headers["X-Content-Type-Options"] == "nosniff" + + def test_sets_headers_when_filename_present(self): response = Response("payload", mimetype="text/html") updated = enforce_download_for_html( @@ -27,11 +100,12 @@ class TestFileResponseHelpers: ) assert updated is True - assert "attachment" in response.headers["Content-Disposition"] + assert response.headers["Content-Disposition"].startswith("attachment") + assert "unsafe.html" in response.headers["Content-Disposition"] assert response.headers["Content-Type"] == "application/octet-stream" assert response.headers["X-Content-Type-Options"] == "nosniff" - def test_enforce_download_for_html_no_change_for_non_html(self): + def test_does_not_modify_response_for_non_html_content(self): response = Response("payload", mimetype="text/plain") updated = enforce_download_for_html( diff --git a/api/tests/unit_tests/controllers/common/test_helpers.py b/api/tests/unit_tests/controllers/common/test_helpers.py new file mode 100644 index 0000000000..59c463177c --- /dev/null +++ b/api/tests/unit_tests/controllers/common/test_helpers.py @@ -0,0 +1,188 @@ +from uuid import UUID + +import httpx +import pytest + +from controllers.common import helpers +from controllers.common.helpers import FileInfo, guess_file_info_from_response + + +def make_response( + url="https://example.com/file.txt", + headers=None, + content=None, +): + return httpx.Response( + 200, + request=httpx.Request("GET", url), + headers=headers or {}, + content=content or b"", + ) + + +class TestGuessFileInfoFromResponse: + def test_filename_from_url(self): + response = make_response( + url="https://example.com/test.pdf", + content=b"Hello World", + ) + + info = guess_file_info_from_response(response) + + assert info.filename == "test.pdf" + assert info.extension == ".pdf" + assert info.mimetype == "application/pdf" + + def test_filename_from_content_disposition(self): + headers = { + "Content-Disposition": "attachment; filename=myfile.csv", + "Content-Type": "text/csv", + } + response = make_response( + url="https://example.com/", + headers=headers, + content=b"Hello World", + ) + + info = guess_file_info_from_response(response) + + assert info.filename == "myfile.csv" + assert info.extension == ".csv" + assert info.mimetype == "text/csv" + + @pytest.mark.parametrize( + ("magic_available", "expected_ext"), + [ + (True, "txt"), + (False, "bin"), + ], + ) + def test_generated_filename_when_missing(self, monkeypatch, magic_available, expected_ext): + if magic_available: + if helpers.magic is None: + pytest.skip("python-magic is not installed, cannot run 'magic_available=True' test variant") + else: + monkeypatch.setattr(helpers, "magic", None) + + response = make_response( + url="https://example.com/", + content=b"Hello World", + ) + + info = guess_file_info_from_response(response) + + name, ext = info.filename.split(".") + UUID(name) + assert ext == expected_ext + + def test_mimetype_from_header_when_unknown(self): + headers = {"Content-Type": "application/json"} + response = make_response( + url="https://example.com/file.unknown", + headers=headers, + content=b'{"a": 1}', + ) + + info = guess_file_info_from_response(response) + + assert info.mimetype == "application/json" + + def test_extension_added_when_missing(self): + headers = {"Content-Type": "image/png"} + response = make_response( + url="https://example.com/image", + headers=headers, + content=b"fakepngdata", + ) + + info = guess_file_info_from_response(response) + + assert info.extension == ".png" + assert info.filename.endswith(".png") + + def test_content_length_used_as_size(self): + headers = { + "Content-Length": "1234", + "Content-Type": "text/plain", + } + response = make_response( + url="https://example.com/a.txt", + headers=headers, + content=b"a" * 1234, + ) + + info = guess_file_info_from_response(response) + + assert info.size == 1234 + + def test_size_minus_one_when_header_missing(self): + response = make_response(url="https://example.com/a.txt") + + info = guess_file_info_from_response(response) + + assert info.size == -1 + + def test_fallback_to_bin_extension(self): + headers = {"Content-Type": "application/octet-stream"} + response = make_response( + url="https://example.com/download", + headers=headers, + content=b"\x00\x01\x02\x03", + ) + + info = guess_file_info_from_response(response) + + assert info.extension == ".bin" + assert info.filename.endswith(".bin") + + def test_return_type(self): + response = make_response() + + info = guess_file_info_from_response(response) + + assert isinstance(info, FileInfo) + + +class TestMagicImportWarnings: + @pytest.mark.parametrize( + ("platform_name", "expected_message"), + [ + ("Windows", "pip install python-magic-bin"), + ("Darwin", "brew install libmagic"), + ("Linux", "sudo apt-get install libmagic1"), + ("Other", "install `libmagic`"), + ], + ) + def test_magic_import_warning_per_platform( + self, + monkeypatch, + platform_name, + expected_message, + ): + import builtins + import importlib + + # Force ImportError when "magic" is imported + real_import = builtins.__import__ + + def fake_import(name, *args, **kwargs): + if name == "magic": + raise ImportError("No module named magic") + return real_import(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", fake_import) + monkeypatch.setattr("platform.system", lambda: platform_name) + + # Remove helpers so it imports fresh + import sys + + original_helpers = sys.modules.get(helpers.__name__) + sys.modules.pop(helpers.__name__, None) + + try: + with pytest.warns(UserWarning, match="To use python-magic") as warning: + imported_helpers = importlib.import_module(helpers.__name__) + assert expected_message in str(warning[0].message) + finally: + if original_helpers is not None: + sys.modules[helpers.__name__] = original_helpers diff --git a/api/tests/unit_tests/controllers/common/test_schema.py b/api/tests/unit_tests/controllers/common/test_schema.py new file mode 100644 index 0000000000..56c8160f02 --- /dev/null +++ b/api/tests/unit_tests/controllers/common/test_schema.py @@ -0,0 +1,189 @@ +import sys +from enum import StrEnum +from unittest.mock import MagicMock, patch + +import pytest +from flask_restx import Namespace +from pydantic import BaseModel + + +class UserModel(BaseModel): + id: int + name: str + + +class ProductModel(BaseModel): + id: int + price: float + + +@pytest.fixture(autouse=True) +def mock_console_ns(): + """Mock the console_ns to avoid circular imports during test collection.""" + mock_ns = MagicMock(spec=Namespace) + mock_ns.models = {} + + # Inject mock before importing schema module + with patch.dict(sys.modules, {"controllers.console": MagicMock(console_ns=mock_ns)}): + yield mock_ns + + +def test_default_ref_template_value(): + from controllers.common.schema import DEFAULT_REF_TEMPLATE_SWAGGER_2_0 + + assert DEFAULT_REF_TEMPLATE_SWAGGER_2_0 == "#/definitions/{model}" + + +def test_register_schema_model_calls_namespace_schema_model(): + from controllers.common.schema import register_schema_model + + namespace = MagicMock(spec=Namespace) + + register_schema_model(namespace, UserModel) + + namespace.schema_model.assert_called_once() + + model_name, schema = namespace.schema_model.call_args.args + + assert model_name == "UserModel" + assert isinstance(schema, dict) + assert "properties" in schema + + +def test_register_schema_model_passes_schema_from_pydantic(): + from controllers.common.schema import DEFAULT_REF_TEMPLATE_SWAGGER_2_0, register_schema_model + + namespace = MagicMock(spec=Namespace) + + register_schema_model(namespace, UserModel) + + schema = namespace.schema_model.call_args.args[1] + + expected_schema = UserModel.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0) + + assert schema == expected_schema + + +def test_register_schema_models_registers_multiple_models(): + from controllers.common.schema import register_schema_models + + namespace = MagicMock(spec=Namespace) + + register_schema_models(namespace, UserModel, ProductModel) + + assert namespace.schema_model.call_count == 2 + + called_names = [call.args[0] for call in namespace.schema_model.call_args_list] + assert called_names == ["UserModel", "ProductModel"] + + +def test_register_schema_models_calls_register_schema_model(monkeypatch): + from controllers.common.schema import register_schema_models + + namespace = MagicMock(spec=Namespace) + + calls = [] + + def fake_register(ns, model): + calls.append((ns, model)) + + monkeypatch.setattr( + "controllers.common.schema.register_schema_model", + fake_register, + ) + + register_schema_models(namespace, UserModel, ProductModel) + + assert calls == [ + (namespace, UserModel), + (namespace, ProductModel), + ] + + +class StatusEnum(StrEnum): + ACTIVE = "active" + INACTIVE = "inactive" + + +class PriorityEnum(StrEnum): + HIGH = "high" + LOW = "low" + + +def test_get_or_create_model_returns_existing_model(mock_console_ns): + from controllers.common.schema import get_or_create_model + + existing_model = MagicMock() + mock_console_ns.models = {"TestModel": existing_model} + + result = get_or_create_model("TestModel", {"key": "value"}) + + assert result == existing_model + mock_console_ns.model.assert_not_called() + + +def test_get_or_create_model_creates_new_model_when_not_exists(mock_console_ns): + from controllers.common.schema import get_or_create_model + + mock_console_ns.models = {} + new_model = MagicMock() + mock_console_ns.model.return_value = new_model + field_def = {"name": {"type": "string"}} + + result = get_or_create_model("NewModel", field_def) + + assert result == new_model + mock_console_ns.model.assert_called_once_with("NewModel", field_def) + + +def test_get_or_create_model_does_not_call_model_if_exists(mock_console_ns): + from controllers.common.schema import get_or_create_model + + existing_model = MagicMock() + mock_console_ns.models = {"ExistingModel": existing_model} + + result = get_or_create_model("ExistingModel", {"key": "value"}) + + assert result == existing_model + mock_console_ns.model.assert_not_called() + + +def test_register_enum_models_registers_single_enum(): + from controllers.common.schema import register_enum_models + + namespace = MagicMock(spec=Namespace) + + register_enum_models(namespace, StatusEnum) + + namespace.schema_model.assert_called_once() + + model_name, schema = namespace.schema_model.call_args.args + + assert model_name == "StatusEnum" + assert isinstance(schema, dict) + + +def test_register_enum_models_registers_multiple_enums(): + from controllers.common.schema import register_enum_models + + namespace = MagicMock(spec=Namespace) + + register_enum_models(namespace, StatusEnum, PriorityEnum) + + assert namespace.schema_model.call_count == 2 + + called_names = [call.args[0] for call in namespace.schema_model.call_args_list] + assert called_names == ["StatusEnum", "PriorityEnum"] + + +def test_register_enum_models_uses_correct_ref_template(): + from controllers.common.schema import register_enum_models + + namespace = MagicMock(spec=Namespace) + + register_enum_models(namespace, StatusEnum) + + schema = namespace.schema_model.call_args.args[1] + + # Verify the schema contains enum values + assert "enum" in schema or "anyOf" in schema diff --git a/api/tests/unit_tests/core/model_runtime/__base/__init__.py b/api/tests/unit_tests/controllers/console/app/__init__.py similarity index 100% rename from api/tests/unit_tests/core/model_runtime/__base/__init__.py rename to api/tests/unit_tests/controllers/console/app/__init__.py diff --git a/api/tests/unit_tests/controllers/console/app/test_annotation_api.py b/api/tests/unit_tests/controllers/console/app/test_annotation_api.py new file mode 100644 index 0000000000..fecbd7f7b0 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_annotation_api.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from controllers.console.app import annotation as annotation_module + + +def test_annotation_reply_payload_valid(): + """Test AnnotationReplyPayload with valid data.""" + payload = annotation_module.AnnotationReplyPayload( + score_threshold=0.5, + embedding_provider_name="openai", + embedding_model_name="text-embedding-3-small", + ) + assert payload.score_threshold == 0.5 + assert payload.embedding_provider_name == "openai" + assert payload.embedding_model_name == "text-embedding-3-small" + + +def test_annotation_setting_update_payload_valid(): + """Test AnnotationSettingUpdatePayload with valid data.""" + payload = annotation_module.AnnotationSettingUpdatePayload( + score_threshold=0.75, + ) + assert payload.score_threshold == 0.75 + + +def test_annotation_list_query_defaults(): + """Test AnnotationListQuery with default parameters.""" + query = annotation_module.AnnotationListQuery() + assert query.page == 1 + assert query.limit == 20 + assert query.keyword == "" + + +def test_annotation_list_query_custom_page(): + """Test AnnotationListQuery with custom page.""" + query = annotation_module.AnnotationListQuery(page=3, limit=50) + assert query.page == 3 + assert query.limit == 50 + + +def test_annotation_list_query_with_keyword(): + """Test AnnotationListQuery with keyword.""" + query = annotation_module.AnnotationListQuery(keyword="test") + assert query.keyword == "test" + + +def test_create_annotation_payload_with_message_id(): + """Test CreateAnnotationPayload with message ID.""" + payload = annotation_module.CreateAnnotationPayload( + message_id="550e8400-e29b-41d4-a716-446655440000", + question="What is AI?", + ) + assert payload.message_id == "550e8400-e29b-41d4-a716-446655440000" + assert payload.question == "What is AI?" + + +def test_create_annotation_payload_with_text(): + """Test CreateAnnotationPayload with text content.""" + payload = annotation_module.CreateAnnotationPayload( + question="What is ML?", + answer="Machine learning is...", + ) + assert payload.question == "What is ML?" + assert payload.answer == "Machine learning is..." + + +def test_update_annotation_payload(): + """Test UpdateAnnotationPayload.""" + payload = annotation_module.UpdateAnnotationPayload( + question="Updated question", + answer="Updated answer", + ) + assert payload.question == "Updated question" + assert payload.answer == "Updated answer" + + +def test_annotation_reply_status_query_enable(): + """Test AnnotationReplyStatusQuery with enable action.""" + query = annotation_module.AnnotationReplyStatusQuery(action="enable") + assert query.action == "enable" + + +def test_annotation_reply_status_query_disable(): + """Test AnnotationReplyStatusQuery with disable action.""" + query = annotation_module.AnnotationReplyStatusQuery(action="disable") + assert query.action == "disable" + + +def test_annotation_file_payload_valid(): + """Test AnnotationFilePayload with valid message ID.""" + payload = annotation_module.AnnotationFilePayload(message_id="550e8400-e29b-41d4-a716-446655440000") + assert payload.message_id == "550e8400-e29b-41d4-a716-446655440000" diff --git a/api/tests/unit_tests/controllers/console/app/test_annotation_security.py b/api/tests/unit_tests/controllers/console/app/test_annotation_security.py index 06a7b98baf..9f1ff9b40f 100644 --- a/api/tests/unit_tests/controllers/console/app/test_annotation_security.py +++ b/api/tests/unit_tests/controllers/console/app/test_annotation_security.py @@ -13,6 +13,9 @@ from pandas.errors import ParserError from werkzeug.datastructures import FileStorage from configs import dify_config +from controllers.console.wraps import annotation_import_concurrency_limit, annotation_import_rate_limit +from services.annotation_service import AppAnnotationService +from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task class TestAnnotationImportRateLimiting: @@ -33,8 +36,6 @@ class TestAnnotationImportRateLimiting: def test_rate_limit_per_minute_enforced(self, mock_redis, mock_current_account): """Test that per-minute rate limit is enforced.""" - from controllers.console.wraps import annotation_import_rate_limit - # Simulate exceeding per-minute limit mock_redis.zcard.side_effect = [ dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE + 1, # Minute check @@ -54,7 +55,6 @@ class TestAnnotationImportRateLimiting: def test_rate_limit_per_hour_enforced(self, mock_redis, mock_current_account): """Test that per-hour rate limit is enforced.""" - from controllers.console.wraps import annotation_import_rate_limit # Simulate exceeding per-hour limit mock_redis.zcard.side_effect = [ @@ -74,7 +74,6 @@ class TestAnnotationImportRateLimiting: def test_rate_limit_within_limits_passes(self, mock_redis, mock_current_account): """Test that requests within limits are allowed.""" - from controllers.console.wraps import annotation_import_rate_limit # Simulate being under both limits mock_redis.zcard.return_value = 2 @@ -110,7 +109,6 @@ class TestAnnotationImportConcurrencyControl: def test_concurrency_limit_enforced(self, mock_redis, mock_current_account): """Test that concurrent task limit is enforced.""" - from controllers.console.wraps import annotation_import_concurrency_limit # Simulate max concurrent tasks already running mock_redis.zcard.return_value = dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT @@ -127,7 +125,6 @@ class TestAnnotationImportConcurrencyControl: def test_concurrency_within_limit_passes(self, mock_redis, mock_current_account): """Test that requests within concurrency limits are allowed.""" - from controllers.console.wraps import annotation_import_concurrency_limit # Simulate being under concurrent task limit mock_redis.zcard.return_value = 1 @@ -142,7 +139,6 @@ class TestAnnotationImportConcurrencyControl: def test_stale_jobs_are_cleaned_up(self, mock_redis, mock_current_account): """Test that old/stale job entries are removed.""" - from controllers.console.wraps import annotation_import_concurrency_limit mock_redis.zcard.return_value = 0 @@ -203,7 +199,6 @@ class TestAnnotationImportServiceValidation: def test_max_records_limit_enforced(self, mock_app, mock_db_session): """Test that files with too many records are rejected.""" - from services.annotation_service import AppAnnotationService # Create CSV with too many records max_records = dify_config.ANNOTATION_IMPORT_MAX_RECORDS @@ -229,7 +224,6 @@ class TestAnnotationImportServiceValidation: def test_min_records_limit_enforced(self, mock_app, mock_db_session): """Test that files with too few valid records are rejected.""" - from services.annotation_service import AppAnnotationService # Create CSV with only header (no data rows) csv_content = "question,answer\n" @@ -249,7 +243,6 @@ class TestAnnotationImportServiceValidation: def test_invalid_csv_format_handled(self, mock_app, mock_db_session): """Test that invalid CSV format is handled gracefully.""" - from services.annotation_service import AppAnnotationService # Any content is fine once we force ParserError csv_content = 'invalid,csv,format\nwith,unbalanced,quotes,and"stuff' @@ -270,7 +263,6 @@ class TestAnnotationImportServiceValidation: def test_valid_import_succeeds(self, mock_app, mock_db_session): """Test that valid import request succeeds.""" - from services.annotation_service import AppAnnotationService # Create valid CSV csv_content = "question,answer\nWhat is AI?,Artificial Intelligence\nWhat is ML?,Machine Learning\n" @@ -300,18 +292,10 @@ class TestAnnotationImportServiceValidation: class TestAnnotationImportTaskOptimization: """Test optimizations in batch import task.""" - def test_task_has_timeout_configured(self): - """Test that task has proper timeout configuration.""" - from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task - - # Verify task configuration - assert hasattr(batch_import_annotations_task, "time_limit") - assert hasattr(batch_import_annotations_task, "soft_time_limit") - - # Check timeout values are reasonable - # Hard limit should be 6 minutes (360s) - # Soft limit should be 5 minutes (300s) - # Note: actual values depend on Celery configuration + def test_task_is_registered_with_queue(self): + """Test that task is registered with the correct queue.""" + assert hasattr(batch_import_annotations_task, "apply_async") + assert hasattr(batch_import_annotations_task, "delay") class TestConfigurationValues: diff --git a/api/tests/unit_tests/controllers/console/app/test_app_apis.py b/api/tests/unit_tests/controllers/console/app/test_app_apis.py new file mode 100644 index 0000000000..074bbfab78 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_app_apis.py @@ -0,0 +1,585 @@ +""" +Additional tests to improve coverage for low-coverage modules in controllers/console/app. +Target: increase coverage for files with <75% coverage. +""" + +from __future__ import annotations + +import uuid +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from werkzeug.exceptions import BadRequest, NotFound + +from controllers.console.app import ( + annotation as annotation_module, +) +from controllers.console.app import ( + completion as completion_module, +) +from controllers.console.app import ( + message as message_module, +) +from controllers.console.app import ( + ops_trace as ops_trace_module, +) +from controllers.console.app import ( + site as site_module, +) +from controllers.console.app import ( + statistic as statistic_module, +) +from controllers.console.app import ( + workflow_app_log as workflow_app_log_module, +) +from controllers.console.app import ( + workflow_draft_variable as workflow_draft_variable_module, +) +from controllers.console.app import ( + workflow_statistic as workflow_statistic_module, +) +from controllers.console.app import ( + workflow_trigger as workflow_trigger_module, +) +from controllers.console.app import ( + wraps as wraps_module, +) +from controllers.console.app.completion import ChatMessagePayload, CompletionMessagePayload +from controllers.console.app.mcp_server import MCPServerCreatePayload, MCPServerUpdatePayload +from controllers.console.app.ops_trace import TraceConfigPayload, TraceProviderQuery +from controllers.console.app.site import AppSiteUpdatePayload +from controllers.console.app.workflow import AdvancedChatWorkflowRunPayload, SyncDraftWorkflowPayload +from controllers.console.app.workflow_app_log import WorkflowAppLogQuery +from controllers.console.app.workflow_draft_variable import WorkflowDraftVariableUpdatePayload +from controllers.console.app.workflow_statistic import WorkflowStatisticQuery +from controllers.console.app.workflow_trigger import Parser, ParserEnable + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +class _ConnContext: + def __init__(self, rows): + self._rows = rows + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, _query, _args): + return self._rows + + +# ========== Completion Tests ========== +class TestCompletionEndpoints: + """Tests for completion API endpoints.""" + + def test_completion_create_payload(self): + """Test completion creation payload.""" + payload = CompletionMessagePayload(inputs={"prompt": "test"}, model_config={}) + assert payload.inputs == {"prompt": "test"} + + def test_chat_message_payload_uuid_validation(self): + payload = ChatMessagePayload( + inputs={}, + model_config={}, + query="hi", + conversation_id=str(uuid.uuid4()), + parent_message_id=str(uuid.uuid4()), + ) + assert payload.query == "hi" + + def test_completion_api_success(self, app, monkeypatch): + api = completion_module.CompletionMessageApi() + method = _unwrap(api.post) + + class DummyAccount: + pass + + dummy_account = DummyAccount() + + monkeypatch.setattr(completion_module, "current_user", dummy_account) + monkeypatch.setattr(completion_module, "Account", DummyAccount) + monkeypatch.setattr( + completion_module.AppGenerateService, + "generate", + lambda **_kwargs: {"text": "ok"}, + ) + monkeypatch.setattr( + completion_module.helper, + "compact_generate_response", + lambda response: {"result": response}, + ) + + with app.test_request_context( + "/", + json={"inputs": {}, "model_config": {}, "query": "hi"}, + ): + resp = method(app_model=MagicMock(id="app-1")) + + assert resp == {"result": {"text": "ok"}} + + def test_completion_api_conversation_not_exists(self, app, monkeypatch): + api = completion_module.CompletionMessageApi() + method = _unwrap(api.post) + + class DummyAccount: + pass + + dummy_account = DummyAccount() + + monkeypatch.setattr(completion_module, "current_user", dummy_account) + monkeypatch.setattr(completion_module, "Account", DummyAccount) + monkeypatch.setattr( + completion_module.AppGenerateService, + "generate", + lambda **_kwargs: (_ for _ in ()).throw( + completion_module.services.errors.conversation.ConversationNotExistsError() + ), + ) + + with app.test_request_context( + "/", + json={"inputs": {}, "model_config": {}, "query": "hi"}, + ): + with pytest.raises(NotFound): + method(app_model=MagicMock(id="app-1")) + + def test_completion_api_provider_not_initialized(self, app, monkeypatch): + api = completion_module.CompletionMessageApi() + method = _unwrap(api.post) + + class DummyAccount: + pass + + dummy_account = DummyAccount() + + monkeypatch.setattr(completion_module, "current_user", dummy_account) + monkeypatch.setattr(completion_module, "Account", DummyAccount) + monkeypatch.setattr( + completion_module.AppGenerateService, + "generate", + lambda **_kwargs: (_ for _ in ()).throw(completion_module.ProviderTokenNotInitError("x")), + ) + + with app.test_request_context( + "/", + json={"inputs": {}, "model_config": {}, "query": "hi"}, + ): + with pytest.raises(completion_module.ProviderNotInitializeError): + method(app_model=MagicMock(id="app-1")) + + def test_completion_api_quota_exceeded(self, app, monkeypatch): + api = completion_module.CompletionMessageApi() + method = _unwrap(api.post) + + class DummyAccount: + pass + + dummy_account = DummyAccount() + + monkeypatch.setattr(completion_module, "current_user", dummy_account) + monkeypatch.setattr(completion_module, "Account", DummyAccount) + monkeypatch.setattr( + completion_module.AppGenerateService, + "generate", + lambda **_kwargs: (_ for _ in ()).throw(completion_module.QuotaExceededError()), + ) + + with app.test_request_context( + "/", + json={"inputs": {}, "model_config": {}, "query": "hi"}, + ): + with pytest.raises(completion_module.ProviderQuotaExceededError): + method(app_model=MagicMock(id="app-1")) + + +# ========== OpsTrace Tests ========== +class TestOpsTraceEndpoints: + """Tests for ops_trace endpoint.""" + + def test_ops_trace_query_basic(self): + """Test ops_trace query.""" + query = TraceProviderQuery(tracing_provider="langfuse") + assert query.tracing_provider == "langfuse" + + def test_ops_trace_config_payload(self): + payload = TraceConfigPayload(tracing_provider="langfuse", tracing_config={"api_key": "k"}) + assert payload.tracing_config["api_key"] == "k" + + def test_trace_app_config_get_empty(self, app, monkeypatch): + api = ops_trace_module.TraceAppConfigApi() + method = _unwrap(api.get) + + monkeypatch.setattr( + ops_trace_module.OpsService, + "get_tracing_app_config", + lambda **_kwargs: None, + ) + + with app.test_request_context("/?tracing_provider=langfuse"): + result = method(app_id="app-1") + + assert result == {"has_not_configured": True} + + def test_trace_app_config_post_invalid(self, app, monkeypatch): + api = ops_trace_module.TraceAppConfigApi() + method = _unwrap(api.post) + + monkeypatch.setattr( + ops_trace_module.OpsService, + "create_tracing_app_config", + lambda **_kwargs: {"error": True}, + ) + + with app.test_request_context( + "/", + json={"tracing_provider": "langfuse", "tracing_config": {"api_key": "k"}}, + ): + with pytest.raises(BadRequest): + method(app_id="app-1") + + def test_trace_app_config_delete_not_found(self, app, monkeypatch): + api = ops_trace_module.TraceAppConfigApi() + method = _unwrap(api.delete) + + monkeypatch.setattr( + ops_trace_module.OpsService, + "delete_tracing_app_config", + lambda **_kwargs: False, + ) + + with app.test_request_context("/?tracing_provider=langfuse"): + with pytest.raises(BadRequest): + method(app_id="app-1") + + +# ========== Site Tests ========== +class TestSiteEndpoints: + """Tests for site endpoint.""" + + def test_site_response_structure(self): + """Test site response structure.""" + payload = AppSiteUpdatePayload(title="My Site", description="Test site") + assert payload.title == "My Site" + + def test_site_default_language_validation(self): + payload = AppSiteUpdatePayload(default_language="en-US") + assert payload.default_language == "en-US" + + def test_app_site_update_post(self, app, monkeypatch): + api = site_module.AppSite() + method = _unwrap(api.post) + + site = MagicMock() + query = MagicMock() + query.where.return_value.first.return_value = site + monkeypatch.setattr( + site_module.db, + "session", + MagicMock(query=lambda *_args, **_kwargs: query, commit=lambda: None), + ) + monkeypatch.setattr( + site_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(id="u1"), "t1"), + ) + monkeypatch.setattr(site_module, "naive_utc_now", lambda: "now") + + with app.test_request_context("/", json={"title": "My Site"}): + result = method(app_model=SimpleNamespace(id="app-1")) + + assert result is site + + def test_app_site_access_token_reset(self, app, monkeypatch): + api = site_module.AppSiteAccessTokenReset() + method = _unwrap(api.post) + + site = MagicMock() + query = MagicMock() + query.where.return_value.first.return_value = site + monkeypatch.setattr( + site_module.db, + "session", + MagicMock(query=lambda *_args, **_kwargs: query, commit=lambda: None), + ) + monkeypatch.setattr(site_module.Site, "generate_code", lambda *_args, **_kwargs: "code") + monkeypatch.setattr( + site_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(id="u1"), "t1"), + ) + monkeypatch.setattr(site_module, "naive_utc_now", lambda: "now") + + with app.test_request_context("/"): + result = method(app_model=SimpleNamespace(id="app-1")) + + assert result is site + + +# ========== Workflow Tests ========== +class TestWorkflowEndpoints: + """Tests for workflow endpoints.""" + + def test_workflow_copy_payload(self): + """Test workflow copy payload.""" + payload = SyncDraftWorkflowPayload(graph={}, features={}) + assert payload.graph == {} + + def test_workflow_mode_query(self): + """Test workflow mode query.""" + payload = AdvancedChatWorkflowRunPayload(inputs={}, query="hi") + assert payload.query == "hi" + + +# ========== Workflow App Log Tests ========== +class TestWorkflowAppLogEndpoints: + """Tests for workflow app log endpoints.""" + + def test_workflow_app_log_query(self): + """Test workflow app log query.""" + query = WorkflowAppLogQuery(keyword="test", page=1, limit=20) + assert query.keyword == "test" + + def test_workflow_app_log_query_detail_bool(self): + query = WorkflowAppLogQuery(detail="true") + assert query.detail is True + + def test_workflow_app_log_api_get(self, app, monkeypatch): + api = workflow_app_log_module.WorkflowAppLogApi() + method = _unwrap(api.get) + + monkeypatch.setattr(workflow_app_log_module, "db", SimpleNamespace(engine=MagicMock())) + + class DummySession: + def __enter__(self): + return "session" + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(workflow_app_log_module, "Session", lambda *args, **kwargs: DummySession()) + + def fake_get_paginate(self, **_kwargs): + return {"items": [], "total": 0} + + monkeypatch.setattr( + workflow_app_log_module.WorkflowAppService, + "get_paginate_workflow_app_logs", + fake_get_paginate, + ) + + with app.test_request_context("/?page=1&limit=20"): + result = method(app_model=SimpleNamespace(id="app-1")) + + assert result == {"items": [], "total": 0} + + +# ========== Workflow Draft Variable Tests ========== +class TestWorkflowDraftVariableEndpoints: + """Tests for workflow draft variable endpoints.""" + + def test_workflow_variable_creation(self): + """Test workflow variable creation.""" + payload = WorkflowDraftVariableUpdatePayload(name="var1", value="test") + assert payload.name == "var1" + + def test_workflow_variable_collection_get(self, app, monkeypatch): + api = workflow_draft_variable_module.WorkflowVariableCollectionApi() + method = _unwrap(api.get) + + monkeypatch.setattr(workflow_draft_variable_module, "db", SimpleNamespace(engine=MagicMock())) + + class DummySession: + def __enter__(self): + return "session" + + def __exit__(self, exc_type, exc, tb): + return False + + class DummyDraftService: + def __init__(self, session): + self.session = session + + def list_variables_without_values(self, **_kwargs): + return {"items": [], "total": 0} + + monkeypatch.setattr(workflow_draft_variable_module, "Session", lambda *args, **kwargs: DummySession()) + + class DummyWorkflowService: + def is_workflow_exist(self, *args, **kwargs): + return True + + monkeypatch.setattr(workflow_draft_variable_module, "WorkflowDraftVariableService", DummyDraftService) + monkeypatch.setattr(workflow_draft_variable_module, "WorkflowService", DummyWorkflowService) + + with app.test_request_context("/?page=1&limit=20"): + result = method(app_model=SimpleNamespace(id="app-1")) + + assert result == {"items": [], "total": 0} + + +# ========== Workflow Statistic Tests ========== +class TestWorkflowStatisticEndpoints: + """Tests for workflow statistic endpoints.""" + + def test_workflow_statistic_time_range(self): + """Test workflow statistic time range query.""" + query = WorkflowStatisticQuery(start="2024-01-01", end="2024-12-31") + assert query.start == "2024-01-01" + + def test_workflow_statistic_blank_to_none(self): + query = WorkflowStatisticQuery(start="", end="") + assert query.start is None + assert query.end is None + + def test_workflow_daily_runs_statistic(self, app, monkeypatch): + monkeypatch.setattr(workflow_statistic_module, "db", SimpleNamespace(engine=MagicMock())) + monkeypatch.setattr( + workflow_statistic_module.DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: SimpleNamespace(get_daily_runs_statistics=lambda **_kw: [{"date": "2024-01-01"}]), + ) + monkeypatch.setattr( + workflow_statistic_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(timezone="UTC"), "t1"), + ) + monkeypatch.setattr( + workflow_statistic_module, + "parse_time_range", + lambda *_args, **_kwargs: (None, None), + ) + + api = workflow_statistic_module.WorkflowDailyRunsStatistic() + method = _unwrap(api.get) + + with app.test_request_context("/"): + response = method(app_model=SimpleNamespace(tenant_id="t1", id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-01"}]} + + def test_workflow_daily_terminals_statistic(self, app, monkeypatch): + monkeypatch.setattr(workflow_statistic_module, "db", SimpleNamespace(engine=MagicMock())) + monkeypatch.setattr( + workflow_statistic_module.DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: SimpleNamespace( + get_daily_terminals_statistics=lambda **_kw: [{"date": "2024-01-02"}] + ), + ) + monkeypatch.setattr( + workflow_statistic_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(timezone="UTC"), "t1"), + ) + monkeypatch.setattr( + workflow_statistic_module, + "parse_time_range", + lambda *_args, **_kwargs: (None, None), + ) + + api = workflow_statistic_module.WorkflowDailyTerminalsStatistic() + method = _unwrap(api.get) + + with app.test_request_context("/"): + response = method(app_model=SimpleNamespace(tenant_id="t1", id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-02"}]} + + +# ========== Workflow Trigger Tests ========== +class TestWorkflowTriggerEndpoints: + """Tests for workflow trigger endpoints.""" + + def test_webhook_trigger_payload(self): + """Test webhook trigger payload.""" + payload = Parser(node_id="node-1") + assert payload.node_id == "node-1" + + enable_payload = ParserEnable(trigger_id="trigger-1", enable_trigger=True) + assert enable_payload.enable_trigger is True + + def test_webhook_trigger_api_get(self, app, monkeypatch): + api = workflow_trigger_module.WebhookTriggerApi() + method = _unwrap(api.get) + + monkeypatch.setattr(workflow_trigger_module, "db", SimpleNamespace(engine=MagicMock())) + + trigger = MagicMock() + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = trigger + + class DummySession: + def __enter__(self): + return session + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr(workflow_trigger_module, "Session", lambda *_args, **_kwargs: DummySession()) + + with app.test_request_context("/?node_id=node-1"): + result = method(app_model=SimpleNamespace(id="app-1")) + + assert result is trigger + + +# ========== Wraps Tests ========== +class TestWrapsEndpoints: + """Tests for wraps utility functions.""" + + def test_get_app_model_context(self): + """Test get_app_model wrapper context.""" + # These are decorator functions, so we test their availability + assert hasattr(wraps_module, "get_app_model") + + +# ========== MCP Server Tests ========== +class TestMCPServerEndpoints: + """Tests for MCP server endpoints.""" + + def test_mcp_server_connection(self): + """Test MCP server connection.""" + payload = MCPServerCreatePayload(parameters={"url": "http://localhost:3000"}) + assert payload.parameters["url"] == "http://localhost:3000" + + def test_mcp_server_update_payload(self): + payload = MCPServerUpdatePayload(id="server-1", parameters={"timeout": 30}, status="active") + assert payload.status == "active" + + +# ========== Error Handling Tests ========== +class TestErrorHandling: + """Tests for error handling in various endpoints.""" + + def test_annotation_list_query_validation(self): + """Test annotation list query validation.""" + with pytest.raises(ValueError): + annotation_module.AnnotationListQuery(page=0) + + +# ========== Integration-like Tests ========== +class TestPayloadIntegration: + """Integration tests for payload handling.""" + + def test_multiple_payload_types(self): + """Test handling of multiple payload types.""" + payloads = [ + annotation_module.AnnotationReplyPayload( + score_threshold=0.5, embedding_provider_name="openai", embedding_model_name="text-embedding-3-small" + ), + message_module.MessageFeedbackPayload(message_id=str(uuid.uuid4()), rating="like"), + statistic_module.StatisticTimeRangeQuery(start="2024-01-01"), + ] + assert len(payloads) == 3 + assert all(p is not None for p in payloads) diff --git a/api/tests/unit_tests/controllers/console/app/test_app_import_api.py b/api/tests/unit_tests/controllers/console/app/test_app_import_api.py new file mode 100644 index 0000000000..91f58460ac --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_app_import_api.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from controllers.console.app import app_import as app_import_module +from services.app_dsl_service import ImportStatus + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +class _Result: + def __init__(self, status: ImportStatus, app_id: str | None = "app-1"): + self.status = status + self.app_id = app_id + + def model_dump(self, mode: str = "json"): + return {"status": self.status, "app_id": self.app_id} + + +class _SessionContext: + def __init__(self, session): + self._session = session + + def __enter__(self): + return self._session + + def __exit__(self, exc_type, exc, tb): + return False + + +def _install_session(monkeypatch: pytest.MonkeyPatch, session: MagicMock) -> None: + monkeypatch.setattr(app_import_module, "Session", lambda *_: _SessionContext(session)) + monkeypatch.setattr(app_import_module, "db", SimpleNamespace(engine=object())) + + +def _install_features(monkeypatch: pytest.MonkeyPatch, enabled: bool) -> None: + features = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=enabled)) + monkeypatch.setattr(app_import_module.FeatureService, "get_system_features", lambda: features) + + +def test_import_post_returns_failed_status(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = app_import_module.AppImportApi() + method = _unwrap(api.post) + + session = MagicMock() + _install_session(monkeypatch, session) + _install_features(monkeypatch, enabled=False) + monkeypatch.setattr( + app_import_module.AppDslService, + "import_app", + lambda *_args, **_kwargs: _Result(ImportStatus.FAILED, app_id=None), + ) + monkeypatch.setattr(app_import_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + with app.test_request_context("/console/api/apps/imports", method="POST", json={"mode": "yaml-content"}): + response, status = method() + + session.commit.assert_called_once() + assert status == 400 + assert response["status"] == ImportStatus.FAILED + + +def test_import_post_returns_pending_status(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = app_import_module.AppImportApi() + method = _unwrap(api.post) + + session = MagicMock() + _install_session(monkeypatch, session) + _install_features(monkeypatch, enabled=False) + monkeypatch.setattr( + app_import_module.AppDslService, + "import_app", + lambda *_args, **_kwargs: _Result(ImportStatus.PENDING), + ) + monkeypatch.setattr(app_import_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + with app.test_request_context("/console/api/apps/imports", method="POST", json={"mode": "yaml-content"}): + response, status = method() + + session.commit.assert_called_once() + assert status == 202 + assert response["status"] == ImportStatus.PENDING + + +def test_import_post_updates_webapp_auth_when_enabled(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = app_import_module.AppImportApi() + method = _unwrap(api.post) + + session = MagicMock() + _install_session(monkeypatch, session) + _install_features(monkeypatch, enabled=True) + monkeypatch.setattr( + app_import_module.AppDslService, + "import_app", + lambda *_args, **_kwargs: _Result(ImportStatus.COMPLETED, app_id="app-123"), + ) + update_access = MagicMock() + monkeypatch.setattr(app_import_module.EnterpriseService.WebAppAuth, "update_app_access_mode", update_access) + monkeypatch.setattr(app_import_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + with app.test_request_context("/console/api/apps/imports", method="POST", json={"mode": "yaml-content"}): + response, status = method() + + session.commit.assert_called_once() + update_access.assert_called_once_with("app-123", "private") + assert status == 200 + assert response["status"] == ImportStatus.COMPLETED + + +def test_import_confirm_returns_failed_status(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = app_import_module.AppImportConfirmApi() + method = _unwrap(api.post) + + session = MagicMock() + _install_session(monkeypatch, session) + monkeypatch.setattr( + app_import_module.AppDslService, + "confirm_import", + lambda *_args, **_kwargs: _Result(ImportStatus.FAILED), + ) + monkeypatch.setattr(app_import_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + with app.test_request_context("/console/api/apps/imports/import-1/confirm", method="POST"): + response, status = method(import_id="import-1") + + session.commit.assert_called_once() + assert status == 400 + assert response["status"] == ImportStatus.FAILED + + +def test_import_check_dependencies_returns_result(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = app_import_module.AppImportCheckDependenciesApi() + method = _unwrap(api.get) + + session = MagicMock() + _install_session(monkeypatch, session) + monkeypatch.setattr( + app_import_module.AppDslService, + "check_dependencies", + lambda *_args, **_kwargs: SimpleNamespace(model_dump=lambda mode="json": {"leaked_dependencies": []}), + ) + + with app.test_request_context("/console/api/apps/imports/app-1/check-dependencies", method="GET"): + response, status = method(app_model=SimpleNamespace(id="app-1")) + + assert status == 200 + assert response["leaked_dependencies"] == [] diff --git a/api/tests/unit_tests/controllers/console/app/test_audio.py b/api/tests/unit_tests/controllers/console/app/test_audio.py new file mode 100644 index 0000000000..021e9a0784 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_audio.py @@ -0,0 +1,292 @@ +from __future__ import annotations + +import io +from types import SimpleNamespace + +import pytest +from werkzeug.datastructures import FileStorage +from werkzeug.exceptions import InternalServerError + +from controllers.console.app.audio import ChatMessageAudioApi, ChatMessageTextApi, TextModesApi +from controllers.console.app.error import ( + AppUnavailableError, + AudioTooLargeError, + CompletionRequestError, + NoAudioUploadedError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderNotSupportSpeechToTextError, + ProviderQuotaExceededError, + UnsupportedAudioTypeError, +) +from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError +from dify_graph.model_runtime.errors.invoke import InvokeError +from services.audio_service import AudioService +from services.errors.app_model_config import AppModelConfigBrokenError +from services.errors.audio import ( + AudioTooLargeServiceError, + NoAudioUploadedServiceError, + ProviderNotSupportSpeechToTextServiceError, + ProviderNotSupportTextToSpeechLanageServiceError, + UnsupportedAudioTypeServiceError, +) + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def _file_data(): + return FileStorage(stream=io.BytesIO(b"audio"), filename="audio.wav", content_type="audio/wav") + + +def test_console_audio_api_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(AudioService, "transcript_asr", lambda **_kwargs: {"text": "ok"}) + api = ChatMessageAudioApi() + handler = _unwrap(api.post) + app_model = SimpleNamespace(id="a1") + + with app.test_request_context("/console/api/apps/app/audio-to-text", method="POST", data={"file": _file_data()}): + response = handler(app_model=app_model) + + assert response == {"text": "ok"} + + +@pytest.mark.parametrize( + ("exc", "expected"), + [ + (AppModelConfigBrokenError(), AppUnavailableError), + (NoAudioUploadedServiceError(), NoAudioUploadedError), + (AudioTooLargeServiceError("too big"), AudioTooLargeError), + (UnsupportedAudioTypeServiceError(), UnsupportedAudioTypeError), + (ProviderNotSupportSpeechToTextServiceError(), ProviderNotSupportSpeechToTextError), + (ProviderTokenNotInitError("token"), ProviderNotInitializeError), + (QuotaExceededError(), ProviderQuotaExceededError), + (ModelCurrentlyNotSupportError(), ProviderModelCurrentlyNotSupportError), + (InvokeError("invoke"), CompletionRequestError), + ], +) +def test_console_audio_api_error_mapping(app, monkeypatch: pytest.MonkeyPatch, exc, expected) -> None: + monkeypatch.setattr(AudioService, "transcript_asr", lambda **_kwargs: (_ for _ in ()).throw(exc)) + api = ChatMessageAudioApi() + handler = _unwrap(api.post) + app_model = SimpleNamespace(id="a1") + + with app.test_request_context("/console/api/apps/app/audio-to-text", method="POST", data={"file": _file_data()}): + with pytest.raises(expected): + handler(app_model=app_model) + + +def test_console_audio_api_unhandled_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(AudioService, "transcript_asr", lambda **_kwargs: (_ for _ in ()).throw(RuntimeError("boom"))) + api = ChatMessageAudioApi() + handler = _unwrap(api.post) + app_model = SimpleNamespace(id="a1") + + with app.test_request_context("/console/api/apps/app/audio-to-text", method="POST", data={"file": _file_data()}): + with pytest.raises(InternalServerError): + handler(app_model=app_model) + + +def test_console_text_api_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(AudioService, "transcript_tts", lambda **_kwargs: {"audio": "ok"}) + + api = ChatMessageTextApi() + handler = _unwrap(api.post) + app_model = SimpleNamespace(id="a1") + + with app.test_request_context( + "/console/api/apps/app/text-to-audio", + method="POST", + json={"text": "hello", "voice": "v"}, + ): + response = handler(app_model=app_model) + + assert response == {"audio": "ok"} + + +def test_console_text_api_error_mapping(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(AudioService, "transcript_tts", lambda **_kwargs: (_ for _ in ()).throw(QuotaExceededError())) + + api = ChatMessageTextApi() + handler = _unwrap(api.post) + app_model = SimpleNamespace(id="a1") + + with app.test_request_context( + "/console/api/apps/app/text-to-audio", + method="POST", + json={"text": "hello"}, + ): + with pytest.raises(ProviderQuotaExceededError): + handler(app_model=app_model) + + +def test_console_text_modes_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(AudioService, "transcript_tts_voices", lambda **_kwargs: ["voice-1"]) + + api = TextModesApi() + handler = _unwrap(api.get) + app_model = SimpleNamespace(tenant_id="t1") + + with app.test_request_context("/console/api/apps/app/text-to-audio/voices?language=en", method="GET"): + response = handler(app_model=app_model) + + assert response == ["voice-1"] + + +def test_console_text_modes_language_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + AudioService, + "transcript_tts_voices", + lambda **_kwargs: (_ for _ in ()).throw(ProviderNotSupportTextToSpeechLanageServiceError()), + ) + + api = TextModesApi() + handler = _unwrap(api.get) + app_model = SimpleNamespace(tenant_id="t1") + + with app.test_request_context("/console/api/apps/app/text-to-audio/voices?language=en", method="GET"): + with pytest.raises(AppUnavailableError): + handler(app_model=app_model) + + +def test_audio_to_text_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = ChatMessageAudioApi() + method = _unwrap(api.post) + + response_payload = {"text": "hello"} + monkeypatch.setattr(AudioService, "transcript_asr", lambda **_kwargs: response_payload) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"x"), "sample.wav")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + response = method(app_model=app_model) + + assert response == response_payload + + +def test_audio_to_text_maps_audio_too_large(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = ChatMessageAudioApi() + method = _unwrap(api.post) + + monkeypatch.setattr( + AudioService, + "transcript_asr", + lambda **_kwargs: (_ for _ in ()).throw(AudioTooLargeServiceError("too large")), + ) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"x"), "sample.wav")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + with pytest.raises(AudioTooLargeError): + method(app_model=app_model) + + +def test_text_to_audio_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = ChatMessageTextApi() + method = _unwrap(api.post) + + monkeypatch.setattr(AudioService, "transcript_tts", lambda **_kwargs: {"audio": "ok"}) + + app_model = SimpleNamespace(id="app-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio", + method="POST", + json={"text": "hello"}, + ): + response = method(app_model=app_model) + + assert response == {"audio": "ok"} + + +def test_text_to_audio_voices_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = TextModesApi() + method = _unwrap(api.get) + + monkeypatch.setattr(AudioService, "transcript_tts_voices", lambda **_kwargs: ["voice-1"]) + + app_model = SimpleNamespace(tenant_id="tenant-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio/voices", + method="GET", + query_string={"language": "en-US"}, + ): + response = method(app_model=app_model) + + assert response == ["voice-1"] + + +def test_audio_to_text_with_invalid_file(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = ChatMessageAudioApi() + method = _unwrap(api.post) + + monkeypatch.setattr(AudioService, "transcript_asr", lambda **_kwargs: {"text": "test"}) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"invalid"), "sample.xyz")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + # Should not raise, AudioService is mocked + response = method(app_model=app_model) + assert response == {"text": "test"} + + +def test_text_to_audio_with_language_param(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = ChatMessageTextApi() + method = _unwrap(api.post) + + monkeypatch.setattr(AudioService, "transcript_tts", lambda **_kwargs: {"audio": "test"}) + + app_model = SimpleNamespace(id="app-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio", + method="POST", + json={"text": "hello", "language": "en-US"}, + ): + response = method(app_model=app_model) + assert response == {"audio": "test"} + + +def test_text_to_audio_voices_with_language_filter(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = TextModesApi() + method = _unwrap(api.get) + + monkeypatch.setattr( + AudioService, + "transcript_tts_voices", + lambda **_kwargs: [{"id": "voice-1", "name": "Voice 1"}], + ) + + app_model = SimpleNamespace(tenant_id="tenant-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio/voices?language=en-US", + method="GET", + ): + response = method(app_model=app_model) + assert isinstance(response, list) diff --git a/api/tests/unit_tests/controllers/console/app/test_audio_api.py b/api/tests/unit_tests/controllers/console/app/test_audio_api.py new file mode 100644 index 0000000000..8b71837c29 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_audio_api.py @@ -0,0 +1,156 @@ +from __future__ import annotations + +import io +from types import SimpleNamespace + +import pytest + +from controllers.console.app import audio as audio_module +from controllers.console.app.error import AudioTooLargeError +from services.errors.audio import AudioTooLargeServiceError + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def test_audio_to_text_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.ChatMessageAudioApi() + method = _unwrap(api.post) + + response_payload = {"text": "hello"} + monkeypatch.setattr(audio_module.AudioService, "transcript_asr", lambda **_kwargs: response_payload) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"x"), "sample.wav")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + response = method(app_model=app_model) + + assert response == response_payload + + +def test_audio_to_text_maps_audio_too_large(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.ChatMessageAudioApi() + method = _unwrap(api.post) + + monkeypatch.setattr( + audio_module.AudioService, + "transcript_asr", + lambda **_kwargs: (_ for _ in ()).throw(AudioTooLargeServiceError("too large")), + ) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"x"), "sample.wav")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + with pytest.raises(AudioTooLargeError): + method(app_model=app_model) + + +def test_text_to_audio_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.ChatMessageTextApi() + method = _unwrap(api.post) + + monkeypatch.setattr(audio_module.AudioService, "transcript_tts", lambda **_kwargs: {"audio": "ok"}) + + app_model = SimpleNamespace(id="app-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio", + method="POST", + json={"text": "hello"}, + ): + response = method(app_model=app_model) + + assert response == {"audio": "ok"} + + +def test_text_to_audio_voices_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.TextModesApi() + method = _unwrap(api.get) + + monkeypatch.setattr(audio_module.AudioService, "transcript_tts_voices", lambda **_kwargs: ["voice-1"]) + + app_model = SimpleNamespace(tenant_id="tenant-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio/voices", + method="GET", + query_string={"language": "en-US"}, + ): + response = method(app_model=app_model) + + assert response == ["voice-1"] + + +def test_audio_to_text_with_invalid_file(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.ChatMessageAudioApi() + method = _unwrap(api.post) + + monkeypatch.setattr(audio_module.AudioService, "transcript_asr", lambda **_kwargs: {"text": "test"}) + + app_model = SimpleNamespace(id="app-1") + + data = {"file": (io.BytesIO(b"invalid"), "sample.xyz")} + with app.test_request_context( + "/console/api/apps/app-1/audio-to-text", + method="POST", + data=data, + content_type="multipart/form-data", + ): + # Should not raise, AudioService is mocked + response = method(app_model=app_model) + assert response == {"text": "test"} + + +def test_text_to_audio_with_language_param(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.ChatMessageTextApi() + method = _unwrap(api.post) + + monkeypatch.setattr(audio_module.AudioService, "transcript_tts", lambda **_kwargs: {"audio": "test"}) + + app_model = SimpleNamespace(id="app-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio", + method="POST", + json={"text": "hello", "language": "en-US"}, + ): + response = method(app_model=app_model) + assert response == {"audio": "test"} + + +def test_text_to_audio_voices_with_language_filter(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = audio_module.TextModesApi() + method = _unwrap(api.get) + + monkeypatch.setattr( + audio_module.AudioService, + "transcript_tts_voices", + lambda **_kwargs: [{"id": "voice-1", "name": "Voice 1"}], + ) + + app_model = SimpleNamespace(tenant_id="tenant-1") + + with app.test_request_context( + "/console/api/apps/app-1/text-to-audio/voices?language=en-US", + method="GET", + ): + response = method(app_model=app_model) + assert isinstance(response, list) diff --git a/api/tests/unit_tests/controllers/console/app/test_conversation_api.py b/api/tests/unit_tests/controllers/console/app/test_conversation_api.py new file mode 100644 index 0000000000..5db8e5c332 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_conversation_api.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from werkzeug.exceptions import BadRequest, NotFound + +from controllers.console.app import conversation as conversation_module +from models.model import AppMode +from services.errors.conversation import ConversationNotExistsError + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def _make_account(): + return SimpleNamespace(timezone="UTC", id="u1") + + +def test_completion_conversation_list_returns_paginated_result(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = conversation_module.CompletionConversationApi() + method = _unwrap(api.get) + + account = _make_account() + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (account, "t1")) + monkeypatch.setattr(conversation_module, "parse_time_range", lambda *_args, **_kwargs: (None, None)) + + paginate_result = MagicMock() + monkeypatch.setattr(conversation_module.db, "paginate", lambda *_args, **_kwargs: paginate_result) + + with app.test_request_context("/console/api/apps/app-1/completion-conversations", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response is paginate_result + + +def test_completion_conversation_list_invalid_time_range(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = conversation_module.CompletionConversationApi() + method = _unwrap(api.get) + + account = _make_account() + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (account, "t1")) + monkeypatch.setattr( + conversation_module, + "parse_time_range", + lambda *_args, **_kwargs: (_ for _ in ()).throw(ValueError("bad range")), + ) + + with app.test_request_context( + "/console/api/apps/app-1/completion-conversations", + method="GET", + query_string={"start": "bad"}, + ): + with pytest.raises(BadRequest): + method(app_model=SimpleNamespace(id="app-1")) + + +def test_chat_conversation_list_advanced_chat_calls_paginate(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = conversation_module.ChatConversationApi() + method = _unwrap(api.get) + + account = _make_account() + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (account, "t1")) + monkeypatch.setattr(conversation_module, "parse_time_range", lambda *_args, **_kwargs: (None, None)) + + paginate_result = MagicMock() + monkeypatch.setattr(conversation_module.db, "paginate", lambda *_args, **_kwargs: paginate_result) + + with app.test_request_context("/console/api/apps/app-1/chat-conversations", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1", mode=AppMode.ADVANCED_CHAT)) + + assert response is paginate_result + + +def test_get_conversation_updates_read_at(monkeypatch: pytest.MonkeyPatch) -> None: + conversation = SimpleNamespace(id="c1", app_id="app-1") + + query = MagicMock() + query.where.return_value = query + query.first.return_value = conversation + + session = MagicMock() + session.query.return_value = query + + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (_make_account(), "t1")) + monkeypatch.setattr(conversation_module.db, "session", session) + + result = conversation_module._get_conversation(SimpleNamespace(id="app-1"), "c1") + + assert result is conversation + session.execute.assert_called_once() + session.commit.assert_called_once() + session.refresh.assert_called_once_with(conversation) + + +def test_get_conversation_missing_raises_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + + session = MagicMock() + session.query.return_value = query + + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (_make_account(), "t1")) + monkeypatch.setattr(conversation_module.db, "session", session) + + with pytest.raises(NotFound): + conversation_module._get_conversation(SimpleNamespace(id="app-1"), "missing") + + +def test_completion_conversation_delete_maps_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + api = conversation_module.CompletionConversationDetailApi() + method = _unwrap(api.delete) + + monkeypatch.setattr(conversation_module, "current_account_with_tenant", lambda: (_make_account(), "t1")) + monkeypatch.setattr( + conversation_module.ConversationService, + "delete", + lambda *_args, **_kwargs: (_ for _ in ()).throw(ConversationNotExistsError()), + ) + + with pytest.raises(NotFound): + method(app_model=SimpleNamespace(id="app-1"), conversation_id="c1") diff --git a/api/tests/unit_tests/controllers/console/app/test_generator_api.py b/api/tests/unit_tests/controllers/console/app/test_generator_api.py new file mode 100644 index 0000000000..f83bc18da3 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_generator_api.py @@ -0,0 +1,260 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from controllers.console.app import generator as generator_module +from controllers.console.app.error import ProviderNotInitializeError +from core.errors.error import ProviderTokenNotInitError + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def _model_config_payload(): + return {"provider": "openai", "name": "gpt-4o", "mode": "chat", "completion_params": {}} + + +def _install_workflow_service(monkeypatch: pytest.MonkeyPatch, workflow): + class _Service: + def get_draft_workflow(self, app_model): + return workflow + + monkeypatch.setattr(generator_module, "WorkflowService", lambda: _Service()) + + +def test_rule_generate_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.RuleGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + monkeypatch.setattr(generator_module.LLMGenerator, "generate_rule_config", lambda **_kwargs: {"rules": []}) + + with app.test_request_context( + "/console/api/rule-generate", + method="POST", + json={"instruction": "do it", "model_config": _model_config_payload()}, + ): + response = method() + + assert response == {"rules": []} + + +def test_rule_code_generate_maps_token_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.RuleCodeGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + def _raise(*_args, **_kwargs): + raise ProviderTokenNotInitError("missing token") + + monkeypatch.setattr(generator_module.LLMGenerator, "generate_code", _raise) + + with app.test_request_context( + "/console/api/rule-code-generate", + method="POST", + json={"instruction": "do it", "model_config": _model_config_payload()}, + ): + with pytest.raises(ProviderNotInitializeError): + method() + + +def test_instruction_generate_app_not_found(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: None) + monkeypatch.setattr(generator_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "node-1", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response, status = method() + + assert status == 400 + assert response["error"] == "app app-1 not found" + + +def test_instruction_generate_workflow_not_found(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + app_model = SimpleNamespace(id="app-1") + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: app_model) + monkeypatch.setattr(generator_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + _install_workflow_service(monkeypatch, workflow=None) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "node-1", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response, status = method() + + assert status == 400 + assert response["error"] == "workflow app-1 not found" + + +def test_instruction_generate_node_missing(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + app_model = SimpleNamespace(id="app-1") + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: app_model) + monkeypatch.setattr(generator_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + + workflow = SimpleNamespace(graph_dict={"nodes": []}) + _install_workflow_service(monkeypatch, workflow=workflow) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "node-1", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response, status = method() + + assert status == 400 + assert response["error"] == "node node-1 not found" + + +def test_instruction_generate_code_node(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + app_model = SimpleNamespace(id="app-1") + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: app_model) + monkeypatch.setattr(generator_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + + workflow = SimpleNamespace( + graph_dict={ + "nodes": [ + {"id": "node-1", "data": {"type": "code"}}, + ] + } + ) + _install_workflow_service(monkeypatch, workflow=workflow) + monkeypatch.setattr(generator_module.LLMGenerator, "generate_code", lambda **_kwargs: {"code": "x"}) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "node-1", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response = method() + + assert response == {"code": "x"} + + +def test_instruction_generate_legacy_modify(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + monkeypatch.setattr( + generator_module.LLMGenerator, + "instruction_modify_legacy", + lambda **_kwargs: {"instruction": "ok"}, + ) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "", + "current": "old", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response = method() + + assert response == {"instruction": "ok"} + + +def test_instruction_generate_incompatible_params(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = generator_module.InstructionGenerateApi() + method = _unwrap(api.post) + + monkeypatch.setattr(generator_module, "current_account_with_tenant", lambda: (None, "t1")) + + with app.test_request_context( + "/console/api/instruction-generate", + method="POST", + json={ + "flow_id": "app-1", + "node_id": "", + "current": "", + "instruction": "do", + "model_config": _model_config_payload(), + }, + ): + response, status = method() + + assert status == 400 + assert response["error"] == "incompatible parameters" + + +def test_instruction_template_prompt(app) -> None: + api = generator_module.InstructionGenerationTemplateApi() + method = _unwrap(api.post) + + with app.test_request_context( + "/console/api/instruction-generate/template", + method="POST", + json={"type": "prompt"}, + ): + response = method() + + assert "data" in response + + +def test_instruction_template_invalid_type(app) -> None: + api = generator_module.InstructionGenerationTemplateApi() + method = _unwrap(api.post) + + with app.test_request_context( + "/console/api/instruction-generate/template", + method="POST", + json={"type": "unknown"}, + ): + with pytest.raises(ValueError): + method() diff --git a/api/tests/unit_tests/controllers/console/app/test_message_api.py b/api/tests/unit_tests/controllers/console/app/test_message_api.py new file mode 100644 index 0000000000..a76e958829 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_message_api.py @@ -0,0 +1,122 @@ +from __future__ import annotations + +import pytest + +from controllers.console.app import message as message_module + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def test_chat_messages_query_valid(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test valid ChatMessagesQuery with all fields.""" + query = message_module.ChatMessagesQuery( + conversation_id="550e8400-e29b-41d4-a716-446655440000", + first_id="550e8400-e29b-41d4-a716-446655440001", + limit=50, + ) + assert query.limit == 50 + + +def test_chat_messages_query_defaults(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test ChatMessagesQuery with defaults.""" + query = message_module.ChatMessagesQuery(conversation_id="550e8400-e29b-41d4-a716-446655440000") + assert query.first_id is None + assert query.limit == 20 + + +def test_chat_messages_query_empty_first_id(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test ChatMessagesQuery converts empty first_id to None.""" + query = message_module.ChatMessagesQuery( + conversation_id="550e8400-e29b-41d4-a716-446655440000", + first_id="", + ) + assert query.first_id is None + + +def test_message_feedback_payload_valid_like(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test MessageFeedbackPayload with like rating.""" + payload = message_module.MessageFeedbackPayload( + message_id="550e8400-e29b-41d4-a716-446655440000", + rating="like", + content="Good answer", + ) + assert payload.rating == "like" + assert payload.content == "Good answer" + + +def test_message_feedback_payload_valid_dislike(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test MessageFeedbackPayload with dislike rating.""" + payload = message_module.MessageFeedbackPayload( + message_id="550e8400-e29b-41d4-a716-446655440000", + rating="dislike", + ) + assert payload.rating == "dislike" + + +def test_message_feedback_payload_no_rating(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test MessageFeedbackPayload without rating.""" + payload = message_module.MessageFeedbackPayload(message_id="550e8400-e29b-41d4-a716-446655440000") + assert payload.rating is None + + +def test_feedback_export_query_defaults(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with default format.""" + query = message_module.FeedbackExportQuery() + assert query.format == "csv" + assert query.from_source is None + + +def test_feedback_export_query_json_format(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with JSON format.""" + query = message_module.FeedbackExportQuery(format="json") + assert query.format == "json" + + +def test_feedback_export_query_has_comment_true(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with has_comment as true string.""" + query = message_module.FeedbackExportQuery(has_comment="true") + assert query.has_comment is True + + +def test_feedback_export_query_has_comment_false(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with has_comment as false string.""" + query = message_module.FeedbackExportQuery(has_comment="false") + assert query.has_comment is False + + +def test_feedback_export_query_has_comment_1(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with has_comment as 1.""" + query = message_module.FeedbackExportQuery(has_comment="1") + assert query.has_comment is True + + +def test_feedback_export_query_has_comment_0(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with has_comment as 0.""" + query = message_module.FeedbackExportQuery(has_comment="0") + assert query.has_comment is False + + +def test_feedback_export_query_rating_filter(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test FeedbackExportQuery with rating filter.""" + query = message_module.FeedbackExportQuery(rating="like") + assert query.rating == "like" + + +def test_annotation_count_response(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test AnnotationCountResponse creation.""" + response = message_module.AnnotationCountResponse(count=10) + assert response.count == 10 + + +def test_suggested_questions_response(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test SuggestedQuestionsResponse creation.""" + response = message_module.SuggestedQuestionsResponse(data=["What is AI?", "How does ML work?"]) + assert len(response.data) == 2 + assert response.data[0] == "What is AI?" diff --git a/api/tests/unit_tests/controllers/console/app/test_model_config_api.py b/api/tests/unit_tests/controllers/console/app/test_model_config_api.py new file mode 100644 index 0000000000..61d92bb5c7 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_model_config_api.py @@ -0,0 +1,151 @@ +from __future__ import annotations + +import json +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from controllers.console.app import model_config as model_config_module +from models.model import AppMode, AppModelConfig + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def test_post_updates_app_model_config_for_chat(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = model_config_module.ModelConfigResource() + method = _unwrap(api.post) + + app_model = SimpleNamespace( + id="app-1", + mode=AppMode.CHAT.value, + is_agent=False, + app_model_config_id=None, + updated_by=None, + updated_at=None, + ) + monkeypatch.setattr( + model_config_module.AppModelConfigService, + "validate_configuration", + lambda **_kwargs: {"pre_prompt": "hi"}, + ) + monkeypatch.setattr(model_config_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + session = MagicMock() + monkeypatch.setattr(model_config_module.db, "session", session) + + def _from_model_config_dict(self, model_config): + self.pre_prompt = model_config["pre_prompt"] + self.id = "config-1" + return self + + monkeypatch.setattr(AppModelConfig, "from_model_config_dict", _from_model_config_dict) + send_mock = MagicMock() + monkeypatch.setattr(model_config_module.app_model_config_was_updated, "send", send_mock) + + with app.test_request_context("/console/api/apps/app-1/model-config", method="POST", json={"pre_prompt": "hi"}): + response = method(app_model=app_model) + + session.add.assert_called_once() + session.flush.assert_called_once() + session.commit.assert_called_once() + send_mock.assert_called_once() + assert app_model.app_model_config_id == "config-1" + assert response["result"] == "success" + + +def test_post_encrypts_agent_tool_parameters(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = model_config_module.ModelConfigResource() + method = _unwrap(api.post) + + app_model = SimpleNamespace( + id="app-1", + mode=AppMode.AGENT_CHAT.value, + is_agent=True, + app_model_config_id="config-0", + updated_by=None, + updated_at=None, + ) + + original_config = AppModelConfig(app_id="app-1", created_by="u1", updated_by="u1") + original_config.agent_mode = json.dumps( + { + "enabled": True, + "strategy": "function-calling", + "tools": [ + { + "provider_id": "provider", + "provider_type": "builtin", + "tool_name": "tool", + "tool_parameters": {"secret": "masked"}, + } + ], + "prompt": None, + } + ) + + session = MagicMock() + query = MagicMock() + query.where.return_value = query + query.first.return_value = original_config + session.query.return_value = query + monkeypatch.setattr(model_config_module.db, "session", session) + + monkeypatch.setattr( + model_config_module.AppModelConfigService, + "validate_configuration", + lambda **_kwargs: { + "pre_prompt": "hi", + "agent_mode": { + "enabled": True, + "strategy": "function-calling", + "tools": [ + { + "provider_id": "provider", + "provider_type": "builtin", + "tool_name": "tool", + "tool_parameters": {"secret": "masked"}, + } + ], + "prompt": None, + }, + }, + ) + monkeypatch.setattr(model_config_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), "t1")) + + monkeypatch.setattr(model_config_module.ToolManager, "get_agent_tool_runtime", lambda **_kwargs: object()) + + class _ParamManager: + def __init__(self, **_kwargs): + self.delete_called = False + + def decrypt_tool_parameters(self, _value): + return {"secret": "decrypted"} + + def mask_tool_parameters(self, _value): + return {"secret": "masked"} + + def encrypt_tool_parameters(self, _value): + return {"secret": "encrypted"} + + def delete_tool_parameters_cache(self): + self.delete_called = True + + monkeypatch.setattr(model_config_module, "ToolParameterConfigurationManager", _ParamManager) + send_mock = MagicMock() + monkeypatch.setattr(model_config_module.app_model_config_was_updated, "send", send_mock) + + with app.test_request_context("/console/api/apps/app-1/model-config", method="POST", json={"pre_prompt": "hi"}): + response = method(app_model=app_model) + + stored_config = session.add.call_args[0][0] + stored_agent_mode = json.loads(stored_config.agent_mode) + assert stored_agent_mode["tools"][0]["tool_parameters"]["secret"] == "encrypted" + assert response["result"] == "success" diff --git a/api/tests/unit_tests/controllers/console/app/test_statistic_api.py b/api/tests/unit_tests/controllers/console/app/test_statistic_api.py new file mode 100644 index 0000000000..15459994f9 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_statistic_api.py @@ -0,0 +1,215 @@ +from __future__ import annotations + +from decimal import Decimal +from types import SimpleNamespace + +import pytest +from werkzeug.exceptions import BadRequest + +from controllers.console.app import statistic as statistic_module + + +def _unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +class _ConnContext: + def __init__(self, rows): + self._rows = rows + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, _query, _args): + return self._rows + + +def _install_db(monkeypatch: pytest.MonkeyPatch, rows) -> None: + engine = SimpleNamespace(begin=lambda: _ConnContext(rows)) + monkeypatch.setattr(statistic_module, "db", SimpleNamespace(engine=engine)) + + +def _install_common(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + statistic_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(timezone="UTC"), "t1"), + ) + monkeypatch.setattr( + statistic_module, + "parse_time_range", + lambda *_args, **_kwargs: (None, None), + ) + monkeypatch.setattr(statistic_module, "convert_datetime_to_date", lambda field: field) + + +def test_daily_message_statistic_returns_rows(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyMessageStatistic() + method = _unwrap(api.get) + + rows = [SimpleNamespace(date="2024-01-01", message_count=3)] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-messages", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-01", "message_count": 3}]} + + +def test_daily_conversation_statistic_returns_rows(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyConversationStatistic() + method = _unwrap(api.get) + + rows = [SimpleNamespace(date="2024-01-02", conversation_count=5)] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-conversations", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-02", "conversation_count": 5}]} + + +def test_daily_token_cost_statistic_returns_rows(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyTokenCostStatistic() + method = _unwrap(api.get) + + rows = [SimpleNamespace(date="2024-01-03", token_count=10, total_price=0.25, currency="USD")] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/token-costs", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + data = response.get_json() + assert len(data["data"]) == 1 + assert data["data"][0]["date"] == "2024-01-03" + assert data["data"][0]["token_count"] == 10 + assert data["data"][0]["total_price"] == 0.25 + + +def test_daily_terminals_statistic_returns_rows(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyTerminalsStatistic() + method = _unwrap(api.get) + + rows = [SimpleNamespace(date="2024-01-04", terminal_count=7)] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-end-users", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-04", "terminal_count": 7}]} + + +def test_average_session_interaction_statistic_requires_chat_mode(app, monkeypatch: pytest.MonkeyPatch) -> None: + """Test that AverageSessionInteractionStatistic is limited to chat/agent modes.""" + # This just verifies the decorator is applied correctly + # Actual endpoint testing would require complex JOIN mocking + api = statistic_module.AverageSessionInteractionStatistic() + method = _unwrap(api.get) + assert callable(method) + + +def test_daily_message_statistic_with_invalid_time_range(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyMessageStatistic() + method = _unwrap(api.get) + + def mock_parse(*args, **kwargs): + raise ValueError("Invalid time range") + + _install_db(monkeypatch, []) + monkeypatch.setattr( + statistic_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(timezone="UTC"), "t1"), + ) + monkeypatch.setattr(statistic_module, "parse_time_range", mock_parse) + monkeypatch.setattr(statistic_module, "convert_datetime_to_date", lambda field: field) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-messages", method="GET"): + with pytest.raises(BadRequest): + method(app_model=SimpleNamespace(id="app-1")) + + +def test_daily_message_statistic_multiple_rows(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyMessageStatistic() + method = _unwrap(api.get) + + rows = [ + SimpleNamespace(date="2024-01-01", message_count=10), + SimpleNamespace(date="2024-01-02", message_count=15), + SimpleNamespace(date="2024-01-03", message_count=12), + ] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-messages", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + data = response.get_json() + assert len(data["data"]) == 3 + + +def test_daily_message_statistic_empty_result(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyMessageStatistic() + method = _unwrap(api.get) + + _install_common(monkeypatch) + _install_db(monkeypatch, []) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-messages", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response.get_json() == {"data": []} + + +def test_daily_conversation_statistic_with_time_range(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyConversationStatistic() + method = _unwrap(api.get) + + rows = [SimpleNamespace(date="2024-01-02", conversation_count=5)] + _install_db(monkeypatch, rows) + monkeypatch.setattr( + statistic_module, + "current_account_with_tenant", + lambda: (SimpleNamespace(timezone="UTC"), "t1"), + ) + monkeypatch.setattr( + statistic_module, + "parse_time_range", + lambda *_args, **_kwargs: ("s", "e"), + ) + monkeypatch.setattr(statistic_module, "convert_datetime_to_date", lambda field: field) + + with app.test_request_context("/console/api/apps/app-1/statistics/daily-conversations", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + assert response.get_json() == {"data": [{"date": "2024-01-02", "conversation_count": 5}]} + + +def test_daily_token_cost_with_multiple_currencies(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = statistic_module.DailyTokenCostStatistic() + method = _unwrap(api.get) + + rows = [ + SimpleNamespace(date="2024-01-01", token_count=100, total_price=Decimal("0.50"), currency="USD"), + SimpleNamespace(date="2024-01-02", token_count=200, total_price=Decimal("1.00"), currency="USD"), + ] + _install_common(monkeypatch) + _install_db(monkeypatch, rows) + + with app.test_request_context("/console/api/apps/app-1/statistics/token-costs", method="GET"): + response = method(app_model=SimpleNamespace(id="app-1")) + + data = response.get_json() + assert len(data["data"]) == 2 diff --git a/api/tests/unit_tests/controllers/console/app/test_workflow.py b/api/tests/unit_tests/controllers/console/app/test_workflow.py new file mode 100644 index 0000000000..f100080eaa --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_workflow.py @@ -0,0 +1,163 @@ +from __future__ import annotations + +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest +from werkzeug.exceptions import HTTPException, NotFound + +from controllers.console.app import workflow as workflow_module +from controllers.console.app.error import DraftWorkflowNotExist, DraftWorkflowNotSync +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File + + +def _unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def test_parse_file_no_config(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(workflow_module.FileUploadConfigManager, "convert", lambda *_args, **_kwargs: None) + workflow = SimpleNamespace(features_dict={}, tenant_id="t1") + + assert workflow_module._parse_file(workflow, files=[{"id": "f"}]) == [] + + +def test_parse_file_with_config(monkeypatch: pytest.MonkeyPatch) -> None: + config = object() + file_list = [ + File( + tenant_id="t1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="http://u", + ) + ] + build_mock = Mock(return_value=file_list) + monkeypatch.setattr(workflow_module.FileUploadConfigManager, "convert", lambda *_args, **_kwargs: config) + monkeypatch.setattr(workflow_module.file_factory, "build_from_mappings", build_mock) + + workflow = SimpleNamespace(features_dict={}, tenant_id="t1") + result = workflow_module._parse_file(workflow, files=[{"id": "f"}]) + + assert result == file_list + build_mock.assert_called_once() + + +def test_sync_draft_workflow_invalid_content_type(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = workflow_module.DraftWorkflowApi() + handler = _unwrap(api.post) + + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (SimpleNamespace(), "t1")) + + with app.test_request_context("/apps/app/workflows/draft", method="POST", data="x", content_type="text/html"): + with pytest.raises(HTTPException) as exc: + handler(api, app_model=SimpleNamespace(id="app")) + + assert exc.value.code == 415 + + +def test_sync_draft_workflow_invalid_json(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = workflow_module.DraftWorkflowApi() + handler = _unwrap(api.post) + + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (SimpleNamespace(), "t1")) + + with app.test_request_context( + "/apps/app/workflows/draft", + method="POST", + data="[]", + content_type="application/json", + ): + response, status = handler(api, app_model=SimpleNamespace(id="app")) + + assert status == 400 + assert response["message"] == "Invalid JSON data" + + +def test_sync_draft_workflow_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + workflow = SimpleNamespace( + unique_hash="h", + updated_at=None, + created_at=datetime(2024, 1, 1), + ) + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (SimpleNamespace(), "t1")) + monkeypatch.setattr( + workflow_module.variable_factory, "build_environment_variable_from_mapping", lambda *_args: "env" + ) + monkeypatch.setattr( + workflow_module.variable_factory, "build_conversation_variable_from_mapping", lambda *_args: "conv" + ) + + service = SimpleNamespace(sync_draft_workflow=lambda **_kwargs: workflow) + monkeypatch.setattr(workflow_module, "WorkflowService", lambda: service) + + api = workflow_module.DraftWorkflowApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/apps/app/workflows/draft", + method="POST", + json={"graph": {}, "features": {}, "hash": "h"}, + ): + response = handler(api, app_model=SimpleNamespace(id="app")) + + assert response["result"] == "success" + + +def test_sync_draft_workflow_hash_mismatch(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (SimpleNamespace(), "t1")) + + def _raise(*_args, **_kwargs): + raise workflow_module.WorkflowHashNotEqualError() + + service = SimpleNamespace(sync_draft_workflow=_raise) + monkeypatch.setattr(workflow_module, "WorkflowService", lambda: service) + + api = workflow_module.DraftWorkflowApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/apps/app/workflows/draft", + method="POST", + json={"graph": {}, "features": {}, "hash": "h"}, + ): + with pytest.raises(DraftWorkflowNotSync): + handler(api, app_model=SimpleNamespace(id="app")) + + +def test_draft_workflow_get_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + workflow_module, "WorkflowService", lambda: SimpleNamespace(get_draft_workflow=lambda **_k: None) + ) + + api = workflow_module.DraftWorkflowApi() + handler = _unwrap(api.get) + + with pytest.raises(DraftWorkflowNotExist): + handler(api, app_model=SimpleNamespace(id="app")) + + +def test_advanced_chat_run_conversation_not_exists(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + workflow_module.AppGenerateService, + "generate", + lambda *_args, **_kwargs: (_ for _ in ()).throw( + workflow_module.services.errors.conversation.ConversationNotExistsError() + ), + ) + monkeypatch.setattr(workflow_module, "current_account_with_tenant", lambda: (SimpleNamespace(), "t1")) + + api = workflow_module.AdvancedChatDraftWorkflowRunApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/apps/app/advanced-chat/workflows/draft/run", + method="POST", + json={"inputs": {}}, + ): + with pytest.raises(NotFound): + handler(api, app_model=SimpleNamespace(id="app")) diff --git a/api/tests/unit_tests/controllers/console/app/test_wraps.py b/api/tests/unit_tests/controllers/console/app/test_wraps.py new file mode 100644 index 0000000000..7664e492da --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_wraps.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from controllers.console.app import wraps as wraps_module +from controllers.console.app.error import AppNotFoundError +from models.model import AppMode + + +def test_get_app_model_injects_model(monkeypatch: pytest.MonkeyPatch) -> None: + app_model = SimpleNamespace(id="app-1", mode=AppMode.CHAT.value, status="normal", tenant_id="t1") + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: app_model) + + monkeypatch.setattr(wraps_module, "current_account_with_tenant", lambda: (None, "t1")) + monkeypatch.setattr(wraps_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + + @wraps_module.get_app_model + def handler(app_model): + return app_model.id + + assert handler(app_id="app-1") == "app-1" + + +def test_get_app_model_rejects_wrong_mode(monkeypatch: pytest.MonkeyPatch) -> None: + app_model = SimpleNamespace(id="app-1", mode=AppMode.CHAT.value, status="normal", tenant_id="t1") + query = SimpleNamespace(where=lambda *_args, **_kwargs: query, first=lambda: app_model) + + monkeypatch.setattr(wraps_module, "current_account_with_tenant", lambda: (None, "t1")) + monkeypatch.setattr(wraps_module.db, "session", SimpleNamespace(query=lambda *_args, **_kwargs: query)) + + @wraps_module.get_app_model(mode=[AppMode.COMPLETION]) + def handler(app_model): + return app_model.id + + with pytest.raises(AppNotFoundError): + handler(app_id="app-1") + + +def test_get_app_model_requires_app_id() -> None: + @wraps_module.get_app_model + def handler(app_model): + return app_model.id + + with pytest.raises(ValueError): + handler() diff --git a/api/tests/unit_tests/core/model_runtime/__init__.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/__init__.py similarity index 100% rename from api/tests/unit_tests/core/model_runtime/__init__.py rename to api/tests/unit_tests/controllers/console/datasets/rag_pipeline/__init__.py diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_auth.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_auth.py new file mode 100644 index 0000000000..9014edc39e --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_auth.py @@ -0,0 +1,817 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, NotFound + +from controllers.console import console_ns +from controllers.console.datasets.rag_pipeline.datasource_auth import ( + DatasourceAuth, + DatasourceAuthDefaultApi, + DatasourceAuthDeleteApi, + DatasourceAuthListApi, + DatasourceAuthOauthCustomClient, + DatasourceAuthUpdateApi, + DatasourceHardCodeAuthListApi, + DatasourceOAuthCallback, + DatasourcePluginOAuthAuthorizationUrl, + DatasourceUpdateProviderNameApi, +) +from core.plugin.impl.oauth import OAuthHandler +from dify_graph.model_runtime.errors.validate import CredentialsValidateFailedError +from services.datasource_provider_service import DatasourceProviderService +from services.plugin.oauth_service import OAuthProxyService + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestDatasourcePluginOAuthAuthorizationUrl: + def test_get_success(self, app): + api = DatasourcePluginOAuthAuthorizationUrl() + method = unwrap(api.get) + + user = MagicMock(id="user-1") + + with ( + app.test_request_context("/?credential_id=cred-1"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value={"client_id": "abc"}, + ), + patch.object( + OAuthProxyService, + "create_proxy_context", + return_value="ctx-1", + ), + patch.object( + OAuthHandler, + "get_authorization_url", + return_value={"url": "http://auth"}, + ), + ): + response = method(api, "notion") + + assert response.status_code == 200 + + def test_get_no_oauth_config(self, app): + api = DatasourcePluginOAuthAuthorizationUrl() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value=None, + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + def test_get_without_credential_id_sets_cookie(self, app): + api = DatasourcePluginOAuthAuthorizationUrl() + method = unwrap(api.get) + + user = MagicMock(id="user-1") + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value={"client_id": "abc"}, + ), + patch.object( + OAuthProxyService, + "create_proxy_context", + return_value="ctx-123", + ), + patch.object( + OAuthHandler, + "get_authorization_url", + return_value={"url": "http://auth"}, + ), + ): + response = method(api, "notion") + + assert response.status_code == 200 + assert "context_id" in response.headers.get("Set-Cookie") + + +class TestDatasourceOAuthCallback: + def test_callback_success_new_credential(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + oauth_response = MagicMock() + oauth_response.credentials = {"token": "abc"} + oauth_response.expires_at = None + oauth_response.metadata = {"name": "test"} + + context = { + "user_id": "user-1", + "tenant_id": "tenant-1", + "credential_id": None, + } + + with ( + app.test_request_context("/?context_id=ctx"), + patch.object( + OAuthProxyService, + "use_proxy_context", + return_value=context, + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value={"client_id": "abc"}, + ), + patch.object( + OAuthHandler, + "get_credentials", + return_value=oauth_response, + ), + patch.object( + DatasourceProviderService, + "add_datasource_oauth_provider", + return_value=None, + ), + ): + response = method(api, "notion") + + assert response.status_code == 302 + + def test_callback_missing_context(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(Forbidden): + method(api, "notion") + + def test_callback_invalid_context(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + with ( + app.test_request_context("/?context_id=bad"), + patch.object( + OAuthProxyService, + "use_proxy_context", + return_value=None, + ), + ): + with pytest.raises(Forbidden): + method(api, "notion") + + def test_callback_oauth_config_not_found(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + context = {"user_id": "u", "tenant_id": "t"} + + with ( + app.test_request_context("/?context_id=ctx"), + patch.object( + OAuthProxyService, + "use_proxy_context", + return_value=context, + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "notion") + + def test_callback_reauthorize_existing_credential(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + oauth_response = MagicMock() + oauth_response.credentials = {"token": "abc"} + oauth_response.expires_at = None + oauth_response.metadata = {} # avatar + name missing + + context = { + "user_id": "user-1", + "tenant_id": "tenant-1", + "credential_id": "cred-1", + } + + with ( + app.test_request_context("/?context_id=ctx"), + patch.object( + OAuthProxyService, + "use_proxy_context", + return_value=context, + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value={"client_id": "abc"}, + ), + patch.object( + OAuthHandler, + "get_credentials", + return_value=oauth_response, + ), + patch.object( + DatasourceProviderService, + "reauthorize_datasource_oauth_provider", + return_value=None, + ), + ): + response = method(api, "notion") + + assert response.status_code == 302 + assert "/oauth-callback" in response.location + + def test_callback_context_id_from_cookie(self, app): + api = DatasourceOAuthCallback() + method = unwrap(api.get) + + oauth_response = MagicMock() + oauth_response.credentials = {"token": "abc"} + oauth_response.expires_at = None + oauth_response.metadata = {} + + context = { + "user_id": "user-1", + "tenant_id": "tenant-1", + "credential_id": None, + } + + with ( + app.test_request_context("/", headers={"Cookie": "context_id=ctx"}), + patch.object( + OAuthProxyService, + "use_proxy_context", + return_value=context, + ), + patch.object( + DatasourceProviderService, + "get_oauth_client", + return_value={"client_id": "abc"}, + ), + patch.object( + OAuthHandler, + "get_credentials", + return_value=oauth_response, + ), + patch.object( + DatasourceProviderService, + "add_datasource_oauth_provider", + return_value=None, + ), + ): + response = method(api, "notion") + + assert response.status_code == 302 + + +class TestDatasourceAuth: + def test_post_success(self, app): + api = DatasourceAuth() + method = unwrap(api.post) + + payload = {"credentials": {"key": "val"}} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "add_datasource_api_key_provider", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_post_invalid_credentials(self, app): + api = DatasourceAuth() + method = unwrap(api.post) + + payload = {"credentials": {"key": "bad"}} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "add_datasource_api_key_provider", + side_effect=CredentialsValidateFailedError("invalid"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + def test_get_success(self, app): + api = DatasourceAuth() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "list_datasource_credentials", + return_value=[{"id": "1"}], + ), + ): + response, status = method(api, "notion") + + assert status == 200 + assert response["result"] + + def test_post_missing_credentials(self, app): + api = DatasourceAuth() + method = unwrap(api.post) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + def test_get_empty_list(self, app): + api = DatasourceAuth() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "list_datasource_credentials", + return_value=[], + ), + ): + response, status = method(api, "notion") + + assert status == 200 + assert response["result"] == [] + + +class TestDatasourceAuthDeleteApi: + def test_delete_success(self, app): + api = DatasourceAuthDeleteApi() + method = unwrap(api.post) + + payload = {"credential_id": "cred-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "remove_datasource_credentials", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_delete_missing_credential_id(self, app): + api = DatasourceAuthDeleteApi() + method = unwrap(api.post) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + +class TestDatasourceAuthUpdateApi: + def test_update_success(self, app): + api = DatasourceAuthUpdateApi() + method = unwrap(api.post) + + payload = {"credential_id": "id", "credentials": {"k": "v"}} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "update_datasource_credentials", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 201 + + def test_update_with_credentials_none(self, app): + api = DatasourceAuthUpdateApi() + method = unwrap(api.post) + + payload = {"credential_id": "id", "credentials": None} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "update_datasource_credentials", + return_value=None, + ) as update_mock, + ): + response, status = method(api, "notion") + + update_mock.assert_called_once() + assert status == 201 + + def test_update_name_only(self, app): + api = DatasourceAuthUpdateApi() + method = unwrap(api.post) + + payload = {"credential_id": "id", "name": "New Name"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "update_datasource_credentials", + return_value=None, + ), + ): + _, status = method(api, "notion") + + assert status == 201 + + def test_update_with_empty_credentials_dict(self, app): + api = DatasourceAuthUpdateApi() + method = unwrap(api.post) + + payload = {"credential_id": "id", "credentials": {}} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "update_datasource_credentials", + return_value=None, + ) as update_mock, + ): + _, status = method(api, "notion") + + update_mock.assert_called_once() + assert status == 201 + + +class TestDatasourceAuthListApi: + def test_list_success(self, app): + api = DatasourceAuthListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_all_datasource_credentials", + return_value=[{"id": "1"}], + ), + ): + response, status = method(api) + + assert status == 200 + + def test_auth_list_empty(self, app): + api = DatasourceAuthListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_all_datasource_credentials", + return_value=[], + ), + ): + response, status = method(api) + + assert status == 200 + assert response["result"] == [] + + def test_hardcode_list_empty(self, app): + api = DatasourceHardCodeAuthListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_hard_code_datasource_credentials", + return_value=[], + ), + ): + response, status = method(api) + + assert status == 200 + assert response["result"] == [] + + +class TestDatasourceHardCodeAuthListApi: + def test_list_success(self, app): + api = DatasourceHardCodeAuthListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "get_hard_code_datasource_credentials", + return_value=[{"id": "1"}], + ), + ): + response, status = method(api) + + assert status == 200 + + +class TestDatasourceAuthOauthCustomClient: + def test_post_success(self, app): + api = DatasourceAuthOauthCustomClient() + method = unwrap(api.post) + + payload = {"client_params": {}, "enable_oauth_custom_client": True} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "setup_oauth_custom_client_params", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_delete_success(self, app): + api = DatasourceAuthOauthCustomClient() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "remove_oauth_custom_client_params", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_post_empty_payload(self, app): + api = DatasourceAuthOauthCustomClient() + method = unwrap(api.post) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "setup_oauth_custom_client_params", + return_value=None, + ), + ): + _, status = method(api, "notion") + + assert status == 200 + + def test_post_disabled_flag(self, app): + api = DatasourceAuthOauthCustomClient() + method = unwrap(api.post) + + payload = { + "client_params": {"a": 1}, + "enable_oauth_custom_client": False, + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "setup_oauth_custom_client_params", + return_value=None, + ) as setup_mock, + ): + _, status = method(api, "notion") + + setup_mock.assert_called_once() + assert status == 200 + + +class TestDatasourceAuthDefaultApi: + def test_set_default_success(self, app): + api = DatasourceAuthDefaultApi() + method = unwrap(api.post) + + payload = {"id": "cred-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "set_default_datasource_provider", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_default_missing_id(self, app): + api = DatasourceAuthDefaultApi() + method = unwrap(api.post) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + +class TestDatasourceUpdateProviderNameApi: + def test_update_name_success(self, app): + api = DatasourceUpdateProviderNameApi() + method = unwrap(api.post) + + payload = {"credential_id": "id", "name": "New Name"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + DatasourceProviderService, + "update_datasource_provider_name", + return_value=None, + ), + ): + response, status = method(api, "notion") + + assert status == 200 + + def test_update_name_too_long(self, app): + api = DatasourceUpdateProviderNameApi() + method = unwrap(api.post) + + payload = { + "credential_id": "id", + "name": "x" * 101, + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") + + def test_update_name_missing_credential_id(self, app): + api = DatasourceUpdateProviderNameApi() + method = unwrap(api.post) + + payload = {"name": "Valid"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_auth.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api, "notion") diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_content_preview.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_content_preview.py new file mode 100644 index 0000000000..7a8ccde55a --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_datasource_content_preview.py @@ -0,0 +1,143 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden + +from controllers.console import console_ns +from controllers.console.datasets.rag_pipeline.datasource_content_preview import ( + DataSourceContentPreviewApi, +) +from models import Account +from models.dataset import Pipeline + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestDataSourceContentPreviewApi: + def _valid_payload(self): + return { + "inputs": {"query": "hello"}, + "datasource_type": "notion", + "credential_id": "cred-1", + } + + def test_post_success(self, app): + api = DataSourceContentPreviewApi() + method = unwrap(api.post) + + payload = self._valid_payload() + + pipeline = MagicMock(spec=Pipeline) + node_id = "node-1" + account = MagicMock(spec=Account) + + preview_result = {"content": "preview data"} + + service_instance = MagicMock() + service_instance.run_datasource_node_preview.return_value = preview_result + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.current_user", + account, + ), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.RagPipelineService", + return_value=service_instance, + ), + ): + response, status = method(api, pipeline, node_id) + + service_instance.run_datasource_node_preview.assert_called_once_with( + pipeline=pipeline, + node_id=node_id, + user_inputs=payload["inputs"], + account=account, + datasource_type=payload["datasource_type"], + is_published=True, + credential_id=payload["credential_id"], + ) + assert status == 200 + assert response == preview_result + + def test_post_forbidden_non_account_user(self, app): + api = DataSourceContentPreviewApi() + method = unwrap(api.post) + + payload = self._valid_payload() + + pipeline = MagicMock(spec=Pipeline) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.current_user", + MagicMock(), # NOT Account + ), + ): + with pytest.raises(Forbidden): + method(api, pipeline, "node-1") + + def test_post_invalid_payload(self, app): + api = DataSourceContentPreviewApi() + method = unwrap(api.post) + + payload = { + "inputs": {"query": "hello"}, + # datasource_type missing + } + + pipeline = MagicMock(spec=Pipeline) + account = MagicMock(spec=Account) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.current_user", + account, + ), + ): + with pytest.raises(ValueError): + method(api, pipeline, "node-1") + + def test_post_without_credential_id(self, app): + api = DataSourceContentPreviewApi() + method = unwrap(api.post) + + payload = { + "inputs": {"query": "hello"}, + "datasource_type": "notion", + "credential_id": None, + } + + pipeline = MagicMock(spec=Pipeline) + account = MagicMock(spec=Account) + + service_instance = MagicMock() + service_instance.run_datasource_node_preview.return_value = {"ok": True} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.current_user", + account, + ), + patch( + "controllers.console.datasets.rag_pipeline.datasource_content_preview.RagPipelineService", + return_value=service_instance, + ), + ): + response, status = method(api, pipeline, "node-1") + + service_instance.run_datasource_node_preview.assert_called_once() + assert status == 200 + assert response == {"ok": True} diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline.py new file mode 100644 index 0000000000..3b8679f4ec --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline.py @@ -0,0 +1,187 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from controllers.console import console_ns +from controllers.console.datasets.rag_pipeline.rag_pipeline import ( + CustomizedPipelineTemplateApi, + PipelineTemplateDetailApi, + PipelineTemplateListApi, + PublishCustomizedPipelineTemplateApi, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestPipelineTemplateListApi: + def test_get_success(self, app): + api = PipelineTemplateListApi() + method = unwrap(api.get) + + templates = [{"id": "t1"}] + + with ( + app.test_request_context("/?type=built-in&language=en-US"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.RagPipelineService.get_pipeline_templates", + return_value=templates, + ), + ): + response, status = method(api) + + assert status == 200 + assert response == templates + + +class TestPipelineTemplateDetailApi: + def test_get_success(self, app): + api = PipelineTemplateDetailApi() + method = unwrap(api.get) + + template = {"id": "tpl-1"} + + service = MagicMock() + service.get_pipeline_template_detail.return_value = template + + with ( + app.test_request_context("/?type=built-in"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.RagPipelineService", + return_value=service, + ), + ): + response, status = method(api, "tpl-1") + + assert status == 200 + assert response == template + + +class TestCustomizedPipelineTemplateApi: + def test_patch_success(self, app): + api = CustomizedPipelineTemplateApi() + method = unwrap(api.patch) + + payload = { + "name": "Template", + "description": "Desc", + "icon_info": {"icon": "📘"}, + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.RagPipelineService.update_customized_pipeline_template" + ) as update_mock, + ): + response = method(api, "tpl-1") + + update_mock.assert_called_once() + assert response == 200 + + def test_delete_success(self, app): + api = CustomizedPipelineTemplateApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.RagPipelineService.delete_customized_pipeline_template" + ) as delete_mock, + ): + response = method(api, "tpl-1") + + delete_mock.assert_called_once_with("tpl-1") + assert response == 200 + + def test_post_success(self, app): + api = CustomizedPipelineTemplateApi() + method = unwrap(api.post) + + template = MagicMock() + template.yaml_content = "yaml-data" + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = template + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.Session", + return_value=session_ctx, + ), + ): + response, status = method(api, "tpl-1") + + assert status == 200 + assert response == {"data": "yaml-data"} + + def test_post_template_not_found(self, app): + api = CustomizedPipelineTemplateApi() + method = unwrap(api.post) + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = None + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.Session", + return_value=session_ctx, + ), + ): + with pytest.raises(ValueError): + method(api, "tpl-1") + + +class TestPublishCustomizedPipelineTemplateApi: + def test_post_success(self, app): + api = PublishCustomizedPipelineTemplateApi() + method = unwrap(api.post) + + payload = { + "name": "Template", + "description": "Desc", + "icon_info": {"icon": "📘"}, + } + + service = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline.RagPipelineService", + return_value=service, + ), + ): + response = method(api, "pipeline-1") + + service.publish_customized_pipeline_template.assert_called_once() + assert response == {"result": "success"} diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_datasets.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_datasets.py new file mode 100644 index 0000000000..fd38fcbb5e --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_datasets.py @@ -0,0 +1,187 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden + +import services +from controllers.console import console_ns +from controllers.console.datasets.error import DatasetNameDuplicateError +from controllers.console.datasets.rag_pipeline.rag_pipeline_datasets import ( + CreateEmptyRagPipelineDatasetApi, + CreateRagPipelineDatasetApi, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestCreateRagPipelineDatasetApi: + def _valid_payload(self): + return {"yaml_content": "name: test"} + + def test_post_success(self, app): + api = CreateRagPipelineDatasetApi() + method = unwrap(api.post) + + payload = self._valid_payload() + user = MagicMock(is_dataset_editor=True) + import_info = {"dataset_id": "ds-1"} + + mock_service = MagicMock() + mock_service.create_rag_pipeline_dataset.return_value = import_info + + mock_session_ctx = MagicMock() + mock_session_ctx.__enter__.return_value = MagicMock() + mock_session_ctx.__exit__.return_value = None + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.Session", + return_value=mock_session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.RagPipelineDslService", + return_value=mock_service, + ), + ): + response, status = method(api) + + assert status == 201 + assert response == import_info + + def test_post_forbidden_non_editor(self, app): + api = CreateRagPipelineDatasetApi() + method = unwrap(api.post) + + payload = self._valid_payload() + user = MagicMock(is_dataset_editor=False) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + ): + with pytest.raises(Forbidden): + method(api) + + def test_post_dataset_name_duplicate(self, app): + api = CreateRagPipelineDatasetApi() + method = unwrap(api.post) + + payload = self._valid_payload() + user = MagicMock(is_dataset_editor=True) + + mock_service = MagicMock() + mock_service.create_rag_pipeline_dataset.side_effect = services.errors.dataset.DatasetNameDuplicateError() + + mock_session_ctx = MagicMock() + mock_session_ctx.__enter__.return_value = MagicMock() + mock_session_ctx.__exit__.return_value = None + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.Session", + return_value=mock_session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.RagPipelineDslService", + return_value=mock_service, + ), + ): + with pytest.raises(DatasetNameDuplicateError): + method(api) + + def test_post_invalid_payload(self, app): + api = CreateRagPipelineDatasetApi() + method = unwrap(api.post) + + payload = {} + user = MagicMock(is_dataset_editor=True) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestCreateEmptyRagPipelineDatasetApi: + def test_post_success(self, app): + api = CreateEmptyRagPipelineDatasetApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=True) + dataset = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.DatasetService.create_empty_rag_pipeline_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.marshal", + return_value={"id": "ds-1"}, + ), + ): + response, status = method(api) + + assert status == 201 + assert response == {"id": "ds-1"} + + def test_post_forbidden_non_editor(self, app): + api = CreateEmptyRagPipelineDatasetApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=False) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + ): + with pytest.raises(Forbidden): + method(api) diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_draft_variable.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_draft_variable.py new file mode 100644 index 0000000000..b4c0903f63 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_draft_variable.py @@ -0,0 +1,324 @@ +from unittest.mock import MagicMock, patch + +import pytest +from flask import Response + +from controllers.console import console_ns +from controllers.console.app.error import DraftWorkflowNotExist +from controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable import ( + RagPipelineEnvironmentVariableCollectionApi, + RagPipelineNodeVariableCollectionApi, + RagPipelineSystemVariableCollectionApi, + RagPipelineVariableApi, + RagPipelineVariableCollectionApi, + RagPipelineVariableResetApi, +) +from controllers.web.error import InvalidArgumentError, NotFoundError +from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID +from dify_graph.variables.types import SegmentType +from models.account import Account + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def fake_db(): + db = MagicMock() + db.engine = MagicMock() + db.session.return_value = MagicMock() + return db + + +@pytest.fixture +def editor_user(): + user = MagicMock(spec=Account) + user.has_edit_permission = True + return user + + +@pytest.fixture +def restx_config(app): + return patch.dict(app.config, {"RESTX_MASK_HEADER": "X-Fields"}) + + +class TestRagPipelineVariableCollectionApi: + def test_get_variables_success(self, app, fake_db, editor_user, restx_config): + api = RagPipelineVariableCollectionApi() + method = unwrap(api.get) + + pipeline = MagicMock(id="p1") + + rag_srv = MagicMock() + rag_srv.is_workflow_exist.return_value = True + + # IMPORTANT: RESTX expects .variables + var_list = MagicMock() + var_list.variables = [] + + draft_srv = MagicMock() + draft_srv.list_variables_without_values.return_value = var_list + + with ( + app.test_request_context("/?page=1&limit=10"), + restx_config, + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.RagPipelineService", + return_value=rag_srv, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=draft_srv, + ), + ): + result = method(api, pipeline) + + assert result["items"] == [] + + def test_get_variables_workflow_not_exist(self, app, fake_db, editor_user): + api = RagPipelineVariableCollectionApi() + method = unwrap(api.get) + + pipeline = MagicMock() + + rag_srv = MagicMock() + rag_srv.is_workflow_exist.return_value = False + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.RagPipelineService", + return_value=rag_srv, + ), + ): + with pytest.raises(DraftWorkflowNotExist): + method(api, pipeline) + + def test_delete_variables_success(self, app, fake_db, editor_user): + api = RagPipelineVariableCollectionApi() + method = unwrap(api.delete) + + pipeline = MagicMock(id="p1") + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService"), + ): + result = method(api, pipeline) + + assert isinstance(result, Response) + assert result.status_code == 204 + + +class TestRagPipelineNodeVariableCollectionApi: + def test_get_node_variables_success(self, app, fake_db, editor_user, restx_config): + api = RagPipelineNodeVariableCollectionApi() + method = unwrap(api.get) + + pipeline = MagicMock(id="p1") + + var_list = MagicMock() + var_list.variables = [] + + srv = MagicMock() + srv.list_node_variables.return_value = var_list + + with ( + app.test_request_context("/"), + restx_config, + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + ): + result = method(api, pipeline, "node1") + + assert result["items"] == [] + + def test_get_node_variables_invalid_node(self, app, editor_user): + api = RagPipelineNodeVariableCollectionApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + ): + with pytest.raises(InvalidArgumentError): + method(api, MagicMock(), SYSTEM_VARIABLE_NODE_ID) + + +class TestRagPipelineVariableApi: + def test_get_variable_not_found(self, app, fake_db, editor_user): + api = RagPipelineVariableApi() + method = unwrap(api.get) + + srv = MagicMock() + srv.get_variable.return_value = None + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + ): + with pytest.raises(NotFoundError): + method(api, MagicMock(), "v1") + + def test_patch_variable_invalid_file_payload(self, app, fake_db, editor_user): + api = RagPipelineVariableApi() + method = unwrap(api.patch) + + pipeline = MagicMock(id="p1", tenant_id="t1") + variable = MagicMock(app_id="p1", value_type=SegmentType.FILE) + + srv = MagicMock() + srv.get_variable.return_value = variable + + payload = {"value": "invalid"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + ): + with pytest.raises(InvalidArgumentError): + method(api, pipeline, "v1") + + def test_delete_variable_success(self, app, fake_db, editor_user): + api = RagPipelineVariableApi() + method = unwrap(api.delete) + + pipeline = MagicMock(id="p1") + variable = MagicMock(app_id="p1") + + srv = MagicMock() + srv.get_variable.return_value = variable + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + ): + result = method(api, pipeline, "v1") + + assert result.status_code == 204 + + +class TestRagPipelineVariableResetApi: + def test_reset_variable_success(self, app, fake_db, editor_user): + api = RagPipelineVariableResetApi() + method = unwrap(api.put) + + pipeline = MagicMock(id="p1") + workflow = MagicMock() + variable = MagicMock(app_id="p1") + + srv = MagicMock() + srv.get_variable.return_value = variable + srv.reset_variable.return_value = variable + + rag_srv = MagicMock() + rag_srv.get_draft_workflow.return_value = workflow + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.RagPipelineService", + return_value=rag_srv, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.marshal", + return_value={"id": "v1"}, + ), + ): + result = method(api, pipeline, "v1") + + assert result == {"id": "v1"} + + +class TestSystemAndEnvironmentVariablesApi: + def test_system_variables_success(self, app, fake_db, editor_user, restx_config): + api = RagPipelineSystemVariableCollectionApi() + method = unwrap(api.get) + + pipeline = MagicMock(id="p1") + + var_list = MagicMock() + var_list.variables = [] + + srv = MagicMock() + srv.list_system_variables.return_value = var_list + + with ( + app.test_request_context("/"), + restx_config, + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.db", fake_db), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.WorkflowDraftVariableService", + return_value=srv, + ), + ): + result = method(api, pipeline) + + assert result["items"] == [] + + def test_environment_variables_success(self, app, editor_user): + api = RagPipelineEnvironmentVariableCollectionApi() + method = unwrap(api.get) + + env_var = MagicMock( + id="e1", + name="ENV", + description="d", + selector="s", + value_type=MagicMock(value="string"), + value="x", + ) + + workflow = MagicMock(environment_variables=[env_var]) + pipeline = MagicMock(id="p1") + + rag_srv = MagicMock() + rag_srv.get_draft_workflow.return_value = workflow + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.current_user", editor_user), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_draft_variable.RagPipelineService", + return_value=rag_srv, + ), + ): + result = method(api, pipeline) + + assert len(result["items"]) == 1 diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_import.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_import.py new file mode 100644 index 0000000000..a72ad45110 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_import.py @@ -0,0 +1,329 @@ +from unittest.mock import MagicMock, patch + +from controllers.console import console_ns +from controllers.console.datasets.rag_pipeline.rag_pipeline_import import ( + RagPipelineExportApi, + RagPipelineImportApi, + RagPipelineImportCheckDependenciesApi, + RagPipelineImportConfirmApi, +) +from models.dataset import Pipeline +from services.app_dsl_service import ImportStatus + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestRagPipelineImportApi: + def _payload(self, mode="create"): + return { + "mode": mode, + "yaml_content": "content", + "name": "Test", + } + + def test_post_success_200(self, app): + api = RagPipelineImportApi() + method = unwrap(api.post) + + payload = self._payload() + + user = MagicMock() + result = MagicMock() + result.status = "completed" + result.model_dump.return_value = {"status": "success"} + + service = MagicMock() + service.import_rag_pipeline.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api) + + assert status == 200 + assert response == {"status": "success"} + + def test_post_failed_400(self, app): + api = RagPipelineImportApi() + method = unwrap(api.post) + + payload = self._payload() + + user = MagicMock() + result = MagicMock() + result.status = ImportStatus.FAILED + result.model_dump.return_value = {"status": "failed"} + + service = MagicMock() + service.import_rag_pipeline.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api) + + assert status == 400 + assert response == {"status": "failed"} + + def test_post_pending_202(self, app): + api = RagPipelineImportApi() + method = unwrap(api.post) + + payload = self._payload() + + user = MagicMock() + result = MagicMock() + result.status = ImportStatus.PENDING + result.model_dump.return_value = {"status": "pending"} + + service = MagicMock() + service.import_rag_pipeline.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api) + + assert status == 202 + assert response == {"status": "pending"} + + +class TestRagPipelineImportConfirmApi: + def test_confirm_success(self, app): + api = RagPipelineImportConfirmApi() + method = unwrap(api.post) + + user = MagicMock() + result = MagicMock() + result.status = "completed" + result.model_dump.return_value = {"ok": True} + + service = MagicMock() + service.confirm_import.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api, "import-1") + + assert status == 200 + assert response == {"ok": True} + + def test_confirm_failed(self, app): + api = RagPipelineImportConfirmApi() + method = unwrap(api.post) + + user = MagicMock() + result = MagicMock() + result.status = ImportStatus.FAILED + result.model_dump.return_value = {"ok": False} + + service = MagicMock() + service.confirm_import.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api, "import-1") + + assert status == 400 + assert response == {"ok": False} + + +class TestRagPipelineImportCheckDependenciesApi: + def test_get_success(self, app): + api = RagPipelineImportCheckDependenciesApi() + method = unwrap(api.get) + + pipeline = MagicMock(spec=Pipeline) + result = MagicMock() + result.model_dump.return_value = {"deps": []} + + service = MagicMock() + service.check_dependencies.return_value = result + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api, pipeline) + + assert status == 200 + assert response == {"deps": []} + + +class TestRagPipelineExportApi: + def test_get_with_include_secret(self, app): + api = RagPipelineExportApi() + method = unwrap(api.get) + + pipeline = MagicMock(spec=Pipeline) + service = MagicMock() + service.export_rag_pipeline_dsl.return_value = {"yaml": "data"} + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = MagicMock() + session_ctx.__exit__.return_value = None + + with ( + app.test_request_context("/?include_secret=true"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_import.RagPipelineDslService", + return_value=service, + ), + ): + response, status = method(api, pipeline) + + assert status == 200 + assert response == {"data": {"yaml": "data"}} diff --git a/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_workflow.py b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_workflow.py new file mode 100644 index 0000000000..7775cbdd81 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/rag_pipeline/test_rag_pipeline_workflow.py @@ -0,0 +1,688 @@ +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, NotFound + +import services +from controllers.console import console_ns +from controllers.console.app.error import DraftWorkflowNotExist, DraftWorkflowNotSync +from controllers.console.datasets.rag_pipeline.rag_pipeline_workflow import ( + DefaultRagPipelineBlockConfigApi, + DraftRagPipelineApi, + DraftRagPipelineRunApi, + PublishedAllRagPipelineApi, + PublishedRagPipelineApi, + PublishedRagPipelineRunApi, + RagPipelineByIdApi, + RagPipelineDatasourceVariableApi, + RagPipelineDraftNodeRunApi, + RagPipelineDraftRunIterationNodeApi, + RagPipelineDraftRunLoopNodeApi, + RagPipelineRecommendedPluginApi, + RagPipelineTaskStopApi, + RagPipelineTransformApi, + RagPipelineWorkflowLastRunApi, +) +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError +from services.errors.app import WorkflowHashNotEqualError +from services.errors.llm import InvokeRateLimitError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestDraftWorkflowApi: + def test_get_draft_success(self, app): + api = DraftRagPipelineApi() + method = unwrap(api.get) + + pipeline = MagicMock() + workflow = MagicMock() + + service = MagicMock() + service.get_draft_workflow.return_value = workflow + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline) + assert result == workflow + + def test_get_draft_not_exist(self, app): + api = DraftRagPipelineApi() + method = unwrap(api.get) + + pipeline = MagicMock() + service = MagicMock() + service.get_draft_workflow.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + with pytest.raises(DraftWorkflowNotExist): + method(api, pipeline) + + def test_sync_hash_not_match(self, app): + api = DraftRagPipelineApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + service = MagicMock() + service.sync_draft_workflow.side_effect = WorkflowHashNotEqualError() + + with ( + app.test_request_context("/", json={"graph": {}, "features": {}}), + patch.object(type(console_ns), "payload", {"graph": {}, "features": {}}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + with pytest.raises(DraftWorkflowNotSync): + method(api, pipeline) + + def test_sync_invalid_text_plain(self, app): + api = DraftRagPipelineApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context("/", data="bad-json", headers={"Content-Type": "text/plain"}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + ): + response, status = method(api, pipeline) + assert status == 400 + + +class TestDraftRunNodes: + def test_iteration_node_success(self, app): + api = RagPipelineDraftRunIterationNodeApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(type(console_ns), "payload", {"inputs": {}}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate_single_iteration", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.helper.compact_generate_response", + return_value={"ok": True}, + ), + ): + result = method(api, pipeline, "node") + assert result == {"ok": True} + + def test_iteration_node_conversation_not_exists(self, app): + api = RagPipelineDraftRunIterationNodeApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(type(console_ns), "payload", {"inputs": {}}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate_single_iteration", + side_effect=services.errors.conversation.ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(api, pipeline, "node") + + def test_loop_node_success(self, app): + api = RagPipelineDraftRunLoopNodeApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(type(console_ns), "payload", {"inputs": {}}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate_single_loop", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.helper.compact_generate_response", + return_value={"ok": True}, + ), + ): + assert method(api, pipeline, "node") == {"ok": True} + + +class TestPipelineRunApis: + def test_draft_run_success(self, app): + api = DraftRagPipelineRunApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + payload = { + "inputs": {}, + "datasource_type": "x", + "datasource_info_list": [], + "start_node_id": "n", + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.helper.compact_generate_response", + return_value={"ok": True}, + ), + ): + assert method(api, pipeline) == {"ok": True} + + def test_draft_run_rate_limit(self, app): + api = DraftRagPipelineRunApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context( + "/", json={"inputs": {}, "datasource_type": "x", "datasource_info_list": [], "start_node_id": "n"} + ), + patch.object( + type(console_ns), + "payload", + {"inputs": {}, "datasource_type": "x", "datasource_info_list": [], "start_node_id": "n"}, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate", + side_effect=InvokeRateLimitError("limit"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(api, pipeline) + + +class TestDraftNodeRun: + def test_execution_not_found(self, app): + api = RagPipelineDraftNodeRunApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + service = MagicMock() + service.run_draft_workflow_node.return_value = None + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(type(console_ns), "payload", {"inputs": {}}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + with pytest.raises(ValueError): + method(api, pipeline, "node") + + +class TestPublishedPipelineApis: + def test_publish_success(self, app): + api = PublishedRagPipelineApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock(id="u1") + + workflow = MagicMock( + id="w1", + created_at=datetime.utcnow(), + ) + + session = MagicMock() + session.merge.return_value = pipeline + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + + service = MagicMock() + service.publish_workflow.return_value = workflow + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline) + + assert result["result"] == "success" + assert "created_at" in result + + +class TestMiscApis: + def test_task_stop(self, app): + api = RagPipelineTaskStopApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock(id="u1") + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.AppQueueManager.set_stop_flag" + ) as stop_mock, + ): + result = method(api, pipeline, "task-1") + stop_mock.assert_called_once() + assert result["result"] == "success" + + def test_transform_forbidden(self, app): + api = RagPipelineTransformApi() + method = unwrap(api.post) + + user = MagicMock(has_edit_permission=False, is_dataset_operator=False) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + ): + with pytest.raises(Forbidden): + method(api, "ds1") + + def test_recommended_plugins(self, app): + api = RagPipelineRecommendedPluginApi() + method = unwrap(api.get) + + service = MagicMock() + service.get_recommended_plugins.return_value = [{"id": "p1"}] + + with ( + app.test_request_context("/?type=all"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api) + assert result == [{"id": "p1"}] + + +class TestPublishedRagPipelineRunApi: + def test_published_run_success(self, app): + api = PublishedRagPipelineRunApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + payload = { + "inputs": {}, + "datasource_type": "x", + "datasource_info_list": [], + "start_node_id": "n", + "response_mode": "blocking", + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.helper.compact_generate_response", + return_value={"ok": True}, + ), + ): + result = method(api, pipeline) + assert result == {"ok": True} + + def test_published_run_rate_limit(self, app): + api = PublishedRagPipelineRunApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + payload = { + "inputs": {}, + "datasource_type": "x", + "datasource_info_list": [], + "start_node_id": "n", + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.PipelineGenerateService.generate", + side_effect=InvokeRateLimitError("limit"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(api, pipeline) + + +class TestDefaultBlockConfigApi: + def test_get_block_config_success(self, app): + api = DefaultRagPipelineBlockConfigApi() + method = unwrap(api.get) + + pipeline = MagicMock() + + service = MagicMock() + service.get_default_block_config.return_value = {"k": "v"} + + with ( + app.test_request_context("/?q={}"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline, "llm") + assert result == {"k": "v"} + + def test_get_block_config_invalid_json(self, app): + api = DefaultRagPipelineBlockConfigApi() + method = unwrap(api.get) + + pipeline = MagicMock() + + with app.test_request_context("/?q=bad-json"): + with pytest.raises(ValueError): + method(api, pipeline, "llm") + + +class TestPublishedAllRagPipelineApi: + def test_get_published_workflows_success(self, app): + api = PublishedAllRagPipelineApi() + method = unwrap(api.get) + + pipeline = MagicMock() + user = MagicMock(id="u1") + + service = MagicMock() + service.get_all_published_workflow.return_value = ([{"id": "w1"}], False) + + session = MagicMock() + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline) + + assert result["items"] == [{"id": "w1"}] + assert result["has_more"] is False + + def test_get_published_workflows_forbidden(self, app): + api = PublishedAllRagPipelineApi() + method = unwrap(api.get) + + pipeline = MagicMock() + user = MagicMock(id="u1") + + with ( + app.test_request_context("/?user_id=u2"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + ): + with pytest.raises(Forbidden): + method(api, pipeline) + + +class TestRagPipelineByIdApi: + def test_patch_success(self, app): + api = RagPipelineByIdApi() + method = unwrap(api.patch) + + pipeline = MagicMock(tenant_id="t1") + user = MagicMock(id="u1") + + workflow = MagicMock() + + service = MagicMock() + service.update_workflow.return_value = workflow + + session = MagicMock() + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + + fake_db = MagicMock() + fake_db.engine = MagicMock() + + payload = {"marked_name": "test"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.db", + fake_db, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.Session", + return_value=session_ctx, + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline, "w1") + + assert result == workflow + + def test_patch_no_fields(self, app): + api = RagPipelineByIdApi() + method = unwrap(api.patch) + + pipeline = MagicMock() + user = MagicMock() + + with ( + app.test_request_context("/", json={}), + patch.object(type(console_ns), "payload", {}), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + ): + result, status = method(api, pipeline, "w1") + assert status == 400 + + +class TestRagPipelineWorkflowLastRunApi: + def test_last_run_success(self, app): + api = RagPipelineWorkflowLastRunApi() + method = unwrap(api.get) + + pipeline = MagicMock() + workflow = MagicMock() + node_exec = MagicMock() + + service = MagicMock() + service.get_draft_workflow.return_value = workflow + service.get_node_last_run.return_value = node_exec + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline, "node1") + assert result == node_exec + + def test_last_run_not_found(self, app): + api = RagPipelineWorkflowLastRunApi() + method = unwrap(api.get) + + pipeline = MagicMock() + + service = MagicMock() + service.get_draft_workflow.return_value = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + with pytest.raises(NotFound): + method(api, pipeline, "node1") + + +class TestRagPipelineDatasourceVariableApi: + def test_set_datasource_variables_success(self, app): + api = RagPipelineDatasourceVariableApi() + method = unwrap(api.post) + + pipeline = MagicMock() + user = MagicMock() + + payload = { + "datasource_type": "db", + "datasource_info": {}, + "start_node_id": "n1", + "start_node_title": "Node", + } + + service = MagicMock() + service.set_datasource_variables.return_value = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.current_account_with_tenant", + return_value=(user, "t"), + ), + patch( + "controllers.console.datasets.rag_pipeline.rag_pipeline_workflow.RagPipelineService", + return_value=service, + ), + ): + result = method(api, pipeline) + assert result is not None diff --git a/api/tests/unit_tests/controllers/console/datasets/test_data_source.py b/api/tests/unit_tests/controllers/console/datasets/test_data_source.py new file mode 100644 index 0000000000..3060062adf --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_data_source.py @@ -0,0 +1,444 @@ +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from werkzeug.exceptions import NotFound + +from controllers.console.datasets import data_source +from controllers.console.datasets.data_source import ( + DataSourceApi, + DataSourceNotionApi, + DataSourceNotionDatasetSyncApi, + DataSourceNotionDocumentSyncApi, + DataSourceNotionListApi, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def tenant_ctx(): + return (MagicMock(id="u1"), "tenant-1") + + +@pytest.fixture +def patch_tenant(tenant_ctx): + with patch( + "controllers.console.datasets.data_source.current_account_with_tenant", + return_value=tenant_ctx, + ): + yield + + +@pytest.fixture +def mock_engine(): + with patch.object( + type(data_source.db), + "engine", + new_callable=PropertyMock, + return_value=MagicMock(), + ): + yield + + +class TestDataSourceApi: + def test_get_success(self, app, patch_tenant): + api = DataSourceApi() + method = unwrap(api.get) + + binding = MagicMock( + id="b1", + provider="notion", + created_at="now", + disabled=False, + source_info={}, + ) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.db.session.scalars", + return_value=MagicMock(all=lambda: [binding]), + ), + ): + response, status = method(api) + + assert status == 200 + assert response["data"][0]["is_bound"] is True + + def test_get_no_bindings(self, app, patch_tenant): + api = DataSourceApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.db.session.scalars", + return_value=MagicMock(all=lambda: []), + ), + ): + response, status = method(api) + + assert status == 200 + assert response["data"] == [] + + def test_patch_enable_binding(self, app, patch_tenant, mock_engine): + api = DataSourceApi() + method = unwrap(api.patch) + + binding = MagicMock(id="b1", disabled=True) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + patch("controllers.console.datasets.data_source.db.session.add"), + patch("controllers.console.datasets.data_source.db.session.commit"), + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.scalar_one_or_none.return_value = binding + + response, status = method(api, "b1", "enable") + + assert status == 200 + assert binding.disabled is False + + def test_patch_disable_binding(self, app, patch_tenant, mock_engine): + api = DataSourceApi() + method = unwrap(api.patch) + + binding = MagicMock(id="b1", disabled=False) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + patch("controllers.console.datasets.data_source.db.session.add"), + patch("controllers.console.datasets.data_source.db.session.commit"), + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.scalar_one_or_none.return_value = binding + + response, status = method(api, "b1", "disable") + + assert status == 200 + assert binding.disabled is True + + def test_patch_binding_not_found(self, app, patch_tenant, mock_engine): + api = DataSourceApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.scalar_one_or_none.return_value = None + + with pytest.raises(NotFound): + method(api, "b1", "enable") + + def test_patch_enable_already_enabled(self, app, patch_tenant, mock_engine): + api = DataSourceApi() + method = unwrap(api.patch) + + binding = MagicMock(id="b1", disabled=False) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.scalar_one_or_none.return_value = binding + + with pytest.raises(ValueError): + method(api, "b1", "enable") + + def test_patch_disable_already_disabled(self, app, patch_tenant, mock_engine): + api = DataSourceApi() + method = unwrap(api.patch) + + binding = MagicMock(id="b1", disabled=True) + + with ( + app.test_request_context("/"), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.execute.return_value.scalar_one_or_none.return_value = binding + + with pytest.raises(ValueError): + method(api, "b1", "disable") + + +class TestDataSourceNotionListApi: + def test_get_credential_not_found(self, app, patch_tenant): + api = DataSourceNotionListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?credential_id=c1"), + patch( + "controllers.console.datasets.data_source.DatasourceProviderService.get_datasource_credentials", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api) + + def test_get_success_no_dataset_id(self, app, patch_tenant, mock_engine): + api = DataSourceNotionListApi() + method = unwrap(api.get) + + page = MagicMock( + page_id="p1", + page_name="Page 1", + type="page", + parent_id="parent", + page_icon=None, + ) + + online_document_message = MagicMock( + result=[ + MagicMock( + workspace_id="w1", + workspace_name="My Workspace", + workspace_icon="icon", + pages=[page], + ) + ] + ) + + with ( + app.test_request_context("/?credential_id=c1"), + patch( + "controllers.console.datasets.data_source.DatasourceProviderService.get_datasource_credentials", + return_value={"token": "t"}, + ), + patch( + "core.datasource.datasource_manager.DatasourceManager.get_datasource_runtime", + return_value=MagicMock( + get_online_document_pages=lambda **kw: iter([online_document_message]), + datasource_provider_type=lambda: None, + ), + ), + ): + response, status = method(api) + + assert status == 200 + + def test_get_success_with_dataset_id(self, app, patch_tenant, mock_engine): + api = DataSourceNotionListApi() + method = unwrap(api.get) + + page = MagicMock( + page_id="p1", + page_name="Page 1", + type="page", + parent_id="parent", + page_icon=None, + ) + + online_document_message = MagicMock( + result=[ + MagicMock( + workspace_id="w1", + workspace_name="My Workspace", + workspace_icon="icon", + pages=[page], + ) + ] + ) + + dataset = MagicMock(data_source_type="notion_import") + document = MagicMock(data_source_info='{"notion_page_id": "p1"}') + + with ( + app.test_request_context("/?credential_id=c1&dataset_id=ds1"), + patch( + "controllers.console.datasets.data_source.DatasourceProviderService.get_datasource_credentials", + return_value={"token": "t"}, + ), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=dataset, + ), + patch("controllers.console.datasets.data_source.Session") as mock_session_class, + patch( + "core.datasource.datasource_manager.DatasourceManager.get_datasource_runtime", + return_value=MagicMock( + get_online_document_pages=lambda **kw: iter([online_document_message]), + datasource_provider_type=lambda: None, + ), + ), + ): + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.scalars.return_value.all.return_value = [document] + + response, status = method(api) + + assert status == 200 + + def test_get_invalid_dataset_type(self, app, patch_tenant, mock_engine): + api = DataSourceNotionListApi() + method = unwrap(api.get) + + dataset = MagicMock(data_source_type="other_type") + + with ( + app.test_request_context("/?credential_id=c1&dataset_id=ds1"), + patch( + "controllers.console.datasets.data_source.DatasourceProviderService.get_datasource_credentials", + return_value={"token": "t"}, + ), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=dataset, + ), + patch("controllers.console.datasets.data_source.Session"), + ): + with pytest.raises(ValueError): + method(api) + + +class TestDataSourceNotionApi: + def test_get_preview_success(self, app, patch_tenant): + api = DataSourceNotionApi() + method = unwrap(api.get) + + extractor = MagicMock(extract=lambda: [MagicMock(page_content="hello")]) + + with ( + app.test_request_context("/?credential_id=c1"), + patch( + "controllers.console.datasets.data_source.DatasourceProviderService.get_datasource_credentials", + return_value={"integration_secret": "t"}, + ), + patch( + "controllers.console.datasets.data_source.NotionExtractor", + return_value=extractor, + ), + ): + response, status = method(api, "p1", "page") + + assert status == 200 + + def test_post_indexing_estimate_success(self, app, patch_tenant): + api = DataSourceNotionApi() + method = unwrap(api.post) + + payload = { + "notion_info_list": [ + { + "workspace_id": "w1", + "credential_id": "c1", + "pages": [{"page_id": "p1", "type": "page"}], + } + ], + "process_rule": {"rules": {}}, + "doc_form": "text_model", + "doc_language": "English", + } + + with ( + app.test_request_context("/", method="POST", json=payload, headers={"Content-Type": "application/json"}), + patch( + "controllers.console.datasets.data_source.DocumentService.estimate_args_validate", + ), + patch( + "controllers.console.datasets.data_source.IndexingRunner.indexing_estimate", + return_value=MagicMock(model_dump=lambda: {"total_pages": 1}), + ), + ): + response, status = method(api) + + assert status == 200 + + +class TestDataSourceNotionDatasetSyncApi: + def test_get_success(self, app, patch_tenant): + api = DataSourceNotionDatasetSyncApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.data_source.DocumentService.get_document_by_dataset_id", + return_value=[MagicMock(id="d1")], + ), + patch( + "controllers.console.datasets.data_source.document_indexing_sync_task.delay", + return_value=None, + ), + ): + response, status = method(api, "ds-1") + + assert status == 200 + + def test_get_dataset_not_found(self, app, patch_tenant): + api = DataSourceNotionDatasetSyncApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1") + + +class TestDataSourceNotionDocumentSyncApi: + def test_get_success(self, app, patch_tenant): + api = DataSourceNotionDocumentSyncApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.data_source.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.data_source.document_indexing_sync_task.delay", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + def test_get_document_not_found(self, app, patch_tenant): + api = DataSourceNotionDocumentSyncApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.data_source.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.data_source.DocumentService.get_document", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py new file mode 100644 index 0000000000..f9fc2ac397 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets.py @@ -0,0 +1,1926 @@ +import datetime +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from werkzeug.exceptions import BadRequest, Forbidden, NotFound + +import services +from controllers.console import console_ns +from controllers.console.app.error import ProviderNotInitializeError +from controllers.console.datasets.datasets import ( + DatasetApi, + DatasetApiBaseUrlApi, + DatasetApiDeleteApi, + DatasetApiKeyApi, + DatasetAutoDisableLogApi, + DatasetEnableApiApi, + DatasetErrorDocs, + DatasetIndexingEstimateApi, + DatasetIndexingStatusApi, + DatasetListApi, + DatasetPermissionUserListApi, + DatasetQueryApi, + DatasetRelatedAppListApi, + DatasetRetrievalSettingApi, + DatasetRetrievalSettingMockApi, + DatasetUseCheckApi, +) +from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError +from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError +from core.provider_manager import ProviderManager +from models.enums import CreatorUserRole +from models.model import ApiToken, UploadFile +from services.dataset_service import DatasetPermissionService, DatasetService + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestDatasetList: + def _mock_dataset_dict(self, **overrides): + base = { + "id": "ds-1", + "indexing_technique": "economy", + "embedding_model": None, + "embedding_model_provider": None, + "permission": "only_me", + } + base.update(overrides) + return base + + def _mock_user(self): + user = MagicMock() + user.is_dataset_editor = True + return user + + def test_get_success_basic(self, app): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [MagicMock()] + marshaled = [self._mock_dataset_dict()] + + with app.test_request_context("/datasets"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets", + return_value=(datasets, 1), + ), + patch( + "controllers.console.datasets.datasets.marshal", + return_value=marshaled, + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=MagicMock(get_models=lambda **_: []), + ), + ): + resp, status = method(api) + + assert status == 200 + assert resp["total"] == 1 + assert resp["data"][0]["embedding_available"] is True + + def test_get_with_ids_filter(self, app): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [MagicMock()] + marshaled = [self._mock_dataset_dict()] + + with app.test_request_context("/datasets?ids=1&ids=2"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets_by_ids", + return_value=(datasets, 2), + ) as by_ids_mock, + patch( + "controllers.console.datasets.datasets.marshal", + return_value=marshaled, + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=MagicMock(get_models=lambda **_: []), + ), + ): + resp, status = method(api) + + by_ids_mock.assert_called_once() + assert status == 200 + assert resp["total"] == 2 + + def test_get_with_tag_ids(self, app): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [MagicMock()] + marshaled = [self._mock_dataset_dict()] + + with app.test_request_context("/datasets?tag_ids=tag1"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets", + return_value=(datasets, 1), + ), + patch( + "controllers.console.datasets.datasets.marshal", + return_value=marshaled, + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=MagicMock(get_models=lambda **_: []), + ), + ): + resp, status = method(api) + + assert status == 200 + + def test_embedding_available_false(self, app): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [MagicMock()] + marshaled = [ + self._mock_dataset_dict( + indexing_technique="high_quality", + embedding_model="text-embed", + embedding_model_provider="openai", + ) + ] + + config = MagicMock() + config.get_models.return_value = [] # model not available + + with app.test_request_context("/datasets"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets", + return_value=(datasets, 1), + ), + patch( + "controllers.console.datasets.datasets.marshal", + return_value=marshaled, + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=config, + ), + ): + resp, status = method(api) + + assert resp["data"][0]["embedding_available"] is False + + def test_partial_members_permission(self, app): + api = DatasetListApi() + method = unwrap(api.get) + + current_user = self._mock_user() + datasets = [MagicMock()] + marshaled = [self._mock_dataset_dict(permission="partial_members")] + + with app.test_request_context("/datasets"): + with ( + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_datasets", + return_value=(datasets, 1), + ), + patch( + "controllers.console.datasets.datasets.db.session.execute", + return_value=MagicMock(all=lambda: [("ds-1", "u1")]), + ), + patch( + "controllers.console.datasets.datasets.marshal", + return_value=marshaled, + ), + patch.object( + ProviderManager, + "get_configurations", + return_value=MagicMock(get_models=lambda **_: []), + ), + ): + resp, status = method(api) + + assert resp["data"][0]["partial_member_list"] == ["u1"] + + +class TestDatasetListApiPost: + def test_post_success(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + payload = { + "name": "My Dataset", + "description": "desc", + "indexing_technique": "economy", + "provider": "vendor", + } + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + # ---- minimal required fields for marshal ---- + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + with ( + app.test_request_context("/datasets", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch.object( + DatasetService, + "create_empty_dataset", + return_value=dataset, + ), + ): + _, status = method(api) + + assert status == 201 + + def test_post_forbidden(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + payload = {"name": "test"} + + user = MagicMock() + user.is_dataset_editor = False + + with ( + app.test_request_context("/datasets", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + ): + with pytest.raises(Forbidden): + method(api) + + def test_post_duplicate_name(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + payload = {"name": "duplicate"} + + user = MagicMock() + user.is_dataset_editor = True + + with ( + app.test_request_context("/datasets", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch.object( + DatasetService, + "create_empty_dataset", + side_effect=services.errors.dataset.DatasetNameDuplicateError(), + ), + ): + with pytest.raises(DatasetNameDuplicateError): + method(api) + + def test_post_invalid_payload_missing_name(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + with app.test_request_context("/datasets", json={}), patch.object(type(console_ns), "payload", {}): + with pytest.raises(ValueError): + method(api) + + def test_post_invalid_indexing_technique(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + payload = { + "name": "bad", + "indexing_technique": "invalid-tech", + } + + with app.test_request_context("/datasets", json=payload), patch.object(type(console_ns), "payload", payload): + with pytest.raises(ValueError, match="Invalid indexing technique"): + method(api) + + def test_post_invalid_provider(self, app): + api = DatasetListApi() + method = unwrap(api.post) + + payload = { + "name": "bad", + "provider": "unknown", + } + + with app.test_request_context("/datasets", json=payload), patch.object(type(console_ns), "payload", payload): + with pytest.raises(ValueError, match="Invalid provider"): + method(api) + + +class TestDatasetApiGet: + def test_get_success_basic(self, app): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "123e4567-e89b-12d3-a456-426614174000" + + user = MagicMock() + tenant_id = "tenant-1" + + dataset = MagicMock() + dataset.id = dataset_id + dataset.indexing_technique = "economy" + dataset.embedding_model_provider = None + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + dataset.permission = "only_me" + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch("controllers.console.datasets.datasets.ProviderManager") as provider_manager_mock, + ): + # embedding models exist → embedding_available stays True + provider_manager_mock.return_value.get_configurations.return_value.get_models.return_value = [] + + data, status = method(api, dataset_id) + + assert status == 200 + assert data["embedding_available"] is True + + def test_get_dataset_not_found(self, app): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "missing-id" + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, dataset_id) + + def test_get_permission_denied(self, app): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + dataset = MagicMock() + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no access"), + ), + ): + with pytest.raises(Forbidden, match="no access"): + method(api, dataset_id) + + def test_get_high_quality_embedding_unavailable(self, app): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + user = MagicMock() + tenant_id = "tenant-1" + + dataset = MagicMock() + dataset.id = dataset_id + dataset.indexing_technique = "high_quality" + dataset.embedding_model = "text-embedding" + dataset.embedding_model_provider = "openai" + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + dataset.permission = "only_me" + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch("controllers.console.datasets.datasets.ProviderManager") as provider_manager_mock, + ): + # embedding model NOT configured + provider_manager_mock.return_value.get_configurations.return_value.get_models.return_value = [] + + data, _ = method(api, dataset_id) + + assert data["embedding_available"] is False + + def test_get_partial_members_permission(self, app): + api = DatasetApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + + dataset = MagicMock() + dataset.id = dataset_id + dataset.indexing_technique = "economy" + dataset.embedding_model_provider = None + dataset.permission = "partial_members" + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + partial_members = [{"id": "u1"}, {"id": "u2"}] + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch.object( + DatasetPermissionService, + "get_dataset_partial_member_list", + return_value=partial_members, + ), + patch("controllers.console.datasets.datasets.ProviderManager") as provider_manager_mock, + ): + provider_manager_mock.return_value.get_configurations.return_value.get_models.return_value = [] + + data, _ = method(api, dataset_id) + + assert data["partial_member_list"] == partial_members + + +class TestDatasetApiPatch: + def test_patch_success_basic(self, app): + api = DatasetApi() + method = unwrap(api.patch) + + dataset_id = "dataset-id" + + payload = { + "name": "updated-name", + "description": "updated description", + } + + user = MagicMock() + tenant_id = "tenant-1" + + dataset = MagicMock() + dataset.id = dataset_id + dataset.tenant_id = tenant_id + dataset.permission = "only_me" + dataset.indexing_technique = "economy" + dataset.embedding_model_provider = None + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "check_permission", + return_value=None, + ), + patch.object( + DatasetService, + "update_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "get_dataset_partial_member_list", + return_value=[], + ), + ): + result, status = method(api, dataset_id) + + assert status == 200 + assert result["partial_member_list"] == [] + + def test_patch_dataset_not_found(self, app): + api = DatasetApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/datasets/missing"), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, "missing") + + def test_patch_permission_denied(self, app): + api = DatasetApi() + method = unwrap(api.patch) + + dataset_id = "dataset-id" + dataset = MagicMock() + + payload = {"name": "x"} + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch.object(type(console_ns), "payload", payload), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetPermissionService, + "check_permission", + side_effect=Forbidden("no permission"), + ), + ): + with pytest.raises(Forbidden): + method(api, dataset_id) + + def test_patch_partial_members_update(self, app): + api = DatasetApi() + method = unwrap(api.patch) + + dataset_id = "dataset-id" + + payload = { + "permission": "partial_members", + "partial_member_list": [{"id": "u1"}, {"id": "u2"}], + } + + dataset = MagicMock() + dataset.id = dataset_id + dataset.permission = "partial_members" + dataset.indexing_technique = "economy" + dataset.embedding_model_provider = None + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "check_permission", + return_value=None, + ), + patch.object( + DatasetService, + "update_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "update_partial_member_list", + return_value=None, + ), + patch.object( + DatasetPermissionService, + "get_dataset_partial_member_list", + return_value=payload["partial_member_list"], + ), + ): + result, _ = method(api, dataset_id) + + assert result["partial_member_list"] == payload["partial_member_list"] + + def test_patch_clear_partial_members(self, app): + api = DatasetApi() + method = unwrap(api.patch) + + dataset_id = "dataset-id" + + payload = { + "permission": "only_me", + } + + dataset = MagicMock() + dataset.id = dataset_id + dataset.permission = "only_me" + dataset.indexing_technique = "economy" + dataset.embedding_model_provider = None + + dataset.embedding_available = True + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.is_multimodal = False + dataset.documents = [] + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "check_permission", + return_value=None, + ), + patch.object( + DatasetService, + "update_dataset", + return_value=dataset, + ), + patch.object( + DatasetPermissionService, + "clear_partial_member_list", + return_value=None, + ), + patch.object( + DatasetPermissionService, + "get_dataset_partial_member_list", + return_value=[], + ), + ): + result, _ = method(api, dataset_id) + + assert result["partial_member_list"] == [] + + +class TestDatasetApiDelete: + def test_delete_success(self, app): + api = DatasetApi() + method = unwrap(api.delete) + + dataset_id = "dataset-id" + user = MagicMock() + user.has_edit_permission = True + user.is_dataset_operator = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch.object( + DatasetService, + "delete_dataset", + return_value=True, + ), + patch.object( + DatasetPermissionService, + "clear_partial_member_list", + return_value=None, + ), + ): + result, status = method(api, dataset_id) + + assert status == 204 + assert result == {"result": "success"} + + def test_delete_forbidden_no_permission(self, app): + api = DatasetApi() + method = unwrap(api.delete) + + dataset_id = "dataset-id" + user = MagicMock() + user.has_edit_permission = False + user.is_dataset_operator = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant"), + ), + ): + with pytest.raises(Forbidden): + method(api, dataset_id) + + def test_delete_dataset_not_found(self, app): + api = DatasetApi() + method = unwrap(api.delete) + + dataset_id = "missing-dataset" + user = MagicMock() + user.has_edit_permission = True + user.is_dataset_operator = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch.object( + DatasetService, + "delete_dataset", + return_value=False, + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, dataset_id) + + def test_delete_dataset_in_use(self, app): + api = DatasetApi() + method = unwrap(api.delete) + + dataset_id = "dataset-id" + user = MagicMock() + user.has_edit_permission = True + user.is_dataset_operator = False + + with ( + app.test_request_context(f"/datasets/{dataset_id}"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch.object( + DatasetService, + "delete_dataset", + side_effect=services.errors.dataset.DatasetInUseError(), + ), + ): + with pytest.raises(DatasetInUseError): + method(api, dataset_id) + + +class TestDatasetUseCheckApi: + def test_get_use_check_true(self, app): + api = DatasetUseCheckApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + + with ( + app.test_request_context(f"/datasets/{dataset_id}/use-check"), + patch.object( + DatasetService, + "dataset_use_check", + return_value=True, + ), + ): + result, status = method(api, dataset_id) + + assert status == 200 + assert result == {"is_using": True} + + def test_get_use_check_false(self, app): + api = DatasetUseCheckApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + + with ( + app.test_request_context(f"/datasets/{dataset_id}/use-check"), + patch.object( + DatasetService, + "dataset_use_check", + return_value=False, + ), + ): + result, status = method(api, dataset_id) + + assert status == 200 + assert result == {"is_using": False} + + +class TestDatasetQueryApi: + def test_get_queries_success(self, app): + api = DatasetQueryApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + + current_user = MagicMock() + + dataset = MagicMock() + dataset.id = dataset_id + + queries = [MagicMock(), MagicMock()] + + with ( + app.test_request_context("/datasets/queries?page=1&limit=20"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch.object( + DatasetService, + "get_dataset_queries", + return_value=(queries, 2), + ), + ): + response, status = method(api, dataset_id) + + assert status == 200 + assert response["total"] == 2 + assert response["page"] == 1 + assert response["limit"] == 20 + assert response["has_more"] is False + assert len(response["data"]) == 2 + + def test_get_queries_dataset_not_found(self, app): + api = DatasetQueryApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + current_user = MagicMock() + + with ( + app.test_request_context("/datasets/queries"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, dataset_id) + + def test_get_queries_permission_denied(self, app): + api = DatasetQueryApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + current_user = MagicMock() + + dataset = MagicMock() + + with ( + app.test_request_context("/datasets/queries"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no access"), + ), + ): + with pytest.raises(Forbidden): + method(api, dataset_id) + + def test_get_queries_pagination_has_more(self, app): + api = DatasetQueryApi() + method = unwrap(api.get) + + dataset_id = "dataset-id" + current_user = MagicMock() + + dataset = MagicMock() + dataset.id = dataset_id + + queries = [MagicMock() for _ in range(20)] + + with ( + app.test_request_context("/datasets/queries?page=1&limit=20"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + return_value=None, + ), + patch.object( + DatasetService, + "get_dataset_queries", + return_value=(queries, 40), + ), + ): + response, status = method(api, dataset_id) + + assert status == 200 + assert response["has_more"] is True + assert len(response["data"]) == 20 + + +class TestDatasetIndexingEstimateApi: + def _upload_file(self, *, tenant_id: str = "tenant-1", file_id: str = "file-1") -> UploadFile: + upload_file = UploadFile( + tenant_id=tenant_id, + storage_type="local", + key="key", + name="name.txt", + size=1, + extension="txt", + mime_type="text/plain", + created_by_role=CreatorUserRole.ACCOUNT, + created_by="user-1", + created_at=datetime.datetime.now(tz=datetime.UTC), + used=False, + ) + upload_file.id = file_id + return upload_file + + def _base_payload(self): + return { + "info_list": { + "data_source_type": "upload_file", + "file_info_list": { + "file_ids": ["file-1"], + }, + }, + "process_rule": {"chunk_size": 100}, + "indexing_technique": "high_quality", + "doc_form": "text_model", + "doc_language": "English", + "dataset_id": None, + } + + def test_post_success_upload_file(self, app): + api = DatasetIndexingEstimateApi() + method = unwrap(api.post) + + payload = self._base_payload() + + mock_file = self._upload_file() + + mock_response = MagicMock() + mock_response.model_dump.return_value = {"tokens": 100} + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.estimate_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [mock_file]), + ), + patch( + "controllers.console.datasets.datasets.IndexingRunner.indexing_estimate", + return_value=mock_response, + ), + ): + response, status = method(api) + + assert status == 200 + assert response == {"tokens": 100} + + def test_post_file_not_found(self, app): + api = DatasetIndexingEstimateApi() + method = unwrap(api.post) + + payload = self._base_payload() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.estimate_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: None), + ), + ): + with pytest.raises(NotFound): + method(api) + + def test_post_llm_bad_request_error(self, app): + api = DatasetIndexingEstimateApi() + method = unwrap(api.post) + mock_file = self._upload_file() + + payload = self._base_payload() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.estimate_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [mock_file]), + ), + patch( + "controllers.console.datasets.datasets.IndexingRunner.indexing_estimate", + side_effect=LLMBadRequestError(), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api) + + def test_post_provider_token_not_init(self, app): + api = DatasetIndexingEstimateApi() + method = unwrap(api.post) + mock_file = self._upload_file() + + payload = self._base_payload() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.estimate_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [mock_file]), + ), + patch( + "controllers.console.datasets.datasets.IndexingRunner.indexing_estimate", + side_effect=ProviderTokenNotInitError("token missing"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api) + + def test_post_generic_exception(self, app): + api = DatasetIndexingEstimateApi() + method = unwrap(api.post) + mock_file = self._upload_file() + + payload = self._base_payload() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.estimate_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [mock_file]), + ), + patch( + "controllers.console.datasets.datasets.IndexingRunner.indexing_estimate", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(IndexingEstimateError): + method(api) + + +class TestDatasetRelatedAppListApi: + def test_get_success(self, app): + api = DatasetRelatedAppListApi() + method = unwrap(api.get) + + dataset = MagicMock() + dataset.id = "dataset-1" + + app1 = MagicMock() + app2 = MagicMock() + + join1 = MagicMock(app=app1) + join2 = MagicMock(app=app2) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_related_apps", + return_value=[join1, join2], + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response["total"] == 2 + assert response["data"] == [app1, app2] + + def test_get_dataset_not_found(self, app): + api = DatasetRelatedAppListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "dataset-1") + + def test_get_permission_denied(self, app): + api = DatasetRelatedAppListApi() + method = unwrap(api.get) + + dataset = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no permission"), + ), + ): + with pytest.raises(Forbidden): + method(api, "dataset-1") + + def test_get_filters_none_apps(self, app): + api = DatasetRelatedAppListApi() + method = unwrap(api.get) + + dataset = MagicMock() + dataset.id = "dataset-1" + + app1 = MagicMock() + + join1 = MagicMock(app=app1) + join2 = MagicMock(app=None) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_related_apps", + return_value=[join1, join2], + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response["total"] == 1 + assert response["data"] == [app1] + + +class TestDatasetIndexingStatusApi: + def test_get_success_with_documents(self, app): + api = DatasetIndexingStatusApi() + method = unwrap(api.get) + + document = MagicMock() + document.id = "doc-1" + document.indexing_status = "completed" + document.processing_started_at = None + document.parsing_completed_at = None + document.cleaning_completed_at = None + document.splitting_completed_at = None + document.completed_at = None + document.paused_at = None + document.error = None + document.stopped_at = None + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [document]), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=MagicMock(where=lambda *args, **kwargs: MagicMock(count=lambda: 3)), + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert "data" in response + assert len(response["data"]) == 1 + + item = response["data"][0] + assert item["completed_segments"] == 3 + assert item["total_segments"] == 3 + + def test_get_success_no_documents(self, app): + api = DatasetIndexingStatusApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: []), + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response == {"data": []} + + def test_segment_counts_different_values(self, app): + api = DatasetIndexingStatusApi() + method = unwrap(api.get) + + document = MagicMock() + document.id = "doc-1" + document.indexing_status = "indexing" + document.processing_started_at = None + document.parsing_completed_at = None + document.cleaning_completed_at = None + document.splitting_completed_at = None + document.completed_at = None + document.paused_at = None + document.error = None + document.stopped_at = None + + # First count = completed segments, second = total segments + query_mock = MagicMock() + query_mock.where.side_effect = [ + MagicMock(count=lambda: 2), + MagicMock(count=lambda: 5), + ] + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [document]), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=query_mock, + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + item = response["data"][0] + assert item["completed_segments"] == 2 + assert item["total_segments"] == 5 + + +class TestDatasetApiKeyApi: + def test_get_api_keys_success(self, app): + api = DatasetApiKeyApi() + method = unwrap(api.get) + + mock_key_1 = MagicMock(spec=ApiToken) + mock_key_2 = MagicMock(spec=ApiToken) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.scalars", + return_value=MagicMock(all=lambda: [mock_key_1, mock_key_2]), + ), + ): + response = method(api) + + assert "items" in response + assert response["items"] == [mock_key_1, mock_key_2] + + def test_post_create_api_key_success(self, app): + api = DatasetApiKeyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=MagicMock(where=lambda *args, **kwargs: MagicMock(count=lambda: 3)), + ), + patch( + "controllers.console.datasets.datasets.ApiToken.generate_api_key", + return_value="dataset-abc123", + ), + patch( + "controllers.console.datasets.datasets.db.session.add", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.commit", + return_value=None, + ), + ): + response, status = method(api) + + assert status == 200 + assert isinstance(response, ApiToken) + assert response.token == "dataset-abc123" + assert response.type == "dataset" + + def test_post_exceed_max_keys(self, app): + api = DatasetApiKeyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=MagicMock(where=lambda *args, **kwargs: MagicMock(count=lambda: 10)), + ), + ): + with pytest.raises(BadRequest) as exc_info: + method(api) + + assert exc_info.value.code == 400 + assert exc_info.value.data == { + "message": "Cannot create more than 10 API keys for this resource type.", + "custom": "max_keys_exceeded", + } + + +class TestDatasetApiDeleteApi: + def test_delete_success(self, app): + api = DatasetApiDeleteApi() + method = unwrap(api.delete) + + mock_key = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=MagicMock(where=lambda *args, **kwargs: MagicMock(first=lambda: mock_key)), + ), + patch( + "controllers.console.datasets.datasets.db.session.commit", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.db.session.delete", + return_value=None, + ), + ): + response, status = method(api, "api-key-id") + + assert status == 204 + assert response["result"] == "success" + + def test_delete_key_not_found(self, app): + api = DatasetApiDeleteApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.db.session.query", + return_value=MagicMock(where=lambda *args, **kwargs: MagicMock(first=lambda: None)), + ), + ): + with pytest.raises(NotFound): + method(api, "api-key-id") + + +class TestDatasetEnableApiApi: + def test_enable_api(self, app): + api = DatasetEnableApiApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.update_dataset_api_status", + return_value=None, + ), + ): + response, status = method(api, "dataset-1", "enable") + + assert status == 200 + assert response["result"] == "success" + + def test_disable_api(self, app): + api = DatasetEnableApiApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.update_dataset_api_status", + return_value=None, + ), + ): + response, status = method(api, "dataset-1", "disable") + + assert status == 200 + assert response["result"] == "success" + + +class TestDatasetApiBaseUrlApi: + def test_get_api_base_url_from_config(self, app): + api = DatasetApiBaseUrlApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.dify_config.SERVICE_API_URL", + "https://example.com", + ), + ): + response = method(api) + + assert response["api_base_url"] == "https://example.com/v1" + + def test_get_api_base_url_from_request(self, app): + api = DatasetApiBaseUrlApi() + method = unwrap(api.get) + + with ( + app.test_request_context("http://localhost:5000/"), + patch( + "controllers.console.datasets.datasets.dify_config.SERVICE_API_URL", + None, + ), + ): + response = method(api) + + assert response["api_base_url"] == "http://localhost:5000/v1" + + +class TestDatasetRetrievalSettingApi: + def test_get_success(self, app): + api = DatasetRetrievalSettingApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.dify_config.VECTOR_STORE", + "qdrant", + ), + patch( + "controllers.console.datasets.datasets._get_retrieval_methods_by_vector_type", + return_value={"retrieval_method": ["semantic", "hybrid"]}, + ), + ): + response = method(api) + + assert "retrieval_method" in response + + +class TestDatasetRetrievalSettingMockApi: + def test_get_success(self, app): + api = DatasetRetrievalSettingMockApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets._get_retrieval_methods_by_vector_type", + return_value={"retrieval_method": ["semantic"]}, + ), + ): + response = method(api, "milvus") + + assert response["retrieval_method"] == ["semantic"] + + +class TestDatasetErrorDocs: + def test_get_success(self, app): + api = DatasetErrorDocs() + method = unwrap(api.get) + + dataset = MagicMock() + error_doc = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DocumentService.get_error_documents_by_dataset_id", + return_value=[error_doc], + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response["total"] == 1 + + def test_get_dataset_not_found(self, app): + api = DatasetErrorDocs() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "dataset-1") + + +class TestDatasetPermissionUserListApi: + def test_get_success(self, app): + api = DatasetPermissionUserListApi() + method = unwrap(api.get) + + dataset = MagicMock() + users = [{"id": "u1"}, {"id": "u2"}] + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets.DatasetPermissionService.get_dataset_partial_member_list", + return_value=users, + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response["data"] == users + + def test_get_permission_denied(self, app): + api = DatasetPermissionUserListApi() + method = unwrap(api.get) + + dataset = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no permission"), + ), + ): + with pytest.raises(Forbidden): + method(api, "dataset-1") + + +class TestDatasetAutoDisableLogApi: + def test_get_success(self, app): + api = DatasetAutoDisableLogApi() + method = unwrap(api.get) + + dataset = MagicMock() + logs = [{"reason": "quota"}] + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset_auto_disable_logs", + return_value=logs, + ), + ): + response, status = method(api, "dataset-1") + + assert status == 200 + assert response == logs + + def test_get_dataset_not_found(self, app): + api = DatasetAutoDisableLogApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "dataset-1") diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py new file mode 100644 index 0000000000..dbe54ccb99 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets_document.py @@ -0,0 +1,1379 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, NotFound + +import services +from controllers.console import console_ns +from controllers.console.datasets.datasets_document import ( + DatasetDocumentListApi, + DocumentApi, + DocumentBatchDownloadZipApi, + DocumentBatchIndexingEstimateApi, + DocumentBatchIndexingStatusApi, + DocumentDownloadApi, + DocumentGenerateSummaryApi, + DocumentIndexingEstimateApi, + DocumentIndexingStatusApi, + DocumentMetadataApi, + DocumentPipelineExecutionLogApi, + DocumentProcessingApi, + DocumentRetryApi, + DocumentStatusApi, + DocumentSummaryStatusApi, + GetProcessRuleApi, +) +from controllers.console.datasets.error import ( + DocumentAlreadyFinishedError, + DocumentIndexingError, + IndexingEstimateError, + InvalidActionError, + InvalidMetadataError, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def tenant_ctx(): + return (MagicMock(is_dataset_editor=True, id="u1"), "tenant-1") + + +@pytest.fixture +def patch_tenant(tenant_ctx): + with patch( + "controllers.console.datasets.datasets_document.current_account_with_tenant", + return_value=tenant_ctx, + ): + yield + + +@pytest.fixture +def dataset(): + return MagicMock(id="ds-1", indexing_technique="economy", summary_index_setting={"enable": True}) + + +@pytest.fixture +def document(): + return MagicMock( + id="doc-1", + tenant_id="tenant-1", + indexing_status="indexing", + data_source_type="upload_file", + data_source_info_dict={"upload_file_id": "file-1"}, + doc_form="text", + archived=False, + is_paused=False, + dataset_process_rule=None, + ) + + +@pytest.fixture +def patch_dataset(dataset): + with patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=dataset, + ): + yield + + +@pytest.fixture +def patch_permission(): + with patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ): + yield + + +class TestGetProcessRuleApi: + def test_get_default_success(self, app, patch_tenant): + api = GetProcessRuleApi() + method = unwrap(api.get) + + with app.test_request_context("/"): + response = method(api) + + assert "rules" in response + + def test_get_with_document_dataset_not_found(self, app, patch_tenant): + api = GetProcessRuleApi() + method = unwrap(api.get) + + document = MagicMock(dataset_id="ds-1") + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.db.get_or_404", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api) + + +class TestDatasetDocumentListApi: + def test_get_with_fetch_true_counts_segments(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.get) + + doc = MagicMock(id="doc-1") + pagination = MagicMock(items=[doc], total=1) + + count_mock = MagicMock(return_value=2) + + with ( + app.test_request_context("/?fetch=true"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(count=count_mock)), + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.marshal", + return_value=[{"id": "doc-1"}], + ), + ): + resp = method(api, "ds-1") + + assert resp["data"] + + def test_get_with_search_status_and_created_at_sort(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[MagicMock()], total=1) + + with ( + app.test_request_context("/?keyword=test&status=enabled&sort=created_at"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.apply_display_status_filter", + side_effect=lambda q, s: q, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.marshal", + return_value=[{"id": "doc-1"}], + ), + ): + resp = method(api, "ds-1") + + assert resp["total"] == 1 + + def test_get_success(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[MagicMock()], total=1) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.marshal", + return_value=[{"id": "doc-1"}], + ), + ): + response = method(api, "ds-1") + + assert response["total"] == 1 + + def test_post_success(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.post) + + payload = {"indexing_technique": "economy"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DocumentService.document_create_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.save_document_with_dataset_id", + return_value=([MagicMock()], "batch-1"), + ), + ): + response = method(api, "ds-1") + + assert "documents" in response + + def test_post_forbidden(self, app): + api = DatasetDocumentListApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=False) + + with ( + app.test_request_context("/", json={}), + patch.object(type(console_ns), "payload", {}), + patch( + "controllers.console.datasets.datasets_document.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + ): + with pytest.raises(Forbidden): + method(api, "ds-1") + + def test_get_with_fetch_true_and_invalid_fetch(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[MagicMock()], total=1) + + with ( + app.test_request_context("/?fetch=maybe"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.marshal", + return_value=[{"id": "doc-1"}], + ), + ): + response = method(api, "ds-1") + + assert response["total"] == 1 + + def test_get_sort_hit_count(self, app, patch_tenant, patch_dataset, patch_permission): + api = DatasetDocumentListApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[], total=0) + + with ( + app.test_request_context("/?sort=hit_count"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + ): + response = method(api, "ds-1") + + assert response["total"] == 0 + + +class TestDocumentApi: + def test_get_success(self, app, patch_tenant): + api = DocumentApi() + method = unwrap(api.get) + + document = MagicMock(dataset_process_rule=None) + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_process_rules", + return_value={}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + def test_get_invalid_metadata(self, app, patch_tenant): + api = DocumentApi() + method = unwrap(api.get) + + with app.test_request_context("/?metadata=wrong"), patch.object(api, "get_document", return_value=MagicMock()): + with pytest.raises(InvalidMetadataError): + method(api, "ds-1", "doc-1") + + def test_delete_success(self, app, patch_tenant, patch_dataset): + api = DocumentApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch.object(api, "get_document", return_value=MagicMock()), + patch( + "controllers.console.datasets.datasets_document.DocumentService.delete_document", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 204 + + def test_delete_indexing_error(self, app, patch_tenant, patch_dataset): + api = DocumentApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch.object(api, "get_document", return_value=MagicMock()), + patch( + "controllers.console.datasets.datasets_document.DocumentService.delete_document", + side_effect=services.errors.document.DocumentIndexingError(), + ), + ): + with pytest.raises(DocumentIndexingError): + method(api, "ds-1", "doc-1") + + +class TestDocumentDownloadApi: + def test_download_success(self, app, patch_tenant): + api = DocumentDownloadApi() + method = unwrap(api.get) + + document = MagicMock() + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document_download_url", + return_value="url", + ), + ): + response = method(api, "ds-1", "doc-1") + + assert response["url"] == "url" + + +class TestDocumentProcessingApi: + def test_processing_forbidden_when_not_editor(self, app): + api = DocumentProcessingApi() + method = unwrap(api.patch) + + user = MagicMock(is_dataset_editor=False) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.current_account_with_tenant", + return_value=(user, "tenant"), + ), + patch.object(api, "get_document", return_value=MagicMock()), + ): + with pytest.raises(Forbidden): + method(api, "ds-1", "doc-1", "pause") + + def test_resume_from_error_state(self, app, patch_tenant): + api = DocumentProcessingApi() + method = unwrap(api.patch) + + doc = MagicMock(indexing_status="error", is_paused=True) + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=doc), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + _, status = method(api, "ds-1", "doc-1", "resume") + + assert status == 200 + + def test_resume_success(self, app, patch_tenant): + api = DocumentProcessingApi() + method = unwrap(api.patch) + + document = MagicMock(indexing_status="paused", is_paused=True) + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1", "resume") + + assert status == 200 + + def test_pause_success(self, app, patch_tenant): + api = DocumentProcessingApi() + method = unwrap(api.patch) + + document = MagicMock(indexing_status="indexing") + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1", "pause") + + assert status == 200 + + def test_pause_invalid(self, app, patch_tenant): + api = DocumentProcessingApi() + method = unwrap(api.patch) + + document = MagicMock(indexing_status="completed") + + with app.test_request_context("/"), patch.object(api, "get_document", return_value=document): + with pytest.raises(InvalidActionError): + method(api, "ds-1", "doc-1", "pause") + + +class TestDocumentMetadataApi: + def test_put_metadata_schema_filtering(self, app, patch_tenant): + api = DocumentMetadataApi() + method = unwrap(api.put) + + doc = MagicMock() + + payload = { + "doc_type": "invoice", + "doc_metadata": {"amount": 10, "invalid": "x"}, + } + + schema = {"amount": int} + + with ( + app.test_request_context("/", json=payload), + patch.object(api, "get_document", return_value=doc), + patch( + "controllers.console.datasets.datasets_document.DocumentService.DOCUMENT_METADATA_SCHEMA", + {"invoice": schema}, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + method(api, "ds-1", "doc-1") + + assert doc.doc_metadata == {"amount": 10} + + def test_put_success(self, app, patch_tenant): + api = DocumentMetadataApi() + method = unwrap(api.put) + + document = MagicMock() + + payload = {"doc_type": "others", "doc_metadata": {"a": 1}} + + with ( + app.test_request_context("/", json=payload), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.DocumentService.DOCUMENT_METADATA_SCHEMA", + {"others": {}}, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + def test_put_invalid_payload(self, app, patch_tenant): + api = DocumentMetadataApi() + method = unwrap(api.put) + + with app.test_request_context("/", json={}), patch.object(api, "get_document", return_value=MagicMock()): + with pytest.raises(ValueError): + method(api, "ds-1", "doc-1") + + def test_put_invalid_doc_type(self, app, patch_tenant): + api = DocumentMetadataApi() + method = unwrap(api.put) + + payload = {"doc_type": "invalid", "doc_metadata": {}} + + with ( + app.test_request_context("/", json=payload), + patch.object(api, "get_document", return_value=MagicMock()), + patch( + "controllers.console.datasets.datasets_document.DocumentService.DOCUMENT_METADATA_SCHEMA", + {"others": {}}, + ), + ): + with pytest.raises(ValueError): + method(api, "ds-1", "doc-1") + + +class TestDocumentStatusApi: + def test_patch_success(self, app, patch_tenant, patch_dataset): + api = DocumentStatusApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.batch_update_document_status", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "enable") + + assert status == 200 + + def test_patch_invalid_action(self, app, patch_tenant, patch_dataset): + api = DocumentStatusApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.batch_update_document_status", + side_effect=ValueError("x"), + ), + ): + with pytest.raises(InvalidActionError): + method(api, "ds-1", "enable") + + +class TestDocumentRetryApi: + def test_retry_archived_document_skipped(self, app, patch_tenant, patch_dataset): + api = DocumentRetryApi() + method = unwrap(api.post) + + payload = {"document_ids": ["doc-1"]} + + doc = MagicMock(indexing_status="indexing") + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document", + return_value=doc, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.check_archived", + return_value=True, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.retry_document", + ) as retry_mock, + ): + resp, status = method(api, "ds-1") + + assert status == 204 + retry_mock.assert_called_once_with("ds-1", []) + + def test_retry_success(self, app, patch_tenant, patch_dataset): + api = DocumentRetryApi() + method = unwrap(api.post) + + payload = {"document_ids": ["doc-1"]} + + document = MagicMock(indexing_status="indexing", archived=False) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.check_archived", + return_value=False, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.retry_document", + return_value=None, + ) as retry_mock, + ): + response, status = method(api, "ds-1") + + assert status == 204 + retry_mock.assert_called_once_with("ds-1", [document]) + + def test_retry_skips_completed_document(self, app, patch_tenant, patch_dataset): + api = DocumentRetryApi() + method = unwrap(api.post) + + payload = {"document_ids": ["doc-1"]} + + document = MagicMock(indexing_status="completed", archived=False) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.retry_document", + return_value=None, + ) as retry_mock, + ): + response, status = method(api, "ds-1") + + assert status == 204 + retry_mock.assert_called_once_with("ds-1", []) + + +class TestDocumentPipelineExecutionLogApi: + def test_get_log_success(self, app, patch_tenant, patch_dataset): + api = DocumentPipelineExecutionLogApi() + method = unwrap(api.get) + + log = MagicMock( + datasource_info="{}", + datasource_type="file", + input_data={}, + datasource_node_id="n1", + ) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=MagicMock( + filter_by=lambda **k: MagicMock(order_by=lambda *a: MagicMock(first=lambda: log)) + ), + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + +class TestDocumentGenerateSummaryApi: + def test_generate_summary_missing_documents(self, app, patch_tenant, patch_permission): + api = DocumentGenerateSummaryApi() + method = unwrap(api.post) + + dataset = MagicMock( + indexing_technique="high_quality", + summary_index_setting={"enable": True}, + ) + + payload = {"document_list": ["doc-1", "doc-2"]} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_documents_by_ids", + return_value=[MagicMock(id="doc-1")], + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1") + + def test_generate_not_enabled(self, app, patch_tenant, patch_permission): + api = DocumentGenerateSummaryApi() + method = unwrap(api.post) + + dataset = MagicMock(indexing_technique="high_quality", summary_index_setting={"enable": False}) + + payload = {"document_list": ["doc-1"]} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=dataset, + ), + ): + with pytest.raises(ValueError): + method(api, "ds-1") + + def test_generate_summary_success_with_qa_skip(self, app, patch_tenant, patch_permission): + api = DocumentGenerateSummaryApi() + method = unwrap(api.post) + + dataset = MagicMock( + indexing_technique="high_quality", + summary_index_setting={"enable": True}, + ) + + doc1 = MagicMock(id="doc-1", doc_form="qa_model") + doc2 = MagicMock(id="doc-2", doc_form="text") + + payload = {"document_list": ["doc-1", "doc-2"]} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_documents_by_ids", + return_value=[doc1, doc2], + ), + patch( + "controllers.console.datasets.datasets_document.generate_summary_index_task.delay", + return_value=None, + ), + ): + response, status = method(api, "ds-1") + + assert status == 200 + + +class TestDocumentSummaryStatusApi: + def test_get_success(self, app, patch_tenant, patch_permission): + api = DocumentSummaryStatusApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "services.summary_index_service.SummaryIndexService.get_document_summary_status_detail", + return_value={"total_segments": 0}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + +class TestDocumentIndexingEstimateApi: + def test_indexing_estimate_file_not_found(self, app, patch_tenant): + api = DocumentIndexingEstimateApi() + method = unwrap(api.get) + + document = MagicMock( + indexing_status="indexing", + data_source_type="upload_file", + data_source_info_dict={"upload_file_id": "file-1"}, + tenant_id="tenant-1", + doc_form="text", + dataset_process_rule=None, + ) + + query_mock = MagicMock() + query_mock.where.return_value.first.return_value = None + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=query_mock, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_indexing_estimate_generic_exception(self, app, patch_tenant): + api = DocumentIndexingEstimateApi() + method = unwrap(api.get) + + document = MagicMock( + indexing_status="indexing", + data_source_type="upload_file", + data_source_info_dict={"upload_file_id": "file-1"}, + tenant_id="tenant-1", + doc_form="text", + dataset_process_rule=None, + ) + + upload_file = MagicMock() + + mock_indexing_runner = MagicMock() + mock_indexing_runner.indexing_estimate.side_effect = RuntimeError("Some indexing error") + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=MagicMock( + where=MagicMock(return_value=MagicMock(first=MagicMock(return_value=upload_file))) + ), + ), + patch( + "controllers.console.datasets.datasets_document.ExtractSetting", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.IndexingRunner", + return_value=mock_indexing_runner, + ), + ): + with pytest.raises(IndexingEstimateError): + method(api, "ds-1", "doc-1") + + def test_get_finished(self, app, patch_tenant): + api = DocumentIndexingEstimateApi() + method = unwrap(api.get) + + document = MagicMock(indexing_status="completed") + + with app.test_request_context("/"), patch.object(api, "get_document", return_value=document): + with pytest.raises(DocumentAlreadyFinishedError): + method(api, "ds-1", "doc-1") + + +class TestDocumentBatchDownloadZipApi: + def test_post_no_documents(self, app, patch_tenant): + api = DocumentBatchDownloadZipApi() + method = unwrap(api.post) + + payload = {"document_ids": []} + + with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload): + with pytest.raises(ValueError): + method(api, "ds-1") + + +class TestDatasetDocumentListApiDelete: + def test_delete_success(self, app, patch_tenant, patch_dataset): + """Test successful deletion of documents""" + api = DatasetDocumentListApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/?document_id=doc-1&document_id=doc-2"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.delete_documents", + return_value=None, + ), + ): + response, status = method(api, "ds-1") + + assert status == 204 + + def test_delete_indexing_error(self, app, patch_tenant, patch_dataset): + """Test deletion with indexing error""" + api = DatasetDocumentListApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.delete_documents", + side_effect=services.errors.document.DocumentIndexingError(), + ), + ): + with pytest.raises(DocumentIndexingError): + method(api, "ds-1") + + def test_delete_dataset_not_found(self, app, patch_tenant): + """Test deletion when dataset not found""" + api = DatasetDocumentListApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1") + + +class TestDocumentBatchIndexingEstimateApi: + def test_batch_indexing_estimate_website(self, app, patch_tenant): + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + doc = MagicMock( + indexing_status="indexing", + data_source_type="website_crawl", + data_source_info_dict={ + "provider": "firecrawl", + "job_id": "j1", + "url": "https://x.com", + "mode": "single", + "only_main_content": True, + }, + doc_form="text", + ) + + with ( + app.test_request_context("/"), + patch.object(api, "get_batch_documents", return_value=[doc]), + patch( + "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", + return_value=MagicMock(model_dump=lambda: {"tokens": 2}), + ), + ): + resp, status = method(api, "ds-1", "batch-1") + + assert status == 200 + + def test_batch_indexing_estimate_notion(self, app, patch_tenant): + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + doc = MagicMock( + indexing_status="indexing", + data_source_type="notion_import", + data_source_info_dict={ + "credential_id": "c1", + "notion_workspace_id": "w1", + "notion_page_id": "p1", + "type": "page", + }, + doc_form="text", + ) + + with ( + app.test_request_context("/"), + patch.object(api, "get_batch_documents", return_value=[doc]), + patch( + "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", + return_value=MagicMock(model_dump=lambda: {"tokens": 1}), + ), + ): + resp, status = method(api, "ds-1", "batch-1") + + assert status == 200 + + def test_batch_estimate_unsupported_datasource(self, app, patch_tenant): + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + document = MagicMock( + indexing_status="indexing", + data_source_type="unknown", + data_source_info_dict={}, + doc_form="text", + ) + + with app.test_request_context("/"), patch.object(api, "get_batch_documents", return_value=[document]): + with pytest.raises(ValueError): + method(api, "ds-1", "batch-1") + + def test_get_batch_estimate_invalid_batch(self, app, patch_tenant): + """Test batch estimation with invalid batch""" + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + with app.test_request_context("/"), patch.object(api, "get_batch_documents", side_effect=NotFound()): + with pytest.raises(NotFound): + method(api, "ds-1", "invalid-batch") + + +class TestDocumentBatchIndexingStatusApi: + def test_get_batch_status_invalid_batch(self, app, patch_tenant): + """Test batch status with invalid batch""" + api = DocumentBatchIndexingStatusApi() + method = unwrap(api.get) + + with app.test_request_context("/"), patch.object(api, "get_batch_documents", side_effect=NotFound()): + with pytest.raises(NotFound): + method(api, "ds-1", "invalid-batch") + + +class TestDocumentIndexingStatusApi: + def test_get_status_document_not_found(self, app, patch_tenant): + """Test getting status for non-existent document""" + api = DocumentIndexingStatusApi() + method = unwrap(api.get) + + with app.test_request_context("/"), patch.object(api, "get_document", side_effect=NotFound()): + with pytest.raises(NotFound): + method(api, "ds-1", "invalid-doc") + + +class TestDocumentApiMetadata: + def test_get_with_only_option(self, app, patch_tenant): + """Test get with 'only' metadata option""" + api = DocumentApi() + method = unwrap(api.get) + + document = MagicMock(dataset_process_rule=None, doc_metadata_details=[]) + + with ( + app.test_request_context("/?metadata=only"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_process_rules", + return_value={}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + def test_get_with_without_option(self, app, patch_tenant): + """Test get with 'without' metadata option""" + api = DocumentApi() + method = unwrap(api.get) + + document = MagicMock(dataset_process_rule=None) + + with ( + app.test_request_context("/?metadata=without"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_process_rules", + return_value={}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + +class TestDocumentGenerateSummaryApiSuccess: + def test_generate_not_enabled_high_quality(self, app, patch_tenant, patch_permission): + """Test summary generation on non-high-quality dataset""" + api = DocumentGenerateSummaryApi() + method = unwrap(api.post) + + dataset = MagicMock(indexing_technique="economy", summary_index_setting={"enable": True}) + + payload = {"document_list": ["doc-1"]} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=dataset, + ), + ): + with pytest.raises(ValueError): + method(api, "ds-1") + + +class TestDocumentProcessingApiResume: + def test_resume_invalid_status(self, app, patch_tenant): + """Test resume on non-paused document""" + api = DocumentProcessingApi() + method = unwrap(api.patch) + + document = MagicMock(indexing_status="completed", is_paused=False) + + with app.test_request_context("/"), patch.object(api, "get_document", return_value=document): + with pytest.raises(InvalidActionError): + method(api, "ds-1", "doc-1", "resume") + + +class TestDocumentPermissionCases: + def test_document_batch_get_permission_denied(self, app, patch_tenant): + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("No permission"), + ), + ): + with pytest.raises(Forbidden): + method(api, "ds-1", "batch-1") + + def test_document_batch_get_documents_not_found(self, app, patch_tenant): + api = DocumentBatchIndexingEstimateApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ), + patch.object(api, "get_batch_documents", return_value=None), + ): + response, status = method(api, "ds-1", "batch-1") + + assert status == 200 + assert response == { + "tokens": 0, + "total_price": 0, + "currency": "USD", + "total_segments": 0, + "preview": [], + } + + def test_document_tenant_mismatch(self, app): + api = DocumentApi() + method = unwrap(api.get) + + user = MagicMock(is_dataset_editor=True) + document = MagicMock( + tenant_id="other-tenant", + dataset_process_rule=None, + ) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_document.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), # ✅ prevents real DB call + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_process_rules", + return_value={}, + ), + ): + with pytest.raises(Forbidden): + method(api, "ds-1", "doc-1") + + def test_process_rule_get_by_document_success(self, app, patch_tenant): + api = GetProcessRuleApi() + method = unwrap(api.get) + + document = MagicMock(dataset_id="ds-1") + process_rule = MagicMock(mode="custom", rules_dict={"a": 1}) + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.db.get_or_404", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=MagicMock( + where=lambda *a: MagicMock( + order_by=lambda *b: MagicMock(limit=lambda n: MagicMock(one_or_none=lambda: process_rule)) + ) + ), + ), + ): + result = method(api) + + if isinstance(result, tuple): + response, status = result + else: + response, status = result, 200 + + assert status == 200 + assert response["mode"] == "custom" + + def test_process_rule_permission_denied(self, app): + api = GetProcessRuleApi() + method = unwrap(api.get) + + document = MagicMock(dataset_id="ds-1") + + with ( + app.test_request_context("/?document_id=doc-1"), + patch( + "controllers.console.datasets.datasets_document.current_account_with_tenant", + return_value=(MagicMock(is_dataset_editor=True), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_document.db.get_or_404", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("No permission"), + ), + ): + with pytest.raises(Forbidden): + method(api) + + +class TestDocumentListAdvancedCases: + def test_document_list_with_multiple_sort_options(self, app, patch_tenant, patch_dataset, patch_permission): + """Test document list with different sort options""" + api = DatasetDocumentListApi() + method = unwrap(api.get) + + pagination = MagicMock(items=[MagicMock()], total=1) + + with ( + app.test_request_context("/?sort=updated_at"), + patch( + "controllers.console.datasets.datasets_document.db.paginate", + return_value=pagination, + ), + patch( + "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_document.marshal", + return_value=[{"id": "doc-1"}], + ), + ): + response = method(api, "ds-1") + + assert response["total"] == 1 + + def test_document_metadata_with_schema_validation(self, app, patch_tenant): + """Test document metadata update with schema validation""" + api = DocumentMetadataApi() + method = unwrap(api.put) + + doc = MagicMock() + payload = { + "doc_type": "contract", + "doc_metadata": {"amount": 5000, "currency": "USD", "invalid_field": "x"}, + } + + schema = {"amount": int, "currency": str} + + with ( + app.test_request_context("/", json=payload), + patch.object(api, "get_document", return_value=doc), + patch( + "controllers.console.datasets.datasets_document.DocumentService.DOCUMENT_METADATA_SCHEMA", + {"contract": schema}, + ), + patch( + "controllers.console.datasets.datasets_document.db.session.commit", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + assert doc.doc_metadata == {"amount": 5000, "currency": "USD"} + + +class TestDocumentIndexingEdgeCases: + def test_document_indexing_with_extraction_setting(self, app, patch_tenant): + api = DocumentIndexingEstimateApi() + method = unwrap(api.get) + + document = MagicMock( + indexing_status="indexing", + data_source_type="upload_file", + data_source_info_dict={"upload_file_id": "file-1"}, + tenant_id="tenant-1", + doc_form="text", + dataset_process_rule=None, + ) + + upload_file = MagicMock() + + with ( + app.test_request_context("/"), + patch.object(api, "get_document", return_value=document), + patch( + "controllers.console.datasets.datasets_document.db.session.query", + return_value=MagicMock(where=lambda *a: MagicMock(first=lambda: upload_file)), + ), + patch( + "controllers.console.datasets.datasets_document.ExtractSetting", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_document.IndexingRunner.indexing_estimate", + return_value=MagicMock(model_dump=lambda: {"tokens": 5}), + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 diff --git a/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py b/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py new file mode 100644 index 0000000000..e67e4daad9 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_datasets_segments.py @@ -0,0 +1,1252 @@ +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, NotFound + +import services +from controllers.console import console_ns +from controllers.console.app.error import ProviderNotInitializeError +from controllers.console.datasets.datasets_segments import ( + ChildChunkAddApi, + ChildChunkUpdateApi, + DatasetDocumentSegmentAddApi, + DatasetDocumentSegmentApi, + DatasetDocumentSegmentBatchImportApi, + DatasetDocumentSegmentListApi, + DatasetDocumentSegmentUpdateApi, + _get_segment_with_summary, +) +from controllers.console.datasets.error import ( + ChildChunkDeleteIndexError, + ChildChunkIndexingError, + InvalidActionError, +) +from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError +from models.dataset import ChildChunk, DocumentSegment +from models.model import UploadFile + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def _segment(): + return SimpleNamespace( + id="s1", + position=1, + document_id="d1", + content="c", + sign_content="c", + answer="a", + word_count=1, + tokens=1, + keywords=[], + index_node_id="n1", + index_node_hash="h", + hit_count=0, + enabled=True, + disabled_at=None, + disabled_by=None, + status="normal", + created_by="u1", + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + updated_by="u1", + indexing_at=None, + completed_at=None, + error=None, + stopped_at=None, + child_chunks=[], + attachments=[], + summary=None, + ) + + +def test_get_segment_with_summary(monkeypatch): + segment = _segment() + summary = SimpleNamespace(summary_content="summary") + + monkeypatch.setattr( + "services.summary_index_service.SummaryIndexService.get_segment_summary", + lambda *_args, **_kwargs: summary, + ) + + result = _get_segment_with_summary(segment, dataset_id="d1") + + assert result["summary"] == "summary" + + +class TestDatasetDocumentSegmentListApi: + def test_get_success(self, app): + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + dataset = MagicMock() + document = MagicMock() + + segment = MagicMock(spec=DocumentSegment) + segment.id = "seg-1" + + pagination = MagicMock() + pagination.items = [segment] + pagination.total = 1 + pagination.pages = 1 + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.paginate", + return_value=pagination, + ), + patch( + "services.summary_index_service.SummaryIndexService.get_segments_summaries", + return_value={}, + ), + patch( + "controllers.console.datasets.datasets_segments.marshal", + return_value={"id": "seg-1"}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + + def test_get_dataset_not_found(self, app): + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_get_permission_denied(self, app): + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + dataset = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no access"), + ), + ): + with pytest.raises(Forbidden): + method(api, "ds-1", "doc-1") + + +class TestDatasetDocumentSegmentApi: + def test_patch_success(self, app): + api = DatasetDocumentSegmentApi() + method = unwrap(api.patch) + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + dataset.indexing_technique = "economy" + + document = MagicMock() + document.id = "doc-1" + + with ( + app.test_request_context("/?segment_id=s1&segment_id=s2"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.redis_client.get", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.update_segments_status", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1", "enable") + + assert status == 200 + assert response["result"] == "success" + + def test_patch_document_indexing_in_progress(self, app): + api = DatasetDocumentSegmentApi() + method = unwrap(api.patch) + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + dataset.indexing_technique = "economy" + + document = MagicMock() + document.id = "doc-1" + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.redis_client.get", + return_value=b"running", + ), + ): + with pytest.raises(InvalidActionError): + method(api, "ds-1", "doc-1", "disable") + + def test_patch_llm_bad_request(self, app): + api = DatasetDocumentSegmentApi() + method = unwrap(api.patch) + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock( + indexing_technique="high_quality", + embedding_model_provider="openai", + embedding_model="text-embed", + ) + + document = MagicMock(id="doc-1") + + with ( + app.test_request_context("/?segment_id=s1"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.ModelManager.get_model_instance", + side_effect=LLMBadRequestError(), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, "ds-1", "doc-1", "enable") + + def test_patch_provider_token_not_init(self, app): + api = DatasetDocumentSegmentApi() + method = unwrap(api.patch) + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock( + indexing_technique="high_quality", + embedding_model_provider="openai", + embedding_model="text-embed", + ) + + document = MagicMock(id="doc-1") + + with ( + app.test_request_context("/?segment_id=s1"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.ModelManager.get_model_instance", + side_effect=ProviderTokenNotInitError("token missing"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, "ds-1", "doc-1", "enable") + + +class TestDatasetDocumentSegmentAddApi: + def test_post_success(self, app): + api = DatasetDocumentSegmentAddApi() + method = unwrap(api.post) + + payload = {"content": "hello"} + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + dataset.indexing_technique = "economy" + + document = MagicMock() + document.doc_form = "text" + + segment = MagicMock() + segment.id = "seg-1" + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.segment_create_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.create_segment", + return_value=segment, + ), + patch( + "controllers.console.datasets.datasets_segments.marshal", + return_value={"id": "seg-1"}, + ), + patch( + "controllers.console.datasets.datasets_segments._get_segment_with_summary", + return_value={"id": "seg-1"}, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + assert response["data"]["id"] == "seg-1" + + def test_post_llm_bad_request(self, app): + api = DatasetDocumentSegmentAddApi() + method = unwrap(api.post) + + payload = {"content": "x"} + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock( + indexing_technique="high_quality", + embedding_model_provider="openai", + embedding_model="text-embed", + ) + + document = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.ModelManager.get_model_instance", + side_effect=LLMBadRequestError(), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, "ds-1", "doc-1") + + def test_post_provider_token_not_init(self, app): + api = DatasetDocumentSegmentAddApi() + method = unwrap(api.post) + + payload = {"content": "x"} + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock( + indexing_technique="high_quality", + embedding_model_provider="openai", + embedding_model="text-embed", + ) + + document = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.ModelManager.get_model_instance", + side_effect=ProviderTokenNotInitError("token missing"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, "ds-1", "doc-1") + + +class TestDatasetDocumentSegmentUpdateApi: + def test_patch_success(self, app): + api = DatasetDocumentSegmentUpdateApi() + method = unwrap(api.patch) + + payload = {"content": "updated"} + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + dataset.indexing_technique = "economy" + + document = MagicMock() + document.doc_form = "text" + + segment = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: segment)), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.segment_create_args_validate", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.update_segment", + return_value=segment, + ), + patch( + "controllers.console.datasets.datasets_segments._get_segment_with_summary", + return_value={"id": "seg-1"}, + ), + ): + response, status = method(api, "ds-1", "doc-1", "seg-1") + + assert status == 200 + assert "data" in response + + def test_patch_llm_bad_request(self, app): + api = DatasetDocumentSegmentUpdateApi() + method = unwrap(api.patch) + + payload = {"content": "x"} + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock( + indexing_technique="high_quality", + embedding_model_provider="openai", + embedding_model="text-embed", + ) + + document = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.ModelManager.get_model_instance", + side_effect=LLMBadRequestError(), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, "ds-1", "doc-1", "seg-1") + + +class TestDatasetDocumentSegmentBatchImportApi: + def test_post_success(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + upload_file = MagicMock(spec=UploadFile) + upload_file.name = "test.csv" + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: upload_file)), + ), + patch( + "controllers.console.datasets.datasets_segments.redis_client.setnx", + return_value=True, + ), + patch( + "controllers.console.datasets.datasets_segments.batch_create_segment_to_index_task.delay", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 200 + assert response["job_status"] == "waiting" + + def test_post_dataset_not_found(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_post_document_not_found(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_post_upload_file_not_found(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: None)), + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_post_invalid_file_type(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + upload_file = MagicMock() + upload_file.name = "test.txt" + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: upload_file)), + ), + ): + with pytest.raises(ValueError): + method(api, "ds-1", "doc-1") + + def test_post_async_task_failure(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + payload = {"upload_file_id": "file-1"} + + upload_file = MagicMock() + upload_file.name = "test.csv" + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: upload_file)), + ), + patch( + "controllers.console.datasets.datasets_segments.redis_client.setnx", + side_effect=Exception("redis down"), + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 500 + assert "error" in response + + def test_get_job_not_found_in_redis(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.redis_client.get", + return_value=None, + ), + ): + with pytest.raises(ValueError): + method(api, job_id="job-1") + + +class TestChildChunkAddApi: + def test_post_success(self, app): + api = ChildChunkAddApi() + method = unwrap(api.post) + + payload = {"content": "child"} + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + dataset.indexing_technique = "economy" + + document = MagicMock() + segment = MagicMock() + child_chunk = MagicMock(spec=ChildChunk) + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: segment)), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk", + return_value=child_chunk, + ), + patch( + "controllers.console.datasets.datasets_segments.marshal", + return_value={"id": "cc-1"}, + ), + ): + response, status = method(api, "ds-1", "doc-1", "seg-1") + + assert status == 200 + assert response["data"]["id"] == "cc-1" + + def test_post_child_chunk_indexing_error(self, app): + api = ChildChunkAddApi() + method = unwrap(api.post) + + payload = {"content": "child"} + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock(indexing_technique="economy") + document = MagicMock() + segment = MagicMock() + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: segment)), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk", + side_effect=services.errors.chunk.ChildChunkIndexingError("fail"), + ), + ): + with pytest.raises(ChildChunkIndexingError): + method(api, "ds-1", "doc-1", "seg-1") + + +class TestChildChunkUpdateApi: + def test_delete_success(self, app): + api = ChildChunkUpdateApi() + method = unwrap(api.delete) + + user = MagicMock() + user.is_dataset_editor = True + + dataset = MagicMock() + document = MagicMock() + segment = MagicMock() + child_chunk = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + side_effect=[ + MagicMock(where=lambda *a, **k: MagicMock(first=lambda: segment)), + MagicMock(where=lambda *a, **k: MagicMock(first=lambda: child_chunk)), + ], + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.delete_child_chunk", + return_value=None, + ), + ): + response, status = method(api, "ds-1", "doc-1", "seg-1", "cc-1") + + assert status == 204 + assert response["result"] == "success" + + def test_delete_child_chunk_index_error(self, app): + api = ChildChunkUpdateApi() + method = unwrap(api.delete) + + user = MagicMock(is_dataset_editor=True) + + dataset = MagicMock() + document = MagicMock() + segment = MagicMock() + child_chunk = MagicMock() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + side_effect=[ + MagicMock(where=lambda *a, **k: MagicMock(first=lambda: segment)), + MagicMock(where=lambda *a, **k: MagicMock(first=lambda: child_chunk)), + ], + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.delete_child_chunk", + side_effect=services.errors.chunk.ChildChunkDeleteIndexError("fail"), + ), + ): + with pytest.raises(ChildChunkDeleteIndexError): + method(api, "ds-1", "doc-1", "seg-1", "cc-1") + + +class TestSegmentListAdvancedCases: + def test_segment_list_with_keyword_filter(self, app): + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + dataset = MagicMock() + document = MagicMock() + + segment = MagicMock(spec=DocumentSegment) + segment.id = "seg-1" + segment.keywords = ["test"] + segment.enabled = True + + pagination = MagicMock(items=[segment], total=1, pages=1) + + with ( + app.test_request_context("/?keyword=test"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.paginate", + return_value=pagination, + ), + patch( + "services.summary_index_service.SummaryIndexService.get_segments_summaries", + return_value={}, + ), + ): + result = method(api, "ds-1", "doc-1") + + if isinstance(result, tuple): + response, status = result + else: + response, status = result, 200 + + assert status == 200 + assert response["total"] == 1 + + def test_segment_list_permission_denied(self, app): + """Test segment list with permission denied""" + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=MagicMock(), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("No permission"), + ), + ): + with pytest.raises(Forbidden): + method(api, "ds-1", "doc-1") + + def test_segment_list_dataset_not_found(self, app): + """Test segment list with dataset not found""" + api = DatasetDocumentSegmentListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + +class TestSegmentOperationCases: + def test_segment_add_with_provider_token_error(self, app): + """Test segment add with provider token not initialized""" + api = DatasetDocumentSegmentAddApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=True) + dataset = MagicMock() + document = MagicMock() + + payload = {"content": "new content", "answer": None} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.SegmentService.create_segment", + side_effect=ProviderTokenNotInitError("Token not init"), + ), + ): + with pytest.raises(ProviderTokenNotInitError): + method(api, "ds-1", "doc-1") + + def test_batch_import_with_document_not_found(self, app): + """Test batch import with document not found""" + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=True) + dataset = MagicMock() + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_batch_import_with_invalid_file(self, app): + """Test batch import with invalid file type""" + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=True) + dataset = MagicMock() + document = MagicMock() + upload_file = None # File not found + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: upload_file)), + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1", "doc-1") + + def test_batch_import_with_async_task_failure(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.post) + + user = MagicMock(is_dataset_editor=True) + dataset = MagicMock() + document = MagicMock() + upload_file = MagicMock(spec=UploadFile, extension="csv", id="file-1") + upload_file.name = "test.csv" + + payload = {"upload_file_id": "file-1"} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.get_dataset", + return_value=dataset, + ), + patch( + "controllers.console.datasets.datasets_segments.DocumentService.get_document", + return_value=document, + ), + patch( + "controllers.console.datasets.datasets_segments.db.session.query", + return_value=MagicMock(where=lambda *a, **k: MagicMock(first=lambda: upload_file)), + ), + patch( + "controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission", + return_value=None, + ), + patch( + "controllers.console.datasets.datasets_segments.batch_create_segment_to_index_task.delay", + side_effect=Exception("Task failed"), + ), + ): + response, status = method(api, "ds-1", "doc-1") + + assert status == 500 + assert "error" in response + + def test_batch_import_get_job_not_found(self, app): + api = DatasetDocumentSegmentBatchImportApi() + method = unwrap(api.get) + + user = MagicMock(is_dataset_editor=True) + + with ( + app.test_request_context("/?job_id=invalid-job"), + patch( + "controllers.console.datasets.datasets_segments.current_account_with_tenant", + return_value=(user, "tenant-1"), + ), + patch( + "controllers.console.datasets.datasets_segments.redis_client.get", + return_value=None, + ), + ): + with pytest.raises(ValueError): + method(api, "invalid-job") diff --git a/api/tests/unit_tests/controllers/console/datasets/test_external.py b/api/tests/unit_tests/controllers/console/datasets/test_external.py new file mode 100644 index 0000000000..161d0c41e8 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_external.py @@ -0,0 +1,399 @@ +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import Forbidden, NotFound + +import services +from controllers.console import console_ns +from controllers.console.datasets.error import DatasetNameDuplicateError +from controllers.console.datasets.external import ( + BedrockRetrievalApi, + ExternalApiTemplateApi, + ExternalApiTemplateListApi, + ExternalDatasetCreateApi, + ExternalKnowledgeHitTestingApi, +) +from services.dataset_service import DatasetService +from services.external_knowledge_service import ExternalDatasetService +from services.hit_testing_service import HitTestingService +from services.knowledge_service import ExternalDatasetTestService + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask("test_external_dataset") + app.config["TESTING"] = True + return app + + +@pytest.fixture +def current_user(): + user = MagicMock() + user.id = "user-1" + user.is_dataset_editor = True + user.has_edit_permission = True + user.is_dataset_operator = True + return user + + +@pytest.fixture(autouse=True) +def mock_auth(mocker, current_user): + mocker.patch( + "controllers.console.datasets.external.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ) + + +class TestExternalApiTemplateListApi: + def test_get_success(self, app): + api = ExternalApiTemplateListApi() + method = unwrap(api.get) + + api_item = MagicMock() + api_item.to_dict.return_value = {"id": "1"} + + with ( + app.test_request_context("/?page=1&limit=20"), + patch.object( + ExternalDatasetService, + "get_external_knowledge_apis", + return_value=([api_item], 1), + ), + ): + resp, status = method(api) + + assert status == 200 + assert resp["total"] == 1 + assert resp["data"][0]["id"] == "1" + + def test_post_forbidden(self, app, current_user): + current_user.is_dataset_editor = False + api = ExternalApiTemplateListApi() + method = unwrap(api.post) + + payload = {"name": "x", "settings": {"k": "v"}} + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object(ExternalDatasetService, "validate_api_list"), + ): + with pytest.raises(Forbidden): + method(api) + + def test_post_duplicate_name(self, app): + api = ExternalApiTemplateListApi() + method = unwrap(api.post) + + payload = {"name": "x", "settings": {"k": "v"}} + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object(ExternalDatasetService, "validate_api_list"), + patch.object( + ExternalDatasetService, + "create_external_knowledge_api", + side_effect=services.errors.dataset.DatasetNameDuplicateError(), + ), + ): + with pytest.raises(DatasetNameDuplicateError): + method(api) + + +class TestExternalApiTemplateApi: + def test_get_not_found(self, app): + api = ExternalApiTemplateApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object( + ExternalDatasetService, + "get_external_knowledge_api", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "api-id") + + def test_delete_forbidden(self, app, current_user): + current_user.has_edit_permission = False + current_user.is_dataset_operator = False + + api = ExternalApiTemplateApi() + method = unwrap(api.delete) + + with app.test_request_context("/"): + with pytest.raises(Forbidden): + method(api, "api-id") + + +class TestExternalDatasetCreateApi: + def test_create_success(self, app): + api = ExternalDatasetCreateApi() + method = unwrap(api.post) + + payload = { + "external_knowledge_api_id": "api", + "external_knowledge_id": "kid", + "name": "dataset", + } + + dataset = MagicMock() + + dataset.embedding_available = False + dataset.built_in_field_enabled = False + dataset.is_published = False + dataset.enable_api = False + dataset.enable_qa = False + dataset.enable_vector_store = False + dataset.vector_store_setting = None + dataset.is_multimodal = False + + dataset.retrieval_model_dict = {} + dataset.tags = [] + dataset.external_knowledge_info = None + dataset.external_retrieval_model = None + dataset.doc_metadata = [] + dataset.icon_info = None + + dataset.summary_index_setting = MagicMock() + dataset.summary_index_setting.enable = False + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object( + ExternalDatasetService, + "create_external_dataset", + return_value=dataset, + ), + ): + _, status = method(api) + + assert status == 201 + + def test_create_forbidden(self, app, current_user): + current_user.is_dataset_editor = False + api = ExternalDatasetCreateApi() + method = unwrap(api.post) + + payload = { + "external_knowledge_api_id": "api", + "external_knowledge_id": "kid", + "name": "dataset", + } + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + ): + with pytest.raises(Forbidden): + method(api) + + +class TestExternalKnowledgeHitTestingApi: + def test_hit_testing_dataset_not_found(self, app): + api = ExternalKnowledgeHitTestingApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "dataset-id") + + def test_hit_testing_success(self, app): + api = ExternalKnowledgeHitTestingApi() + method = unwrap(api.post) + + payload = {"query": "hello"} + + dataset = MagicMock() + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object(DatasetService, "get_dataset", return_value=dataset), + patch.object(DatasetService, "check_dataset_permission"), + patch.object( + HitTestingService, + "external_retrieve", + return_value={"ok": True}, + ), + ): + resp = method(api, "dataset-id") + + assert resp["ok"] is True + + +class TestBedrockRetrievalApi: + def test_bedrock_retrieval(self, app): + api = BedrockRetrievalApi() + method = unwrap(api.post) + + payload = { + "retrieval_setting": {}, + "query": "hello", + "knowledge_id": "kid", + } + + with ( + app.test_request_context("/"), + patch.object(type(console_ns), "payload", new_callable=PropertyMock, return_value=payload), + patch.object( + ExternalDatasetTestService, + "knowledge_retrieval", + return_value={"ok": True}, + ), + ): + resp, status = method() + + assert status == 200 + assert resp["ok"] is True + + +class TestExternalApiTemplateListApiAdvanced: + def test_post_duplicate_name_error(self, app, mock_auth, current_user): + api = ExternalApiTemplateListApi() + method = unwrap(api.post) + + payload = {"name": "duplicate_api", "settings": {"key": "value"}} + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch("controllers.console.datasets.external.ExternalDatasetService.validate_api_list"), + patch( + "controllers.console.datasets.external.ExternalDatasetService.create_external_knowledge_api", + side_effect=services.errors.dataset.DatasetNameDuplicateError("Duplicate"), + ), + ): + with pytest.raises(DatasetNameDuplicateError): + method(api) + + def test_get_with_pagination(self, app, mock_auth, current_user): + api = ExternalApiTemplateListApi() + method = unwrap(api.get) + + templates = [MagicMock(id=f"api-{i}") for i in range(3)] + + with ( + app.test_request_context("/?page=1&limit=20"), + patch( + "controllers.console.datasets.external.ExternalDatasetService.get_external_knowledge_apis", + return_value=(templates, 25), + ), + ): + resp, status = method(api) + + assert status == 200 + assert resp["total"] == 25 + assert len(resp["data"]) == 3 + + +class TestExternalDatasetCreateApiAdvanced: + def test_create_forbidden(self, app, mock_auth, current_user): + """Test creating external dataset without permission""" + api = ExternalDatasetCreateApi() + method = unwrap(api.post) + + current_user.is_dataset_editor = False + + payload = { + "external_knowledge_api_id": "api-1", + "external_knowledge_id": "ek-1", + "name": "new_dataset", + "description": "A dataset", + } + + with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload): + with pytest.raises(Forbidden): + method(api) + + +class TestExternalKnowledgeHitTestingApiAdvanced: + def test_hit_testing_dataset_not_found(self, app, mock_auth, current_user): + """Test hit testing on non-existent dataset""" + api = ExternalKnowledgeHitTestingApi() + method = unwrap(api.post) + + payload = { + "query": "test query", + "external_retrieval_model": None, + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.external.DatasetService.get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, "ds-1") + + def test_hit_testing_with_custom_retrieval_model(self, app, mock_auth, current_user): + api = ExternalKnowledgeHitTestingApi() + method = unwrap(api.post) + + dataset = MagicMock() + payload = { + "query": "test query", + "external_retrieval_model": {"type": "bm25"}, + "metadata_filtering_conditions": {"status": "active"}, + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.external.DatasetService.get_dataset", + return_value=dataset, + ), + patch("controllers.console.datasets.external.DatasetService.check_dataset_permission"), + patch( + "controllers.console.datasets.external.HitTestingService.external_retrieve", + return_value={"results": []}, + ), + ): + resp = method(api, "ds-1") + + assert resp["results"] == [] + + +class TestBedrockRetrievalApiAdvanced: + def test_bedrock_retrieval_with_invalid_setting(self, app, mock_auth, current_user): + api = BedrockRetrievalApi() + method = unwrap(api.post) + + payload = { + "retrieval_setting": {}, + "query": "test", + "knowledge_id": "k-1", + } + + with ( + app.test_request_context("/", json=payload), + patch.object(type(console_ns), "payload", payload), + patch( + "controllers.console.datasets.external.ExternalDatasetTestService.knowledge_retrieval", + side_effect=ValueError("Invalid settings"), + ), + ): + with pytest.raises(ValueError): + method() diff --git a/api/tests/unit_tests/controllers/console/datasets/test_hit_testing.py b/api/tests/unit_tests/controllers/console/datasets/test_hit_testing.py new file mode 100644 index 0000000000..726c0a5cf3 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_hit_testing.py @@ -0,0 +1,160 @@ +import uuid +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +from controllers.console import console_ns +from controllers.console.datasets.hit_testing import HitTestingApi +from controllers.console.datasets.hit_testing_base import HitTestingPayload + + +def unwrap(func): + """Recursively unwrap decorated functions.""" + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask("test_hit_testing") + app.config["TESTING"] = True + return app + + +@pytest.fixture +def dataset_id(): + return uuid.uuid4() + + +@pytest.fixture +def dataset(): + return MagicMock(id="dataset-1") + + +@pytest.fixture(autouse=True) +def bypass_decorators(mocker): + """Bypass all decorators on the API method.""" + mocker.patch( + "controllers.console.datasets.hit_testing.setup_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.hit_testing.login_required", + return_value=lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.hit_testing.account_initialization_required", + return_value=lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.hit_testing.cloud_edition_billing_rate_limit_check", + return_value=lambda *_: lambda f: f, + ) + + +class TestHitTestingApi: + def test_hit_testing_success(self, app, dataset, dataset_id): + api = HitTestingApi() + method = unwrap(api.post) + + payload = { + "query": "what is vector search", + "top_k": 3, + } + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch.object( + HitTestingPayload, + "model_validate", + return_value=MagicMock(model_dump=lambda **_: payload), + ), + patch.object( + HitTestingApi, + "get_and_validate_dataset", + return_value=dataset, + ), + patch.object( + HitTestingApi, + "hit_testing_args_check", + ), + patch.object( + HitTestingApi, + "perform_hit_testing", + return_value={"query": "what is vector search", "records": []}, + ), + ): + result = method(api, dataset_id) + + assert "query" in result + assert "records" in result + assert result["records"] == [] + + def test_hit_testing_dataset_not_found(self, app, dataset_id): + api = HitTestingApi() + method = unwrap(api.post) + + payload = { + "query": "test", + } + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch.object( + HitTestingApi, + "get_and_validate_dataset", + side_effect=NotFound("Dataset not found"), + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, dataset_id) + + def test_hit_testing_invalid_args(self, app, dataset, dataset_id): + api = HitTestingApi() + method = unwrap(api.post) + + payload = { + "query": "", + } + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch.object( + HitTestingPayload, + "model_validate", + return_value=MagicMock(model_dump=lambda **_: payload), + ), + patch.object( + HitTestingApi, + "get_and_validate_dataset", + return_value=dataset, + ), + patch.object( + HitTestingApi, + "hit_testing_args_check", + side_effect=ValueError("Invalid parameters"), + ), + ): + with pytest.raises(ValueError, match="Invalid parameters"): + method(api, dataset_id) diff --git a/api/tests/unit_tests/controllers/console/datasets/test_hit_testing_base.py b/api/tests/unit_tests/controllers/console/datasets/test_hit_testing_base.py new file mode 100644 index 0000000000..e7ae37ae45 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_hit_testing_base.py @@ -0,0 +1,207 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, InternalServerError, NotFound + +import services +from controllers.console.app.error import ( + CompletionRequestError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from controllers.console.datasets.error import DatasetNotInitializedError +from controllers.console.datasets.hit_testing_base import ( + DatasetsHitTestingBase, +) +from core.errors.error import ( + LLMBadRequestError, + ModelCurrentlyNotSupportError, + ProviderTokenNotInitError, + QuotaExceededError, +) +from dify_graph.model_runtime.errors.invoke import InvokeError +from models.account import Account +from services.dataset_service import DatasetService +from services.hit_testing_service import HitTestingService + + +@pytest.fixture +def account(): + acc = MagicMock(spec=Account) + return acc + + +@pytest.fixture(autouse=True) +def patch_current_user(mocker, account): + """Patch current_user to a valid Account.""" + mocker.patch( + "controllers.console.datasets.hit_testing_base.current_user", + account, + ) + + +@pytest.fixture +def dataset(): + return MagicMock(id="dataset-1") + + +class TestGetAndValidateDataset: + def test_success(self, dataset): + with ( + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + ): + result = DatasetsHitTestingBase.get_and_validate_dataset("dataset-1") + + assert result == dataset + + def test_dataset_not_found(self): + with patch.object( + DatasetService, + "get_dataset", + return_value=None, + ): + with pytest.raises(NotFound, match="Dataset not found"): + DatasetsHitTestingBase.get_and_validate_dataset("dataset-1") + + def test_permission_denied(self, dataset): + with ( + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + side_effect=services.errors.account.NoPermissionError("no access"), + ), + ): + with pytest.raises(Forbidden, match="no access"): + DatasetsHitTestingBase.get_and_validate_dataset("dataset-1") + + +class TestHitTestingArgsCheck: + def test_args_check_called(self): + args = {"query": "test"} + + with patch.object( + HitTestingService, + "hit_testing_args_check", + ) as check_mock: + DatasetsHitTestingBase.hit_testing_args_check(args) + + check_mock.assert_called_once_with(args) + + +class TestParseArgs: + def test_parse_args_success(self): + payload = {"query": "hello"} + + result = DatasetsHitTestingBase.parse_args(payload) + + assert result["query"] == "hello" + + def test_parse_args_invalid(self): + payload = {"query": "x" * 300} + + with pytest.raises(ValueError): + DatasetsHitTestingBase.parse_args(payload) + + +class TestPerformHitTesting: + def test_success(self, dataset): + response = { + "query": "hello", + "records": [], + } + + with patch.object( + HitTestingService, + "retrieve", + return_value=response, + ): + result = DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + assert result["query"] == "hello" + assert result["records"] == [] + + def test_index_not_initialized(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=services.errors.index.IndexNotInitializedError(), + ): + with pytest.raises(DatasetNotInitializedError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_provider_token_not_init(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=ProviderTokenNotInitError("token missing"), + ): + with pytest.raises(ProviderNotInitializeError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_quota_exceeded(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=QuotaExceededError(), + ): + with pytest.raises(ProviderQuotaExceededError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_model_not_supported(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=ModelCurrentlyNotSupportError(), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_llm_bad_request(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=LLMBadRequestError("bad request"), + ): + with pytest.raises(ProviderNotInitializeError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_invoke_error(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=InvokeError("invoke failed"), + ): + with pytest.raises(CompletionRequestError): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_value_error(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=ValueError("bad args"), + ): + with pytest.raises(ValueError, match="bad args"): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) + + def test_unexpected_error(self, dataset): + with patch.object( + HitTestingService, + "retrieve", + side_effect=Exception("boom"), + ): + with pytest.raises(InternalServerError, match="boom"): + DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"}) diff --git a/api/tests/unit_tests/controllers/console/datasets/test_metadata.py b/api/tests/unit_tests/controllers/console/datasets/test_metadata.py new file mode 100644 index 0000000000..de834c2d4d --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_metadata.py @@ -0,0 +1,362 @@ +import uuid +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +from controllers.console import console_ns +from controllers.console.datasets.metadata import ( + DatasetMetadataApi, + DatasetMetadataBuiltInFieldActionApi, + DatasetMetadataBuiltInFieldApi, + DatasetMetadataCreateApi, + DocumentMetadataEditApi, +) +from services.dataset_service import DatasetService +from services.entities.knowledge_entities.knowledge_entities import ( + MetadataArgs, + MetadataOperationData, +) +from services.metadata_service import MetadataService + + +def unwrap(func): + """Recursively unwrap decorated functions.""" + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask("test_dataset_metadata") + app.config["TESTING"] = True + return app + + +@pytest.fixture +def current_user(): + user = MagicMock() + user.id = "user-1" + return user + + +@pytest.fixture +def dataset(): + ds = MagicMock() + ds.id = "dataset-1" + return ds + + +@pytest.fixture +def dataset_id(): + return uuid.uuid4() + + +@pytest.fixture +def metadata_id(): + return uuid.uuid4() + + +@pytest.fixture(autouse=True) +def bypass_decorators(mocker): + """Bypass setup/login/license decorators.""" + mocker.patch( + "controllers.console.datasets.metadata.setup_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.metadata.login_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.metadata.account_initialization_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.metadata.enterprise_license_required", + lambda f: f, + ) + + +class TestDatasetMetadataCreateApi: + def test_create_metadata_success(self, app, current_user, dataset, dataset_id): + api = DatasetMetadataCreateApi() + method = unwrap(api.post) + + payload = {"name": "author"} + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + MetadataArgs, + "model_validate", + return_value=MagicMock(), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + patch.object( + MetadataService, + "create_metadata", + return_value={"id": "m1", "name": "author"}, + ), + ): + result, status = method(api, dataset_id) + + assert status == 201 + assert result["name"] == "author" + + def test_create_metadata_dataset_not_found(self, app, current_user, dataset_id): + api = DatasetMetadataCreateApi() + method = unwrap(api.post) + + valid_payload = { + "type": "string", + "name": "author", + } + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=valid_payload, + ), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + MetadataArgs, + "model_validate", + return_value=MagicMock(), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound, match="Dataset not found"): + method(api, dataset_id) + + +class TestDatasetMetadataGetApi: + def test_get_metadata_success(self, app, dataset, dataset_id): + api = DatasetMetadataCreateApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + MetadataService, + "get_dataset_metadatas", + return_value=[{"id": "m1"}], + ), + ): + result, status = method(api, dataset_id) + + assert status == 200 + assert isinstance(result, list) + + def test_get_metadata_dataset_not_found(self, app, dataset_id): + api = DatasetMetadataCreateApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object( + DatasetService, + "get_dataset", + return_value=None, + ), + ): + with pytest.raises(NotFound): + method(api, dataset_id) + + +class TestDatasetMetadataApi: + def test_update_metadata_success(self, app, current_user, dataset, dataset_id, metadata_id): + api = DatasetMetadataApi() + method = unwrap(api.patch) + + payload = {"name": "updated-name"} + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + patch.object( + MetadataService, + "update_metadata_name", + return_value={"id": "m1", "name": "updated-name"}, + ), + ): + result, status = method(api, dataset_id, metadata_id) + + assert status == 200 + assert result["name"] == "updated-name" + + def test_delete_metadata_success(self, app, current_user, dataset, dataset_id, metadata_id): + api = DatasetMetadataApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + patch.object( + MetadataService, + "delete_metadata", + ), + ): + result, status = method(api, dataset_id, metadata_id) + + assert status == 204 + assert result["result"] == "success" + + +class TestDatasetMetadataBuiltInFieldApi: + def test_get_built_in_fields(self, app): + api = DatasetMetadataBuiltInFieldApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object( + MetadataService, + "get_built_in_fields", + return_value=["title", "source"], + ), + ): + result, status = method(api) + + assert status == 200 + assert result["fields"] == ["title", "source"] + + +class TestDatasetMetadataBuiltInFieldActionApi: + def test_enable_built_in_field(self, app, current_user, dataset, dataset_id): + api = DatasetMetadataBuiltInFieldActionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + patch.object( + MetadataService, + "enable_built_in_field", + ), + ): + result, status = method(api, dataset_id, "enable") + + assert status == 200 + assert result["result"] == "success" + + +class TestDocumentMetadataEditApi: + def test_update_document_metadata_success(self, app, current_user, dataset, dataset_id): + api = DocumentMetadataEditApi() + method = unwrap(api.post) + + payload = {"operation": "add", "metadata": {}} + + with ( + app.test_request_context("/"), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.datasets.metadata.current_account_with_tenant", + return_value=(current_user, "tenant-1"), + ), + patch.object( + DatasetService, + "get_dataset", + return_value=dataset, + ), + patch.object( + DatasetService, + "check_dataset_permission", + ), + patch.object( + MetadataOperationData, + "model_validate", + return_value=MagicMock(), + ), + patch.object( + MetadataService, + "update_documents_metadata", + ), + ): + result, status = method(api, dataset_id) + + assert status == 200 + assert result["result"] == "success" diff --git a/api/tests/unit_tests/controllers/console/datasets/test_website.py b/api/tests/unit_tests/controllers/console/datasets/test_website.py new file mode 100644 index 0000000000..9f0da6e76f --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_website.py @@ -0,0 +1,233 @@ +from unittest.mock import Mock, PropertyMock, patch + +import pytest +from flask import Flask + +from controllers.console import console_ns +from controllers.console.datasets.error import WebsiteCrawlError +from controllers.console.datasets.website import ( + WebsiteCrawlApi, + WebsiteCrawlStatusApi, +) +from services.website_service import ( + WebsiteCrawlApiRequest, + WebsiteCrawlStatusApiRequest, + WebsiteService, +) + + +def unwrap(func): + """Recursively unwrap decorated functions.""" + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask("test_website_crawl") + app.config["TESTING"] = True + return app + + +@pytest.fixture(autouse=True) +def bypass_auth_and_setup(mocker): + """Bypass setup/login/account decorators.""" + mocker.patch( + "controllers.console.datasets.website.login_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.website.setup_required", + lambda f: f, + ) + mocker.patch( + "controllers.console.datasets.website.account_initialization_required", + lambda f: f, + ) + + +class TestWebsiteCrawlApi: + def test_crawl_success(self, app, mocker): + api = WebsiteCrawlApi() + method = unwrap(api.post) + + payload = { + "provider": "firecrawl", + "url": "https://example.com", + "options": {"depth": 1}, + } + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + ): + mock_request = Mock(spec=WebsiteCrawlApiRequest) + mocker.patch.object( + WebsiteCrawlApiRequest, + "from_args", + return_value=mock_request, + ) + + mocker.patch.object( + WebsiteService, + "crawl_url", + return_value={"job_id": "job-1"}, + ) + + result, status = method(api) + + assert status == 200 + assert result["job_id"] == "job-1" + + def test_crawl_invalid_payload(self, app, mocker): + api = WebsiteCrawlApi() + method = unwrap(api.post) + + payload = { + "provider": "firecrawl", + "url": "bad-url", + "options": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + ): + mocker.patch.object( + WebsiteCrawlApiRequest, + "from_args", + side_effect=ValueError("invalid payload"), + ) + + with pytest.raises(WebsiteCrawlError, match="invalid payload"): + method(api) + + def test_crawl_service_error(self, app, mocker): + api = WebsiteCrawlApi() + method = unwrap(api.post) + + payload = { + "provider": "firecrawl", + "url": "https://example.com", + "options": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + ): + mock_request = Mock(spec=WebsiteCrawlApiRequest) + mocker.patch.object( + WebsiteCrawlApiRequest, + "from_args", + return_value=mock_request, + ) + + mocker.patch.object( + WebsiteService, + "crawl_url", + side_effect=Exception("crawl failed"), + ) + + with pytest.raises(WebsiteCrawlError, match="crawl failed"): + method(api) + + +class TestWebsiteCrawlStatusApi: + def test_get_status_success(self, app, mocker): + api = WebsiteCrawlStatusApi() + method = unwrap(api.get) + + job_id = "job-123" + args = {"provider": "firecrawl"} + + with app.test_request_context("/?provider=firecrawl"): + mocker.patch( + "controllers.console.datasets.website.request.args.to_dict", + return_value=args, + ) + + mock_request = Mock(spec=WebsiteCrawlStatusApiRequest) + mocker.patch.object( + WebsiteCrawlStatusApiRequest, + "from_args", + return_value=mock_request, + ) + + mocker.patch.object( + WebsiteService, + "get_crawl_status_typed", + return_value={"status": "completed"}, + ) + + result, status = method(api, job_id) + + assert status == 200 + assert result["status"] == "completed" + + def test_get_status_invalid_provider(self, app, mocker): + api = WebsiteCrawlStatusApi() + method = unwrap(api.get) + + job_id = "job-123" + args = {"provider": "firecrawl"} + + with app.test_request_context("/?provider=firecrawl"): + mocker.patch( + "controllers.console.datasets.website.request.args.to_dict", + return_value=args, + ) + + mocker.patch.object( + WebsiteCrawlStatusApiRequest, + "from_args", + side_effect=ValueError("invalid provider"), + ) + + with pytest.raises(WebsiteCrawlError, match="invalid provider"): + method(api, job_id) + + def test_get_status_service_error(self, app, mocker): + api = WebsiteCrawlStatusApi() + method = unwrap(api.get) + + job_id = "job-123" + args = {"provider": "firecrawl"} + + with app.test_request_context("/?provider=firecrawl"): + mocker.patch( + "controllers.console.datasets.website.request.args.to_dict", + return_value=args, + ) + + mock_request = Mock(spec=WebsiteCrawlStatusApiRequest) + mocker.patch.object( + WebsiteCrawlStatusApiRequest, + "from_args", + return_value=mock_request, + ) + + mocker.patch.object( + WebsiteService, + "get_crawl_status_typed", + side_effect=Exception("status lookup failed"), + ) + + with pytest.raises(WebsiteCrawlError, match="status lookup failed"): + method(api, job_id) diff --git a/api/tests/unit_tests/controllers/console/datasets/test_wraps.py b/api/tests/unit_tests/controllers/console/datasets/test_wraps.py new file mode 100644 index 0000000000..90f00711c1 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/datasets/test_wraps.py @@ -0,0 +1,117 @@ +from unittest.mock import Mock + +import pytest + +from controllers.console.datasets.error import PipelineNotFoundError +from controllers.console.datasets.wraps import get_rag_pipeline +from models.dataset import Pipeline + + +class TestGetRagPipeline: + def test_missing_pipeline_id(self): + @get_rag_pipeline + def dummy_view(**kwargs): + return "ok" + + with pytest.raises(ValueError, match="missing pipeline_id"): + dummy_view() + + def test_pipeline_not_found(self, mocker): + @get_rag_pipeline + def dummy_view(**kwargs): + return "ok" + + mocker.patch( + "controllers.console.datasets.wraps.current_account_with_tenant", + return_value=(Mock(), "tenant-1"), + ) + + mock_query = Mock() + mock_query.where.return_value.first.return_value = None + + mocker.patch( + "controllers.console.datasets.wraps.db.session.query", + return_value=mock_query, + ) + + with pytest.raises(PipelineNotFoundError): + dummy_view(pipeline_id="pipeline-1") + + def test_pipeline_found_and_injected(self, mocker): + pipeline = Mock(spec=Pipeline) + pipeline.id = "pipeline-1" + pipeline.tenant_id = "tenant-1" + + @get_rag_pipeline + def dummy_view(**kwargs): + return kwargs["pipeline"] + + mocker.patch( + "controllers.console.datasets.wraps.current_account_with_tenant", + return_value=(Mock(), "tenant-1"), + ) + + mock_query = Mock() + mock_query.where.return_value.first.return_value = pipeline + + mocker.patch( + "controllers.console.datasets.wraps.db.session.query", + return_value=mock_query, + ) + + result = dummy_view(pipeline_id="pipeline-1") + + assert result is pipeline + + def test_pipeline_id_removed_from_kwargs(self, mocker): + pipeline = Mock(spec=Pipeline) + + @get_rag_pipeline + def dummy_view(**kwargs): + assert "pipeline_id" not in kwargs + return "ok" + + mocker.patch( + "controllers.console.datasets.wraps.current_account_with_tenant", + return_value=(Mock(), "tenant-1"), + ) + + mock_query = Mock() + mock_query.where.return_value.first.return_value = pipeline + + mocker.patch( + "controllers.console.datasets.wraps.db.session.query", + return_value=mock_query, + ) + + result = dummy_view(pipeline_id="pipeline-1") + + assert result == "ok" + + def test_pipeline_id_cast_to_string(self, mocker): + pipeline = Mock(spec=Pipeline) + + @get_rag_pipeline + def dummy_view(**kwargs): + return kwargs["pipeline"] + + mocker.patch( + "controllers.console.datasets.wraps.current_account_with_tenant", + return_value=(Mock(), "tenant-1"), + ) + + def where_side_effect(*args, **kwargs): + assert args[0].right.value == "123" + return Mock(first=lambda: pipeline) + + mock_query = Mock() + mock_query.where.side_effect = where_side_effect + + mocker.patch( + "controllers.console.datasets.wraps.db.session.query", + return_value=mock_query, + ) + + result = dummy_view(pipeline_id=123) + + assert result is pipeline diff --git a/api/tests/unit_tests/controllers/console/explore/__init__.py b/api/tests/unit_tests/controllers/console/explore/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/console/explore/test_audio.py b/api/tests/unit_tests/controllers/console/explore/test_audio.py new file mode 100644 index 0000000000..0afbc5a8f7 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_audio.py @@ -0,0 +1,402 @@ +from io import BytesIO +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import InternalServerError + +import controllers.console.explore.audio as audio_module +from controllers.console.app.error import ( + AppUnavailableError, + AudioTooLargeError, + CompletionRequestError, + NoAudioUploadedError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from core.errors.error import ( + ModelCurrentlyNotSupportError, + ProviderTokenNotInitError, + QuotaExceededError, +) +from dify_graph.model_runtime.errors.invoke import InvokeError +from services.errors.audio import ( + AudioTooLargeServiceError, + NoAudioUploadedServiceError, +) + + +def unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +@pytest.fixture +def installed_app(): + app = MagicMock() + app.app = MagicMock() + return app + + +@pytest.fixture +def audio_file(): + return (BytesIO(b"audio"), "audio.wav") + + +class TestChatAudioApi: + def setup_method(self): + self.api = audio_module.ChatAudioApi() + self.method = unwrap(self.api.post) + + def test_post_success(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + return_value={"text": "ok"}, + ), + ): + resp = self.method(installed_app) + + assert resp == {"text": "ok"} + + def test_app_unavailable(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=audio_module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(AppUnavailableError): + self.method(installed_app) + + def test_no_audio_uploaded(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=NoAudioUploadedServiceError(), + ), + ): + with pytest.raises(NoAudioUploadedError): + self.method(installed_app) + + def test_audio_too_large(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=AudioTooLargeServiceError("too big"), + ), + ): + with pytest.raises(AudioTooLargeError): + self.method(installed_app) + + def test_provider_quota_exceeded(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + self.method(installed_app) + + def test_unknown_exception(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(InternalServerError): + self.method(installed_app) + + def test_unsupported_audio_type(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=audio_module.UnsupportedAudioTypeServiceError(), + ), + ): + with pytest.raises(audio_module.UnsupportedAudioTypeError): + self.method(installed_app) + + def test_provider_not_support_speech_to_text(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=audio_module.ProviderNotSupportSpeechToTextServiceError(), + ), + ): + with pytest.raises(audio_module.ProviderNotSupportSpeechToTextError): + self.method(installed_app) + + def test_provider_not_initialized(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=ProviderTokenNotInitError("not init"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + self.method(installed_app) + + def test_model_currently_not_supported(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + self.method(installed_app) + + def test_invoke_error_asr(self, app, installed_app, audio_file): + with ( + app.test_request_context( + "/", + data={"file": audio_file}, + content_type="multipart/form-data", + ), + patch.object( + audio_module.AudioService, + "transcript_asr", + side_effect=InvokeError("invoke failed"), + ), + ): + with pytest.raises(CompletionRequestError): + self.method(installed_app) + + +class TestChatTextApi: + def setup_method(self): + self.api = audio_module.ChatTextApi() + self.method = unwrap(self.api.post) + + def test_post_success(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"message_id": "m1", "text": "hello", "voice": "v1"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + return_value={"audio": "ok"}, + ), + ): + resp = self.method(installed_app) + + assert resp == {"audio": "ok"} + + def test_provider_not_initialized(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=ProviderTokenNotInitError("not init"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + self.method(installed_app) + + def test_model_not_supported(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + self.method(installed_app) + + def test_invoke_error(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=InvokeError("invoke failed"), + ), + ): + with pytest.raises(CompletionRequestError): + self.method(installed_app) + + def test_unknown_exception(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(InternalServerError): + self.method(installed_app) + + def test_app_unavailable_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=audio_module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(AppUnavailableError): + self.method(installed_app) + + def test_no_audio_uploaded_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=NoAudioUploadedServiceError(), + ), + ): + with pytest.raises(NoAudioUploadedError): + self.method(installed_app) + + def test_audio_too_large_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=AudioTooLargeServiceError("too big"), + ), + ): + with pytest.raises(AudioTooLargeError): + self.method(installed_app) + + def test_unsupported_audio_type_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=audio_module.UnsupportedAudioTypeServiceError(), + ), + ): + with pytest.raises(audio_module.UnsupportedAudioTypeError): + self.method(installed_app) + + def test_provider_not_support_speech_to_text_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=audio_module.ProviderNotSupportSpeechToTextServiceError(), + ), + ): + with pytest.raises(audio_module.ProviderNotSupportSpeechToTextError): + self.method(installed_app) + + def test_quota_exceeded_tts(self, app, installed_app): + with ( + app.test_request_context( + "/", + json={"text": "hi"}, + ), + patch.object( + audio_module.AudioService, + "transcript_tts", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + self.method(installed_app) diff --git a/api/tests/unit_tests/controllers/console/explore/test_banner.py b/api/tests/unit_tests/controllers/console/explore/test_banner.py new file mode 100644 index 0000000000..0606219356 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_banner.py @@ -0,0 +1,100 @@ +from datetime import datetime +from unittest.mock import MagicMock, patch + +import controllers.console.explore.banner as banner_module + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestBannerApi: + def test_get_banners_with_requested_language(self, app): + api = banner_module.BannerApi() + method = unwrap(api.get) + + banner = MagicMock() + banner.id = "b1" + banner.content = {"text": "hello"} + banner.link = "https://example.com" + banner.sort = 1 + banner.status = "enabled" + banner.created_at = datetime(2024, 1, 1) + + query = MagicMock() + query.where.return_value = query + query.order_by.return_value = query + query.all.return_value = [banner] + + session = MagicMock() + session.query.return_value = query + + with app.test_request_context("/?language=fr-FR"), patch.object(banner_module.db, "session", session): + result = method(api) + + assert result == [ + { + "id": "b1", + "content": {"text": "hello"}, + "link": "https://example.com", + "sort": 1, + "status": "enabled", + "created_at": "2024-01-01T00:00:00", + } + ] + + def test_get_banners_fallback_to_en_us(self, app): + api = banner_module.BannerApi() + method = unwrap(api.get) + + banner = MagicMock() + banner.id = "b2" + banner.content = {"text": "fallback"} + banner.link = None + banner.sort = 1 + banner.status = "enabled" + banner.created_at = None + + query = MagicMock() + query.where.return_value = query + query.order_by.return_value = query + query.all.side_effect = [ + [], + [banner], + ] + + session = MagicMock() + session.query.return_value = query + + with app.test_request_context("/?language=es-ES"), patch.object(banner_module.db, "session", session): + result = method(api) + + assert result == [ + { + "id": "b2", + "content": {"text": "fallback"}, + "link": None, + "sort": 1, + "status": "enabled", + "created_at": None, + } + ] + + def test_get_banners_default_language_en_us(self, app): + api = banner_module.BannerApi() + method = unwrap(api.get) + + query = MagicMock() + query.where.return_value = query + query.order_by.return_value = query + query.all.return_value = [] + + session = MagicMock() + session.query.return_value = query + + with app.test_request_context("/"), patch.object(banner_module.db, "session", session): + result = method(api) + + assert result == [] diff --git a/api/tests/unit_tests/controllers/console/explore/test_completion.py b/api/tests/unit_tests/controllers/console/explore/test_completion.py new file mode 100644 index 0000000000..1dd16f3c59 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_completion.py @@ -0,0 +1,459 @@ +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from werkzeug.exceptions import InternalServerError + +import controllers.console.explore.completion as completion_module +from controllers.console.app.error import ( + ConversationCompletedError, +) +from controllers.console.explore.error import NotChatAppError, NotCompletionAppError +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError +from models import Account +from models.model import AppMode +from services.errors.llm import InvokeRateLimitError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def user(): + return MagicMock(spec=Account) + + +@pytest.fixture +def completion_app(): + return MagicMock(app=MagicMock(mode=AppMode.COMPLETION)) + + +@pytest.fixture +def chat_app(): + return MagicMock(app=MagicMock(mode=AppMode.CHAT)) + + +@pytest.fixture +def payload_data(): + return {"inputs": {}, "query": "hi"} + + +@pytest.fixture +def payload_patch(payload_data): + return patch.object( + type(completion_module.console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload_data, + ) + + +class TestCompletionApi: + def test_post_success(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + return_value={"ok": True}, + ), + patch.object( + completion_module.helper, + "compact_generate_response", + return_value=("ok", 200), + ), + ): + result = method(completion_app) + + assert result == ("ok", 200) + + def test_post_wrong_app_mode(self): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + installed_app = MagicMock(app=MagicMock(mode=AppMode.CHAT)) + + with pytest.raises(NotCompletionAppError): + method(installed_app) + + def test_conversation_completed(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.conversation.ConversationCompletedError(), + ), + ): + with pytest.raises(ConversationCompletedError): + method(completion_app) + + def test_internal_error(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(InternalServerError): + method(completion_app) + + def test_conversation_not_exists(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.conversation.ConversationNotExistsError(), + ), + ): + with pytest.raises(completion_module.NotFound): + method(completion_app) + + def test_app_unavailable(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(completion_module.AppUnavailableError): + method(completion_app) + + def test_provider_not_initialized(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.ProviderTokenNotInitError("not init"), + ), + ): + with pytest.raises(completion_module.ProviderNotInitializeError): + method(completion_app) + + def test_quota_exceeded(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.QuotaExceededError(), + ), + ): + with pytest.raises(completion_module.ProviderQuotaExceededError): + method(completion_app) + + def test_model_not_supported(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(completion_module.ProviderModelCurrentlyNotSupportError): + method(completion_app) + + def test_invoke_error(self, app, completion_app, user, payload_patch): + api = completion_module.CompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.InvokeError("invoke failed"), + ), + ): + with pytest.raises(completion_module.CompletionRequestError): + method(completion_app) + + +class TestCompletionStopApi: + def test_stop_success(self, completion_app, user): + api = completion_module.CompletionStopApi() + method = unwrap(api.post) + + user.id = "u1" + + with ( + patch.object(completion_module, "current_user", user), + patch.object(completion_module.AppTaskService, "stop_task"), + ): + resp, status = method(completion_app, "task-1") + + assert status == 200 + assert resp == {"result": "success"} + + def test_stop_wrong_app_mode(self): + api = completion_module.CompletionStopApi() + method = unwrap(api.post) + + installed_app = MagicMock(app=MagicMock(mode=AppMode.CHAT)) + + with pytest.raises(NotCompletionAppError): + method(installed_app, "task") + + +class TestChatApi: + def test_post_success(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + return_value={"ok": True}, + ), + patch.object( + completion_module.helper, + "compact_generate_response", + return_value=("ok", 200), + ), + ): + result = method(chat_app) + + assert result == ("ok", 200) + + def test_post_not_chat_app(self): + api = completion_module.ChatApi() + method = unwrap(api.post) + + installed_app = MagicMock(app=MagicMock(mode=AppMode.COMPLETION)) + + with pytest.raises(NotChatAppError): + method(installed_app) + + def test_rate_limit_error(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=InvokeRateLimitError("limit"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(chat_app) + + def test_conversation_completed_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.conversation.ConversationCompletedError(), + ), + ): + with pytest.raises(ConversationCompletedError): + method(chat_app) + + def test_conversation_not_exists_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.conversation.ConversationNotExistsError(), + ), + ): + with pytest.raises(completion_module.NotFound): + method(chat_app) + + def test_app_unavailable_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(completion_module.AppUnavailableError): + method(chat_app) + + def test_provider_not_initialized_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.ProviderTokenNotInitError("not init"), + ), + ): + with pytest.raises(completion_module.ProviderNotInitializeError): + method(chat_app) + + def test_quota_exceeded_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.QuotaExceededError(), + ), + ): + with pytest.raises(completion_module.ProviderQuotaExceededError): + method(chat_app) + + def test_model_not_supported_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(completion_module.ProviderModelCurrentlyNotSupportError): + method(chat_app) + + def test_invoke_error_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=completion_module.InvokeError("invoke failed"), + ), + ): + with pytest.raises(completion_module.CompletionRequestError): + method(chat_app) + + def test_internal_error_chat(self, app, chat_app, user, payload_patch): + api = completion_module.ChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + payload_patch, + patch.object(completion_module, "current_user", user), + patch.object( + completion_module.AppGenerateService, + "generate", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(InternalServerError): + method(chat_app) + + +class TestChatStopApi: + def test_stop_success(self, chat_app, user): + api = completion_module.ChatStopApi() + method = unwrap(api.post) + + user.id = "u1" + + with ( + patch.object(completion_module, "current_user", user), + patch.object(completion_module.AppTaskService, "stop_task"), + ): + resp, status = method(chat_app, "task-1") + + assert status == 200 + assert resp == {"result": "success"} + + def test_stop_not_chat_app(self): + api = completion_module.ChatStopApi() + method = unwrap(api.post) + + installed_app = MagicMock(app=MagicMock(mode=AppMode.COMPLETION)) + + with pytest.raises(NotChatAppError): + method(installed_app, "task") diff --git a/api/tests/unit_tests/controllers/console/explore/test_conversation.py b/api/tests/unit_tests/controllers/console/explore/test_conversation.py new file mode 100644 index 0000000000..65cc209725 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_conversation.py @@ -0,0 +1,232 @@ +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +import controllers.console.explore.conversation as conversation_module +from controllers.console.explore.error import NotChatAppError +from models import Account +from models.model import AppMode +from services.errors.conversation import ( + ConversationNotExistsError, + LastConversationNotExistsError, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class FakeConversation: + def __init__(self, cid): + self.id = cid + self.name = "test" + self.inputs = {} + self.status = "normal" + self.introduction = "" + + +@pytest.fixture +def chat_app(): + app_model = MagicMock(mode=AppMode.CHAT, id="app-id") + return MagicMock(app=app_model) + + +@pytest.fixture +def non_chat_app(): + app_model = MagicMock(mode=AppMode.COMPLETION) + return MagicMock(app=app_model) + + +@pytest.fixture +def user(): + user = MagicMock(spec=Account) + user.id = "uid" + return user + + +@pytest.fixture(autouse=True) +def mock_db_and_session(): + with ( + patch.object( + conversation_module, + "db", + MagicMock(session=MagicMock(), engine=MagicMock()), + ), + patch( + "controllers.console.explore.conversation.Session", + MagicMock(), + ), + ): + yield + + +class TestConversationListApi: + def test_get_success(self, app: Flask, chat_app, user): + api = conversation_module.ConversationListApi() + method = unwrap(api.get) + + pagination = MagicMock( + limit=20, + has_more=False, + data=[FakeConversation("c1"), FakeConversation("c2")], + ) + + with ( + app.test_request_context("/?limit=20"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.WebConversationService, + "pagination_by_last_id", + return_value=pagination, + ), + ): + result = method(chat_app) + + assert result["limit"] == 20 + assert result["has_more"] is False + assert len(result["data"]) == 2 + + def test_last_conversation_not_exists(self, app: Flask, chat_app, user): + api = conversation_module.ConversationListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.WebConversationService, + "pagination_by_last_id", + side_effect=LastConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(chat_app) + + def test_wrong_app_mode(self, app: Flask, non_chat_app): + api = conversation_module.ConversationListApi() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(NotChatAppError): + method(non_chat_app) + + +class TestConversationApi: + def test_delete_success(self, app: Flask, chat_app, user): + api = conversation_module.ConversationApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.ConversationService, + "delete", + ), + ): + result = method(chat_app, "cid") + + body, status = result + assert status == 204 + assert body["result"] == "success" + + def test_delete_not_found(self, app: Flask, chat_app, user): + api = conversation_module.ConversationApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.ConversationService, + "delete", + side_effect=ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(chat_app, "cid") + + def test_delete_wrong_app_mode(self, app: Flask, non_chat_app): + api = conversation_module.ConversationApi() + method = unwrap(api.delete) + + with app.test_request_context("/"): + with pytest.raises(NotChatAppError): + method(non_chat_app, "cid") + + +class TestConversationRenameApi: + def test_rename_success(self, app: Flask, chat_app, user): + api = conversation_module.ConversationRenameApi() + method = unwrap(api.post) + + conversation = FakeConversation("cid") + + with ( + app.test_request_context("/", json={"name": "new"}), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.ConversationService, + "rename", + return_value=conversation, + ), + ): + result = method(chat_app, "cid") + + assert result["id"] == "cid" + + def test_rename_not_found(self, app: Flask, chat_app, user): + api = conversation_module.ConversationRenameApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"name": "new"}), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.ConversationService, + "rename", + side_effect=ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(chat_app, "cid") + + +class TestConversationPinApi: + def test_pin_success(self, app: Flask, chat_app, user): + api = conversation_module.ConversationPinApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.WebConversationService, + "pin", + ), + ): + result = method(chat_app, "cid") + + assert result == {"result": "success"} + + +class TestConversationUnPinApi: + def test_unpin_success(self, app: Flask, chat_app, user): + api = conversation_module.ConversationUnPinApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/"), + patch.object(conversation_module, "current_user", user), + patch.object( + conversation_module.WebConversationService, + "unpin", + ), + ): + result = method(chat_app, "cid") + + assert result == {"result": "success"} diff --git a/api/tests/unit_tests/controllers/console/explore/test_installed_app.py b/api/tests/unit_tests/controllers/console/explore/test_installed_app.py new file mode 100644 index 0000000000..3983a6a97e --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_installed_app.py @@ -0,0 +1,363 @@ +from datetime import datetime +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from werkzeug.exceptions import BadRequest, Forbidden, NotFound + +import controllers.console.explore.installed_app as module + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def tenant_id(): + return "t1" + + +@pytest.fixture +def current_user(tenant_id): + user = MagicMock() + user.id = "u1" + user.current_tenant = MagicMock(id=tenant_id) + return user + + +@pytest.fixture +def installed_app(): + app = MagicMock() + app.id = "ia1" + app.app = MagicMock(id="a1") + app.app_owner_tenant_id = "t2" + app.is_pinned = False + app.last_used_at = datetime(2024, 1, 1) + return app + + +@pytest.fixture +def payload_patch(): + def _patch(payload): + return patch.object( + type(module.console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ) + + return _patch + + +class TestInstalledAppsListApi: + def test_get_installed_apps(self, app, current_user, tenant_id, installed_app): + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app] + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="owner"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=False)), + ), + ): + result = method(api) + + assert "installed_apps" in result + assert result["installed_apps"][0]["editable"] is True + assert result["installed_apps"][0]["uninstallable"] is False + + def test_get_installed_apps_with_app_id_filter(self, app, current_user, tenant_id): + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + session = MagicMock() + session.scalars.return_value.all.return_value = [] + + with ( + app.test_request_context("/?app_id=a1"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="member"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=False)), + ), + ): + result = method(api) + + assert result == {"installed_apps": []} + + def test_get_installed_apps_with_webapp_auth_enabled(self, app, current_user, tenant_id, installed_app): + """Test filtering when webapp_auth is enabled.""" + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app] + + mock_webapp_setting = MagicMock() + mock_webapp_setting.access_mode = "restricted" + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="owner"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=True)), + ), + patch.object( + module.EnterpriseService.WebAppAuth, + "batch_get_app_access_mode_by_id", + return_value={"a1": mock_webapp_setting}, + ), + patch.object( + module.EnterpriseService.WebAppAuth, + "batch_is_user_allowed_to_access_webapps", + return_value={"a1": True}, + ), + ): + result = method(api) + + assert len(result["installed_apps"]) == 1 + + def test_get_installed_apps_with_webapp_auth_user_denied(self, app, current_user, tenant_id, installed_app): + """Test filtering when user doesn't have access.""" + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app] + + mock_webapp_setting = MagicMock() + mock_webapp_setting.access_mode = "restricted" + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="member"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=True)), + ), + patch.object( + module.EnterpriseService.WebAppAuth, + "batch_get_app_access_mode_by_id", + return_value={"a1": mock_webapp_setting}, + ), + patch.object( + module.EnterpriseService.WebAppAuth, + "batch_is_user_allowed_to_access_webapps", + return_value={"a1": False}, + ), + ): + result = method(api) + + assert result["installed_apps"] == [] + + def test_get_installed_apps_with_sso_verified_access(self, app, current_user, tenant_id, installed_app): + """Test that sso_verified access mode apps are skipped in filtering.""" + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app] + + mock_webapp_setting = MagicMock() + mock_webapp_setting.access_mode = "sso_verified" + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="owner"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=True)), + ), + patch.object( + module.EnterpriseService.WebAppAuth, + "batch_get_app_access_mode_by_id", + return_value={"a1": mock_webapp_setting}, + ), + ): + result = method(api) + + assert len(result["installed_apps"]) == 0 + + def test_get_installed_apps_filters_null_apps(self, app, current_user, tenant_id): + """Test that installed apps with null app are filtered out.""" + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + installed_app_with_null = MagicMock() + installed_app_with_null.app = None + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app_with_null] + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + patch.object(module.TenantService, "get_user_role", return_value="owner"), + patch.object( + module.FeatureService, + "get_system_features", + return_value=MagicMock(webapp_auth=MagicMock(enabled=False)), + ), + ): + result = method(api) + + assert result["installed_apps"] == [] + + def test_get_installed_apps_current_tenant_none(self, app, tenant_id, installed_app): + """Test error when current_user.current_tenant is None.""" + api = module.InstalledAppsListApi() + method = unwrap(api.get) + + current_user = MagicMock() + current_user.current_tenant = None + + session = MagicMock() + session.scalars.return_value.all.return_value = [installed_app] + + with ( + app.test_request_context("/"), + patch.object(module, "current_account_with_tenant", return_value=(current_user, tenant_id)), + patch.object(module.db, "session", session), + ): + with pytest.raises(ValueError, match="current_user.current_tenant must not be None"): + method(api) + + +class TestInstalledAppsCreateApi: + def test_post_success(self, app, tenant_id, payload_patch): + api = module.InstalledAppsListApi() + method = unwrap(api.post) + + recommended = MagicMock() + recommended.install_count = 0 + + app_entity = MagicMock() + app_entity.id = "a1" + app_entity.is_public = True + app_entity.tenant_id = "t2" + + session = MagicMock() + session.query.return_value.where.return_value.first.side_effect = [ + recommended, + app_entity, + None, + ] + + with ( + app.test_request_context("/", json={"app_id": "a1"}), + payload_patch({"app_id": "a1"}), + patch.object(module.db, "session", session), + patch.object(module, "current_account_with_tenant", return_value=(None, tenant_id)), + ): + result = method(api) + + assert result == {"message": "App installed successfully"} + assert recommended.install_count == 1 + + def test_post_recommended_not_found(self, app, payload_patch): + api = module.InstalledAppsListApi() + method = unwrap(api.post) + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = None + + with ( + app.test_request_context("/", json={"app_id": "a1"}), + payload_patch({"app_id": "a1"}), + patch.object(module.db, "session", session), + ): + with pytest.raises(NotFound): + method(api) + + def test_post_app_not_public(self, app, tenant_id, payload_patch): + api = module.InstalledAppsListApi() + method = unwrap(api.post) + + recommended = MagicMock() + app_entity = MagicMock(is_public=False) + + session = MagicMock() + session.query.return_value.where.return_value.first.side_effect = [ + recommended, + app_entity, + ] + + with ( + app.test_request_context("/", json={"app_id": "a1"}), + payload_patch({"app_id": "a1"}), + patch.object(module.db, "session", session), + patch.object(module, "current_account_with_tenant", return_value=(None, tenant_id)), + ): + with pytest.raises(Forbidden): + method(api) + + +class TestInstalledAppApi: + def test_delete_success(self, tenant_id, installed_app): + api = module.InstalledAppApi() + method = unwrap(api.delete) + + with ( + patch.object(module, "current_account_with_tenant", return_value=(None, tenant_id)), + patch.object(module.db, "session"), + ): + resp, status = method(installed_app) + + assert status == 204 + assert resp["result"] == "success" + + def test_delete_owned_by_current_tenant(self, tenant_id): + api = module.InstalledAppApi() + method = unwrap(api.delete) + + installed_app = MagicMock(app_owner_tenant_id=tenant_id) + + with patch.object(module, "current_account_with_tenant", return_value=(None, tenant_id)): + with pytest.raises(BadRequest): + method(installed_app) + + def test_patch_update_pin(self, app, payload_patch, installed_app): + api = module.InstalledAppApi() + method = unwrap(api.patch) + + with ( + app.test_request_context("/", json={"is_pinned": True}), + payload_patch({"is_pinned": True}), + patch.object(module.db, "session"), + ): + result = method(installed_app) + + assert installed_app.is_pinned is True + assert result["result"] == "success" + + def test_patch_no_change(self, app, payload_patch, installed_app): + api = module.InstalledAppApi() + method = unwrap(api.patch) + + with app.test_request_context("/", json={}), payload_patch({}), patch.object(module.db, "session"): + result = method(installed_app) + + assert result["result"] == "success" diff --git a/api/tests/unit_tests/controllers/console/explore/test_message.py b/api/tests/unit_tests/controllers/console/explore/test_message.py new file mode 100644 index 0000000000..6b5c304884 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_message.py @@ -0,0 +1,552 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import InternalServerError, NotFound + +import controllers.console.explore.message as module +from controllers.console.app.error import ( + AppMoreLikeThisDisabledError, + CompletionRequestError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from controllers.console.explore.error import ( + AppSuggestedQuestionsAfterAnswerDisabledError, + NotChatAppError, + NotCompletionAppError, +) +from core.errors.error import ( + ModelCurrentlyNotSupportError, + ProviderTokenNotInitError, + QuotaExceededError, +) +from dify_graph.model_runtime.errors.invoke import InvokeError +from services.errors.conversation import ConversationNotExistsError +from services.errors.message import ( + FirstMessageNotExistsError, + MessageNotExistsError, + SuggestedQuestionsAfterAnswerDisabledError, +) + + +def unwrap(func): + bound_self = getattr(func, "__self__", None) + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + if bound_self is not None: + return func.__get__(bound_self, bound_self.__class__) + return func + + +def make_message(): + msg = MagicMock() + msg.id = "m1" + msg.conversation_id = "11111111-1111-1111-1111-111111111111" + msg.parent_message_id = None + msg.inputs = {} + msg.query = "hello" + msg.re_sign_file_url_answer = "" + msg.user_feedback = MagicMock(rating=None) + msg.status = "normal" + msg.error = None + return msg + + +class TestMessageListApi: + def test_get_success(self, app): + api = module.MessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + pagination = MagicMock( + limit=20, + has_more=False, + data=[make_message(), make_message()], + ) + + with ( + app.test_request_context( + "/", + query_string={"conversation_id": "11111111-1111-1111-1111-111111111111"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "pagination_by_first_id", + return_value=pagination, + ), + ): + result = method(installed_app) + + assert result["limit"] == 20 + assert result["has_more"] is False + assert len(result["data"]) == 2 + + def test_get_not_chat_app(self): + api = module.MessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)): + with pytest.raises(NotChatAppError): + method(installed_app) + + def test_conversation_not_exists(self, app): + api = module.MessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + app.test_request_context( + "/", + query_string={"conversation_id": "11111111-1111-1111-1111-111111111111"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "pagination_by_first_id", + side_effect=ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app) + + def test_first_message_not_exists(self, app): + api = module.MessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + app.test_request_context( + "/", + query_string={"conversation_id": "11111111-1111-1111-1111-111111111111"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "pagination_by_first_id", + side_effect=FirstMessageNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app) + + +class TestMessageFeedbackApi: + def test_post_success(self, app): + api = module.MessageFeedbackApi() + method = unwrap(api.post) + + installed_app = MagicMock() + installed_app.app = MagicMock() + + with ( + app.test_request_context("/", json={"rating": "like"}), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "create_feedback", + ), + ): + result = method(installed_app, "mid") + + assert result["result"] == "success" + + def test_message_not_exists(self, app): + api = module.MessageFeedbackApi() + method = unwrap(api.post) + + installed_app = MagicMock() + installed_app.app = MagicMock() + + with ( + app.test_request_context("/", json={}), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "create_feedback", + side_effect=MessageNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app, "mid") + + +class TestMessageMoreLikeThisApi: + def test_get_success(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + return_value={"ok": True}, + ), + patch.object( + module.helper, + "compact_generate_response", + return_value=("ok", 200), + ), + ): + resp = method(installed_app, "mid") + + assert resp == ("ok", 200) + + def test_not_completion_app(self): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)): + with pytest.raises(NotCompletionAppError): + method(installed_app, "mid") + + def test_more_like_this_disabled(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=module.MoreLikeThisDisabledError(), + ), + ): + with pytest.raises(AppMoreLikeThisDisabledError): + method(installed_app, "mid") + + def test_message_not_exists_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=MessageNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app, "mid") + + def test_provider_not_init_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(installed_app, "mid") + + def test_quota_exceeded_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(installed_app, "mid") + + def test_model_not_support_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(installed_app, "mid") + + def test_invoke_error_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(installed_app, "mid") + + def test_unexpected_error_more_like_this(self, app): + api = module.MessageMoreLikeThisApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + app.test_request_context( + "/", + query_string={"response_mode": "blocking"}, + ), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.AppGenerateService, + "generate_more_like_this", + side_effect=Exception("unexpected"), + ), + ): + with pytest.raises(InternalServerError): + method(installed_app, "mid") + + +class TestMessageSuggestedQuestionApi: + def test_get_success(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + return_value=["q1", "q2"], + ), + ): + result = method(installed_app, "mid") + + assert result["data"] == ["q1", "q2"] + + def test_not_chat_app(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)): + with pytest.raises(NotChatAppError): + method(installed_app, "mid") + + def test_disabled(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=SuggestedQuestionsAfterAnswerDisabledError(), + ), + ): + with pytest.raises(AppSuggestedQuestionsAfterAnswerDisabledError): + method(installed_app, "mid") + + def test_message_not_exists_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=MessageNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app, "mid") + + def test_conversation_not_exists_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app, "mid") + + def test_provider_not_init_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(installed_app, "mid") + + def test_quota_exceeded_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(installed_app, "mid") + + def test_model_not_support_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(installed_app, "mid") + + def test_invoke_error_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(installed_app, "mid") + + def test_unexpected_error_suggested_question(self): + api = module.MessageSuggestedQuestionApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=Exception("unexpected"), + ), + ): + with pytest.raises(InternalServerError): + method(installed_app, "mid") diff --git a/api/tests/unit_tests/controllers/console/explore/test_parameter.py b/api/tests/unit_tests/controllers/console/explore/test_parameter.py new file mode 100644 index 0000000000..7aaecbff14 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_parameter.py @@ -0,0 +1,140 @@ +from unittest.mock import MagicMock, patch + +import pytest + +import controllers.console.explore.parameter as module +from controllers.console.app.error import AppUnavailableError +from models.model import AppMode + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestAppParameterApi: + def test_get_app_none(self): + api = module.AppParameterApi() + method = unwrap(api.get) + + installed_app = MagicMock(app=None) + + with pytest.raises(AppUnavailableError): + method(installed_app) + + def test_get_advanced_chat_workflow(self): + api = module.AppParameterApi() + method = unwrap(api.get) + + workflow = MagicMock() + workflow.features_dict = {"f": "v"} + workflow.user_input_form.return_value = [{"name": "x"}] + + app = MagicMock( + mode=AppMode.ADVANCED_CHAT, + workflow=workflow, + ) + + installed_app = MagicMock(app=app) + + with ( + patch.object( + module, + "get_parameters_from_feature_dict", + return_value={"any": "thing"}, + ), + patch.object( + module.fields.Parameters, + "model_validate", + return_value=MagicMock(model_dump=lambda **_: {"ok": True}), + ), + ): + result = method(installed_app) + + assert result == {"ok": True} + + def test_get_advanced_chat_workflow_missing(self): + api = module.AppParameterApi() + method = unwrap(api.get) + + app = MagicMock( + mode=AppMode.ADVANCED_CHAT, + workflow=None, + ) + + installed_app = MagicMock(app=app) + + with pytest.raises(AppUnavailableError): + method(installed_app) + + def test_get_non_workflow_app(self): + api = module.AppParameterApi() + method = unwrap(api.get) + + app_model_config = MagicMock() + app_model_config.to_dict.return_value = {"user_input_form": [{"name": "y"}]} + + app = MagicMock( + mode=AppMode.CHAT, + app_model_config=app_model_config, + ) + + installed_app = MagicMock(app=app) + + with ( + patch.object( + module, + "get_parameters_from_feature_dict", + return_value={"whatever": 123}, + ), + patch.object( + module.fields.Parameters, + "model_validate", + return_value=MagicMock(model_dump=lambda **_: {"ok": True}), + ), + ): + result = method(installed_app) + + assert result == {"ok": True} + + def test_get_non_workflow_missing_config(self): + api = module.AppParameterApi() + method = unwrap(api.get) + + app = MagicMock( + mode=AppMode.CHAT, + app_model_config=None, + ) + + installed_app = MagicMock(app=app) + + with pytest.raises(AppUnavailableError): + method(installed_app) + + +class TestExploreAppMetaApi: + def test_get_meta_success(self): + api = module.ExploreAppMetaApi() + method = unwrap(api.get) + + app = MagicMock() + installed_app = MagicMock(app=app) + + with patch.object( + module.AppService, + "get_app_meta", + return_value={"meta": "ok"}, + ): + result = method(installed_app) + + assert result == {"meta": "ok"} + + def test_get_meta_app_missing(self): + api = module.ExploreAppMetaApi() + method = unwrap(api.get) + + installed_app = MagicMock(app=None) + + with pytest.raises(ValueError): + method(installed_app) diff --git a/api/tests/unit_tests/controllers/console/explore/test_recommended_app.py b/api/tests/unit_tests/controllers/console/explore/test_recommended_app.py new file mode 100644 index 0000000000..02c7507ea7 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_recommended_app.py @@ -0,0 +1,92 @@ +from unittest.mock import MagicMock, patch + +import controllers.console.explore.recommended_app as module + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestRecommendedAppListApi: + def test_get_with_language_param(self, app): + api = module.RecommendedAppListApi() + method = unwrap(api.get) + + result_data = {"recommended_apps": [], "categories": []} + + with ( + app.test_request_context("/", query_string={"language": "en-US"}), + patch.object(module, "current_user", MagicMock(interface_language="fr-FR")), + patch.object( + module.RecommendedAppService, + "get_recommended_apps_and_categories", + return_value=result_data, + ) as service_mock, + ): + result = method(api) + + service_mock.assert_called_once_with("en-US") + assert result == result_data + + def test_get_fallback_to_user_language(self, app): + api = module.RecommendedAppListApi() + method = unwrap(api.get) + + result_data = {"recommended_apps": [], "categories": []} + + with ( + app.test_request_context("/", query_string={"language": "invalid"}), + patch.object(module, "current_user", MagicMock(interface_language="fr-FR")), + patch.object( + module.RecommendedAppService, + "get_recommended_apps_and_categories", + return_value=result_data, + ) as service_mock, + ): + result = method(api) + + service_mock.assert_called_once_with("fr-FR") + assert result == result_data + + def test_get_fallback_to_default_language(self, app): + api = module.RecommendedAppListApi() + method = unwrap(api.get) + + result_data = {"recommended_apps": [], "categories": []} + + with ( + app.test_request_context("/"), + patch.object(module, "current_user", MagicMock(interface_language=None)), + patch.object( + module.RecommendedAppService, + "get_recommended_apps_and_categories", + return_value=result_data, + ) as service_mock, + ): + result = method(api) + + service_mock.assert_called_once_with(module.languages[0]) + assert result == result_data + + +class TestRecommendedAppApi: + def test_get_success(self, app): + api = module.RecommendedAppApi() + method = unwrap(api.get) + + result_data = {"id": "app1"} + + with ( + app.test_request_context("/"), + patch.object( + module.RecommendedAppService, + "get_recommend_app_detail", + return_value=result_data, + ) as service_mock, + ): + result = method(api, "11111111-1111-1111-1111-111111111111") + + service_mock.assert_called_once_with("11111111-1111-1111-1111-111111111111") + assert result == result_data diff --git a/api/tests/unit_tests/controllers/console/explore/test_saved_message.py b/api/tests/unit_tests/controllers/console/explore/test_saved_message.py new file mode 100644 index 0000000000..bb7cdd55c4 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_saved_message.py @@ -0,0 +1,154 @@ +from unittest.mock import MagicMock, PropertyMock, patch +from uuid import uuid4 + +import pytest +from werkzeug.exceptions import NotFound + +import controllers.console.explore.saved_message as module +from controllers.console.explore.error import NotCompletionAppError +from services.errors.message import MessageNotExistsError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def make_saved_message(): + msg = MagicMock() + msg.id = str(uuid4()) + msg.message_id = str(uuid4()) + msg.app_id = str(uuid4()) + msg.inputs = {} + msg.query = "hello" + msg.answer = "world" + msg.user_feedback = MagicMock(rating="like") + msg.created_at = None + return msg + + +@pytest.fixture +def payload_patch(): + def _patch(payload): + return patch.object( + type(module.console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ) + + return _patch + + +class TestSavedMessageListApi: + def test_get_success(self, app): + api = module.SavedMessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + pagination = MagicMock( + limit=20, + has_more=False, + data=[make_saved_message(), make_saved_message()], + ) + + with ( + app.test_request_context("/", query_string={}), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.SavedMessageService, + "pagination_by_last_id", + return_value=pagination, + ), + ): + result = method(installed_app) + + assert result["limit"] == 20 + assert result["has_more"] is False + assert len(result["data"]) == 2 + + def test_get_not_completion_app(self): + api = module.SavedMessageListApi() + method = unwrap(api.get) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)): + with pytest.raises(NotCompletionAppError): + method(installed_app) + + def test_post_success(self, app, payload_patch): + api = module.SavedMessageListApi() + method = unwrap(api.post) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + payload = {"message_id": str(uuid4())} + + with ( + app.test_request_context("/", json=payload), + payload_patch(payload), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object(module.SavedMessageService, "save") as save_mock, + ): + result = method(installed_app) + + save_mock.assert_called_once() + assert result == {"result": "success"} + + def test_post_message_not_exists(self, app, payload_patch): + api = module.SavedMessageListApi() + method = unwrap(api.post) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + payload = {"message_id": str(uuid4())} + + with ( + app.test_request_context("/", json=payload), + payload_patch(payload), + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object( + module.SavedMessageService, + "save", + side_effect=MessageNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(installed_app) + + +class TestSavedMessageApi: + def test_delete_success(self): + api = module.SavedMessageApi() + method = unwrap(api.delete) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="completion") + + with ( + patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)), + patch.object(module.SavedMessageService, "delete") as delete_mock, + ): + result, status = method(installed_app, str(uuid4())) + + delete_mock.assert_called_once() + assert status == 204 + assert result == {"result": "success"} + + def test_delete_not_completion_app(self): + api = module.SavedMessageApi() + method = unwrap(api.delete) + + installed_app = MagicMock() + installed_app.app = MagicMock(mode="chat") + + with patch.object(module, "current_account_with_tenant", return_value=(MagicMock(), None)): + with pytest.raises(NotCompletionAppError): + method(installed_app, str(uuid4())) diff --git a/api/tests/unit_tests/controllers/console/explore/test_trial.py b/api/tests/unit_tests/controllers/console/explore/test_trial.py new file mode 100644 index 0000000000..d85114c8fb --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_trial.py @@ -0,0 +1,1101 @@ +from io import BytesIO +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from werkzeug.exceptions import Forbidden, InternalServerError, NotFound + +import controllers.console.explore.trial as module +from controllers.console.app.error import ( + AppUnavailableError, + CompletionRequestError, + ConversationCompletedError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from controllers.console.explore.error import ( + NotChatAppError, + NotCompletionAppError, + NotWorkflowAppError, +) +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError +from core.errors.error import ( + ModelCurrentlyNotSupportError, + ProviderTokenNotInitError, + QuotaExceededError, +) +from dify_graph.model_runtime.errors.invoke import InvokeError +from models import Account +from models.account import TenantStatus +from models.model import AppMode +from services.errors.conversation import ConversationNotExistsError +from services.errors.llm import InvokeRateLimitError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def account(): + acc = MagicMock(spec=Account) + acc.id = "u1" + return acc + + +@pytest.fixture +def trial_app_chat(): + app = MagicMock() + app.id = "a-chat" + app.mode = AppMode.CHAT + return app + + +@pytest.fixture +def trial_app_completion(): + app = MagicMock() + app.id = "a-comp" + app.mode = AppMode.COMPLETION + return app + + +@pytest.fixture +def trial_app_workflow(): + app = MagicMock() + app.id = "a-workflow" + app.mode = AppMode.WORKFLOW + return app + + +@pytest.fixture +def valid_parameters(): + return { + "user_input_form": [], + "system_parameters": {}, + "suggested_questions": {}, + "suggested_questions_after_answer": {}, + "speech_to_text": {}, + "text_to_speech": {}, + "retriever_resource": {}, + "annotation_reply": {}, + "more_like_this": {}, + "sensitive_word_avoidance": {}, + "file_upload": {}, + } + + +class TestTrialAppWorkflowRunApi: + def test_not_workflow_app(self, app): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with app.test_request_context("/"): + with pytest.raises(NotWorkflowAppError): + method(MagicMock(mode=AppMode.CHAT)) + + def test_success(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object(module.AppGenerateService, "generate", return_value=MagicMock()), + patch.object(module.RecommendedAppService, "add_trial_app_record"), + ): + result = method(trial_app_workflow) + + assert result is not None + + def test_workflow_provider_not_init(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(trial_app_workflow) + + def test_workflow_quota_exceeded(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(trial_app_workflow) + + def test_workflow_model_not_support(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(trial_app_workflow) + + def test_workflow_invoke_error(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(trial_app_workflow) + + def test_workflow_rate_limit_error(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeRateLimitError("test"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(trial_app_workflow) + + def test_workflow_value_error(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "files": []}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ValueError("test error"), + ), + ): + with pytest.raises(ValueError): + method(trial_app_workflow) + + def test_workflow_generic_exception(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "files": []}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=RuntimeError("unexpected error"), + ), + ): + with pytest.raises(InternalServerError): + method(trial_app_workflow) + + +class TestTrialChatApi: + def test_not_chat_app(self, app): + api = module.TrialChatApi() + method = unwrap(api.post) + + with app.test_request_context("/", json={"inputs": {}, "query": "hi"}): + with pytest.raises(NotChatAppError): + method(api, MagicMock(mode="completion")) + + def test_success(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object(module.AppGenerateService, "generate", return_value=MagicMock()), + patch.object(module.RecommendedAppService, "add_trial_app_record"), + ): + result = method(api, trial_app_chat) + + assert result is not None + + def test_chat_conversation_not_exists(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=module.services.errors.conversation.ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(api, trial_app_chat) + + def test_chat_conversation_completed(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=module.services.errors.conversation.ConversationCompletedError(), + ), + ): + with pytest.raises(ConversationCompletedError): + method(api, trial_app_chat) + + def test_chat_app_config_broken(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(AppUnavailableError): + method(api, trial_app_chat) + + def test_chat_provider_not_init(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, trial_app_chat) + + def test_chat_quota_exceeded(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(api, trial_app_chat) + + def test_chat_model_not_support(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(api, trial_app_chat) + + def test_chat_invoke_error(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(api, trial_app_chat) + + def test_chat_rate_limit_error(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeRateLimitError("test"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(api, trial_app_chat) + + def test_chat_value_error(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ValueError("test error"), + ), + ): + with pytest.raises(ValueError): + method(api, trial_app_chat) + + def test_chat_generic_exception(self, app, trial_app_chat, account): + api = module.TrialChatApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": "hi"}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=RuntimeError("unexpected error"), + ), + ): + with pytest.raises(InternalServerError): + method(api, trial_app_chat) + + +class TestTrialCompletionApi: + def test_not_completion_app(self, app): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with app.test_request_context("/", json={"inputs": {}, "query": ""}): + with pytest.raises(NotCompletionAppError): + method(api, MagicMock(mode=AppMode.CHAT)) + + def test_success(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object(module.AppGenerateService, "generate", return_value=MagicMock()), + patch.object(module.RecommendedAppService, "add_trial_app_record"), + ): + result = method(api, trial_app_completion) + + assert result is not None + + def test_completion_app_config_broken(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(AppUnavailableError): + method(api, trial_app_completion) + + def test_completion_provider_not_init(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, trial_app_completion) + + def test_completion_quota_exceeded(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(api, trial_app_completion) + + def test_completion_model_not_support(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ModelCurrentlyNotSupportError(), + ), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(api, trial_app_completion) + + def test_completion_invoke_error(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(api, trial_app_completion) + + def test_completion_rate_limit_error(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=InvokeRateLimitError("test"), + ), + ): + with pytest.raises(InternalServerError): + method(api, trial_app_completion) + + def test_completion_value_error(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=ValueError("test error"), + ), + ): + with pytest.raises(ValueError): + method(api, trial_app_completion) + + def test_completion_generic_exception(self, app, trial_app_completion, account): + api = module.TrialCompletionApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"inputs": {}, "query": ""}), + patch.object(module, "current_user", account), + patch.object( + module.AppGenerateService, + "generate", + side_effect=RuntimeError("unexpected error"), + ), + ): + with pytest.raises(InternalServerError): + method(api, trial_app_completion) + + +class TestTrialMessageSuggestedQuestionApi: + def test_not_chat_app(self, app): + api = module.TrialMessageSuggestedQuestionApi() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(NotChatAppError): + method(api, MagicMock(mode="completion"), str(uuid4())) + + def test_success(self, app, trial_app_chat, account): + api = module.TrialMessageSuggestedQuestionApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object(module, "current_user", account), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + return_value=["q1", "q2"], + ), + ): + result = method(api, trial_app_chat, str(uuid4())) + + assert result == {"data": ["q1", "q2"]} + + def test_conversation_not_exists(self, app, trial_app_chat, account): + api = module.TrialMessageSuggestedQuestionApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch.object(module, "current_user", account), + patch.object( + module.MessageService, + "get_suggested_questions_after_answer", + side_effect=ConversationNotExistsError(), + ), + ): + with pytest.raises(NotFound): + method(api, trial_app_chat, str(uuid4())) + + +class TestTrialAppParameterApi: + def test_app_unavailable(self): + api = module.TrialAppParameterApi() + method = unwrap(api.get) + + with pytest.raises(AppUnavailableError): + method(api, None) + + def test_success_non_workflow(self, valid_parameters): + api = module.TrialAppParameterApi() + method = unwrap(api.get) + + app_model = MagicMock( + mode=AppMode.CHAT, + app_model_config=MagicMock(to_dict=lambda: {"user_input_form": []}), + ) + + with ( + patch.object( + module, + "get_parameters_from_feature_dict", + return_value=valid_parameters, + ), + patch.object( + module.ParametersResponse, + "model_validate", + return_value=MagicMock(model_dump=lambda mode=None: {"ok": True}), + ), + ): + result = method(api, app_model) + + assert result == {"ok": True} + + +class TestTrialChatAudioApi: + def test_success(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_asr", return_value={"text": "hello"}), + patch.object(module.RecommendedAppService, "add_trial_app_record"), + ): + result = method(api, trial_app_chat) + + assert result == {"text": "hello"} + + def test_app_config_broken(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(module.AppUnavailableError): + method(api, trial_app_chat) + + def test_no_audio_uploaded(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=module.services.errors.audio.NoAudioUploadedServiceError(), + ), + ): + with pytest.raises(module.NoAudioUploadedError): + method(api, trial_app_chat) + + def test_audio_too_large(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=module.services.errors.audio.AudioTooLargeServiceError("Too large"), + ), + ): + with pytest.raises(module.AudioTooLargeError): + method(api, trial_app_chat) + + def test_unsupported_audio_type(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=module.services.errors.audio.UnsupportedAudioTypeServiceError(), + ), + ): + with pytest.raises(module.UnsupportedAudioTypeError): + method(api, trial_app_chat) + + def test_provider_not_support_tts(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=module.services.errors.audio.ProviderNotSupportSpeechToTextServiceError(), + ), + ): + with pytest.raises(module.ProviderNotSupportSpeechToTextError): + method(api, trial_app_chat) + + def test_provider_not_init(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_asr", side_effect=ProviderTokenNotInitError("test")), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, trial_app_chat) + + def test_quota_exceeded(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_asr", side_effect=QuotaExceededError()), + ): + with pytest.raises(ProviderQuotaExceededError): + method(api, trial_app_chat) + + +class TestTrialChatTextApi: + def test_success(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_tts", return_value={"audio": "base64_data"}), + patch.object(module.RecommendedAppService, "add_trial_app_record"), + ): + result = method(api, trial_app_chat) + + assert result == {"audio": "base64_data"} + + def test_app_config_broken(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(module.AppUnavailableError): + method(api, trial_app_chat) + + def test_provider_not_support(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.audio.ProviderNotSupportSpeechToTextServiceError(), + ), + ): + with pytest.raises(module.ProviderNotSupportSpeechToTextError): + method(api, trial_app_chat) + + def test_audio_too_large(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.audio.AudioTooLargeServiceError("Too large"), + ), + ): + with pytest.raises(module.AudioTooLargeError): + method(api, trial_app_chat) + + def test_no_audio_uploaded(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.audio.NoAudioUploadedServiceError(), + ), + ): + with pytest.raises(module.NoAudioUploadedError): + method(api, trial_app_chat) + + def test_provider_not_init(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_tts", side_effect=ProviderTokenNotInitError("test")), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, trial_app_chat) + + def test_quota_exceeded(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_tts", side_effect=QuotaExceededError()), + ): + with pytest.raises(ProviderQuotaExceededError): + method(api, trial_app_chat) + + def test_model_not_support(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_tts", side_effect=ModelCurrentlyNotSupportError()), + ): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + method(api, trial_app_chat) + + def test_invoke_error(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object(module.AudioService, "transcript_tts", side_effect=InvokeError("test error")), + ): + with pytest.raises(CompletionRequestError): + method(api, trial_app_chat) + + +class TestTrialAppWorkflowTaskStopApi: + def test_not_workflow_app(self, app, trial_app_chat): + api = module.TrialAppWorkflowTaskStopApi() + method = unwrap(api.post) + + with app.test_request_context("/"): + with pytest.raises(NotWorkflowAppError): + method(trial_app_chat, str(uuid4())) + + def test_success(self, app, trial_app_workflow, account): + api = module.TrialAppWorkflowTaskStopApi() + method = unwrap(api.post) + + task_id = str(uuid4()) + with ( + app.test_request_context("/"), + patch.object(module, "current_user", account), + patch.object(module.AppQueueManager, "set_stop_flag_no_user_check") as mock_set_flag, + patch.object(module.GraphEngineManager, "send_stop_command") as mock_send_cmd, + ): + result = method(trial_app_workflow, task_id) + + assert result == {"result": "success"} + mock_set_flag.assert_called_once_with(task_id) + mock_send_cmd.assert_called_once_with(task_id) + + +class TestTrialSitApi: + def test_no_site(self, app): + api = module.TrialSitApi() + method = unwrap(api.get) + app_model = MagicMock() + app_model.id = "a1" + + with app.test_request_context("/"), patch.object(module.db.session, "query") as mock_query: + mock_query.return_value.where.return_value.first.return_value = None + with pytest.raises(Forbidden): + method(api, app_model) + + def test_archived_tenant(self, app): + api = module.TrialSitApi() + method = unwrap(api.get) + + site = MagicMock() + app_model = MagicMock() + app_model.id = "a1" + app_model.tenant = MagicMock() + app_model.tenant.status = TenantStatus.ARCHIVE + + with app.test_request_context("/"), patch.object(module.db.session, "query") as mock_query: + mock_query.return_value.where.return_value.first.return_value = site + with pytest.raises(Forbidden): + method(api, app_model) + + def test_success(self, app): + api = module.TrialSitApi() + method = unwrap(api.get) + + site = MagicMock() + app_model = MagicMock() + app_model.id = "a1" + app_model.tenant = MagicMock() + app_model.tenant.status = TenantStatus.NORMAL + + with ( + app.test_request_context("/"), + patch.object(module.db.session, "query") as mock_query, + patch.object(module.SiteResponse, "model_validate") as mock_validate, + ): + mock_query.return_value.where.return_value.first.return_value = site + mock_validate_result = MagicMock() + mock_validate_result.model_dump.return_value = {"name": "test", "icon": "icon"} + mock_validate.return_value = mock_validate_result + result = method(api, app_model) + + assert result == {"name": "test", "icon": "icon"} + + +class TestTrialChatAudioApiExceptionHandlers: + def test_provider_not_init(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=ProviderTokenNotInitError("test"), + ), + ): + with pytest.raises(ProviderNotInitializeError): + method(api, trial_app_chat) + + def test_quota_exceeded(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=QuotaExceededError(), + ), + ): + with pytest.raises(ProviderQuotaExceededError): + method(api, trial_app_chat) + + def test_invoke_error(self, app, trial_app_chat, account): + api = module.TrialChatAudioApi() + method = unwrap(api.post) + + file_data = BytesIO(b"fake audio data") + file_data.filename = "test.wav" + + with ( + app.test_request_context( + "/", method="POST", data={"file": (file_data, "test.wav")}, content_type="multipart/form-data" + ), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_asr", + side_effect=InvokeError("test error"), + ), + ): + with pytest.raises(CompletionRequestError): + method(api, trial_app_chat) + + +class TestTrialChatTextApiExceptionHandlers: + def test_app_config_broken(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.app_model_config.AppModelConfigBrokenError(), + ), + ): + with pytest.raises(module.AppUnavailableError): + method(api, trial_app_chat) + + def test_unsupported_audio_type(self, app, trial_app_chat, account): + api = module.TrialChatTextApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"text": "hello", "voice": "en-US"}), + patch.object(module, "current_user", account), + patch.object( + module.AudioService, + "transcript_tts", + side_effect=module.services.errors.audio.UnsupportedAudioTypeServiceError("test"), + ), + ): + with pytest.raises(module.UnsupportedAudioTypeError): + method(api, trial_app_chat) diff --git a/api/tests/unit_tests/controllers/console/explore/test_workflow.py b/api/tests/unit_tests/controllers/console/explore/test_workflow.py new file mode 100644 index 0000000000..445f887fd3 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_workflow.py @@ -0,0 +1,151 @@ +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import InternalServerError + +from controllers.console.explore.error import NotWorkflowAppError +from controllers.console.explore.workflow import ( + InstalledAppWorkflowRunApi, + InstalledAppWorkflowTaskStopApi, +) +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError +from models.model import AppMode +from services.errors.llm import InvokeRateLimitError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask(__name__) + app.config["TESTING"] = True + return app + + +@pytest.fixture +def user(): + return MagicMock() + + +@pytest.fixture +def workflow_app(): + app = MagicMock() + app.mode = AppMode.WORKFLOW + return app + + +@pytest.fixture +def installed_workflow_app(workflow_app): + return MagicMock(app=workflow_app) + + +@pytest.fixture +def non_workflow_installed_app(): + app = MagicMock() + app.mode = AppMode.CHAT + return MagicMock(app=app) + + +@pytest.fixture +def payload(): + return {"inputs": {"a": 1}} + + +class TestInstalledAppWorkflowRunApi: + def test_not_workflow_app(self, app, non_workflow_installed_app): + api = InstalledAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.explore.workflow.current_account_with_tenant", + return_value=(MagicMock(), None), + ), + ): + with pytest.raises(NotWorkflowAppError): + method(non_workflow_installed_app) + + def test_success(self, app, installed_workflow_app, user, payload): + api = InstalledAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.explore.workflow.current_account_with_tenant", + return_value=(user, None), + ), + patch( + "controllers.console.explore.workflow.AppGenerateService.generate", + return_value=MagicMock(), + ) as generate_mock, + ): + result = method(installed_workflow_app) + + generate_mock.assert_called_once() + assert result is not None + + def test_rate_limit_error(self, app, installed_workflow_app, user, payload): + api = InstalledAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.explore.workflow.current_account_with_tenant", + return_value=(user, None), + ), + patch( + "controllers.console.explore.workflow.AppGenerateService.generate", + side_effect=InvokeRateLimitError("rate limit"), + ), + ): + with pytest.raises(InvokeRateLimitHttpError): + method(installed_workflow_app) + + def test_unexpected_exception(self, app, installed_workflow_app, user, payload): + api = InstalledAppWorkflowRunApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.explore.workflow.current_account_with_tenant", + return_value=(user, None), + ), + patch( + "controllers.console.explore.workflow.AppGenerateService.generate", + side_effect=Exception("boom"), + ), + ): + with pytest.raises(InternalServerError): + method(installed_workflow_app) + + +class TestInstalledAppWorkflowTaskStopApi: + def test_not_workflow_app(self, non_workflow_installed_app): + api = InstalledAppWorkflowTaskStopApi() + method = unwrap(api.post) + + with pytest.raises(NotWorkflowAppError): + method(non_workflow_installed_app, "task-1") + + def test_success(self, installed_workflow_app): + api = InstalledAppWorkflowTaskStopApi() + method = unwrap(api.post) + + with ( + patch("controllers.console.explore.workflow.AppQueueManager.set_stop_flag_no_user_check") as stop_flag, + patch("controllers.console.explore.workflow.GraphEngineManager.send_stop_command") as send_stop, + ): + result = method(installed_workflow_app, "task-1") + + stop_flag.assert_called_once_with("task-1") + send_stop.assert_called_once_with("task-1") + assert result == {"result": "success"} diff --git a/api/tests/unit_tests/controllers/console/explore/test_wraps.py b/api/tests/unit_tests/controllers/console/explore/test_wraps.py new file mode 100644 index 0000000000..67e7a32591 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/explore/test_wraps.py @@ -0,0 +1,244 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden, NotFound + +from controllers.console.explore.error import ( + AppAccessDeniedError, + TrialAppLimitExceeded, + TrialAppNotAllowed, +) +from controllers.console.explore.wraps import ( + InstalledAppResource, + TrialAppResource, + installed_app_required, + trial_app_required, + trial_feature_enable, + user_allowed_to_access_app, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def test_installed_app_required_not_found(): + @installed_app_required + def view(installed_app): + return "ok" + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + ): + q.return_value.where.return_value.first.return_value = None + + with pytest.raises(NotFound): + view("app-id") + + +def test_installed_app_required_app_deleted(): + installed_app = MagicMock(app=None) + + @installed_app_required + def view(installed_app): + return "ok" + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + patch("controllers.console.explore.wraps.db.session.delete"), + patch("controllers.console.explore.wraps.db.session.commit"), + ): + q.return_value.where.return_value.first.return_value = installed_app + + with pytest.raises(NotFound): + view("app-id") + + +def test_installed_app_required_success(): + installed_app = MagicMock(app=MagicMock()) + + @installed_app_required + def view(installed_app): + return installed_app + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + ): + q.return_value.where.return_value.first.return_value = installed_app + + result = view("app-id") + assert result == installed_app + + +def test_user_allowed_to_access_app_denied(): + installed_app = MagicMock(app_id="app-1") + + @user_allowed_to_access_app + def view(installed_app): + return "ok" + + feature = MagicMock() + feature.webapp_auth.enabled = True + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(id="user-1"), None), + ), + patch( + "controllers.console.explore.wraps.FeatureService.get_system_features", + return_value=feature, + ), + patch( + "controllers.console.explore.wraps.EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp", + return_value=False, + ), + ): + with pytest.raises(AppAccessDeniedError): + view(installed_app) + + +def test_user_allowed_to_access_app_success(): + installed_app = MagicMock(app_id="app-1") + + @user_allowed_to_access_app + def view(installed_app): + return "ok" + + feature = MagicMock() + feature.webapp_auth.enabled = True + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(id="user-1"), None), + ), + patch( + "controllers.console.explore.wraps.FeatureService.get_system_features", + return_value=feature, + ), + patch( + "controllers.console.explore.wraps.EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp", + return_value=True, + ), + ): + assert view(installed_app) == "ok" + + +def test_trial_app_required_not_allowed(): + @trial_app_required + def view(app): + return "ok" + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(id="user-1"), None), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + ): + q.return_value.where.return_value.first.return_value = None + + with pytest.raises(TrialAppNotAllowed): + view("app-id") + + +def test_trial_app_required_limit_exceeded(): + trial_app = MagicMock(trial_limit=1, app=MagicMock()) + record = MagicMock(count=1) + + @trial_app_required + def view(app): + return "ok" + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(id="user-1"), None), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + ): + q.return_value.where.return_value.first.side_effect = [ + trial_app, + record, + ] + + with pytest.raises(TrialAppLimitExceeded): + view("app-id") + + +def test_trial_app_required_success(): + trial_app = MagicMock(trial_limit=2, app=MagicMock()) + record = MagicMock(count=1) + + @trial_app_required + def view(app): + return app + + with ( + patch( + "controllers.console.explore.wraps.current_account_with_tenant", + return_value=(MagicMock(id="user-1"), None), + ), + patch("controllers.console.explore.wraps.db.session.query") as q, + ): + q.return_value.where.return_value.first.side_effect = [ + trial_app, + record, + ] + + result = view("app-id") + assert result == trial_app.app + + +def test_trial_feature_enable_disabled(): + @trial_feature_enable + def view(): + return "ok" + + features = MagicMock(enable_trial_app=False) + + with patch( + "controllers.console.explore.wraps.FeatureService.get_system_features", + return_value=features, + ): + with pytest.raises(Forbidden): + view() + + +def test_trial_feature_enable_enabled(): + @trial_feature_enable + def view(): + return "ok" + + features = MagicMock(enable_trial_app=True) + + with patch( + "controllers.console.explore.wraps.FeatureService.get_system_features", + return_value=features, + ): + assert view() == "ok" + + +def test_installed_app_resource_decorators(): + decorators = InstalledAppResource.method_decorators + assert len(decorators) == 4 + + +def test_trial_app_resource_decorators(): + decorators = TrialAppResource.method_decorators + assert len(decorators) == 3 diff --git a/api/tests/unit_tests/controllers/console/tag/test_tags.py b/api/tests/unit_tests/controllers/console/tag/test_tags.py new file mode 100644 index 0000000000..769edc8d1c --- /dev/null +++ b/api/tests/unit_tests/controllers/console/tag/test_tags.py @@ -0,0 +1,278 @@ +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import Forbidden + +from controllers.console import console_ns +from controllers.console.tag.tags import ( + TagBindingCreateApi, + TagBindingDeleteApi, + TagListApi, + TagUpdateDeleteApi, +) + + +def unwrap(func): + """ + Recursively unwrap decorated functions. + """ + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask("test_tag") + app.config["TESTING"] = True + return app + + +@pytest.fixture +def admin_user(): + return MagicMock( + id="user-1", + has_edit_permission=True, + is_dataset_editor=True, + ) + + +@pytest.fixture +def readonly_user(): + return MagicMock( + id="user-2", + has_edit_permission=False, + is_dataset_editor=False, + ) + + +@pytest.fixture +def tag(): + tag = MagicMock() + tag.id = "tag-1" + tag.name = "test-tag" + tag.type = "knowledge" + return tag + + +@pytest.fixture +def payload_patch(): + def _patch(payload): + return patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ) + + return _patch + + +class TestTagListApi: + def test_get_success(self, app): + api = TagListApi() + method = unwrap(api.get) + + with app.test_request_context("/?type=knowledge"): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(MagicMock(), "tenant-1"), + ), + patch( + "controllers.console.tag.tags.TagService.get_tags", + return_value=[{"id": "1", "name": "tag"}], + ), + ): + result, status = method(api) + + assert status == 200 + assert isinstance(result, list) + + def test_post_success(self, app, admin_user, tag, payload_patch): + api = TagListApi() + method = unwrap(api.post) + + payload = {"name": "test-tag", "type": "knowledge"} + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(admin_user, None), + ), + payload_patch(payload), + patch( + "controllers.console.tag.tags.TagService.save_tags", + return_value=tag, + ), + ): + result, status = method(api) + + assert status == 200 + assert result["name"] == "test-tag" + + def test_post_forbidden(self, app, readonly_user, payload_patch): + api = TagListApi() + method = unwrap(api.post) + + payload = {"name": "x"} + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(readonly_user, None), + ), + payload_patch(payload), + ): + with pytest.raises(Forbidden): + method(api) + + +class TestTagUpdateDeleteApi: + def test_patch_success(self, app, admin_user, tag, payload_patch): + api = TagUpdateDeleteApi() + method = unwrap(api.patch) + + payload = {"name": "updated", "type": "knowledge"} + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(admin_user, None), + ), + payload_patch(payload), + patch( + "controllers.console.tag.tags.TagService.update_tags", + return_value=tag, + ), + patch( + "controllers.console.tag.tags.TagService.get_tag_binding_count", + return_value=3, + ), + ): + result, status = method(api, "tag-1") + + assert status == 200 + assert result["binding_count"] == 3 + + def test_patch_forbidden(self, app, readonly_user, payload_patch): + api = TagUpdateDeleteApi() + method = unwrap(api.patch) + + payload = {"name": "x"} + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(readonly_user, None), + ), + payload_patch(payload), + ): + with pytest.raises(Forbidden): + method(api, "tag-1") + + def test_delete_success(self, app, admin_user): + api = TagUpdateDeleteApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(admin_user, "tenant-1"), + ), + patch("controllers.console.tag.tags.TagService.delete_tag") as delete_mock, + ): + result, status = method(api, "tag-1") + + delete_mock.assert_called_once_with("tag-1") + assert status == 204 + + +class TestTagBindingCreateApi: + def test_create_success(self, app, admin_user, payload_patch): + api = TagBindingCreateApi() + method = unwrap(api.post) + + payload = { + "tag_ids": ["tag-1"], + "target_id": "target-1", + "type": "knowledge", + } + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(admin_user, None), + ), + payload_patch(payload), + patch("controllers.console.tag.tags.TagService.save_tag_binding") as save_mock, + ): + result, status = method(api) + + save_mock.assert_called_once() + assert status == 200 + assert result["result"] == "success" + + def test_create_forbidden(self, app, readonly_user, payload_patch): + api = TagBindingCreateApi() + method = unwrap(api.post) + + with app.test_request_context("/", json={}): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(readonly_user, None), + ), + payload_patch({}), + ): + with pytest.raises(Forbidden): + method(api) + + +class TestTagBindingDeleteApi: + def test_remove_success(self, app, admin_user, payload_patch): + api = TagBindingDeleteApi() + method = unwrap(api.post) + + payload = { + "tag_id": "tag-1", + "target_id": "target-1", + "type": "knowledge", + } + + with app.test_request_context("/", json=payload): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(admin_user, None), + ), + payload_patch(payload), + patch("controllers.console.tag.tags.TagService.delete_tag_binding") as delete_mock, + ): + result, status = method(api) + + delete_mock.assert_called_once() + assert status == 200 + assert result["result"] == "success" + + def test_remove_forbidden(self, app, readonly_user, payload_patch): + api = TagBindingDeleteApi() + method = unwrap(api.post) + + with app.test_request_context("/", json={}): + with ( + patch( + "controllers.console.tag.tags.current_account_with_tenant", + return_value=(readonly_user, None), + ), + payload_patch({}), + ): + with pytest.raises(Forbidden): + method(api) diff --git a/api/tests/unit_tests/controllers/console/test_admin.py b/api/tests/unit_tests/controllers/console/test_admin.py index e0ddf6542e..16197fcd0c 100644 --- a/api/tests/unit_tests/controllers/console/test_admin.py +++ b/api/tests/unit_tests/controllers/console/test_admin.py @@ -1,13 +1,483 @@ """Final working unit tests for admin endpoints - tests business logic directly.""" import uuid -from unittest.mock import Mock, patch +from unittest.mock import Mock, PropertyMock, patch import pytest from werkzeug.exceptions import NotFound, Unauthorized -from controllers.console.admin import InsertExploreAppPayload -from models.model import App, RecommendedApp +from controllers.console.admin import ( + DeleteExploreBannerApi, + InsertExploreAppApi, + InsertExploreAppListApi, + InsertExploreAppPayload, + InsertExploreBannerApi, + InsertExploreBannerPayload, +) +from models.model import App, InstalledApp, RecommendedApp + + +@pytest.fixture(autouse=True) +def bypass_only_edition_cloud(mocker): + """ + Bypass only_edition_cloud decorator by setting EDITION to "CLOUD". + """ + mocker.patch( + "controllers.console.wraps.dify_config.EDITION", + new="CLOUD", + ) + + +@pytest.fixture +def mock_admin_auth(mocker): + """ + Provide valid admin authentication for controller tests. + """ + mocker.patch( + "controllers.console.admin.dify_config.ADMIN_API_KEY", + "test-admin-key", + ) + mocker.patch( + "controllers.console.admin.extract_access_token", + return_value="test-admin-key", + ) + + +@pytest.fixture +def mock_console_payload(mocker): + payload = { + "app_id": str(uuid.uuid4()), + "language": "en-US", + "category": "Productivity", + "position": 1, + } + + mocker.patch( + "flask_restx.namespace.Namespace.payload", + new_callable=PropertyMock, + return_value=payload, + ) + + return payload + + +@pytest.fixture +def mock_banner_payload(mocker): + mocker.patch( + "flask_restx.namespace.Namespace.payload", + new_callable=PropertyMock, + return_value={ + "title": "Test Banner", + "description": "Banner description", + "img-src": "https://example.com/banner.png", + "link": "https://example.com", + "sort": 1, + "category": "homepage", + }, + ) + + +@pytest.fixture +def mock_session_factory(mocker): + mock_session = Mock() + mock_session.execute = Mock() + mock_session.add = Mock() + mock_session.commit = Mock() + + mocker.patch( + "controllers.console.admin.session_factory.create_session", + return_value=Mock( + __enter__=lambda s: mock_session, + __exit__=Mock(return_value=False), + ), + ) + + +class TestDeleteExploreBannerApi: + def setup_method(self): + self.api = DeleteExploreBannerApi() + + def test_delete_banner_not_found(self, mocker, mock_admin_auth): + mocker.patch( + "controllers.console.admin.db.session.execute", + return_value=Mock(scalar_one_or_none=lambda: None), + ) + + with pytest.raises(NotFound, match="is not found"): + self.api.delete(uuid.uuid4()) + + def test_delete_banner_success(self, mocker, mock_admin_auth): + mock_banner = Mock() + + mocker.patch( + "controllers.console.admin.db.session.execute", + return_value=Mock(scalar_one_or_none=lambda: mock_banner), + ) + mocker.patch("controllers.console.admin.db.session.delete") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.delete(uuid.uuid4()) + + assert status == 204 + assert response["result"] == "success" + + +class TestInsertExploreBannerApi: + def setup_method(self): + self.api = InsertExploreBannerApi() + + def test_insert_banner_success(self, mocker, mock_admin_auth, mock_banner_payload): + mocker.patch("controllers.console.admin.db.session.add") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 201 + assert response["result"] == "success" + + def test_banner_payload_valid_language(self): + payload = { + "title": "Test Banner", + "description": "Banner description", + "img-src": "https://example.com/banner.png", + "link": "https://example.com", + "sort": 1, + "category": "homepage", + "language": "en-US", + } + + model = InsertExploreBannerPayload.model_validate(payload) + assert model.language == "en-US" + + def test_banner_payload_invalid_language(self): + payload = { + "title": "Test Banner", + "description": "Banner description", + "img-src": "https://example.com/banner.png", + "link": "https://example.com", + "sort": 1, + "category": "homepage", + "language": "invalid-lang", + } + + with pytest.raises(ValueError, match="invalid-lang is not a valid language"): + InsertExploreBannerPayload.model_validate(payload) + + +class TestInsertExploreAppApiDelete: + def setup_method(self): + self.api = InsertExploreAppApi() + + def test_delete_when_not_in_explore(self, mocker, mock_admin_auth): + mocker.patch( + "controllers.console.admin.session_factory.create_session", + return_value=Mock( + __enter__=lambda s: s, + __exit__=Mock(return_value=False), + execute=lambda *_: Mock(scalar_one_or_none=lambda: None), + ), + ) + + response, status = self.api.delete(uuid.uuid4()) + + assert status == 204 + assert response["result"] == "success" + + def test_delete_when_in_explore_with_trial_app(self, mocker, mock_admin_auth): + """Test deleting an app from explore that has a trial app.""" + app_id = uuid.uuid4() + + mock_recommended = Mock(spec=RecommendedApp) + mock_recommended.app_id = "app-123" + + mock_app = Mock(spec=App) + mock_app.is_public = True + + mock_trial = Mock() + + # Mock session context manager and its execute + mock_session = Mock() + mock_session.execute = Mock() + mock_session.delete = Mock() + + # Set up side effects for execute calls + mock_session.execute.side_effect = [ + Mock(scalar_one_or_none=lambda: mock_recommended), + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalars=Mock(return_value=Mock(all=lambda: []))), + Mock(scalar_one_or_none=lambda: mock_trial), + ] + + mocker.patch( + "controllers.console.admin.session_factory.create_session", + return_value=Mock( + __enter__=lambda s: mock_session, + __exit__=Mock(return_value=False), + ), + ) + + mocker.patch("controllers.console.admin.db.session.delete") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.delete(app_id) + + assert status == 204 + assert response["result"] == "success" + assert mock_app.is_public is False + + def test_delete_with_installed_apps(self, mocker, mock_admin_auth): + """Test deleting an app that has installed apps in other tenants.""" + app_id = uuid.uuid4() + + mock_recommended = Mock(spec=RecommendedApp) + mock_recommended.app_id = "app-123" + + mock_app = Mock(spec=App) + mock_app.is_public = True + + mock_installed_app = Mock(spec=InstalledApp) + + # Mock session + mock_session = Mock() + mock_session.execute = Mock() + mock_session.delete = Mock() + + mock_session.execute.side_effect = [ + Mock(scalar_one_or_none=lambda: mock_recommended), + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalars=Mock(return_value=Mock(all=lambda: [mock_installed_app]))), + Mock(scalar_one_or_none=lambda: None), + ] + + mocker.patch( + "controllers.console.admin.session_factory.create_session", + return_value=Mock( + __enter__=lambda s: mock_session, + __exit__=Mock(return_value=False), + ), + ) + + mocker.patch("controllers.console.admin.db.session.delete") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.delete(app_id) + + assert status == 204 + assert mock_session.delete.called + + +class TestInsertExploreAppListApi: + def setup_method(self): + self.api = InsertExploreAppListApi() + + def test_app_not_found(self, mocker, mock_admin_auth, mock_console_payload): + mocker.patch( + "controllers.console.admin.db.session.execute", + return_value=Mock(scalar_one_or_none=lambda: None), + ) + + with pytest.raises(NotFound, match="is not found"): + self.api.post() + + def test_create_recommended_app( + self, + mocker, + mock_admin_auth, + mock_console_payload, + ): + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = None + mock_app.tenant_id = "tenant" + mock_app.is_public = False + + # db.session.execute → fetch App + mocker.patch( + "controllers.console.admin.db.session.execute", + return_value=Mock(scalar_one_or_none=lambda: mock_app), + ) + + # session_factory.create_session → recommended_app lookup + mock_session = Mock() + mock_session.execute = Mock(return_value=Mock(scalar_one_or_none=lambda: None)) + + mocker.patch( + "controllers.console.admin.session_factory.create_session", + return_value=Mock( + __enter__=lambda s: mock_session, + __exit__=Mock(return_value=False), + ), + ) + + mocker.patch("controllers.console.admin.db.session.add") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 201 + assert response["result"] == "success" + assert mock_app.is_public is True + + def test_update_recommended_app(self, mocker, mock_admin_auth, mock_console_payload, mock_session_factory): + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = None + mock_app.is_public = False + + mock_recommended = Mock(spec=RecommendedApp) + + mocker.patch( + "controllers.console.admin.db.session.execute", + side_effect=[ + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalar_one_or_none=lambda: mock_recommended), + ], + ) + + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 200 + assert response["result"] == "success" + assert mock_app.is_public is True + + def test_site_data_overrides_payload( + self, + mocker, + mock_admin_auth, + mock_console_payload, + mock_session_factory, + ): + site = Mock() + site.description = "Site Desc" + site.copyright = "Site Copyright" + site.privacy_policy = "Site Privacy" + site.custom_disclaimer = "Site Disclaimer" + + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = site + mock_app.tenant_id = "tenant" + mock_app.is_public = False + + mocker.patch( + "controllers.console.admin.db.session.execute", + side_effect=[ + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalar_one_or_none=lambda: None), + Mock(scalar_one_or_none=lambda: None), + ], + ) + + commit_spy = mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 200 + assert response["result"] == "success" + assert mock_app.is_public is True + commit_spy.assert_called_once() + + def test_create_trial_app_when_can_trial_enabled( + self, + mocker, + mock_admin_auth, + mock_console_payload, + mock_session_factory, + ): + mock_console_payload["can_trial"] = True + mock_console_payload["trial_limit"] = 5 + + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = None + mock_app.tenant_id = "tenant" + mock_app.is_public = False + + mocker.patch( + "controllers.console.admin.db.session.execute", + side_effect=[ + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalar_one_or_none=lambda: None), + Mock(scalar_one_or_none=lambda: None), + ], + ) + + add_spy = mocker.patch("controllers.console.admin.db.session.add") + mocker.patch("controllers.console.admin.db.session.commit") + + self.api.post() + + assert any(call.args[0].__class__.__name__ == "TrialApp" for call in add_spy.call_args_list) + + def test_update_recommended_app_with_trial( + self, + mocker, + mock_admin_auth, + mock_console_payload, + mock_session_factory, + ): + """Test updating a recommended app when trial is enabled.""" + mock_console_payload["can_trial"] = True + mock_console_payload["trial_limit"] = 10 + + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = None + mock_app.is_public = False + mock_app.tenant_id = "tenant-123" + + mock_recommended = Mock(spec=RecommendedApp) + + mocker.patch( + "controllers.console.admin.db.session.execute", + side_effect=[ + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalar_one_or_none=lambda: mock_recommended), + Mock(scalar_one_or_none=lambda: None), + ], + ) + + add_spy = mocker.patch("controllers.console.admin.db.session.add") + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 200 + assert response["result"] == "success" + assert mock_app.is_public is True + + def test_update_recommended_app_without_trial( + self, + mocker, + mock_admin_auth, + mock_console_payload, + mock_session_factory, + ): + """Test updating a recommended app without trial enabled.""" + mock_app = Mock(spec=App) + mock_app.id = "app-id" + mock_app.site = None + mock_app.is_public = False + + mock_recommended = Mock(spec=RecommendedApp) + + mocker.patch( + "controllers.console.admin.db.session.execute", + side_effect=[ + Mock(scalar_one_or_none=lambda: mock_app), + Mock(scalar_one_or_none=lambda: mock_recommended), + ], + ) + + mocker.patch("controllers.console.admin.db.session.commit") + + response, status = self.api.post() + + assert status == 200 + assert response["result"] == "success" + assert mock_app.is_public is True class TestInsertExploreAppPayload: diff --git a/api/tests/unit_tests/controllers/console/test_apikey.py b/api/tests/unit_tests/controllers/console/test_apikey.py new file mode 100644 index 0000000000..018257f815 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_apikey.py @@ -0,0 +1,138 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Forbidden + +from controllers.console.apikey import ( + BaseApiKeyListResource, + BaseApiKeyResource, + _get_resource, +) + + +@pytest.fixture +def tenant_context_admin(): + with patch("controllers.console.apikey.current_account_with_tenant") as mock: + user = MagicMock() + user.is_admin_or_owner = True + mock.return_value = (user, "tenant-123") + yield mock + + +@pytest.fixture +def tenant_context_non_admin(): + with patch("controllers.console.apikey.current_account_with_tenant") as mock: + user = MagicMock() + user.is_admin_or_owner = False + mock.return_value = (user, "tenant-123") + yield mock + + +@pytest.fixture +def db_mock(): + with patch("controllers.console.apikey.db") as mock_db: + mock_db.session = MagicMock() + yield mock_db + + +@pytest.fixture(autouse=True) +def bypass_permissions(): + with patch( + "controllers.console.apikey.edit_permission_required", + lambda f: f, + ): + yield + + +class DummyApiKeyListResource(BaseApiKeyListResource): + resource_type = "app" + resource_model = MagicMock() + resource_id_field = "app_id" + token_prefix = "app-" + + +class DummyApiKeyResource(BaseApiKeyResource): + resource_type = "app" + resource_model = MagicMock() + resource_id_field = "app_id" + + +class TestGetResource: + def test_get_resource_success(self): + fake_resource = MagicMock() + + with ( + patch("controllers.console.apikey.select") as mock_select, + patch("controllers.console.apikey.Session") as mock_session, + patch("controllers.console.apikey.db") as mock_db, + ): + mock_db.engine = MagicMock() + mock_select.return_value.filter_by.return_value = MagicMock() + + session = mock_session.return_value.__enter__.return_value + session.execute.return_value.scalar_one_or_none.return_value = fake_resource + + result = _get_resource("rid", "tid", MagicMock) + assert result == fake_resource + + def test_get_resource_not_found(self): + with ( + patch("controllers.console.apikey.select") as mock_select, + patch("controllers.console.apikey.Session") as mock_session, + patch("controllers.console.apikey.db") as mock_db, + patch("controllers.console.apikey.flask_restx.abort") as abort, + ): + mock_db.engine = MagicMock() + mock_select.return_value.filter_by.return_value = MagicMock() + + session = mock_session.return_value.__enter__.return_value + session.execute.return_value.scalar_one_or_none.return_value = None + + _get_resource("rid", "tid", MagicMock) + + abort.assert_called_once() + + +class TestBaseApiKeyListResource: + def test_get_apikeys_success(self, tenant_context_admin, db_mock): + resource = DummyApiKeyListResource() + + with patch("controllers.console.apikey._get_resource"): + db_mock.session.scalars.return_value.all.return_value = [MagicMock(), MagicMock()] + + result = DummyApiKeyListResource.get.__wrapped__(resource, "resource-id") + assert "items" in result + + +class TestBaseApiKeyResource: + def test_delete_forbidden(self, tenant_context_non_admin, db_mock): + resource = DummyApiKeyResource() + + with patch("controllers.console.apikey._get_resource"): + with pytest.raises(Forbidden): + DummyApiKeyResource.delete(resource, "rid", "kid") + + def test_delete_key_not_found(self, tenant_context_admin, db_mock): + resource = DummyApiKeyResource() + db_mock.session.query.return_value.where.return_value.first.return_value = None + + with patch("controllers.console.apikey._get_resource"): + with pytest.raises(Exception) as exc_info: + DummyApiKeyResource.delete(resource, "rid", "kid") + + # flask_restx.abort raises HTTPException with message in data attribute + assert exc_info.value.data["message"] == "API key not found" + + def test_delete_success(self, tenant_context_admin, db_mock): + resource = DummyApiKeyResource() + db_mock.session.query.return_value.where.return_value.first.return_value = MagicMock() + + with ( + patch("controllers.console.apikey._get_resource"), + patch("controllers.console.apikey.ApiTokenCache.delete"), + ): + result, status = DummyApiKeyResource.delete(resource, "rid", "kid") + + assert status == 204 + assert result == {"result": "success"} + db_mock.session.commit.assert_called_once() diff --git a/api/tests/unit_tests/controllers/console/test_fastopenapi_init_validate.py b/api/tests/unit_tests/controllers/console/test_fastopenapi_init_validate.py deleted file mode 100644 index b9bc42fb25..0000000000 --- a/api/tests/unit_tests/controllers/console/test_fastopenapi_init_validate.py +++ /dev/null @@ -1,46 +0,0 @@ -import builtins -from unittest.mock import patch - -import pytest -from flask import Flask -from flask.views import MethodView - -from extensions import ext_fastopenapi - -if not hasattr(builtins, "MethodView"): - builtins.MethodView = MethodView # type: ignore[attr-defined] - - -@pytest.fixture -def app() -> Flask: - app = Flask(__name__) - app.config["TESTING"] = True - app.secret_key = "test-secret-key" - return app - - -def test_console_init_get_returns_finished_when_no_init_password(app: Flask, monkeypatch: pytest.MonkeyPatch): - ext_fastopenapi.init_app(app) - monkeypatch.delenv("INIT_PASSWORD", raising=False) - - with patch("controllers.console.init_validate.dify_config.EDITION", "SELF_HOSTED"): - client = app.test_client() - response = client.get("/console/api/init") - - assert response.status_code == 200 - assert response.get_json() == {"status": "finished"} - - -def test_console_init_post_returns_success(app: Flask, monkeypatch: pytest.MonkeyPatch): - ext_fastopenapi.init_app(app) - monkeypatch.setenv("INIT_PASSWORD", "test-init-password") - - with ( - patch("controllers.console.init_validate.dify_config.EDITION", "SELF_HOSTED"), - patch("controllers.console.init_validate.TenantService.get_tenant_count", return_value=0), - ): - client = app.test_client() - response = client.post("/console/api/init", json={"password": "test-init-password"}) - - assert response.status_code == 201 - assert response.get_json() == {"result": "success"} diff --git a/api/tests/unit_tests/controllers/console/test_fastopenapi_remote_files.py b/api/tests/unit_tests/controllers/console/test_fastopenapi_remote_files.py deleted file mode 100644 index c0a984e216..0000000000 --- a/api/tests/unit_tests/controllers/console/test_fastopenapi_remote_files.py +++ /dev/null @@ -1,286 +0,0 @@ -"""Tests for remote file upload API endpoints using Flask-RESTX.""" - -import contextlib -from datetime import datetime -from types import SimpleNamespace -from unittest.mock import Mock, patch - -import httpx -import pytest -from flask import Flask, g - - -@pytest.fixture -def app() -> Flask: - """Create Flask app for testing.""" - app = Flask(__name__) - app.config["TESTING"] = True - app.config["SECRET_KEY"] = "test-secret-key" - return app - - -@pytest.fixture -def client(app): - """Create test client with console blueprint registered.""" - from controllers.console import bp - - app.register_blueprint(bp) - return app.test_client() - - -@pytest.fixture -def mock_account(): - """Create a mock account for testing.""" - from models import Account - - account = Mock(spec=Account) - account.id = "test-account-id" - account.current_tenant_id = "test-tenant-id" - return account - - -@pytest.fixture -def auth_ctx(app, mock_account): - """Context manager to set auth/tenant context in flask.g for a request.""" - - @contextlib.contextmanager - def _ctx(): - with app.test_request_context(): - g._login_user = mock_account - g._current_tenant = mock_account.current_tenant_id - yield - - return _ctx - - -class TestGetRemoteFileInfo: - """Test GET /console/api/remote-files/ endpoint.""" - - def test_get_remote_file_info_success(self, app, client, mock_account): - """Test successful retrieval of remote file info.""" - response = httpx.Response( - 200, - request=httpx.Request("HEAD", "http://example.com/file.txt"), - headers={"Content-Type": "text/plain", "Content-Length": "1024"}, - ) - - with ( - patch( - "controllers.console.remote_files.current_account_with_tenant", - return_value=(mock_account, "test-tenant-id"), - ), - patch("controllers.console.remote_files.ssrf_proxy.head", return_value=response), - patch("libs.login.check_csrf_token", return_value=None), - ): - with app.test_request_context(): - g._login_user = mock_account - g._current_tenant = mock_account.current_tenant_id - encoded_url = "http%3A%2F%2Fexample.com%2Ffile.txt" - resp = client.get(f"/console/api/remote-files/{encoded_url}") - - assert resp.status_code == 200 - data = resp.get_json() - assert data["file_type"] == "text/plain" - assert data["file_length"] == 1024 - - def test_get_remote_file_info_fallback_to_get_on_head_failure(self, app, client, mock_account): - """Test fallback to GET when HEAD returns non-200 status.""" - head_response = httpx.Response( - 404, - request=httpx.Request("HEAD", "http://example.com/file.pdf"), - ) - get_response = httpx.Response( - 200, - request=httpx.Request("GET", "http://example.com/file.pdf"), - headers={"Content-Type": "application/pdf", "Content-Length": "2048"}, - ) - - with ( - patch( - "controllers.console.remote_files.current_account_with_tenant", - return_value=(mock_account, "test-tenant-id"), - ), - patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_response), - patch("controllers.console.remote_files.ssrf_proxy.get", return_value=get_response), - patch("libs.login.check_csrf_token", return_value=None), - ): - with app.test_request_context(): - g._login_user = mock_account - g._current_tenant = mock_account.current_tenant_id - encoded_url = "http%3A%2F%2Fexample.com%2Ffile.pdf" - resp = client.get(f"/console/api/remote-files/{encoded_url}") - - assert resp.status_code == 200 - data = resp.get_json() - assert data["file_type"] == "application/pdf" - assert data["file_length"] == 2048 - - -class TestRemoteFileUpload: - """Test POST /console/api/remote-files/upload endpoint.""" - - @pytest.mark.parametrize( - ("head_status", "use_get"), - [ - (200, False), # HEAD succeeds - (405, True), # HEAD fails -> fallback GET - ], - ) - def test_upload_remote_file_success_paths(self, client, mock_account, auth_ctx, head_status, use_get): - url = "http://example.com/file.pdf" - head_resp = httpx.Response( - head_status, - request=httpx.Request("HEAD", url), - headers={"Content-Type": "application/pdf", "Content-Length": "1024"}, - ) - get_resp = httpx.Response( - 200, - request=httpx.Request("GET", url), - headers={"Content-Type": "application/pdf", "Content-Length": "1024"}, - content=b"file content", - ) - - file_info = SimpleNamespace( - extension="pdf", - size=1024, - filename="file.pdf", - mimetype="application/pdf", - ) - uploaded_file = SimpleNamespace( - id="uploaded-file-id", - name="file.pdf", - size=1024, - extension="pdf", - mime_type="application/pdf", - created_by="test-account-id", - created_at=datetime(2024, 1, 1, 12, 0, 0), - ) - - with ( - patch( - "controllers.console.remote_files.current_account_with_tenant", - return_value=(mock_account, "test-tenant-id"), - ), - patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_resp) as p_head, - patch("controllers.console.remote_files.ssrf_proxy.get", return_value=get_resp) as p_get, - patch( - "controllers.console.remote_files.helpers.guess_file_info_from_response", - return_value=file_info, - ), - patch( - "controllers.console.remote_files.FileService.is_file_size_within_limit", - return_value=True, - ), - patch("controllers.console.remote_files.db", spec=["engine"]), - patch("controllers.console.remote_files.FileService") as mock_file_service, - patch( - "controllers.console.remote_files.file_helpers.get_signed_file_url", - return_value="http://example.com/signed-url", - ), - patch("libs.login.check_csrf_token", return_value=None), - ): - mock_file_service.return_value.upload_file.return_value = uploaded_file - - with auth_ctx(): - resp = client.post( - "/console/api/remote-files/upload", - json={"url": url}, - ) - - assert resp.status_code == 201 - p_head.assert_called_once() - # GET is used either for fallback (HEAD fails) or to fetch content after HEAD succeeds - p_get.assert_called_once() - mock_file_service.return_value.upload_file.assert_called_once() - - data = resp.get_json() - assert data["id"] == "uploaded-file-id" - assert data["name"] == "file.pdf" - assert data["size"] == 1024 - assert data["extension"] == "pdf" - assert data["url"] == "http://example.com/signed-url" - assert data["mime_type"] == "application/pdf" - assert data["created_by"] == "test-account-id" - - @pytest.mark.parametrize( - ("size_ok", "raises", "expected_status", "expected_msg"), - [ - # When size check fails in controller, API returns 413 with message "File size exceeded..." - (False, None, 413, "file size exceeded"), - # When service raises unsupported type, controller maps to 415 with message "File type not allowed." - (True, "unsupported", 415, "file type not allowed"), - ], - ) - def test_upload_remote_file_errors( - self, client, mock_account, auth_ctx, size_ok, raises, expected_status, expected_msg - ): - url = "http://example.com/x.pdf" - head_resp = httpx.Response( - 200, - request=httpx.Request("HEAD", url), - headers={"Content-Type": "application/pdf", "Content-Length": "9"}, - ) - file_info = SimpleNamespace(extension="pdf", size=9, filename="x.pdf", mimetype="application/pdf") - - with ( - patch( - "controllers.console.remote_files.current_account_with_tenant", - return_value=(mock_account, "test-tenant-id"), - ), - patch("controllers.console.remote_files.ssrf_proxy.head", return_value=head_resp), - patch( - "controllers.console.remote_files.helpers.guess_file_info_from_response", - return_value=file_info, - ), - patch( - "controllers.console.remote_files.FileService.is_file_size_within_limit", - return_value=size_ok, - ), - patch("controllers.console.remote_files.db", spec=["engine"]), - patch("libs.login.check_csrf_token", return_value=None), - ): - if raises == "unsupported": - from services.errors.file import UnsupportedFileTypeError - - with patch("controllers.console.remote_files.FileService") as mock_file_service: - mock_file_service.return_value.upload_file.side_effect = UnsupportedFileTypeError("bad") - with auth_ctx(): - resp = client.post( - "/console/api/remote-files/upload", - json={"url": url}, - ) - else: - with auth_ctx(): - resp = client.post( - "/console/api/remote-files/upload", - json={"url": url}, - ) - - assert resp.status_code == expected_status - data = resp.get_json() - msg = (data.get("error") or {}).get("message") or data.get("message", "") - assert expected_msg in msg.lower() - - def test_upload_remote_file_fetch_failure(self, client, mock_account, auth_ctx): - """Test upload when fetching of remote file fails.""" - with ( - patch( - "controllers.console.remote_files.current_account_with_tenant", - return_value=(mock_account, "test-tenant-id"), - ), - patch( - "controllers.console.remote_files.ssrf_proxy.head", - side_effect=httpx.RequestError("Connection failed"), - ), - patch("libs.login.check_csrf_token", return_value=None), - ): - with auth_ctx(): - resp = client.post( - "/console/api/remote-files/upload", - json={"url": "http://unreachable.com/file.pdf"}, - ) - - assert resp.status_code == 400 - data = resp.get_json() - msg = (data.get("error") or {}).get("message") or data.get("message", "") - assert "failed to fetch" in msg.lower() diff --git a/api/tests/unit_tests/controllers/console/test_feature.py b/api/tests/unit_tests/controllers/console/test_feature.py new file mode 100644 index 0000000000..d8debc1f2c --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_feature.py @@ -0,0 +1,81 @@ +from werkzeug.exceptions import Unauthorized + + +def unwrap(func): + """ + Recursively unwrap decorated functions. + """ + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestFeatureApi: + def test_get_tenant_features_success(self, mocker): + from controllers.console.feature import FeatureApi + + mocker.patch( + "controllers.console.feature.current_account_with_tenant", + return_value=("account_id", "tenant_123"), + ) + + mocker.patch("controllers.console.feature.FeatureService.get_features").return_value.model_dump.return_value = { + "features": {"feature_a": True} + } + + api = FeatureApi() + + raw_get = unwrap(FeatureApi.get) + result = raw_get(api) + + assert result == {"features": {"feature_a": True}} + + +class TestSystemFeatureApi: + def test_get_system_features_authenticated(self, mocker): + """ + current_user.is_authenticated == True + """ + + from controllers.console.feature import SystemFeatureApi + + fake_user = mocker.Mock() + fake_user.is_authenticated = True + + mocker.patch( + "controllers.console.feature.current_user", + fake_user, + ) + + mocker.patch( + "controllers.console.feature.FeatureService.get_system_features" + ).return_value.model_dump.return_value = {"features": {"sys_feature": True}} + + api = SystemFeatureApi() + result = api.get() + + assert result == {"features": {"sys_feature": True}} + + def test_get_system_features_unauthenticated(self, mocker): + """ + current_user.is_authenticated raises Unauthorized + """ + + from controllers.console.feature import SystemFeatureApi + + fake_user = mocker.Mock() + type(fake_user).is_authenticated = mocker.PropertyMock(side_effect=Unauthorized()) + + mocker.patch( + "controllers.console.feature.current_user", + fake_user, + ) + + mocker.patch( + "controllers.console.feature.FeatureService.get_system_features" + ).return_value.model_dump.return_value = {"features": {"sys_feature": False}} + + api = SystemFeatureApi() + result = api.get() + + assert result == {"features": {"sys_feature": False}} diff --git a/api/tests/unit_tests/controllers/console/test_files.py b/api/tests/unit_tests/controllers/console/test_files.py new file mode 100644 index 0000000000..5df9daa7f8 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_files.py @@ -0,0 +1,300 @@ +import io +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import Forbidden + +from constants import DOCUMENT_EXTENSIONS +from controllers.common.errors import ( + BlockedFileExtensionError, + FilenameNotExistsError, + FileTooLargeError, + NoFileUploadedError, + TooManyFilesError, + UnsupportedFileTypeError, +) +from controllers.console.files import ( + FileApi, + FilePreviewApi, + FileSupportTypeApi, +) + + +def unwrap(func): + """ + Recursively unwrap decorated functions. + """ + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def app(): + app = Flask(__name__) + app.testing = True + return app + + +@pytest.fixture(autouse=True) +def mock_decorators(): + """ + Make decorators no-ops so logic is directly testable + """ + with ( + patch("controllers.console.files.setup_required", new=lambda f: f), + patch("controllers.console.files.login_required", new=lambda f: f), + patch("controllers.console.files.account_initialization_required", new=lambda f: f), + patch("controllers.console.files.cloud_edition_billing_resource_check", return_value=lambda f: f), + ): + yield + + +@pytest.fixture +def mock_current_user(): + user = MagicMock() + user.is_dataset_editor = True + return user + + +@pytest.fixture +def mock_account_context(mock_current_user): + with patch( + "controllers.console.files.current_account_with_tenant", + return_value=(mock_current_user, None), + ): + yield + + +@pytest.fixture +def mock_db(): + with patch("controllers.console.files.db") as db_mock: + db_mock.engine = MagicMock() + yield db_mock + + +@pytest.fixture +def mock_file_service(mock_db): + with patch("controllers.console.files.FileService") as fs: + instance = fs.return_value + yield instance + + +class TestFileApiGet: + def test_get_upload_config(self, app): + api = FileApi() + get_method = unwrap(api.get) + + with app.test_request_context(): + data, status = get_method(api) + + assert status == 200 + assert "file_size_limit" in data + assert "batch_count_limit" in data + + +class TestFileApiPost: + def test_no_file_uploaded(self, app, mock_account_context): + api = FileApi() + post_method = unwrap(api.post) + + with app.test_request_context(method="POST", data={}): + with pytest.raises(NoFileUploadedError): + post_method(api) + + def test_too_many_files(self, app, mock_account_context): + api = FileApi() + post_method = unwrap(api.post) + + with app.test_request_context(method="POST"): + from unittest.mock import MagicMock, patch + + with patch("controllers.console.files.request") as mock_request: + mock_request.files = MagicMock() + mock_request.files.__len__.return_value = 2 + mock_request.files.__contains__.return_value = True + mock_request.form = MagicMock() + mock_request.form.get.return_value = None + + with pytest.raises(TooManyFilesError): + post_method(api) + + def test_filename_missing(self, app, mock_account_context): + api = FileApi() + post_method = unwrap(api.post) + + data = { + "file": (io.BytesIO(b"abc"), ""), + } + + with app.test_request_context(method="POST", data=data): + with pytest.raises(FilenameNotExistsError): + post_method(api) + + def test_dataset_upload_without_permission(self, app, mock_current_user): + mock_current_user.is_dataset_editor = False + + with patch( + "controllers.console.files.current_account_with_tenant", + return_value=(mock_current_user, None), + ): + api = FileApi() + post_method = unwrap(api.post) + + data = { + "file": (io.BytesIO(b"abc"), "test.txt"), + "source": "datasets", + } + + with app.test_request_context(method="POST", data=data): + with pytest.raises(Forbidden): + post_method(api) + + def test_successful_upload(self, app, mock_account_context, mock_file_service): + api = FileApi() + post_method = unwrap(api.post) + + mock_file = MagicMock() + mock_file.id = "file-id-123" + mock_file.filename = "test.txt" + mock_file.name = "test.txt" + mock_file.size = 1024 + mock_file.extension = "txt" + mock_file.mime_type = "text/plain" + mock_file.created_by = "user-123" + mock_file.created_at = 1234567890 + mock_file.preview_url = "http://example.com/preview/file-id-123" + mock_file.source_url = "http://example.com/source/file-id-123" + mock_file.original_url = None + mock_file.user_id = "user-123" + mock_file.tenant_id = "tenant-123" + mock_file.conversation_id = None + mock_file.file_key = "file-key-123" + + mock_file_service.upload_file.return_value = mock_file + + data = { + "file": (io.BytesIO(b"hello"), "test.txt"), + } + + with app.test_request_context(method="POST", data=data): + response, status = post_method(api) + + assert status == 201 + assert response["id"] == "file-id-123" + assert response["name"] == "test.txt" + + def test_upload_with_invalid_source(self, app, mock_account_context, mock_file_service): + """Test that invalid source parameter gets normalized to None""" + api = FileApi() + post_method = unwrap(api.post) + + # Create a properly structured mock file object + mock_file = MagicMock() + mock_file.id = "file-id-456" + mock_file.filename = "test.txt" + mock_file.name = "test.txt" + mock_file.size = 512 + mock_file.extension = "txt" + mock_file.mime_type = "text/plain" + mock_file.created_by = "user-456" + mock_file.created_at = 1234567890 + mock_file.preview_url = None + mock_file.source_url = None + mock_file.original_url = None + mock_file.user_id = "user-456" + mock_file.tenant_id = "tenant-456" + mock_file.conversation_id = None + mock_file.file_key = "file-key-456" + + mock_file_service.upload_file.return_value = mock_file + + data = { + "file": (io.BytesIO(b"content"), "test.txt"), + "source": "invalid_source", # Should be normalized to None + } + + with app.test_request_context(method="POST", data=data): + response, status = post_method(api) + + assert status == 201 + assert response["id"] == "file-id-456" + # Verify that FileService was called with source=None + mock_file_service.upload_file.assert_called_once() + call_kwargs = mock_file_service.upload_file.call_args[1] + assert call_kwargs["source"] is None + + def test_file_too_large_error(self, app, mock_account_context, mock_file_service): + api = FileApi() + post_method = unwrap(api.post) + + from services.errors.file import FileTooLargeError as ServiceFileTooLargeError + + error = ServiceFileTooLargeError("File is too large") + mock_file_service.upload_file.side_effect = error + + data = { + "file": (io.BytesIO(b"x" * 1000000), "big.txt"), + } + + with app.test_request_context(method="POST", data=data): + with pytest.raises(FileTooLargeError): + post_method(api) + + def test_unsupported_file_type(self, app, mock_account_context, mock_file_service): + api = FileApi() + post_method = unwrap(api.post) + + from services.errors.file import UnsupportedFileTypeError as ServiceUnsupportedFileTypeError + + error = ServiceUnsupportedFileTypeError() + mock_file_service.upload_file.side_effect = error + + data = { + "file": (io.BytesIO(b"x"), "bad.exe"), + } + + with app.test_request_context(method="POST", data=data): + with pytest.raises(UnsupportedFileTypeError): + post_method(api) + + def test_blocked_extension(self, app, mock_account_context, mock_file_service): + api = FileApi() + post_method = unwrap(api.post) + + from services.errors.file import BlockedFileExtensionError as ServiceBlockedFileExtensionError + + error = ServiceBlockedFileExtensionError("File extension is blocked") + mock_file_service.upload_file.side_effect = error + + data = { + "file": (io.BytesIO(b"x"), "blocked.txt"), + } + + with app.test_request_context(method="POST", data=data): + with pytest.raises(BlockedFileExtensionError): + post_method(api) + + +class TestFilePreviewApi: + def test_get_preview(self, app, mock_file_service): + api = FilePreviewApi() + get_method = unwrap(api.get) + mock_file_service.get_file_preview.return_value = "preview text" + + with app.test_request_context(): + result = get_method(api, "1234") + + assert result == {"content": "preview text"} + + +class TestFileSupportTypeApi: + def test_get_supported_types(self, app): + api = FileSupportTypeApi() + get_method = unwrap(api.get) + + with app.test_request_context(): + result = get_method(api) + + assert result == {"allowed_extensions": list(DOCUMENT_EXTENSIONS)} diff --git a/api/tests/unit_tests/controllers/console/test_human_input_form.py b/api/tests/unit_tests/controllers/console/test_human_input_form.py new file mode 100644 index 0000000000..232b6eee79 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_human_input_form.py @@ -0,0 +1,293 @@ +from __future__ import annotations + +import json +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest +from flask import Response + +from controllers.console.human_input_form import ( + ConsoleHumanInputFormApi, + ConsoleWorkflowEventsApi, + DifyAPIRepositoryFactory, + WorkflowResponseConverter, + _jsonify_form_definition, +) +from controllers.web.error import NotFoundError +from models.enums import CreatorUserRole +from models.human_input import RecipientType +from models.model import AppMode + + +def _unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def test_jsonify_form_definition() -> None: + expiration = datetime(2024, 1, 1, tzinfo=UTC) + definition = SimpleNamespace(model_dump=lambda: {"fields": []}) + form = SimpleNamespace(get_definition=lambda: definition, expiration_time=expiration) + + response = _jsonify_form_definition(form) + + assert isinstance(response, Response) + payload = json.loads(response.get_data(as_text=True)) + assert payload["expiration_time"] == int(expiration.timestamp()) + + +def test_ensure_console_access_rejects(monkeypatch: pytest.MonkeyPatch) -> None: + form = SimpleNamespace(tenant_id="tenant-1") + monkeypatch.setattr("controllers.console.human_input_form.current_account_with_tenant", lambda: (None, "tenant-2")) + + with pytest.raises(NotFoundError): + ConsoleHumanInputFormApi._ensure_console_access(form) + + +def test_get_form_definition_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + expiration = datetime(2024, 1, 1, tzinfo=UTC) + definition = SimpleNamespace(model_dump=lambda: {"fields": ["a"]}) + form = SimpleNamespace(tenant_id="tenant-1", get_definition=lambda: definition, expiration_time=expiration) + + class _ServiceStub: + def __init__(self, *_args, **_kwargs): + pass + + def get_form_definition_by_token_for_console(self, _token): + return form + + monkeypatch.setattr("controllers.console.human_input_form.HumanInputService", _ServiceStub) + monkeypatch.setattr("controllers.console.human_input_form.current_account_with_tenant", lambda: (None, "tenant-1")) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleHumanInputFormApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/form/human_input/token", method="GET"): + response = handler(api, form_token="token") + + payload = json.loads(response.get_data(as_text=True)) + assert payload["fields"] == ["a"] + + +def test_get_form_definition_not_found(app, monkeypatch: pytest.MonkeyPatch) -> None: + class _ServiceStub: + def __init__(self, *_args, **_kwargs): + pass + + def get_form_definition_by_token_for_console(self, _token): + return None + + monkeypatch.setattr("controllers.console.human_input_form.HumanInputService", _ServiceStub) + monkeypatch.setattr("controllers.console.human_input_form.current_account_with_tenant", lambda: (None, "tenant-1")) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleHumanInputFormApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/form/human_input/token", method="GET"): + with pytest.raises(NotFoundError): + handler(api, form_token="token") + + +def test_post_form_invalid_recipient_type(app, monkeypatch: pytest.MonkeyPatch) -> None: + form = SimpleNamespace(tenant_id="tenant-1", recipient_type=RecipientType.EMAIL_MEMBER) + + class _ServiceStub: + def __init__(self, *_args, **_kwargs): + pass + + def get_form_by_token(self, _token): + return form + + monkeypatch.setattr("controllers.console.human_input_form.HumanInputService", _ServiceStub) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="user-1"), "tenant-1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleHumanInputFormApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/console/api/form/human_input/token", + method="POST", + json={"inputs": {"content": "ok"}, "action": "approve"}, + ): + with pytest.raises(NotFoundError): + handler(api, form_token="token") + + +def test_post_form_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + submit_mock = Mock() + form = SimpleNamespace(tenant_id="tenant-1", recipient_type=RecipientType.CONSOLE) + + class _ServiceStub: + def __init__(self, *_args, **_kwargs): + pass + + def get_form_by_token(self, _token): + return form + + def submit_form_by_token(self, **kwargs): + submit_mock(**kwargs) + + monkeypatch.setattr("controllers.console.human_input_form.HumanInputService", _ServiceStub) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="user-1"), "tenant-1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleHumanInputFormApi() + handler = _unwrap(api.post) + + with app.test_request_context( + "/console/api/form/human_input/token", + method="POST", + json={"inputs": {"content": "ok"}, "action": "approve"}, + ): + response = handler(api, form_token="token") + + assert response.get_json() == {} + submit_mock.assert_called_once() + + +def test_workflow_events_not_found(app, monkeypatch: pytest.MonkeyPatch) -> None: + class _RepoStub: + def get_workflow_run_by_id_and_tenant_id(self, **_kwargs): + return None + + monkeypatch.setattr( + DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: _RepoStub(), + ) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="u1"), "t1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleWorkflowEventsApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/workflow/run/events", method="GET"): + with pytest.raises(NotFoundError): + handler(api, workflow_run_id="run-1") + + +def test_workflow_events_requires_account(app, monkeypatch: pytest.MonkeyPatch) -> None: + workflow_run = SimpleNamespace( + id="run-1", + created_by_role=CreatorUserRole.END_USER, + created_by="user-1", + tenant_id="t1", + ) + + class _RepoStub: + def get_workflow_run_by_id_and_tenant_id(self, **_kwargs): + return workflow_run + + monkeypatch.setattr( + DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: _RepoStub(), + ) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="u1"), "t1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleWorkflowEventsApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/workflow/run/events", method="GET"): + with pytest.raises(NotFoundError): + handler(api, workflow_run_id="run-1") + + +def test_workflow_events_requires_creator(app, monkeypatch: pytest.MonkeyPatch) -> None: + workflow_run = SimpleNamespace( + id="run-1", + created_by_role=CreatorUserRole.ACCOUNT, + created_by="user-2", + tenant_id="t1", + ) + + class _RepoStub: + def get_workflow_run_by_id_and_tenant_id(self, **_kwargs): + return workflow_run + + monkeypatch.setattr( + DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: _RepoStub(), + ) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="u1"), "t1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleWorkflowEventsApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/workflow/run/events", method="GET"): + with pytest.raises(NotFoundError): + handler(api, workflow_run_id="run-1") + + +def test_workflow_events_finished(app, monkeypatch: pytest.MonkeyPatch) -> None: + workflow_run = SimpleNamespace( + id="run-1", + created_by_role=CreatorUserRole.ACCOUNT, + created_by="user-1", + tenant_id="t1", + app_id="app-1", + finished_at=datetime(2024, 1, 1, tzinfo=UTC), + ) + app_model = SimpleNamespace(mode=AppMode.WORKFLOW) + + class _RepoStub: + def get_workflow_run_by_id_and_tenant_id(self, **_kwargs): + return workflow_run + + response_obj = SimpleNamespace( + event=SimpleNamespace(value="finished"), + model_dump=lambda mode="json": {"status": "done"}, + ) + + monkeypatch.setattr( + DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda *_args, **_kwargs: _RepoStub(), + ) + monkeypatch.setattr( + "controllers.console.human_input_form._retrieve_app_for_workflow_run", + lambda *_args, **_kwargs: app_model, + ) + monkeypatch.setattr( + WorkflowResponseConverter, + "workflow_run_result_to_finish_response", + lambda **_kwargs: response_obj, + ) + monkeypatch.setattr( + "controllers.console.human_input_form.current_account_with_tenant", + lambda: (SimpleNamespace(id="user-1"), "t1"), + ) + monkeypatch.setattr("controllers.console.human_input_form.db", SimpleNamespace(engine=object())) + + api = ConsoleWorkflowEventsApi() + handler = _unwrap(api.get) + + with app.test_request_context("/console/api/workflow/run/events", method="GET"): + response = handler(api, workflow_run_id="run-1") + + assert response.mimetype == "text/event-stream" + assert "data" in response.get_data(as_text=True) diff --git a/api/tests/unit_tests/controllers/console/test_init_validate.py b/api/tests/unit_tests/controllers/console/test_init_validate.py new file mode 100644 index 0000000000..3077304cbe --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_init_validate.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock + +import pytest + +from controllers.console import init_validate +from controllers.console.error import AlreadySetupError, InitValidateFailedError + + +class _SessionStub: + def __init__(self, has_setup: bool): + self._has_setup = has_setup + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def execute(self, *_args, **_kwargs): + return SimpleNamespace(scalar_one_or_none=lambda: Mock() if self._has_setup else None) + + +def test_get_init_status_finished(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate, "get_init_validate_status", lambda: True) + result = init_validate.get_init_status() + assert result.status == "finished" + + +def test_get_init_status_not_started(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate, "get_init_validate_status", lambda: False) + result = init_validate.get_init_status() + assert result.status == "not_started" + + +def test_validate_init_password_already_setup(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setattr(init_validate.TenantService, "get_tenant_count", lambda: 1) + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="POST"): + with pytest.raises(AlreadySetupError): + init_validate.validate_init_password(init_validate.InitValidatePayload(password="pw")) + + +def test_validate_init_password_wrong_password(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setattr(init_validate.TenantService, "get_tenant_count", lambda: 0) + monkeypatch.setenv("INIT_PASSWORD", "expected") + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="POST"): + with pytest.raises(InitValidateFailedError): + init_validate.validate_init_password(init_validate.InitValidatePayload(password="wrong")) + assert init_validate.session.get("is_init_validated") is False + + +def test_validate_init_password_success(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setattr(init_validate.TenantService, "get_tenant_count", lambda: 0) + monkeypatch.setenv("INIT_PASSWORD", "expected") + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="POST"): + result = init_validate.validate_init_password(init_validate.InitValidatePayload(password="expected")) + assert result.result == "success" + assert init_validate.session.get("is_init_validated") is True + + +def test_get_init_validate_status_not_self_hosted(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "CLOUD") + assert init_validate.get_init_validate_status() is True + + +def test_get_init_validate_status_validated_session(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setenv("INIT_PASSWORD", "expected") + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="GET"): + init_validate.session["is_init_validated"] = True + assert init_validate.get_init_validate_status() is True + + +def test_get_init_validate_status_setup_exists(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setenv("INIT_PASSWORD", "expected") + monkeypatch.setattr(init_validate, "Session", lambda *_args, **_kwargs: _SessionStub(True)) + monkeypatch.setattr(init_validate, "db", SimpleNamespace(engine=object())) + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="GET"): + init_validate.session.pop("is_init_validated", None) + assert init_validate.get_init_validate_status() is True + + +def test_get_init_validate_status_not_validated(app, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(init_validate.dify_config, "EDITION", "SELF_HOSTED") + monkeypatch.setenv("INIT_PASSWORD", "expected") + monkeypatch.setattr(init_validate, "Session", lambda *_args, **_kwargs: _SessionStub(False)) + monkeypatch.setattr(init_validate, "db", SimpleNamespace(engine=object())) + app.secret_key = "test-secret" + + with app.test_request_context("/console/api/init", method="GET"): + init_validate.session.pop("is_init_validated", None) + assert init_validate.get_init_validate_status() is False diff --git a/api/tests/unit_tests/controllers/console/test_remote_files.py b/api/tests/unit_tests/controllers/console/test_remote_files.py new file mode 100644 index 0000000000..1be402c8ab --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_remote_files.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +import urllib.parse +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import MagicMock + +import httpx +import pytest + +from controllers.common.errors import FileTooLargeError, RemoteFileUploadError, UnsupportedFileTypeError +from controllers.console import remote_files as remote_files_module +from services.errors.file import FileTooLargeError as ServiceFileTooLargeError +from services.errors.file import UnsupportedFileTypeError as ServiceUnsupportedFileTypeError + + +def _unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class _FakeResponse: + def __init__( + self, + *, + status_code: int = 200, + headers: dict[str, str] | None = None, + method: str = "GET", + content: bytes = b"", + text: str = "", + error: Exception | None = None, + ) -> None: + self.status_code = status_code + self.headers = headers or {} + self.request = SimpleNamespace(method=method) + self.content = content + self.text = text + self._error = error + + def raise_for_status(self) -> None: + if self._error: + raise self._error + + +def _mock_upload_dependencies( + monkeypatch: pytest.MonkeyPatch, + *, + file_size_within_limit: bool = True, +): + file_info = SimpleNamespace( + filename="report.txt", + extension=".txt", + mimetype="text/plain", + size=3, + ) + monkeypatch.setattr( + remote_files_module.helpers, + "guess_file_info_from_response", + MagicMock(return_value=file_info), + ) + + file_service_cls = MagicMock() + file_service_cls.is_file_size_within_limit.return_value = file_size_within_limit + monkeypatch.setattr(remote_files_module, "FileService", file_service_cls) + monkeypatch.setattr(remote_files_module, "current_account_with_tenant", lambda: (SimpleNamespace(id="u1"), None)) + monkeypatch.setattr(remote_files_module, "db", SimpleNamespace(engine=object())) + monkeypatch.setattr( + remote_files_module.file_helpers, + "get_signed_file_url", + lambda upload_file_id: f"https://signed.example/{upload_file_id}", + ) + + return file_service_cls + + +def test_get_remote_file_info_uses_head_when_successful(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.GetRemoteFileInfo() + handler = _unwrap(api.get) + decoded_url = "https://example.com/test.txt" + encoded_url = urllib.parse.quote(decoded_url, safe="") + + head_resp = _FakeResponse( + status_code=200, + headers={"Content-Type": "text/plain", "Content-Length": "128"}, + method="HEAD", + ) + head_mock = MagicMock(return_value=head_resp) + get_mock = MagicMock() + monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", head_mock) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", get_mock) + + with app.test_request_context(method="GET"): + payload = handler(api, url=encoded_url) + + assert payload == {"file_type": "text/plain", "file_length": 128} + head_mock.assert_called_once_with(decoded_url) + get_mock.assert_not_called() + + +def test_get_remote_file_info_falls_back_to_get_and_uses_default_headers(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.GetRemoteFileInfo() + handler = _unwrap(api.get) + decoded_url = "https://example.com/test.txt" + encoded_url = urllib.parse.quote(decoded_url, safe="") + + monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", MagicMock(return_value=_FakeResponse(status_code=503))) + get_mock = MagicMock(return_value=_FakeResponse(status_code=200, headers={}, method="GET")) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", get_mock) + + with app.test_request_context(method="GET"): + payload = handler(api, url=encoded_url) + + assert payload == {"file_type": "application/octet-stream", "file_length": 0} + get_mock.assert_called_once_with(decoded_url, timeout=3) + + +def test_remote_file_upload_success_when_fetch_falls_back_to_get(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/report.txt" + + monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", MagicMock(return_value=_FakeResponse(status_code=404))) + get_resp = _FakeResponse(status_code=200, method="GET", content=b"fallback-content") + get_mock = MagicMock(return_value=get_resp) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", get_mock) + + file_service_cls = _mock_upload_dependencies(monkeypatch) + upload_file = SimpleNamespace( + id="file-1", + name="report.txt", + size=16, + extension=".txt", + mime_type="text/plain", + created_by="u1", + created_at=datetime(2024, 1, 1, tzinfo=UTC), + ) + file_service_cls.return_value.upload_file.return_value = upload_file + + with app.test_request_context(method="POST", json={"url": url}): + payload, status = handler(api) + + assert status == 201 + assert payload["id"] == "file-1" + assert payload["url"] == "https://signed.example/file-1" + get_mock.assert_called_once_with(url=url, timeout=3, follow_redirects=True) + file_service_cls.return_value.upload_file.assert_called_once_with( + filename="report.txt", + content=b"fallback-content", + mimetype="text/plain", + user=SimpleNamespace(id="u1"), + source_url=url, + ) + + +def test_remote_file_upload_fetches_content_with_second_get_when_head_succeeds( + app, monkeypatch: pytest.MonkeyPatch +) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/photo.jpg" + + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "head", + MagicMock(return_value=_FakeResponse(status_code=200, method="HEAD", content=b"head-content")), + ) + extra_get_resp = _FakeResponse(status_code=200, method="GET", content=b"downloaded-content") + get_mock = MagicMock(return_value=extra_get_resp) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", get_mock) + + file_service_cls = _mock_upload_dependencies(monkeypatch) + upload_file = SimpleNamespace( + id="file-2", + name="photo.jpg", + size=18, + extension=".jpg", + mime_type="image/jpeg", + created_by="u1", + created_at=datetime(2024, 1, 2, tzinfo=UTC), + ) + file_service_cls.return_value.upload_file.return_value = upload_file + + with app.test_request_context(method="POST", json={"url": url}): + payload, status = handler(api) + + assert status == 201 + assert payload["id"] == "file-2" + get_mock.assert_called_once_with(url) + assert file_service_cls.return_value.upload_file.call_args.kwargs["content"] == b"downloaded-content" + + +def test_remote_file_upload_raises_when_fallback_get_still_not_ok(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/fail.txt" + + monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", MagicMock(return_value=_FakeResponse(status_code=500))) + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "get", + MagicMock(return_value=_FakeResponse(status_code=502, text="bad gateway")), + ) + + with app.test_request_context(method="POST", json={"url": url}): + with pytest.raises(RemoteFileUploadError, match=f"Failed to fetch file from {url}: bad gateway"): + handler(api) + + +def test_remote_file_upload_raises_on_httpx_request_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/fail.txt" + + request = httpx.Request("HEAD", url) + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "head", + MagicMock(side_effect=httpx.RequestError("network down", request=request)), + ) + + with app.test_request_context(method="POST", json={"url": url}): + with pytest.raises(RemoteFileUploadError, match=f"Failed to fetch file from {url}: network down"): + handler(api) + + +def test_remote_file_upload_rejects_oversized_file(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/large.bin" + + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "head", + MagicMock(return_value=_FakeResponse(status_code=200, method="GET", content=b"payload")), + ) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock()) + + _mock_upload_dependencies(monkeypatch, file_size_within_limit=False) + + with app.test_request_context(method="POST", json={"url": url}): + with pytest.raises(FileTooLargeError): + handler(api) + + +def test_remote_file_upload_translates_service_file_too_large_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/large.bin" + + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "head", + MagicMock(return_value=_FakeResponse(status_code=200, method="GET", content=b"payload")), + ) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock()) + file_service_cls = _mock_upload_dependencies(monkeypatch) + file_service_cls.return_value.upload_file.side_effect = ServiceFileTooLargeError("size exceeded") + + with app.test_request_context(method="POST", json={"url": url}): + with pytest.raises(FileTooLargeError, match="size exceeded"): + handler(api) + + +def test_remote_file_upload_translates_service_unsupported_type_error(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.RemoteFileUpload() + handler = _unwrap(api.post) + url = "https://example.com/file.exe" + + monkeypatch.setattr( + remote_files_module.ssrf_proxy, + "head", + MagicMock(return_value=_FakeResponse(status_code=200, method="GET", content=b"payload")), + ) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock()) + file_service_cls = _mock_upload_dependencies(monkeypatch) + file_service_cls.return_value.upload_file.side_effect = ServiceUnsupportedFileTypeError() + + with app.test_request_context(method="POST", json={"url": url}): + with pytest.raises(UnsupportedFileTypeError): + handler(api) diff --git a/api/tests/unit_tests/controllers/console/test_spec.py b/api/tests/unit_tests/controllers/console/test_spec.py new file mode 100644 index 0000000000..05a4befaa8 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_spec.py @@ -0,0 +1,49 @@ +from unittest.mock import patch + +import controllers.console.spec as spec_module + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestSpecSchemaDefinitionsApi: + def test_get_success(self): + api = spec_module.SpecSchemaDefinitionsApi() + method = unwrap(api.get) + + schema_definitions = [{"type": "string"}] + + with patch.object( + spec_module, + "SchemaManager", + ) as schema_manager_cls: + schema_manager_cls.return_value.get_all_schema_definitions.return_value = schema_definitions + + resp, status = method(api) + + assert status == 200 + assert resp == schema_definitions + + def test_get_exception_returns_empty_list(self): + api = spec_module.SpecSchemaDefinitionsApi() + method = unwrap(api.get) + + with ( + patch.object( + spec_module, + "SchemaManager", + side_effect=Exception("boom"), + ), + patch.object( + spec_module.logger, + "exception", + ) as log_exception, + ): + resp, status = method(api) + + assert status == 200 + assert resp == [] + log_exception.assert_called_once() diff --git a/api/tests/unit_tests/controllers/console/test_version.py b/api/tests/unit_tests/controllers/console/test_version.py new file mode 100644 index 0000000000..8d8d324be1 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_version.py @@ -0,0 +1,162 @@ +from unittest.mock import MagicMock, patch + +import controllers.console.version as version_module + + +class TestHasNewVersion: + def test_has_new_version_true(self): + result = version_module._has_new_version( + latest_version="1.2.0", + current_version="1.1.0", + ) + assert result is True + + def test_has_new_version_false(self): + result = version_module._has_new_version( + latest_version="1.0.0", + current_version="1.1.0", + ) + assert result is False + + def test_has_new_version_invalid_version(self): + with patch.object(version_module.logger, "warning") as log_warning: + result = version_module._has_new_version( + latest_version="invalid", + current_version="1.0.0", + ) + + assert result is False + log_warning.assert_called_once() + + +class TestCheckVersionUpdate: + def test_no_check_update_url(self): + query = version_module.VersionQuery(current_version="1.0.0") + + with ( + patch.object( + version_module.dify_config, + "CHECK_UPDATE_URL", + "", + ), + patch.object( + version_module.dify_config.project, + "version", + "1.0.0", + ), + patch.object( + version_module.dify_config, + "CAN_REPLACE_LOGO", + True, + ), + patch.object( + version_module.dify_config, + "MODEL_LB_ENABLED", + False, + ), + ): + result = version_module.check_version_update(query) + + assert result.version == "1.0.0" + assert result.can_auto_update is False + assert result.features.can_replace_logo is True + assert result.features.model_load_balancing_enabled is False + + def test_http_error_fallback(self): + query = version_module.VersionQuery(current_version="1.0.0") + + with ( + patch.object( + version_module.dify_config, + "CHECK_UPDATE_URL", + "http://example.com", + ), + patch.object( + version_module.httpx, + "get", + side_effect=Exception("boom"), + ), + patch.object( + version_module.logger, + "warning", + ) as log_warning, + ): + result = version_module.check_version_update(query) + + assert result.version == "1.0.0" + log_warning.assert_called_once() + + def test_new_version_available(self): + query = version_module.VersionQuery(current_version="1.0.0") + + response = MagicMock() + response.json.return_value = { + "version": "1.2.0", + "releaseDate": "2024-01-01", + "releaseNotes": "New features", + "canAutoUpdate": True, + } + + with ( + patch.object( + version_module.dify_config, + "CHECK_UPDATE_URL", + "http://example.com", + ), + patch.object( + version_module.httpx, + "get", + return_value=response, + ), + patch.object( + version_module.dify_config.project, + "version", + "1.0.0", + ), + patch.object( + version_module.dify_config, + "CAN_REPLACE_LOGO", + False, + ), + patch.object( + version_module.dify_config, + "MODEL_LB_ENABLED", + True, + ), + ): + result = version_module.check_version_update(query) + + assert result.version == "1.2.0" + assert result.release_date == "2024-01-01" + assert result.release_notes == "New features" + assert result.can_auto_update is True + + def test_no_new_version(self): + query = version_module.VersionQuery(current_version="1.2.0") + + response = MagicMock() + response.json.return_value = { + "version": "1.1.0", + } + + with ( + patch.object( + version_module.dify_config, + "CHECK_UPDATE_URL", + "http://example.com", + ), + patch.object( + version_module.httpx, + "get", + return_value=response, + ), + patch.object( + version_module.dify_config.project, + "version", + "1.2.0", + ), + ): + result = version_module.check_version_update(query) + + assert result.version == "1.2.0" + assert result.can_auto_update is False diff --git a/api/tests/unit_tests/controllers/console/workspace/test_accounts.py b/api/tests/unit_tests/controllers/console/workspace/test_accounts.py new file mode 100644 index 0000000000..00d322fdea --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_accounts.py @@ -0,0 +1,341 @@ +from unittest.mock import MagicMock, PropertyMock, patch + +import pytest + +from controllers.console import console_ns +from controllers.console.auth.error import ( + EmailAlreadyInUseError, + EmailCodeError, +) +from controllers.console.error import AccountInFreezeError +from controllers.console.workspace.account import ( + AccountAvatarApi, + AccountDeleteApi, + AccountDeleteVerifyApi, + AccountInitApi, + AccountIntegrateApi, + AccountInterfaceLanguageApi, + AccountInterfaceThemeApi, + AccountNameApi, + AccountPasswordApi, + AccountProfileApi, + AccountTimezoneApi, + ChangeEmailCheckApi, + ChangeEmailResetApi, + CheckEmailUnique, +) +from controllers.console.workspace.error import ( + AccountAlreadyInitedError, + CurrentPasswordIncorrectError, + InvalidAccountDeletionCodeError, +) +from services.errors.account import CurrentPasswordIncorrectError as ServicePwdError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestAccountInitApi: + def test_init_success(self, app): + api = AccountInitApi() + method = unwrap(api.post) + + account = MagicMock(status="inactive") + payload = { + "interface_language": "en-US", + "timezone": "UTC", + "invitation_code": "code123", + } + + with ( + app.test_request_context("/account/init", json=payload), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(account, "t1")), + patch("controllers.console.workspace.account.db.session.commit", return_value=None), + patch("controllers.console.workspace.account.dify_config.EDITION", "CLOUD"), + patch("controllers.console.workspace.account.db.session.query") as query_mock, + ): + query_mock.return_value.where.return_value.first.return_value = MagicMock(status="unused") + resp = method(api) + + assert resp["result"] == "success" + + def test_init_already_initialized(self, app): + api = AccountInitApi() + method = unwrap(api.post) + + account = MagicMock(status="active") + + with ( + app.test_request_context("/account/init"), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(account, "t1")), + ): + with pytest.raises(AccountAlreadyInitedError): + method(api) + + +class TestAccountProfileApi: + def test_get_profile_success(self, app): + api = AccountProfileApi() + method = unwrap(api.get) + + user = MagicMock() + user.id = "u1" + user.name = "John" + user.email = "john@test.com" + user.avatar = "avatar.png" + user.interface_language = "en-US" + user.interface_theme = "light" + user.timezone = "UTC" + user.last_login_ip = "127.0.0.1" + + with ( + app.test_request_context("/account/profile"), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(user, "t1")), + ): + result = method(api) + + assert result["id"] == "u1" + + +class TestAccountUpdateApis: + @pytest.mark.parametrize( + ("api_cls", "payload"), + [ + (AccountNameApi, {"name": "test"}), + (AccountAvatarApi, {"avatar": "img.png"}), + (AccountInterfaceLanguageApi, {"interface_language": "en-US"}), + (AccountInterfaceThemeApi, {"interface_theme": "dark"}), + (AccountTimezoneApi, {"timezone": "UTC"}), + ], + ) + def test_update_success(self, app, api_cls, payload): + api = api_cls() + method = unwrap(api.post) + + user = MagicMock() + user.id = "u1" + user.name = "John" + user.email = "john@test.com" + user.avatar = "avatar.png" + user.interface_language = "en-US" + user.interface_theme = "light" + user.timezone = "UTC" + user.last_login_ip = "127.0.0.1" + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.account.AccountService.update_account", return_value=user), + ): + result = method(api) + + assert result["id"] == "u1" + + +class TestAccountPasswordApi: + def test_password_success(self, app): + api = AccountPasswordApi() + method = unwrap(api.post) + + payload = { + "password": "old", + "new_password": "new123", + "repeat_new_password": "new123", + } + + user = MagicMock() + user.id = "u1" + user.name = "John" + user.email = "john@test.com" + user.avatar = "avatar.png" + user.interface_language = "en-US" + user.interface_theme = "light" + user.timezone = "UTC" + user.last_login_ip = "127.0.0.1" + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.account.AccountService.update_account_password", return_value=None), + ): + result = method(api) + + assert result["id"] == "u1" + + def test_password_wrong_current(self, app): + api = AccountPasswordApi() + method = unwrap(api.post) + + payload = { + "password": "bad", + "new_password": "new123", + "repeat_new_password": "new123", + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.account.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch( + "controllers.console.workspace.account.AccountService.update_account_password", + side_effect=ServicePwdError(), + ), + ): + with pytest.raises(CurrentPasswordIncorrectError): + method(api) + + +class TestAccountIntegrateApi: + def test_get_integrates(self, app): + api = AccountIntegrateApi() + method = unwrap(api.get) + + account = MagicMock(id="acc1") + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.account.current_account_with_tenant", return_value=(account, "t1")), + patch("controllers.console.workspace.account.db.session.scalars") as scalars_mock, + ): + scalars_mock.return_value.all.return_value = [] + result = method(api) + + assert "data" in result + assert len(result["data"]) == 2 + + +class TestAccountDeleteApi: + def test_delete_verify_success(self, app): + api = AccountDeleteVerifyApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.account.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch( + "controllers.console.workspace.account.AccountService.generate_account_deletion_verification_code", + return_value=("token", "1234"), + ), + patch( + "controllers.console.workspace.account.AccountService.send_account_deletion_verification_email", + return_value=None, + ), + ): + result = method(api) + + assert result["result"] == "success" + + def test_delete_invalid_code(self, app): + api = AccountDeleteApi() + method = unwrap(api.post) + + payload = {"token": "t", "code": "x"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.account.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch( + "controllers.console.workspace.account.AccountService.verify_account_deletion_code", + return_value=False, + ), + ): + with pytest.raises(InvalidAccountDeletionCodeError): + method(api) + + +class TestChangeEmailApis: + def test_check_email_code_invalid(self, app): + api = ChangeEmailCheckApi() + method = unwrap(api.post) + + payload = {"email": "a@test.com", "code": "x", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch( + "controllers.console.workspace.account.AccountService.is_change_email_error_rate_limit", + return_value=False, + ), + patch( + "controllers.console.workspace.account.AccountService.get_change_email_data", + return_value={"email": "a@test.com", "code": "y"}, + ), + ): + with pytest.raises(EmailCodeError): + method(api) + + def test_reset_email_already_used(self, app): + api = ChangeEmailResetApi() + method = unwrap(api.post) + + payload = {"new_email": "x@test.com", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch("controllers.console.workspace.account.AccountService.is_account_in_freeze", return_value=False), + patch("controllers.console.workspace.account.AccountService.check_email_unique", return_value=False), + ): + with pytest.raises(EmailAlreadyInUseError): + method(api) + + +class TestCheckEmailUniqueApi: + def test_email_unique_success(self, app): + api = CheckEmailUnique() + method = unwrap(api.post) + + payload = {"email": "ok@test.com"} + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch("controllers.console.workspace.account.AccountService.is_account_in_freeze", return_value=False), + patch("controllers.console.workspace.account.AccountService.check_email_unique", return_value=True), + ): + result = method(api) + + assert result["result"] == "success" + + def test_email_in_freeze(self, app): + api = CheckEmailUnique() + method = unwrap(api.post) + + payload = {"email": "x@test.com"} + + with ( + app.test_request_context("/", json=payload), + patch.object( + type(console_ns), + "payload", + new_callable=PropertyMock, + return_value=payload, + ), + patch("controllers.console.workspace.account.AccountService.is_account_in_freeze", return_value=True), + ): + with pytest.raises(AccountInFreezeError): + method(api) diff --git a/api/tests/unit_tests/controllers/console/workspace/test_agent_providers.py b/api/tests/unit_tests/controllers/console/workspace/test_agent_providers.py new file mode 100644 index 0000000000..b4e03f681d --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_agent_providers.py @@ -0,0 +1,139 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from controllers.console.error import AccountNotFound +from controllers.console.workspace.agent_providers import ( + AgentProviderApi, + AgentProviderListApi, +) + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestAgentProviderListApi: + def test_get_success(self, app): + api = AgentProviderListApi() + method = unwrap(api.get) + + user = MagicMock(id="user1") + tenant_id = "tenant1" + providers = [{"name": "openai"}, {"name": "anthropic"}] + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch( + "controllers.console.workspace.agent_providers.AgentService.list_agent_providers", + return_value=providers, + ), + ): + result = method(api) + + assert result == providers + + def test_get_empty_list(self, app): + api = AgentProviderListApi() + method = unwrap(api.get) + + user = MagicMock(id="user1") + tenant_id = "tenant1" + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch( + "controllers.console.workspace.agent_providers.AgentService.list_agent_providers", + return_value=[], + ), + ): + result = method(api) + + assert result == [] + + def test_get_account_not_found(self, app): + api = AgentProviderListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + side_effect=AccountNotFound(), + ), + ): + with pytest.raises(AccountNotFound): + method(api) + + +class TestAgentProviderApi: + def test_get_success(self, app): + api = AgentProviderApi() + method = unwrap(api.get) + + user = MagicMock(id="user1") + tenant_id = "tenant1" + provider_name = "openai" + provider_data = {"name": "openai", "models": ["gpt-4"]} + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch( + "controllers.console.workspace.agent_providers.AgentService.get_agent_provider", + return_value=provider_data, + ), + ): + result = method(api, provider_name) + + assert result == provider_data + + def test_get_provider_not_found(self, app): + api = AgentProviderApi() + method = unwrap(api.get) + + user = MagicMock(id="user1") + tenant_id = "tenant1" + provider_name = "unknown" + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + return_value=(user, tenant_id), + ), + patch( + "controllers.console.workspace.agent_providers.AgentService.get_agent_provider", + return_value=None, + ), + ): + result = method(api, provider_name) + + assert result is None + + def test_get_account_not_found(self, app): + api = AgentProviderApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.agent_providers.current_account_with_tenant", + side_effect=AccountNotFound(), + ), + ): + with pytest.raises(AccountNotFound): + method(api, "openai") diff --git a/api/tests/unit_tests/controllers/console/workspace/test_endpoint.py b/api/tests/unit_tests/controllers/console/workspace/test_endpoint.py new file mode 100644 index 0000000000..51f76af172 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_endpoint.py @@ -0,0 +1,305 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from controllers.console.workspace.endpoint import ( + EndpointCreateApi, + EndpointDeleteApi, + EndpointDisableApi, + EndpointEnableApi, + EndpointListApi, + EndpointListForSinglePluginApi, + EndpointUpdateApi, +) +from core.plugin.impl.exc import PluginPermissionDeniedError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def user_and_tenant(): + return MagicMock(id="u1"), "t1" + + +@pytest.fixture +def patch_current_account(user_and_tenant): + with patch( + "controllers.console.workspace.endpoint.current_account_with_tenant", + return_value=user_and_tenant, + ): + yield + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointCreateApi: + def test_create_success(self, app): + api = EndpointCreateApi() + method = unwrap(api.post) + + payload = { + "plugin_unique_identifier": "plugin-1", + "name": "endpoint", + "settings": {"a": 1}, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.create_endpoint", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_create_permission_denied(self, app): + api = EndpointCreateApi() + method = unwrap(api.post) + + payload = { + "plugin_unique_identifier": "plugin-1", + "name": "endpoint", + "settings": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.endpoint.EndpointService.create_endpoint", + side_effect=PluginPermissionDeniedError("denied"), + ), + ): + with pytest.raises(ValueError): + method(api) + + def test_create_validation_error(self, app): + api = EndpointCreateApi() + method = unwrap(api.post) + + payload = { + "plugin_unique_identifier": "p1", + "name": "", + "settings": {}, + } + + with ( + app.test_request_context("/", json=payload), + ): + with pytest.raises(ValueError): + method(api) + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointListApi: + def test_list_success(self, app): + api = EndpointListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=1&page_size=10"), + patch("controllers.console.workspace.endpoint.EndpointService.list_endpoints", return_value=[{"id": "e1"}]), + ): + result = method(api) + + assert "endpoints" in result + assert len(result["endpoints"]) == 1 + + def test_list_invalid_query(self, app): + api = EndpointListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=0&page_size=10"), + ): + with pytest.raises(ValueError): + method(api) + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointListForSinglePluginApi: + def test_list_for_plugin_success(self, app): + api = EndpointListForSinglePluginApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=1&page_size=10&plugin_id=p1"), + patch( + "controllers.console.workspace.endpoint.EndpointService.list_endpoints_for_single_plugin", + return_value=[{"id": "e1"}], + ), + ): + result = method(api) + + assert "endpoints" in result + + def test_list_for_plugin_missing_param(self, app): + api = EndpointListForSinglePluginApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=1&page_size=10"), + ): + with pytest.raises(ValueError): + method(api) + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointDeleteApi: + def test_delete_success(self, app): + api = EndpointDeleteApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.delete_endpoint", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_delete_invalid_payload(self, app): + api = EndpointDeleteApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + ): + with pytest.raises(ValueError): + method(api) + + def test_delete_service_failure(self, app): + api = EndpointDeleteApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.delete_endpoint", return_value=False), + ): + result = method(api) + + assert result["success"] is False + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointUpdateApi: + def test_update_success(self, app): + api = EndpointUpdateApi() + method = unwrap(api.post) + + payload = { + "endpoint_id": "e1", + "name": "new-name", + "settings": {"x": 1}, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.update_endpoint", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_update_validation_error(self, app): + api = EndpointUpdateApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1", "settings": {}} + + with ( + app.test_request_context("/", json=payload), + ): + with pytest.raises(ValueError): + method(api) + + def test_update_service_failure(self, app): + api = EndpointUpdateApi() + method = unwrap(api.post) + + payload = { + "endpoint_id": "e1", + "name": "n", + "settings": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.update_endpoint", return_value=False), + ): + result = method(api) + + assert result["success"] is False + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointEnableApi: + def test_enable_success(self, app): + api = EndpointEnableApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.enable_endpoint", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_enable_invalid_payload(self, app): + api = EndpointEnableApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + ): + with pytest.raises(ValueError): + method(api) + + def test_enable_service_failure(self, app): + api = EndpointEnableApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.enable_endpoint", return_value=False), + ): + result = method(api) + + assert result["success"] is False + + +@pytest.mark.usefixtures("patch_current_account") +class TestEndpointDisableApi: + def test_disable_success(self, app): + api = EndpointDisableApi() + method = unwrap(api.post) + + payload = {"endpoint_id": "e1"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.endpoint.EndpointService.disable_endpoint", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_disable_invalid_payload(self, app): + api = EndpointDisableApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={}), + ): + with pytest.raises(ValueError): + method(api) diff --git a/api/tests/unit_tests/controllers/console/workspace/test_members.py b/api/tests/unit_tests/controllers/console/workspace/test_members.py new file mode 100644 index 0000000000..b6708d1f6f --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_members.py @@ -0,0 +1,607 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import HTTPException + +import services +from controllers.console.auth.error import ( + CannotTransferOwnerToSelfError, + EmailCodeError, + InvalidEmailError, + InvalidTokenError, + MemberNotInTenantError, + NotOwnerError, + OwnerTransferLimitError, +) +from controllers.console.error import EmailSendIpLimitError, WorkspaceMembersLimitExceeded +from controllers.console.workspace.members import ( + DatasetOperatorMemberListApi, + MemberCancelInviteApi, + MemberInviteEmailApi, + MemberListApi, + MemberUpdateRoleApi, + OwnerTransfer, + OwnerTransferCheckApi, + SendOwnerTransferEmailApi, +) +from services.errors.account import AccountAlreadyInTenantError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestMemberListApi: + def test_get_success(self, app): + api = MemberListApi() + method = unwrap(api.get) + + tenant = MagicMock() + user = MagicMock(current_tenant=tenant) + member = MagicMock() + member.id = "m1" + member.name = "Member" + member.email = "member@test.com" + member.avatar = "avatar.png" + member.role = "admin" + member.status = "active" + members = [member] + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.get_tenant_members", return_value=members), + ): + result, status = method(api) + + assert status == 200 + assert len(result["accounts"]) == 1 + + def test_get_no_tenant(self, app): + api = MemberListApi() + method = unwrap(api.get) + + user = MagicMock(current_tenant=None) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + ): + with pytest.raises(ValueError): + method(api) + + +class TestMemberInviteEmailApi: + def test_invite_success(self, app): + api = MemberInviteEmailApi() + method = unwrap(api.post) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + features = MagicMock() + features.workspace_members.is_available.return_value = True + + payload = { + "emails": ["a@test.com"], + "role": "normal", + "language": "en-US", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.FeatureService.get_features", return_value=features), + patch("controllers.console.workspace.members.RegisterService.invite_new_member", return_value="token"), + patch("controllers.console.workspace.members.dify_config.CONSOLE_WEB_URL", "http://x"), + ): + result, status = method(api) + + assert status == 201 + assert result["result"] == "success" + + def test_invite_limit_exceeded(self, app): + api = MemberInviteEmailApi() + method = unwrap(api.post) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + features = MagicMock() + features.workspace_members.is_available.return_value = False + + payload = { + "emails": ["a@test.com"], + "role": "normal", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.FeatureService.get_features", return_value=features), + ): + with pytest.raises(WorkspaceMembersLimitExceeded): + method(api) + + def test_invite_already_member(self, app): + api = MemberInviteEmailApi() + method = unwrap(api.post) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + features = MagicMock() + features.workspace_members.is_available.return_value = True + + payload = { + "emails": ["a@test.com"], + "role": "normal", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.FeatureService.get_features", return_value=features), + patch( + "controllers.console.workspace.members.RegisterService.invite_new_member", + side_effect=AccountAlreadyInTenantError(), + ), + patch("controllers.console.workspace.members.dify_config.CONSOLE_WEB_URL", "http://x"), + ): + result, status = method(api) + + assert result["invitation_results"][0]["status"] == "success" + + def test_invite_invalid_role(self, app): + api = MemberInviteEmailApi() + method = unwrap(api.post) + + payload = { + "emails": ["a@test.com"], + "role": "owner", + } + + with app.test_request_context("/", json=payload): + result, status = method(api) + + assert status == 400 + assert result["code"] == "invalid-role" + + def test_invite_generic_exception(self, app): + api = MemberInviteEmailApi() + method = unwrap(api.post) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + features = MagicMock() + features.workspace_members.is_available.return_value = True + + payload = { + "emails": ["a@test.com"], + "role": "normal", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.FeatureService.get_features", return_value=features), + patch( + "controllers.console.workspace.members.RegisterService.invite_new_member", + side_effect=Exception("boom"), + ), + patch("controllers.console.workspace.members.dify_config.CONSOLE_WEB_URL", "http://x"), + ): + result, _ = method(api) + + assert result["invitation_results"][0]["status"] == "failed" + + +class TestMemberCancelInviteApi: + def test_cancel_success(self, app): + api = MemberCancelInviteApi() + method = unwrap(api.delete) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + member = MagicMock() + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.query") as q, + patch("controllers.console.workspace.members.TenantService.remove_member_from_tenant"), + ): + q.return_value.where.return_value.first.return_value = member + result, status = method(api, member.id) + + assert status == 200 + assert result["result"] == "success" + + def test_cancel_not_found(self, app): + api = MemberCancelInviteApi() + method = unwrap(api.delete) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.query") as q, + ): + q.return_value.where.return_value.first.return_value = None + + with pytest.raises(HTTPException): + method(api, "x") + + def test_cancel_cannot_operate_self(self, app): + api = MemberCancelInviteApi() + method = unwrap(api.delete) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + member = MagicMock() + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.query") as q, + patch( + "controllers.console.workspace.members.TenantService.remove_member_from_tenant", + side_effect=services.errors.account.CannotOperateSelfError("x"), + ), + ): + q.return_value.where.return_value.first.return_value = member + result, status = method(api, member.id) + + assert status == 400 + + def test_cancel_no_permission(self, app): + api = MemberCancelInviteApi() + method = unwrap(api.delete) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + member = MagicMock() + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.query") as q, + patch( + "controllers.console.workspace.members.TenantService.remove_member_from_tenant", + side_effect=services.errors.account.NoPermissionError("x"), + ), + ): + q.return_value.where.return_value.first.return_value = member + result, status = method(api, member.id) + + assert status == 403 + + def test_cancel_member_not_in_tenant(self, app): + api = MemberCancelInviteApi() + method = unwrap(api.delete) + + tenant = MagicMock(id="t1") + user = MagicMock(current_tenant=tenant) + member = MagicMock() + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.query") as q, + patch( + "controllers.console.workspace.members.TenantService.remove_member_from_tenant", + side_effect=services.errors.account.MemberNotInTenantError(), + ), + ): + q.return_value.where.return_value.first.return_value = member + result, status = method(api, member.id) + + assert status == 404 + + +class TestMemberUpdateRoleApi: + def test_update_success(self, app): + api = MemberUpdateRoleApi() + method = unwrap(api.put) + + tenant = MagicMock() + user = MagicMock(current_tenant=tenant) + member = MagicMock() + + payload = {"role": "normal"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.db.session.get", return_value=member), + patch("controllers.console.workspace.members.TenantService.update_member_role"), + ): + result = method(api, "id") + + if isinstance(result, tuple): + result = result[0] + + assert result["result"] == "success" + + def test_update_invalid_role(self, app): + api = MemberUpdateRoleApi() + method = unwrap(api.put) + + payload = {"role": "invalid-role"} + + with app.test_request_context("/", json=payload): + result, status = method(api, "id") + + assert status == 400 + + def test_update_member_not_found(self, app): + api = MemberUpdateRoleApi() + method = unwrap(api.put) + + payload = {"role": "normal"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.members.current_account_with_tenant", + return_value=(MagicMock(current_tenant=MagicMock()), "t1"), + ), + patch("controllers.console.workspace.members.db.session.get", return_value=None), + ): + with pytest.raises(HTTPException): + method(api, "id") + + +class TestDatasetOperatorMemberListApi: + def test_get_success(self, app): + api = DatasetOperatorMemberListApi() + method = unwrap(api.get) + + tenant = MagicMock() + user = MagicMock(current_tenant=tenant) + member = MagicMock() + member.id = "op1" + member.name = "Operator" + member.email = "operator@test.com" + member.avatar = "avatar.png" + member.role = "operator" + member.status = "active" + members = [member] + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch( + "controllers.console.workspace.members.TenantService.get_dataset_operator_members", return_value=members + ), + ): + result, status = method(api) + + assert status == 200 + assert len(result["accounts"]) == 1 + + def test_get_no_tenant(self, app): + api = DatasetOperatorMemberListApi() + method = unwrap(api.get) + + user = MagicMock(current_tenant=None) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + ): + with pytest.raises(ValueError): + method(api) + + +class TestSendOwnerTransferEmailApi: + def test_send_success(self, app): + api = SendOwnerTransferEmailApi() + method = unwrap(api.post) + + tenant = MagicMock(name="ws") + user = MagicMock(email="a@test.com", current_tenant=tenant) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.extract_remote_ip", return_value="1.1.1.1"), + patch("controllers.console.workspace.members.AccountService.is_email_send_ip_limit", return_value=False), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.send_owner_transfer_email", return_value="token" + ), + ): + result = method(api) + + assert result["result"] == "success" + + def test_send_ip_limit(self, app): + api = SendOwnerTransferEmailApi() + method = unwrap(api.post) + + payload = {} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.extract_remote_ip", return_value="1.1.1.1"), + patch("controllers.console.workspace.members.AccountService.is_email_send_ip_limit", return_value=True), + ): + with pytest.raises(EmailSendIpLimitError): + method(api) + + def test_send_not_owner(self, app): + api = SendOwnerTransferEmailApi() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(current_tenant=tenant) + + with ( + app.test_request_context("/", json={}), + patch("controllers.console.workspace.members.extract_remote_ip", return_value="1.1.1.1"), + patch("controllers.console.workspace.members.AccountService.is_email_send_ip_limit", return_value=False), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=False), + ): + with pytest.raises(NotOwnerError): + method(api) + + +class TestOwnerTransferCheckApi: + def test_check_invalid_code(self, app): + api = OwnerTransferCheckApi() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(email="a@test.com", current_tenant=tenant) + + payload = {"code": "x", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.is_owner_transfer_error_rate_limit", + return_value=False, + ), + patch( + "controllers.console.workspace.members.AccountService.get_owner_transfer_data", + return_value={"email": "a@test.com", "code": "y"}, + ), + ): + with pytest.raises(EmailCodeError): + method(api) + + def test_rate_limited(self, app): + api = OwnerTransferCheckApi() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(email="a@test.com", current_tenant=tenant) + + payload = {"code": "x", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.is_owner_transfer_error_rate_limit", + return_value=True, + ), + ): + with pytest.raises(OwnerTransferLimitError): + method(api) + + def test_invalid_token(self, app): + api = OwnerTransferCheckApi() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(email="a@test.com", current_tenant=tenant) + + payload = {"code": "x", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.is_owner_transfer_error_rate_limit", + return_value=False, + ), + patch("controllers.console.workspace.members.AccountService.get_owner_transfer_data", return_value=None), + ): + with pytest.raises(InvalidTokenError): + method(api) + + def test_invalid_email(self, app): + api = OwnerTransferCheckApi() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(email="a@test.com", current_tenant=tenant) + + payload = {"code": "x", "token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.is_owner_transfer_error_rate_limit", + return_value=False, + ), + patch( + "controllers.console.workspace.members.AccountService.get_owner_transfer_data", + return_value={"email": "b@test.com", "code": "x"}, + ), + ): + with pytest.raises(InvalidEmailError): + method(api) + + +class TestOwnerTransferApi: + def test_transfer_self(self, app): + api = OwnerTransfer() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(id="1", email="a@test.com", current_tenant=tenant) + + payload = {"token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + ): + with pytest.raises(CannotTransferOwnerToSelfError): + method(api, "1") + + def test_invalid_token(self, app): + api = OwnerTransfer() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(id="1", email="a@test.com", current_tenant=tenant) + + payload = {"token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch("controllers.console.workspace.members.AccountService.get_owner_transfer_data", return_value=None), + ): + with pytest.raises(InvalidTokenError): + method(api, "2") + + def test_member_not_in_tenant(self, app): + api = OwnerTransfer() + method = unwrap(api.post) + + tenant = MagicMock() + user = MagicMock(id="1", email="a@test.com", current_tenant=tenant) + member = MagicMock() + + payload = {"token": "t"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.members.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.members.TenantService.is_owner", return_value=True), + patch( + "controllers.console.workspace.members.AccountService.get_owner_transfer_data", + return_value={"email": "a@test.com"}, + ), + patch("controllers.console.workspace.members.db.session.get", return_value=member), + patch("controllers.console.workspace.members.TenantService.is_member", return_value=False), + ): + with pytest.raises(MemberNotInTenantError): + method(api, "2") diff --git a/api/tests/unit_tests/controllers/console/workspace/test_model_providers.py b/api/tests/unit_tests/controllers/console/workspace/test_model_providers.py new file mode 100644 index 0000000000..af0c2c5594 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_model_providers.py @@ -0,0 +1,388 @@ +from unittest.mock import MagicMock, patch + +import pytest +from pydantic_core import ValidationError +from werkzeug.exceptions import Forbidden + +from controllers.console.workspace.model_providers import ( + ModelProviderCredentialApi, + ModelProviderCredentialSwitchApi, + ModelProviderIconApi, + ModelProviderListApi, + ModelProviderPaymentCheckoutUrlApi, + ModelProviderValidateApi, + PreferredProviderTypeUpdateApi, +) +from dify_graph.model_runtime.errors.validate import CredentialsValidateFailedError + +VALID_UUID = "123e4567-e89b-12d3-a456-426614174000" +INVALID_UUID = "123" + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestModelProviderListApi: + def test_get_success(self, app): + api = ModelProviderListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?model_type=llm"), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.get_provider_list", + return_value=[{"name": "openai"}], + ), + ): + result = method(api) + + assert "data" in result + + +class TestModelProviderCredentialApi: + def test_get_success(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.get) + + with ( + app.test_request_context(f"/?credential_id={VALID_UUID}"), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.get_provider_credential", + return_value={"key": "value"}, + ), + ): + result = method(api, provider="openai") + + assert "credentials" in result + + def test_get_invalid_uuid(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.get) + + with ( + app.test_request_context(f"/?credential_id={INVALID_UUID}"), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + ): + with pytest.raises(ValidationError): + method(api, provider="openai") + + def test_post_create_success(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.post) + + payload = {"credentials": {"a": "b"}, "name": "test"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.create_provider_credential", + return_value=None, + ), + ): + result, status = method(api, provider="openai") + + assert result["result"] == "success" + assert status == 201 + + def test_post_create_validation_error(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.post) + + payload = {"credentials": {"a": "b"}} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.create_provider_credential", + side_effect=CredentialsValidateFailedError("bad"), + ), + ): + with pytest.raises(ValueError): + method(api, provider="openai") + + def test_put_update_success(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.put) + + payload = {"credential_id": VALID_UUID, "credentials": {"a": "b"}} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.update_provider_credential", + return_value=None, + ), + ): + result = method(api, provider="openai") + + assert result["result"] == "success" + + def test_put_invalid_uuid(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.put) + + payload = {"credential_id": INVALID_UUID, "credentials": {"a": "b"}} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + ): + with pytest.raises(ValidationError): + method(api, provider="openai") + + def test_delete_success(self, app): + api = ModelProviderCredentialApi() + method = unwrap(api.delete) + + payload = {"credential_id": VALID_UUID} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.remove_provider_credential", + return_value=None, + ), + ): + result, status = method(api, provider="openai") + + assert result["result"] == "success" + assert status == 204 + + +class TestModelProviderCredentialSwitchApi: + def test_switch_success(self, app): + api = ModelProviderCredentialSwitchApi() + method = unwrap(api.post) + + payload = {"credential_id": VALID_UUID} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.switch_active_provider_credential", + return_value=None, + ), + ): + result = method(api, provider="openai") + + assert result["result"] == "success" + + def test_switch_invalid_uuid(self, app): + api = ModelProviderCredentialSwitchApi() + method = unwrap(api.post) + + payload = {"credential_id": INVALID_UUID} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + ): + with pytest.raises(ValidationError): + method(api, provider="openai") + + +class TestModelProviderValidateApi: + def test_validate_success(self, app): + api = ModelProviderValidateApi() + method = unwrap(api.post) + + payload = {"credentials": {"a": "b"}} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.validate_provider_credentials", + return_value=None, + ), + ): + result = method(api, provider="openai") + + assert result["result"] == "success" + + def test_validate_failure(self, app): + api = ModelProviderValidateApi() + method = unwrap(api.post) + + payload = {"credentials": {"a": "b"}} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.validate_provider_credentials", + side_effect=CredentialsValidateFailedError("bad"), + ), + ): + result = method(api, provider="openai") + + assert result["result"] == "error" + + +class TestModelProviderIconApi: + def test_icon_success(self, app): + api = ModelProviderIconApi() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.get_model_provider_icon", + return_value=(b"123", "image/png"), + ), + ): + response = api.get("t1", "openai", "logo", "en") + + assert response.mimetype == "image/png" + + def test_icon_not_found(self, app): + api = ModelProviderIconApi() + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.get_model_provider_icon", + return_value=(None, None), + ), + ): + with pytest.raises(ValueError): + api.get("t1", "openai", "logo", "en") + + +class TestPreferredProviderTypeUpdateApi: + def test_update_success(self, app): + api = PreferredProviderTypeUpdateApi() + method = unwrap(api.post) + + payload = {"preferred_provider_type": "custom"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.ModelProviderService.switch_preferred_provider", + return_value=None, + ), + ): + result = method(api, provider="openai") + + assert result["result"] == "success" + + def test_invalid_enum(self, app): + api = PreferredProviderTypeUpdateApi() + method = unwrap(api.post) + + payload = {"preferred_provider_type": "invalid"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + ): + with pytest.raises(ValidationError): + method(api, provider="openai") + + +class TestModelProviderPaymentCheckoutUrlApi: + def test_checkout_success(self, app): + api = ModelProviderPaymentCheckoutUrlApi() + method = unwrap(api.get) + + user = MagicMock(id="u1", email="x@test.com") + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(user, "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.BillingService.is_tenant_owner_or_admin", + return_value=None, + ), + patch( + "controllers.console.workspace.model_providers.BillingService.get_model_provider_payment_link", + return_value={"url": "x"}, + ), + ): + result = method(api, provider="anthropic") + + assert "url" in result + + def test_invalid_provider(self, app): + api = ModelProviderPaymentCheckoutUrlApi() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(ValueError): + method(api, provider="openai") + + def test_permission_denied(self, app): + api = ModelProviderPaymentCheckoutUrlApi() + method = unwrap(api.get) + + user = MagicMock(id="u1", email="x@test.com") + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.model_providers.current_account_with_tenant", + return_value=(user, "tenant1"), + ), + patch( + "controllers.console.workspace.model_providers.BillingService.is_tenant_owner_or_admin", + side_effect=Forbidden(), + ), + ): + with pytest.raises(Forbidden): + method(api, provider="anthropic") diff --git a/api/tests/unit_tests/controllers/console/workspace/test_models.py b/api/tests/unit_tests/controllers/console/workspace/test_models.py new file mode 100644 index 0000000000..43b8e1ac2e --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_models.py @@ -0,0 +1,447 @@ +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.console.workspace.models import ( + DefaultModelApi, + ModelProviderAvailableModelApi, + ModelProviderModelApi, + ModelProviderModelCredentialApi, + ModelProviderModelCredentialSwitchApi, + ModelProviderModelDisableApi, + ModelProviderModelEnableApi, + ModelProviderModelParameterRuleApi, + ModelProviderModelValidateApi, +) +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.errors.validate import CredentialsValidateFailedError + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestDefaultModelApi: + def test_get_success(self, app: Flask): + api = DefaultModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context( + "/", + query_string={"model_type": ModelType.LLM.value}, + ), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as service_mock, + ): + service_mock.return_value.get_default_model_of_model_type.return_value = {"model": "gpt-4"} + + result = method(api) + + assert "data" in result + + def test_post_success(self, app: Flask): + api = DefaultModelApi() + method = unwrap(api.post) + + payload = { + "model_settings": [ + { + "model_type": ModelType.LLM.value, + "provider": "openai", + "model": "gpt-4", + } + ] + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result = method(api) + + assert result["result"] == "success" + + def test_get_returns_empty_when_no_default(self, app): + api = DefaultModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/", query_string={"model_type": ModelType.LLM.value}), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService") as service, + ): + service.return_value.get_default_model_of_model_type.return_value = None + + result = method(api) + + assert "data" in result + + +class TestModelProviderModelApi: + def test_get_models_success(self, app: Flask): + api = ModelProviderModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as service_mock, + ): + service_mock.return_value.get_models_by_provider.return_value = [] + + result = method(api, "openai") + + assert "data" in result + + def test_post_models_success(self, app: Flask): + api = ModelProviderModelApi() + method = unwrap(api.post) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + "load_balancing": { + "configs": [{"weight": 1}], + "enabled": True, + }, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + patch("controllers.console.workspace.models.ModelLoadBalancingService"), + ): + result, status = method(api, "openai") + + assert status == 200 + + def test_delete_model_success(self, app: Flask): + api = ModelProviderModelApi() + method = unwrap(api.delete) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result, status = method(api, "openai") + + assert status == 204 + + def test_get_models_returns_empty(self, app): + api = ModelProviderModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService") as service, + ): + service.return_value.get_models_by_provider.return_value = [] + + result = method(api, "openai") + + assert "data" in result + + +class TestModelProviderModelCredentialApi: + def test_get_credentials_success(self, app: Flask): + api = ModelProviderModelCredentialApi() + method = unwrap(api.get) + + with ( + app.test_request_context( + "/", + query_string={ + "model": "gpt-4", + "model_type": ModelType.LLM.value, + }, + ), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as provider_service, + patch("controllers.console.workspace.models.ModelLoadBalancingService") as lb_service, + ): + provider_service.return_value.get_model_credential.return_value = { + "credentials": {}, + "current_credential_id": None, + "current_credential_name": None, + } + provider_service.return_value.provider_manager.get_provider_model_available_credentials.return_value = [] + lb_service.return_value.get_load_balancing_configs.return_value = (False, []) + + result = method(api, "openai") + + assert "credentials" in result + + def test_create_credential_success(self, app: Flask): + api = ModelProviderModelCredentialApi() + method = unwrap(api.post) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + "credentials": {"key": "val"}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result, status = method(api, "openai") + + assert status == 201 + + def test_get_empty_credentials(self, app): + api = ModelProviderModelCredentialApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/", query_string={"model": "gpt", "model_type": ModelType.LLM.value}), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService") as service, + patch("controllers.console.workspace.models.ModelLoadBalancingService") as lb, + ): + service.return_value.get_model_credential.return_value = None + service.return_value.provider_manager.get_provider_model_available_credentials.return_value = [] + lb.return_value.get_load_balancing_configs.return_value = (False, []) + + result = method(api, "openai") + + assert result["credentials"] == {} + + def test_delete_success(self, app): + api = ModelProviderModelCredentialApi() + method = unwrap(api.delete) + + payload = { + "model": "gpt", + "model_type": ModelType.LLM.value, + "credential_id": "123e4567-e89b-12d3-a456-426614174000", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result, status = method(api, "openai") + + assert status == 204 + + +class TestModelProviderModelCredentialSwitchApi: + def test_switch_success(self, app: Flask): + api = ModelProviderModelCredentialSwitchApi() + method = unwrap(api.post) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + "credential_id": "abc", + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result = method(api, "openai") + + assert result["result"] == "success" + + +class TestModelEnableDisableApis: + def test_enable_model(self, app: Flask): + api = ModelProviderModelEnableApi() + method = unwrap(api.patch) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result = method(api, "openai") + + assert result["result"] == "success" + + def test_disable_model(self, app: Flask): + api = ModelProviderModelDisableApi() + method = unwrap(api.patch) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result = method(api, "openai") + + assert result["result"] == "success" + + +class TestModelProviderModelValidateApi: + def test_validate_success(self, app: Flask): + api = ModelProviderModelValidateApi() + method = unwrap(api.post) + + payload = { + "model": "gpt-4", + "model_type": ModelType.LLM.value, + "credentials": {"key": "val"}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService"), + ): + result = method(api, "openai") + + assert result["result"] == "success" + + @pytest.mark.parametrize("model_name", ["gpt-4", "gpt"]) + def test_validate_failure(self, app: Flask, model_name: str): + api = ModelProviderModelValidateApi() + method = unwrap(api.post) + + payload = { + "model": model_name, + "model_type": ModelType.LLM.value, + "credentials": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as service_mock, + ): + service_mock.return_value.validate_model_credentials.side_effect = CredentialsValidateFailedError("invalid") + + result = method(api, "openai") + + assert result["result"] == "error" + + +class TestParameterAndAvailableModels: + def test_parameter_rules(self, app: Flask): + api = ModelProviderModelParameterRuleApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/", query_string={"model": "gpt-4"}), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as service_mock, + ): + service_mock.return_value.get_model_parameter_rules.return_value = [] + + result = method(api, "openai") + + assert "data" in result + + def test_available_models(self, app: Flask): + api = ModelProviderAvailableModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.models.current_account_with_tenant", + return_value=(MagicMock(), "tenant1"), + ), + patch("controllers.console.workspace.models.ModelProviderService") as service_mock, + ): + service_mock.return_value.get_models_by_model_type.return_value = [] + + result = method(api, ModelType.LLM.value) + + assert "data" in result + + def test_empty_rules(self, app): + api = ModelProviderModelParameterRuleApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/", query_string={"model": "gpt"}), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService") as service, + ): + service.return_value.get_model_parameter_rules.return_value = [] + + result = method(api, "openai") + + assert result["data"] == [] + + def test_no_models(self, app): + api = ModelProviderAvailableModelApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.models.current_account_with_tenant", return_value=(MagicMock(), "t1")), + patch("controllers.console.workspace.models.ModelProviderService") as service, + ): + service.return_value.get_models_by_model_type.return_value = [] + + result = method(api, ModelType.LLM.value) + + assert result["data"] == [] diff --git a/api/tests/unit_tests/controllers/console/workspace/test_plugin.py b/api/tests/unit_tests/controllers/console/workspace/test_plugin.py new file mode 100644 index 0000000000..eb19243225 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_plugin.py @@ -0,0 +1,1025 @@ +import io +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.datastructures import FileStorage +from werkzeug.exceptions import Forbidden + +from controllers.console.workspace.plugin import ( + PluginAssetApi, + PluginAutoUpgradeExcludePluginApi, + PluginChangePermissionApi, + PluginChangePreferencesApi, + PluginDebuggingKeyApi, + PluginDeleteAllInstallTaskItemsApi, + PluginDeleteInstallTaskApi, + PluginDeleteInstallTaskItemApi, + PluginFetchDynamicSelectOptionsApi, + PluginFetchDynamicSelectOptionsWithCredentialsApi, + PluginFetchInstallTaskApi, + PluginFetchInstallTasksApi, + PluginFetchManifestApi, + PluginFetchMarketplacePkgApi, + PluginFetchPermissionApi, + PluginFetchPreferencesApi, + PluginIconApi, + PluginInstallFromGithubApi, + PluginInstallFromMarketplaceApi, + PluginInstallFromPkgApi, + PluginListApi, + PluginListInstallationsFromIdsApi, + PluginListLatestVersionsApi, + PluginReadmeApi, + PluginUninstallApi, + PluginUpgradeFromGithubApi, + PluginUpgradeFromMarketplaceApi, + PluginUploadFromBundleApi, + PluginUploadFromGithubApi, + PluginUploadFromPkgApi, +) +from core.plugin.impl.exc import PluginDaemonClientSideError +from models.account import TenantPluginAutoUpgradeStrategy, TenantPluginPermission + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +@pytest.fixture +def user(): + u = MagicMock() + u.id = "u1" + u.is_admin_or_owner = True + return u + + +@pytest.fixture +def tenant(): + return "t1" + + +class TestPluginListLatestVersionsApi: + def test_success(self, app): + api = PluginListLatestVersionsApi() + method = unwrap(api.post) + + payload = {"plugin_ids": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.plugin.PluginService.list_latest_versions", return_value={"p1": "1.0"} + ), + ): + result = method(api) + + assert "versions" in result + + def test_daemon_error(self, app): + api = PluginListLatestVersionsApi() + method = unwrap(api.post) + + payload = {"plugin_ids": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.plugin.PluginService.list_latest_versions", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginDebuggingKeyApi: + def test_debugging_key_success(self, app): + api = PluginDebuggingKeyApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.get_debugging_key", return_value="k"), + ): + result = method(api) + + assert result["key"] == "k" + + def test_debugging_key_error(self, app): + api = PluginDebuggingKeyApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.get_debugging_key", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginListApi: + def test_plugin_list(self, app): + api = PluginListApi() + method = unwrap(api.get) + + mock_list = MagicMock(list=[{"id": 1}], total=1) + + with ( + app.test_request_context("/?page=1&page_size=10"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.list_with_total", return_value=mock_list), + ): + result = method(api) + + assert result["total"] == 1 + + +class TestPluginIconApi: + def test_plugin_icon(self, app): + api = PluginIconApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?tenant_id=t1&filename=a.png"), + patch("controllers.console.workspace.plugin.PluginService.get_asset", return_value=(b"x", "image/png")), + ): + response = method(api) + + assert response.mimetype == "image/png" + + +class TestPluginAssetApi: + def test_plugin_asset(self, app): + api = PluginAssetApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_unique_identifier=p&file_name=a.bin"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.extract_asset", return_value=b"x"), + ): + response = method(api) + + assert response.mimetype == "application/octet-stream" + + +class TestPluginUploadFromPkgApi: + def test_upload_pkg_success(self, app): + api = PluginUploadFromPkgApi() + method = unwrap(api.post) + + data = { + "pkg": (io.BytesIO(b"x"), "test.pkg"), + } + + with ( + app.test_request_context("/", data=data, content_type="multipart/form-data"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.upload_pkg", return_value={"ok": True}), + ): + result = method(api) + + assert result["ok"] is True + + def test_upload_pkg_too_large(self, app): + api = PluginUploadFromPkgApi() + method = unwrap(api.post) + + data = { + "pkg": (io.BytesIO(b"x"), "test.pkg"), + } + + with ( + app.test_request_context("/", data=data, content_type="multipart/form-data"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.dify_config.PLUGIN_MAX_PACKAGE_SIZE", 0), + patch("controllers.console.workspace.plugin.PluginService.upload_pkg") as upload_pkg_mock, + ): + with pytest.raises(ValueError): + method(api) + + upload_pkg_mock.assert_not_called() + + +class TestPluginInstallFromPkgApi: + def test_install_from_pkg(self, app): + api = PluginInstallFromPkgApi() + method = unwrap(api.post) + + payload = {"plugin_unique_identifiers": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.install_from_local_pkg", return_value={"ok": True} + ), + ): + result = method(api) + + assert result["ok"] is True + + +class TestPluginUninstallApi: + def test_uninstall(self, app): + api = PluginUninstallApi() + method = unwrap(api.post) + + payload = {"plugin_installation_id": "x"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.uninstall", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + +class TestPluginChangePermissionApi: + def test_change_permission_forbidden(self, app): + api = PluginChangePermissionApi() + method = unwrap(api.post) + + user = MagicMock(is_admin_or_owner=False) + + payload = { + "install_permission": TenantPluginPermission.InstallPermission.EVERYONE, + "debug_permission": TenantPluginPermission.DebugPermission.EVERYONE, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + ): + with pytest.raises(Forbidden): + method(api) + + def test_change_permission_success(self, app): + api = PluginChangePermissionApi() + method = unwrap(api.post) + + user = MagicMock(is_admin_or_owner=True) + + payload = { + "install_permission": TenantPluginPermission.InstallPermission.EVERYONE, + "debug_permission": TenantPluginPermission.DebugPermission.EVERYONE, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.plugin.PluginPermissionService.change_permission", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + +class TestPluginFetchPermissionApi: + def test_fetch_permission_default(self, app): + api = PluginFetchPermissionApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginPermissionService.get_permission", return_value=None), + ): + result = method(api) + + assert result["install_permission"] is not None + + +class TestPluginFetchDynamicSelectOptionsApi: + def test_fetch_dynamic_options(self, app, user): + api = PluginFetchDynamicSelectOptionsApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_id=p&provider=x&action=y¶meter=z&provider_type=tool"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch( + "controllers.console.workspace.plugin.PluginParameterService.get_dynamic_select_options", + return_value=[1, 2], + ), + ): + result = method(api) + + assert result["options"] == [1, 2] + + +class TestPluginReadmeApi: + def test_fetch_readme(self, app): + api = PluginReadmeApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_unique_identifier=p"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.fetch_plugin_readme", return_value="readme"), + ): + result = method(api) + + assert result["readme"] == "readme" + + +class TestPluginListInstallationsFromIdsApi: + def test_success(self, app): + api = PluginListInstallationsFromIdsApi() + method = unwrap(api.post) + + payload = {"plugin_ids": ["p1", "p2"]} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.list_installations_from_ids", + return_value=[{"id": "p1"}], + ), + ): + result = method(api) + + assert "plugins" in result + + def test_daemon_error(self, app): + api = PluginListInstallationsFromIdsApi() + method = unwrap(api.post) + + payload = {"plugin_ids": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.list_installations_from_ids", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginUploadFromGithubApi: + def test_success(self, app): + api = PluginUploadFromGithubApi() + method = unwrap(api.post) + + payload = {"repo": "r", "version": "v", "package": "p"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upload_pkg_from_github", return_value={"ok": True} + ), + ): + result = method(api) + + assert result["ok"] is True + + def test_daemon_error(self, app): + api = PluginUploadFromGithubApi() + method = unwrap(api.post) + + payload = {"repo": "r", "version": "v", "package": "p"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upload_pkg_from_github", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginUploadFromBundleApi: + def test_success(self, app): + api = PluginUploadFromBundleApi() + method = unwrap(api.post) + + file = FileStorage( + stream=io.BytesIO(b"x"), + filename="test.bundle", + content_type="application/octet-stream", + ) + + with ( + app.test_request_context( + "/", + data={"bundle": file}, + content_type="multipart/form-data", + ), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.upload_bundle", return_value={"ok": True}), + ): + result = method(api) + + assert result["ok"] is True + + def test_too_large(self, app): + api = PluginUploadFromBundleApi() + method = unwrap(api.post) + + file = FileStorage( + stream=io.BytesIO(b"x"), + filename="test.bundle", + content_type="application/octet-stream", + ) + + with ( + app.test_request_context( + "/", + data={"bundle": file}, + content_type="multipart/form-data", + ), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.dify_config.PLUGIN_MAX_BUNDLE_SIZE", 0), + patch("controllers.console.workspace.plugin.PluginService.upload_bundle") as upload_bundle_mock, + ): + with pytest.raises(ValueError): + method(api) + + upload_bundle_mock.assert_not_called() + + +class TestPluginInstallFromGithubApi: + def test_success(self, app): + api = PluginInstallFromGithubApi() + method = unwrap(api.post) + + payload = { + "plugin_unique_identifier": "p", + "repo": "r", + "version": "v", + "package": "pkg", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.install_from_github", return_value={"ok": True}), + ): + result = method(api) + + assert result["ok"] is True + + def test_daemon_error(self, app): + api = PluginInstallFromGithubApi() + method = unwrap(api.post) + + payload = { + "plugin_unique_identifier": "p", + "repo": "r", + "version": "v", + "package": "pkg", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.install_from_github", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginInstallFromMarketplaceApi: + def test_success(self, app): + api = PluginInstallFromMarketplaceApi() + method = unwrap(api.post) + + payload = {"plugin_unique_identifiers": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.install_from_marketplace_pkg", + return_value={"ok": True}, + ), + ): + result = method(api) + + assert result["ok"] is True + + def test_daemon_error(self, app): + api = PluginInstallFromMarketplaceApi() + method = unwrap(api.post) + + payload = {"plugin_unique_identifiers": ["p1"]} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.install_from_marketplace_pkg", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginFetchMarketplacePkgApi: + def test_success(self, app): + api = PluginFetchMarketplacePkgApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_unique_identifier=p"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.fetch_marketplace_pkg", return_value={"m": 1}), + ): + result = method(api) + + assert "manifest" in result + + def test_daemon_error(self, app): + api = PluginFetchMarketplacePkgApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_unique_identifier=p"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.fetch_marketplace_pkg", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginFetchManifestApi: + def test_success(self, app): + api = PluginFetchManifestApi() + method = unwrap(api.get) + + manifest = MagicMock() + manifest.model_dump.return_value = {"x": 1} + + with ( + app.test_request_context("/?plugin_unique_identifier=p"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.fetch_plugin_manifest", return_value=manifest), + ): + result = method(api) + + assert "manifest" in result + + def test_daemon_error(self, app): + api = PluginFetchManifestApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?plugin_unique_identifier=p"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.fetch_plugin_manifest", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginFetchInstallTasksApi: + def test_success(self, app): + api = PluginFetchInstallTasksApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=1&page_size=10"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.fetch_install_tasks", return_value=[{"id": 1}]), + ): + result = method(api) + + assert "tasks" in result + + def test_daemon_error(self, app): + api = PluginFetchInstallTasksApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?page=1&page_size=10"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.fetch_install_tasks", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginFetchInstallTaskApi: + def test_success(self, app): + api = PluginFetchInstallTaskApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.fetch_install_task", return_value={"id": "x"}), + ): + result = method(api, "x") + + assert "task" in result + + def test_daemon_error(self, app): + api = PluginFetchInstallTaskApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.fetch_install_task", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api, "t") + + +class TestPluginDeleteInstallTaskApi: + def test_success(self, app): + api = PluginDeleteInstallTaskApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.delete_install_task", return_value=True), + ): + result = method(api, "x") + + assert result["success"] is True + + def test_daemon_error(self, app): + api = PluginDeleteInstallTaskApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.delete_install_task", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api, "t") + + +class TestPluginDeleteAllInstallTaskItemsApi: + def test_success(self, app): + api = PluginDeleteAllInstallTaskItemsApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.delete_all_install_task_items", return_value=True + ), + ): + result = method(api) + + assert result["success"] is True + + def test_daemon_error(self, app): + api = PluginDeleteAllInstallTaskItemsApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.delete_all_install_task_items", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginDeleteInstallTaskItemApi: + def test_success(self, app): + api = PluginDeleteInstallTaskItemApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginService.delete_install_task_item", return_value=True), + ): + result = method(api, "task1", "item1") + + assert result["success"] is True + + def test_daemon_error(self, app): + api = PluginDeleteInstallTaskItemApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.delete_install_task_item", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api, "task1", "item1") + + +class TestPluginUpgradeFromMarketplaceApi: + def test_success(self, app): + api = PluginUpgradeFromMarketplaceApi() + method = unwrap(api.post) + + payload = { + "original_plugin_unique_identifier": "p1", + "new_plugin_unique_identifier": "p2", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upgrade_plugin_with_marketplace", + return_value={"ok": True}, + ), + ): + result = method(api) + + assert result["ok"] is True + + def test_daemon_error(self, app): + api = PluginUpgradeFromMarketplaceApi() + method = unwrap(api.post) + + payload = { + "original_plugin_unique_identifier": "p1", + "new_plugin_unique_identifier": "p2", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upgrade_plugin_with_marketplace", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginUpgradeFromGithubApi: + def test_success(self, app): + api = PluginUpgradeFromGithubApi() + method = unwrap(api.post) + + payload = { + "original_plugin_unique_identifier": "p1", + "new_plugin_unique_identifier": "p2", + "repo": "r", + "version": "v", + "package": "pkg", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upgrade_plugin_with_github", + return_value={"ok": True}, + ), + ): + result = method(api) + + assert result["ok"] is True + + def test_daemon_error(self, app): + api = PluginUpgradeFromGithubApi() + method = unwrap(api.post) + + payload = { + "original_plugin_unique_identifier": "p1", + "new_plugin_unique_identifier": "p2", + "repo": "r", + "version": "v", + "package": "pkg", + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginService.upgrade_plugin_with_github", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginFetchDynamicSelectOptionsWithCredentialsApi: + def test_success(self, app): + api = PluginFetchDynamicSelectOptionsWithCredentialsApi() + method = unwrap(api.post) + + user = MagicMock(id="u1", is_admin_or_owner=True) + + payload = { + "plugin_id": "p", + "provider": "x", + "action": "y", + "parameter": "z", + "credential_id": "c", + "credentials": {"k": "v"}, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch( + "controllers.console.workspace.plugin.PluginParameterService.get_dynamic_select_options_with_credentials", + return_value=[1], + ), + ): + result = method(api) + + assert result["options"] == [1] + + def test_daemon_error(self, app): + api = PluginFetchDynamicSelectOptionsWithCredentialsApi() + method = unwrap(api.post) + + user = MagicMock(id="u1", is_admin_or_owner=True) + + payload = { + "plugin_id": "p", + "provider": "x", + "action": "y", + "parameter": "z", + "credential_id": "c", + "credentials": {"k": "v"}, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch( + "controllers.console.workspace.plugin.PluginParameterService.get_dynamic_select_options_with_credentials", + side_effect=PluginDaemonClientSideError("error"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestPluginChangePreferencesApi: + def test_success(self, app): + api = PluginChangePreferencesApi() + method = unwrap(api.post) + + user = MagicMock(is_admin_or_owner=True) + + payload = { + "permission": { + "install_permission": TenantPluginPermission.InstallPermission.EVERYONE, + "debug_permission": TenantPluginPermission.DebugPermission.EVERYONE, + }, + "auto_upgrade": { + "strategy_setting": TenantPluginAutoUpgradeStrategy.StrategySetting.FIX_ONLY, + "upgrade_time_of_day": 0, + "upgrade_mode": TenantPluginAutoUpgradeStrategy.UpgradeMode.EXCLUDE, + "exclude_plugins": [], + "include_plugins": [], + }, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.plugin.PluginPermissionService.change_permission", return_value=True), + patch("controllers.console.workspace.plugin.PluginAutoUpgradeService.change_strategy", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_permission_fail(self, app): + api = PluginChangePreferencesApi() + method = unwrap(api.post) + + user = MagicMock(is_admin_or_owner=True) + + payload = { + "permission": { + "install_permission": TenantPluginPermission.InstallPermission.EVERYONE, + "debug_permission": TenantPluginPermission.DebugPermission.EVERYONE, + }, + "auto_upgrade": { + "strategy_setting": TenantPluginAutoUpgradeStrategy.StrategySetting.FIX_ONLY, + "upgrade_time_of_day": 0, + "upgrade_mode": TenantPluginAutoUpgradeStrategy.UpgradeMode.EXCLUDE, + "exclude_plugins": [], + "include_plugins": [], + }, + } + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.plugin.PluginPermissionService.change_permission", return_value=False), + ): + result = method(api) + + assert result["success"] is False + + +class TestPluginFetchPreferencesApi: + def test_success(self, app): + api = PluginFetchPreferencesApi() + method = unwrap(api.get) + + permission = MagicMock( + install_permission=TenantPluginPermission.InstallPermission.EVERYONE, + debug_permission=TenantPluginPermission.DebugPermission.EVERYONE, + ) + + auto_upgrade = MagicMock( + strategy_setting=TenantPluginAutoUpgradeStrategy.StrategySetting.FIX_ONLY, + upgrade_time_of_day=1, + upgrade_mode=TenantPluginAutoUpgradeStrategy.UpgradeMode.EXCLUDE, + exclude_plugins=[], + include_plugins=[], + ) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch( + "controllers.console.workspace.plugin.PluginPermissionService.get_permission", return_value=permission + ), + patch( + "controllers.console.workspace.plugin.PluginAutoUpgradeService.get_strategy", return_value=auto_upgrade + ), + ): + result = method(api) + + assert "permission" in result + assert "auto_upgrade" in result + + +class TestPluginAutoUpgradeExcludePluginApi: + def test_success(self, app): + api = PluginAutoUpgradeExcludePluginApi() + method = unwrap(api.post) + + payload = {"plugin_id": "p"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginAutoUpgradeService.exclude_plugin", return_value=True), + ): + result = method(api) + + assert result["success"] is True + + def test_fail(self, app): + api = PluginAutoUpgradeExcludePluginApi() + method = unwrap(api.post) + + payload = {"plugin_id": "p"} + + with ( + app.test_request_context("/", json=payload), + patch("controllers.console.workspace.plugin.current_account_with_tenant", return_value=(None, "t1")), + patch("controllers.console.workspace.plugin.PluginAutoUpgradeService.exclude_plugin", return_value=False), + ): + result = method(api) + + assert result["success"] is False diff --git a/api/tests/unit_tests/controllers/console/workspace/test_tool_provider.py b/api/tests/unit_tests/controllers/console/workspace/test_tool_provider.py index b15676d9b7..16ea1bf509 100644 --- a/api/tests/unit_tests/controllers/console/workspace/test_tool_provider.py +++ b/api/tests/unit_tests/controllers/console/workspace/test_tool_provider.py @@ -4,16 +4,52 @@ from unittest.mock import MagicMock, patch import pytest from flask import Flask from flask_restx import Api +from werkzeug.exceptions import Forbidden -from controllers.console.workspace.tool_providers import ToolProviderMCPApi +from controllers.console.workspace.tool_providers import ( + ToolApiListApi, + ToolApiProviderAddApi, + ToolApiProviderDeleteApi, + ToolApiProviderGetApi, + ToolApiProviderGetRemoteSchemaApi, + ToolApiProviderListToolsApi, + ToolApiProviderUpdateApi, + ToolBuiltinListApi, + ToolBuiltinProviderAddApi, + ToolBuiltinProviderCredentialsSchemaApi, + ToolBuiltinProviderDeleteApi, + ToolBuiltinProviderGetCredentialInfoApi, + ToolBuiltinProviderGetCredentialsApi, + ToolBuiltinProviderGetOauthClientSchemaApi, + ToolBuiltinProviderIconApi, + ToolBuiltinProviderInfoApi, + ToolBuiltinProviderListToolsApi, + ToolBuiltinProviderSetDefaultApi, + ToolBuiltinProviderUpdateApi, + ToolLabelsApi, + ToolOAuthCallback, + ToolOAuthCustomClient, + ToolPluginOAuthApi, + ToolProviderListApi, + ToolProviderMCPApi, + ToolWorkflowListApi, + ToolWorkflowProviderCreateApi, + ToolWorkflowProviderDeleteApi, + ToolWorkflowProviderGetApi, + ToolWorkflowProviderUpdateApi, + is_valid_url, +) from core.db.session_factory import configure_session_factory from extensions.ext_database import db from services.tools.mcp_tools_manage_service import ReconnectResult -# Backward-compat fixtures referenced by @pytest.mark.usefixtures in this file. -# They are intentionally no-ops because the test already patches the required -# behaviors explicitly via @patch and context managers below. +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + @pytest.fixture def _mock_cache(): return @@ -107,3 +143,602 @@ def test_create_mcp_provider_populates_tools(mock_reconnect, mock_session, mock_ # 若 transform 后包含 tools 字段,确保非空 assert isinstance(body.get("tools"), list) assert body["tools"] + + +class TestUtils: + def test_is_valid_url(self): + assert is_valid_url("https://example.com") + assert is_valid_url("http://example.com") + assert not is_valid_url("") + assert not is_valid_url("ftp://example.com") + assert not is_valid_url("not-a-url") + assert not is_valid_url(None) + + +class TestToolProviderListApi: + def test_get_success(self, app): + api = ToolProviderListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u1"), "t1"), + ), + patch( + "controllers.console.workspace.tool_providers.ToolCommonService.list_tool_providers", + return_value=["p1"], + ), + ): + assert method(api) == ["p1"] + + +class TestBuiltinProviderApis: + def test_list_tools(self, app): + api = ToolBuiltinProviderListToolsApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(None, "t1"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.list_builtin_tool_provider_tools", + return_value=[{"a": 1}], + ), + ): + assert method(api, "provider") == [{"a": 1}] + + def test_info(self, app): + api = ToolBuiltinProviderInfoApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(None, "t1"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_builtin_tool_provider_info", + return_value={"x": 1}, + ), + ): + assert method(api, "provider") == {"x": 1} + + def test_delete(self, app): + api = ToolBuiltinProviderDeleteApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credential_id": "cid"}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(None, "t1"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.delete_builtin_tool_provider", + return_value={"result": "success"}, + ), + ): + assert method(api, "provider")["result"] == "success" + + def test_add_invalid_type(self, app): + api = ToolBuiltinProviderAddApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credentials": {}, "type": "invalid"}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + ): + with pytest.raises(ValueError): + method(api, "provider") + + def test_add_success(self, app): + api = ToolBuiltinProviderAddApi() + method = unwrap(api.post) + + payload = {"credentials": {}, "type": "oauth2", "name": "n"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.add_builtin_tool_provider", + return_value={"id": 1}, + ), + ): + assert method(api, "provider")["id"] == 1 + + def test_update(self, app): + api = ToolBuiltinProviderUpdateApi() + method = unwrap(api.post) + + payload = {"credential_id": "c1", "credentials": {}, "name": "n"} + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.update_builtin_tool_provider", + return_value={"ok": True}, + ), + ): + assert method(api, "provider")["ok"] + + def test_get_credentials(self, app): + api = ToolBuiltinProviderGetCredentialsApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(None, "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_builtin_tool_provider_credentials", + return_value={"k": "v"}, + ), + ): + assert method(api, "provider") == {"k": "v"} + + def test_icon(self, app): + api = ToolBuiltinProviderIconApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_builtin_tool_provider_icon", + return_value=(b"x", "image/png"), + ), + ): + response = method(api, "provider") + assert response.mimetype == "image/png" + + def test_credentials_schema(self, app): + api = ToolBuiltinProviderCredentialsSchemaApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.list_builtin_provider_credentials_schema", + return_value={"schema": {}}, + ), + ): + assert method(api, "provider", "oauth2") == {"schema": {}} + + def test_set_default_credential(self, app): + api = ToolBuiltinProviderSetDefaultApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"id": "c1"}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.set_default_provider", + return_value={"ok": True}, + ), + ): + assert method(api, "provider")["ok"] + + def test_get_credential_info(self, app): + api = ToolBuiltinProviderGetCredentialInfoApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_builtin_tool_provider_credential_info", + return_value={"info": "x"}, + ), + ): + assert method(api, "provider") == {"info": "x"} + + def test_get_oauth_client_schema(self, app): + api = ToolBuiltinProviderGetOauthClientSchemaApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_builtin_tool_provider_oauth_client_schema", + return_value={"schema": {}}, + ), + ): + assert method(api, "provider") == {"schema": {}} + + +class TestApiProviderApis: + def test_add(self, app): + api = ToolApiProviderAddApi() + method = unwrap(api.post) + + payload = { + "credentials": {}, + "schema_type": "openapi", + "schema": "{}", + "provider": "p", + "icon": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.create_api_tool_provider", + return_value={"id": 1}, + ), + ): + assert method(api)["id"] == 1 + + def test_remote_schema(self, app): + api = ToolApiProviderGetRemoteSchemaApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?url=http://x.com"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.get_api_tool_provider_remote_schema", + return_value={"schema": "x"}, + ), + ): + assert method(api)["schema"] == "x" + + def test_list_tools(self, app): + api = ToolApiProviderListToolsApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?provider=p"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.list_api_tool_provider_tools", + return_value=[{"tool": 1}], + ), + ): + assert method(api) == [{"tool": 1}] + + def test_update(self, app): + api = ToolApiProviderUpdateApi() + method = unwrap(api.post) + + payload = { + "credentials": {}, + "schema_type": "openapi", + "schema": "{}", + "provider": "p", + "original_provider": "o", + "icon": {}, + "privacy_policy": "", + "custom_disclaimer": "", + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.update_api_tool_provider", + return_value={"ok": True}, + ), + ): + assert method(api)["ok"] + + def test_delete(self, app): + api = ToolApiProviderDeleteApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"provider": "p"}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.delete_api_tool_provider", + return_value={"result": "success"}, + ), + ): + assert method(api)["result"] == "success" + + def test_get(self, app): + api = ToolApiProviderGetApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/?provider=p"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.get_api_tool_provider", + return_value={"x": 1}, + ), + ): + assert method(api) == {"x": 1} + + +class TestWorkflowApis: + def test_create(self, app): + api = ToolWorkflowProviderCreateApi() + method = unwrap(api.post) + + payload = { + "workflow_app_id": "123e4567-e89b-12d3-a456-426614174000", + "name": "n", + "label": "l", + "description": "d", + "icon": {}, + "parameters": [], + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.WorkflowToolManageService.create_workflow_tool", + return_value={"id": 1}, + ), + ): + assert method(api)["id"] == 1 + + def test_update_invalid(self, app): + api = ToolWorkflowProviderUpdateApi() + method = unwrap(api.post) + + payload = { + "workflow_tool_id": "123e4567-e89b-12d3-a456-426614174000", + "name": "Tool", + "label": "Tool Label", + "description": "A tool", + "icon": {}, + } + + with ( + app.test_request_context("/", json=payload), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.WorkflowToolManageService.update_workflow_tool", + return_value={"ok": True}, + ), + ): + result = method(api) + assert result["ok"] + + def test_delete(self, app): + api = ToolWorkflowProviderDeleteApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"workflow_tool_id": "123e4567-e89b-12d3-a456-426614174000"}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.WorkflowToolManageService.delete_workflow_tool", + return_value={"ok": True}, + ), + ): + assert method(api)["ok"] + + def test_get_error(self, app): + api = ToolWorkflowProviderGetApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestLists: + def test_builtin_list(self, app): + api = ToolBuiltinListApi() + method = unwrap(api.get) + + m = MagicMock() + m.to_dict.return_value = {"x": 1} + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.list_builtin_tools", + return_value=[m], + ), + ): + assert method(api) == [{"x": 1}] + + def test_api_list(self, app): + api = ToolApiListApi() + method = unwrap(api.get) + + m = MagicMock() + m.to_dict.return_value = {"x": 1} + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(None, "t"), + ), + patch( + "controllers.console.workspace.tool_providers.ApiToolManageService.list_api_tools", + return_value=[m], + ), + ): + assert method(api) == [{"x": 1}] + + def test_workflow_list(self, app): + api = ToolWorkflowListApi() + method = unwrap(api.get) + + m = MagicMock() + m.to_dict.return_value = {"x": 1} + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.WorkflowToolManageService.list_tenant_workflow_tools", + return_value=[m], + ), + ): + assert method(api) == [{"x": 1}] + + +class TestLabels: + def test_labels(self, app): + api = ToolLabelsApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.ToolLabelsService.list_tool_labels", + return_value=["l1"], + ), + ): + assert method(api) == ["l1"] + + +class TestOAuth: + def test_oauth_no_client(self, app): + api = ToolPluginOAuthApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(id="u"), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_oauth_client", + return_value=None, + ), + ): + with pytest.raises(Forbidden): + method(api, "provider") + + def test_oauth_callback_no_cookie(self, app): + api = ToolOAuthCallback() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(Forbidden): + method(api, "provider") + + +class TestOAuthCustomClient: + def test_save_custom_client(self, app): + api = ToolOAuthCustomClient() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"client_params": {"a": 1}}), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.save_custom_oauth_client_params", + return_value={"ok": True}, + ), + ): + assert method(api, "provider")["ok"] + + def test_get_custom_client(self, app): + api = ToolOAuthCustomClient() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.get_custom_oauth_client_params", + return_value={"client_id": "x"}, + ), + ): + assert method(api, "provider") == {"client_id": "x"} + + def test_delete_custom_client(self, app): + api = ToolOAuthCustomClient() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.tool_providers.current_account_with_tenant", + return_value=(MagicMock(), "t"), + ), + patch( + "controllers.console.workspace.tool_providers.BuiltinToolManageService.delete_custom_oauth_client_params", + return_value={"ok": True}, + ), + ): + assert method(api, "provider")["ok"] diff --git a/api/tests/unit_tests/controllers/console/workspace/test_trigger_providers.py b/api/tests/unit_tests/controllers/console/workspace/test_trigger_providers.py new file mode 100644 index 0000000000..4776bc7af0 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_trigger_providers.py @@ -0,0 +1,558 @@ +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import BadRequest, Forbidden + +from controllers.console.workspace.trigger_providers import ( + TriggerOAuthAuthorizeApi, + TriggerOAuthCallbackApi, + TriggerOAuthClientManageApi, + TriggerProviderIconApi, + TriggerProviderInfoApi, + TriggerProviderListApi, + TriggerSubscriptionBuilderBuildApi, + TriggerSubscriptionBuilderCreateApi, + TriggerSubscriptionBuilderGetApi, + TriggerSubscriptionBuilderLogsApi, + TriggerSubscriptionBuilderUpdateApi, + TriggerSubscriptionBuilderVerifyApi, + TriggerSubscriptionDeleteApi, + TriggerSubscriptionListApi, + TriggerSubscriptionUpdateApi, + TriggerSubscriptionVerifyApi, +) +from controllers.web.error import NotFoundError +from core.plugin.entities.plugin_daemon import CredentialType +from models.account import Account + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def mock_user(): + user = MagicMock(spec=Account) + user.id = "u1" + user.current_tenant_id = "t1" + return user + + +class TestTriggerProviderApis: + def test_icon_success(self, app): + api = TriggerProviderIconApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerManager.get_trigger_plugin_icon", + return_value="icon", + ), + ): + assert method(api, "github") == "icon" + + def test_list_providers(self, app): + api = TriggerProviderListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.list_trigger_providers", + return_value=[], + ), + ): + assert method(api) == [] + + def test_provider_info(self, app): + api = TriggerProviderInfoApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_trigger_provider", + return_value={"id": "p1"}, + ), + ): + assert method(api, "github") == {"id": "p1"} + + +class TestTriggerSubscriptionListApi: + def test_list_success(self, app): + api = TriggerSubscriptionListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.list_trigger_provider_subscriptions", + return_value=[], + ), + ): + assert method(api, "github") == [] + + def test_list_invalid_provider(self, app): + api = TriggerSubscriptionListApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.list_trigger_provider_subscriptions", + side_effect=ValueError("bad"), + ), + ): + result, status = method(api, "bad") + assert status == 404 + + +class TestTriggerSubscriptionBuilderApis: + def test_create_builder(self, app): + api = TriggerSubscriptionBuilderCreateApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credential_type": "UNAUTHORIZED"}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.create_trigger_subscription_builder", + return_value={"id": "b1"}, + ), + ): + result = method(api, "github") + assert "subscription_builder" in result + + def test_get_builder(self, app): + api = TriggerSubscriptionBuilderGetApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.get_subscription_builder_by_id", + return_value={"id": "b1"}, + ), + ): + assert method(api, "github", "b1") == {"id": "b1"} + + def test_verify_builder(self, app): + api = TriggerSubscriptionBuilderVerifyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credentials": {"a": 1}}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.update_and_verify_builder", + return_value={"ok": True}, + ), + ): + assert method(api, "github", "b1") == {"ok": True} + + def test_verify_builder_error(self, app): + api = TriggerSubscriptionBuilderVerifyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credentials": {}}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.update_and_verify_builder", + side_effect=Exception("err"), + ), + ): + with pytest.raises(ValueError): + method(api, "github", "b1") + + def test_update_builder(self, app): + api = TriggerSubscriptionBuilderUpdateApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"name": "n"}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.update_trigger_subscription_builder", + return_value={"id": "b1"}, + ), + ): + assert method(api, "github", "b1") == {"id": "b1"} + + def test_logs(self, app): + api = TriggerSubscriptionBuilderLogsApi() + method = unwrap(api.get) + + log = MagicMock() + log.model_dump.return_value = {"a": 1} + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.list_logs", + return_value=[log], + ), + ): + assert "logs" in method(api, "github", "b1") + + def test_build(self, app): + api = TriggerSubscriptionBuilderBuildApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"name": "x"}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.update_and_build_builder", + return_value=None, + ), + ): + assert method(api, "github", "b1") == 200 + + +class TestTriggerSubscriptionCrud: + def test_update_rename_only(self, app): + api = TriggerSubscriptionUpdateApi() + method = unwrap(api.post) + + sub = MagicMock() + sub.provider_id = "github" + sub.credential_type = CredentialType.UNAUTHORIZED + + with ( + app.test_request_context("/", json={"name": "x"}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_subscription_by_id", + return_value=sub, + ), + patch("controllers.console.workspace.trigger_providers.TriggerProviderService.update_trigger_subscription"), + ): + assert method(api, "s1") == 200 + + def test_update_not_found(self, app): + api = TriggerSubscriptionUpdateApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"name": "x"}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_subscription_by_id", + return_value=None, + ), + ): + with pytest.raises(NotFoundError): + method(api, "x") + + def test_update_rebuild(self, app): + api = TriggerSubscriptionUpdateApi() + method = unwrap(api.post) + + sub = MagicMock() + sub.provider_id = "github" + sub.credential_type = CredentialType.OAUTH2 + sub.credentials = {} + sub.parameters = {} + + with ( + app.test_request_context("/", json={"credentials": {}}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_subscription_by_id", + return_value=sub, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.rebuild_trigger_subscription" + ), + ): + assert method(api, "s1") == 200 + + def test_delete_subscription(self, app): + api = TriggerSubscriptionDeleteApi() + method = unwrap(api.post) + + mock_session = MagicMock() + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch("controllers.console.workspace.trigger_providers.db") as mock_db, + patch("controllers.console.workspace.trigger_providers.Session") as mock_session_cls, + patch("controllers.console.workspace.trigger_providers.TriggerProviderService.delete_trigger_provider"), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionOperatorService.delete_plugin_trigger_by_subscription" + ), + ): + mock_db.engine = MagicMock() + mock_session_cls.return_value.__enter__.return_value = mock_session + + result = method(api, "sub1") + + assert result["result"] == "success" + + def test_delete_subscription_value_error(self, app): + api = TriggerSubscriptionDeleteApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch("controllers.console.workspace.trigger_providers.db") as mock_db, + patch("controllers.console.workspace.trigger_providers.Session") as session_cls, + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.delete_trigger_provider", + side_effect=ValueError("bad"), + ), + ): + mock_db.engine = MagicMock() + session_cls.return_value.__enter__.return_value = MagicMock() + + with pytest.raises(BadRequest): + method(api, "sub1") + + +class TestTriggerOAuthApis: + def test_oauth_authorize_success(self, app): + api = TriggerOAuthAuthorizeApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_oauth_client", + return_value={"a": 1}, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.create_trigger_subscription_builder", + return_value=MagicMock(id="b1"), + ), + patch( + "controllers.console.workspace.trigger_providers.OAuthProxyService.create_proxy_context", + return_value="ctx", + ), + patch( + "controllers.console.workspace.trigger_providers.OAuthHandler.get_authorization_url", + return_value=MagicMock(authorization_url="url"), + ), + ): + resp = method(api, "github") + assert resp.status_code == 200 + + def test_oauth_authorize_no_client(self, app): + api = TriggerOAuthAuthorizeApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_oauth_client", + return_value=None, + ), + ): + with pytest.raises(NotFoundError): + method(api, "github") + + def test_oauth_callback_forbidden(self, app): + api = TriggerOAuthCallbackApi() + method = unwrap(api.get) + + with app.test_request_context("/"): + with pytest.raises(Forbidden): + method(api, "github") + + def test_oauth_callback_success(self, app): + api = TriggerOAuthCallbackApi() + method = unwrap(api.get) + + ctx = { + "user_id": "u1", + "tenant_id": "t1", + "subscription_builder_id": "b1", + } + + with ( + app.test_request_context("/", headers={"Cookie": "context_id=ctx"}), + patch( + "controllers.console.workspace.trigger_providers.OAuthProxyService.use_proxy_context", return_value=ctx + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_oauth_client", + return_value={"a": 1}, + ), + patch( + "controllers.console.workspace.trigger_providers.OAuthHandler.get_credentials", + return_value=MagicMock(credentials={"a": 1}, expires_at=1), + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerSubscriptionBuilderService.update_trigger_subscription_builder" + ), + ): + resp = method(api, "github") + assert resp.status_code == 302 + + def test_oauth_callback_no_oauth_client(self, app): + api = TriggerOAuthCallbackApi() + method = unwrap(api.get) + + ctx = { + "user_id": "u1", + "tenant_id": "t1", + "subscription_builder_id": "b1", + } + + with ( + app.test_request_context("/", headers={"Cookie": "context_id=ctx"}), + patch( + "controllers.console.workspace.trigger_providers.OAuthProxyService.use_proxy_context", + return_value=ctx, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_oauth_client", + return_value=None, + ), + ): + with pytest.raises(Forbidden): + method(api, "github") + + def test_oauth_callback_empty_credentials(self, app): + api = TriggerOAuthCallbackApi() + method = unwrap(api.get) + + ctx = { + "user_id": "u1", + "tenant_id": "t1", + "subscription_builder_id": "b1", + } + + with ( + app.test_request_context("/", headers={"Cookie": "context_id=ctx"}), + patch( + "controllers.console.workspace.trigger_providers.OAuthProxyService.use_proxy_context", + return_value=ctx, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_oauth_client", + return_value={"a": 1}, + ), + patch( + "controllers.console.workspace.trigger_providers.OAuthHandler.get_credentials", + return_value=MagicMock(credentials=None, expires_at=None), + ), + ): + with pytest.raises(ValueError): + method(api, "github") + + +class TestTriggerOAuthClientManageApi: + def test_get_client(self, app): + api = TriggerOAuthClientManageApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.get_custom_oauth_client_params", + return_value={}, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.is_oauth_custom_client_enabled", + return_value=False, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.is_oauth_system_client_exists", + return_value=True, + ), + patch( + "controllers.console.workspace.trigger_providers.TriggerManager.get_trigger_provider", + return_value=MagicMock(get_oauth_client_schema=lambda: {}), + ), + ): + result = method(api, "github") + assert "configured" in result + + def test_post_client(self, app): + api = TriggerOAuthClientManageApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"enabled": True}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.save_custom_oauth_client_params", + return_value={"ok": True}, + ), + ): + assert method(api, "github") == {"ok": True} + + def test_delete_client(self, app): + api = TriggerOAuthClientManageApi() + method = unwrap(api.delete) + + with ( + app.test_request_context("/"), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.delete_custom_oauth_client_params", + return_value={"ok": True}, + ), + ): + assert method(api, "github") == {"ok": True} + + def test_oauth_client_post_value_error(self, app): + api = TriggerOAuthClientManageApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"enabled": True}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.save_custom_oauth_client_params", + side_effect=ValueError("bad"), + ), + ): + with pytest.raises(BadRequest): + method(api, "github") + + +class TestTriggerSubscriptionVerifyApi: + def test_verify_success(self, app): + api = TriggerSubscriptionVerifyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credentials": {}}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.verify_subscription_credentials", + return_value={"ok": True}, + ), + ): + assert method(api, "github", "s1") == {"ok": True} + + @pytest.mark.parametrize("raised_exception", [ValueError("bad"), Exception("boom")]) + def test_verify_errors(self, app, raised_exception): + api = TriggerSubscriptionVerifyApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/", json={"credentials": {}}), + patch("controllers.console.workspace.trigger_providers.current_user", mock_user()), + patch( + "controllers.console.workspace.trigger_providers.TriggerProviderService.verify_subscription_credentials", + side_effect=raised_exception, + ), + ): + with pytest.raises(BadRequest): + method(api, "github", "s1") diff --git a/api/tests/unit_tests/controllers/console/workspace/test_workspace.py b/api/tests/unit_tests/controllers/console/workspace/test_workspace.py new file mode 100644 index 0000000000..06f666fa60 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_workspace.py @@ -0,0 +1,605 @@ +from datetime import datetime +from io import BytesIO +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.datastructures import FileStorage +from werkzeug.exceptions import Unauthorized + +import services +from controllers.common.errors import ( + FilenameNotExistsError, + FileTooLargeError, + NoFileUploadedError, + TooManyFilesError, + UnsupportedFileTypeError, +) +from controllers.console.error import AccountNotLinkTenantError +from controllers.console.workspace.workspace import ( + CustomConfigWorkspaceApi, + SwitchWorkspaceApi, + TenantApi, + TenantListApi, + WebappLogoWorkspaceApi, + WorkspaceInfoApi, + WorkspaceListApi, + WorkspacePermissionApi, +) +from enums.cloud_plan import CloudPlan +from models.account import TenantStatus + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +class TestTenantListApi: + def test_get_success(self, app): + api = TenantListApi() + method = unwrap(api.get) + + tenant1 = MagicMock( + id="t1", + name="Tenant 1", + status="active", + created_at=datetime.utcnow(), + ) + tenant2 = MagicMock( + id="t2", + name="Tenant 2", + status="active", + created_at=datetime.utcnow(), + ) + + features = MagicMock() + features.billing.enabled = True + features.billing.subscription.plan = CloudPlan.SANDBOX + + with ( + app.test_request_context("/workspaces"), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch( + "controllers.console.workspace.workspace.TenantService.get_join_tenants", + return_value=[tenant1, tenant2], + ), + patch("controllers.console.workspace.workspace.FeatureService.get_features", return_value=features), + ): + result, status = method(api) + + assert status == 200 + assert len(result["workspaces"]) == 2 + assert result["workspaces"][0]["current"] is True + + def test_get_billing_disabled(self, app): + api = TenantListApi() + method = unwrap(api.get) + + tenant = MagicMock( + id="t1", + name="Tenant", + status="active", + created_at=datetime.utcnow(), + ) + + features = MagicMock() + features.billing.enabled = False + + with ( + app.test_request_context("/workspaces"), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + patch( + "controllers.console.workspace.workspace.TenantService.get_join_tenants", + return_value=[tenant], + ), + patch( + "controllers.console.workspace.workspace.FeatureService.get_features", + return_value=features, + ), + ): + result, status = method(api) + + assert status == 200 + assert result["workspaces"][0]["plan"] == CloudPlan.SANDBOX + + +class TestWorkspaceListApi: + def test_get_success(self, app): + api = WorkspaceListApi() + method = unwrap(api.get) + + tenant = MagicMock(id="t1", name="T", status="active", created_at=datetime.utcnow()) + + paginate_result = MagicMock( + items=[tenant], + has_next=False, + total=1, + ) + + with ( + app.test_request_context("/all-workspaces", query_string={"page": 1, "limit": 20}), + patch("controllers.console.workspace.workspace.db.paginate", return_value=paginate_result), + ): + result, status = method(api) + + assert status == 200 + assert result["total"] == 1 + assert result["has_more"] is False + + def test_get_has_next_true(self, app): + api = WorkspaceListApi() + method = unwrap(api.get) + + tenant = MagicMock( + id="t1", + name="T", + status="active", + created_at=datetime.utcnow(), + ) + + paginate_result = MagicMock( + items=[tenant], + has_next=True, + total=10, + ) + + with ( + app.test_request_context("/all-workspaces", query_string={"page": 1, "limit": 1}), + patch( + "controllers.console.workspace.workspace.db.paginate", + return_value=paginate_result, + ), + ): + result, status = method(api) + + assert status == 200 + assert result["has_more"] is True + + +class TestTenantApi: + def test_post_active_tenant(self, app): + api = TenantApi() + method = unwrap(api.post) + + tenant = MagicMock(status="active") + + user = MagicMock(current_tenant=tenant) + + with ( + app.test_request_context("/workspaces/current"), + patch("controllers.console.workspace.workspace.current_account_with_tenant", return_value=(user, "t1")), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", return_value={"id": "t1"} + ), + ): + result, status = method(api) + + assert status == 200 + assert result["id"] == "t1" + + def test_post_archived_with_switch(self, app): + api = TenantApi() + method = unwrap(api.post) + + archived = MagicMock(status=TenantStatus.ARCHIVE) + new_tenant = MagicMock(status="active") + + user = MagicMock(current_tenant=archived) + + with ( + app.test_request_context("/workspaces/current"), + patch("controllers.console.workspace.workspace.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.workspace.TenantService.get_join_tenants", return_value=[new_tenant]), + patch("controllers.console.workspace.workspace.TenantService.switch_tenant"), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", return_value={"id": "new"} + ), + ): + result, status = method(api) + + assert result["id"] == "new" + + def test_post_archived_no_tenant(self, app): + api = TenantApi() + method = unwrap(api.post) + + user = MagicMock(current_tenant=MagicMock(status=TenantStatus.ARCHIVE)) + + with ( + app.test_request_context("/workspaces/current"), + patch("controllers.console.workspace.workspace.current_account_with_tenant", return_value=(user, "t1")), + patch("controllers.console.workspace.workspace.TenantService.get_join_tenants", return_value=[]), + ): + with pytest.raises(Unauthorized): + method(api) + + def test_post_info_path(self, app): + api = TenantApi() + method = unwrap(api.post) + + tenant = MagicMock(status="active") + user = MagicMock(current_tenant=tenant) + + with ( + app.test_request_context("/info"), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(user, "t1"), + ), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", + return_value={"id": "t1"}, + ), + patch("controllers.console.workspace.workspace.logger.warning") as warn_mock, + ): + result, status = method(api) + + warn_mock.assert_called_once() + assert status == 200 + + +class TestSwitchWorkspaceApi: + def test_switch_success(self, app): + api = SwitchWorkspaceApi() + method = unwrap(api.post) + + payload = {"tenant_id": "t2"} + tenant = MagicMock(id="t2") + + with ( + app.test_request_context("/workspaces/switch", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch("controllers.console.workspace.workspace.TenantService.switch_tenant"), + patch("controllers.console.workspace.workspace.db.session.query") as query_mock, + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", return_value={"id": "t2"} + ), + ): + query_mock.return_value.get.return_value = tenant + result = method(api) + + assert result["result"] == "success" + + def test_switch_not_linked(self, app): + api = SwitchWorkspaceApi() + method = unwrap(api.post) + + payload = {"tenant_id": "bad"} + + with ( + app.test_request_context("/workspaces/switch", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch("controllers.console.workspace.workspace.TenantService.switch_tenant", side_effect=Exception), + ): + with pytest.raises(AccountNotLinkTenantError): + method(api) + + def test_switch_tenant_not_found(self, app): + api = SwitchWorkspaceApi() + method = unwrap(api.post) + + payload = {"tenant_id": "missing"} + + with ( + app.test_request_context("/workspaces/switch", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + patch("controllers.console.workspace.workspace.TenantService.switch_tenant"), + patch("controllers.console.workspace.workspace.db.session.query") as query_mock, + ): + query_mock.return_value.get.return_value = None + + with pytest.raises(ValueError): + method(api) + + +class TestCustomConfigWorkspaceApi: + def test_post_success(self, app): + api = CustomConfigWorkspaceApi() + method = unwrap(api.post) + + tenant = MagicMock(custom_config_dict={}) + + payload = {"remove_webapp_brand": True} + + with ( + app.test_request_context("/workspaces/custom-config", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch("controllers.console.workspace.workspace.db.get_or_404", return_value=tenant), + patch("controllers.console.workspace.workspace.db.session.commit"), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", return_value={"id": "t1"} + ), + ): + result = method(api) + + assert result["result"] == "success" + + def test_logo_fallback(self, app): + api = CustomConfigWorkspaceApi() + method = unwrap(api.post) + + tenant = MagicMock(custom_config_dict={"replace_webapp_logo": "old-logo"}) + + payload = {"remove_webapp_brand": False} + + with ( + app.test_request_context("/workspaces/custom-config", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + patch( + "controllers.console.workspace.workspace.db.get_or_404", + return_value=tenant, + ), + patch("controllers.console.workspace.workspace.db.session.commit"), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", + return_value={"id": "t1"}, + ), + ): + result = method(api) + + assert tenant.custom_config_dict["replace_webapp_logo"] == "old-logo" + assert result["result"] == "success" + + +class TestWebappLogoWorkspaceApi: + def test_no_file(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + with ( + app.test_request_context("/upload", data={}), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + ): + with pytest.raises(NoFileUploadedError): + method(api) + + def test_too_many_files(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + data = { + "file": MagicMock(), + "extra": MagicMock(), + } + + with ( + app.test_request_context("/upload", data=data), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + ): + with pytest.raises(TooManyFilesError): + method(api) + + def test_invalid_extension(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + file = MagicMock(filename="test.txt") + + with ( + app.test_request_context("/upload", data={"file": file}), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + ): + with pytest.raises(UnsupportedFileTypeError): + method(api) + + def test_upload_success(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + file = FileStorage( + stream=BytesIO(b"data"), + filename="logo.png", + content_type="image/png", + ) + + upload = MagicMock(id="file1") + + with ( + app.test_request_context( + "/upload", + data={"file": file}, + content_type="multipart/form-data", + ), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch("controllers.console.workspace.workspace.FileService") as fs, + patch("controllers.console.workspace.workspace.db") as mock_db, + ): + mock_db.engine = MagicMock() + fs.return_value.upload_file.return_value = upload + + result, status = method(api) + + assert status == 201 + assert result["id"] == "file1" + + def test_filename_missing(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + file = FileStorage( + stream=BytesIO(b"data"), + filename="", + content_type="image/png", + ) + + with ( + app.test_request_context( + "/upload", + data={"file": file}, + content_type="multipart/form-data", + ), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + ): + with pytest.raises(FilenameNotExistsError): + method(api) + + def test_file_too_large(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + file = FileStorage( + stream=BytesIO(b"x"), + filename="logo.png", + content_type="image/png", + ) + + with ( + app.test_request_context( + "/upload", + data={"file": file}, + content_type="multipart/form-data", + ), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + patch("controllers.console.workspace.workspace.FileService") as fs, + patch("controllers.console.workspace.workspace.db") as mock_db, + ): + mock_db.engine = MagicMock() + fs.return_value.upload_file.side_effect = services.errors.file.FileTooLargeError("too big") + + with pytest.raises(FileTooLargeError): + method(api) + + def test_service_unsupported_file(self, app): + api = WebappLogoWorkspaceApi() + method = unwrap(api.post) + + file = FileStorage( + stream=BytesIO(b"x"), + filename="logo.png", + content_type="image/png", + ) + + with ( + app.test_request_context( + "/upload", + data={"file": file}, + content_type="multipart/form-data", + ), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), "t1"), + ), + patch("controllers.console.workspace.workspace.FileService") as fs, + patch("controllers.console.workspace.workspace.db") as mock_db, + ): + mock_db.engine = MagicMock() + fs.return_value.upload_file.side_effect = services.errors.file.UnsupportedFileTypeError() + + with pytest.raises(UnsupportedFileTypeError): + method(api) + + +class TestWorkspaceInfoApi: + def test_post_success(self, app): + api = WorkspaceInfoApi() + method = unwrap(api.post) + + tenant = MagicMock() + + payload = {"name": "New Name"} + + with ( + app.test_request_context("/workspaces/info", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch("controllers.console.workspace.workspace.db.get_or_404", return_value=tenant), + patch("controllers.console.workspace.workspace.db.session.commit"), + patch( + "controllers.console.workspace.workspace.WorkspaceService.get_tenant_info", + return_value={"name": "New Name"}, + ), + ): + result = method(api) + + assert result["result"] == "success" + + def test_no_current_tenant(self, app): + api = WorkspaceInfoApi() + method = unwrap(api.post) + + payload = {"name": "X"} + + with ( + app.test_request_context("/workspaces/info", json=payload), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), None), + ), + ): + with pytest.raises(ValueError): + method(api) + + +class TestWorkspacePermissionApi: + def test_get_success(self, app): + api = WorkspacePermissionApi() + method = unwrap(api.get) + + permission = MagicMock( + workspace_id="t1", + allow_member_invite=True, + allow_owner_transfer=False, + ) + + with ( + app.test_request_context("/permission"), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", return_value=(MagicMock(), "t1") + ), + patch( + "controllers.console.workspace.workspace.EnterpriseService.WorkspacePermissionService.get_permission", + return_value=permission, + ), + ): + result, status = method(api) + + assert status == 200 + assert result["workspace_id"] == "t1" + + def test_no_current_tenant(self, app): + api = WorkspacePermissionApi() + method = unwrap(api.get) + + with ( + app.test_request_context("/permission"), + patch( + "controllers.console.workspace.workspace.current_account_with_tenant", + return_value=(MagicMock(), None), + ), + ): + with pytest.raises(ValueError): + method(api) diff --git a/api/tests/unit_tests/controllers/console/workspace/test_workspace_wraps.py b/api/tests/unit_tests/controllers/console/workspace/test_workspace_wraps.py new file mode 100644 index 0000000000..b290748155 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/workspace/test_workspace_wraps.py @@ -0,0 +1,142 @@ +from __future__ import annotations + +import importlib +from types import SimpleNamespace + +import pytest +from werkzeug.exceptions import Forbidden + +from controllers.console.workspace import plugin_permission_required +from models.account import TenantPluginPermission + + +class _SessionStub: + def __init__(self, permission): + self._permission = permission + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def query(self, *_args, **_kwargs): + return self + + def where(self, *_args, **_kwargs): + return self + + def first(self): + return self._permission + + +def _workspace_module(): + return importlib.import_module(plugin_permission_required.__module__) + + +def _patch_session(monkeypatch: pytest.MonkeyPatch, permission): + module = _workspace_module() + monkeypatch.setattr(module, "Session", lambda *_args, **_kwargs: _SessionStub(permission)) + monkeypatch.setattr(module, "db", SimpleNamespace(engine=object())) + + +def test_plugin_permission_allows_without_permission(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=False) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, None) + + @plugin_permission_required() + def handler(): + return "ok" + + assert handler() == "ok" + + +def test_plugin_permission_install_nobody_forbidden(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=True) + permission = SimpleNamespace( + install_permission=TenantPluginPermission.InstallPermission.NOBODY, + debug_permission=TenantPluginPermission.DebugPermission.EVERYONE, + ) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, permission) + + @plugin_permission_required(install_required=True) + def handler(): + return "ok" + + with pytest.raises(Forbidden): + handler() + + +def test_plugin_permission_install_admin_requires_admin(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=False) + permission = SimpleNamespace( + install_permission=TenantPluginPermission.InstallPermission.ADMINS, + debug_permission=TenantPluginPermission.DebugPermission.EVERYONE, + ) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, permission) + + @plugin_permission_required(install_required=True) + def handler(): + return "ok" + + with pytest.raises(Forbidden): + handler() + + +def test_plugin_permission_install_admin_allows_admin(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=True) + permission = SimpleNamespace( + install_permission=TenantPluginPermission.InstallPermission.ADMINS, + debug_permission=TenantPluginPermission.DebugPermission.EVERYONE, + ) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, permission) + + @plugin_permission_required(install_required=True) + def handler(): + return "ok" + + assert handler() == "ok" + + +def test_plugin_permission_debug_nobody_forbidden(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=True) + permission = SimpleNamespace( + install_permission=TenantPluginPermission.InstallPermission.EVERYONE, + debug_permission=TenantPluginPermission.DebugPermission.NOBODY, + ) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, permission) + + @plugin_permission_required(debug_required=True) + def handler(): + return "ok" + + with pytest.raises(Forbidden): + handler() + + +def test_plugin_permission_debug_admin_requires_admin(monkeypatch: pytest.MonkeyPatch) -> None: + user = SimpleNamespace(is_admin_or_owner=False) + permission = SimpleNamespace( + install_permission=TenantPluginPermission.InstallPermission.EVERYONE, + debug_permission=TenantPluginPermission.DebugPermission.ADMINS, + ) + module = _workspace_module() + monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "t1")) + _patch_session(monkeypatch, permission) + + @plugin_permission_required(debug_required=True) + def handler(): + return "ok" + + with pytest.raises(Forbidden): + handler() diff --git a/api/tests/unit_tests/controllers/inner_api/__init__.py b/api/tests/unit_tests/controllers/inner_api/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/inner_api/plugin/__init__.py b/api/tests/unit_tests/controllers/inner_api/plugin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin.py b/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin.py new file mode 100644 index 0000000000..844f04fe72 --- /dev/null +++ b/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin.py @@ -0,0 +1,313 @@ +""" +Unit tests for inner_api plugin endpoints + +Tests endpoint structure (method existence) for all plugin APIs, plus +handler-level logic tests for representative non-streaming endpoints. +Auth/setup decorators are tested separately in test_auth_wraps.py; +handler tests use inspect.unwrap() to bypass them. +""" + +import inspect +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.inner_api.plugin.plugin import ( + PluginFetchAppInfoApi, + PluginInvokeAppApi, + PluginInvokeEncryptApi, + PluginInvokeLLMApi, + PluginInvokeLLMWithStructuredOutputApi, + PluginInvokeModerationApi, + PluginInvokeParameterExtractorNodeApi, + PluginInvokeQuestionClassifierNodeApi, + PluginInvokeRerankApi, + PluginInvokeSpeech2TextApi, + PluginInvokeSummaryApi, + PluginInvokeTextEmbeddingApi, + PluginInvokeToolApi, + PluginInvokeTTSApi, + PluginUploadFileRequestApi, +) + + +def _extract_raw_post(cls): + """Extract the raw post() method from a plugin endpoint class. + + Plugin endpoint methods are wrapped by several decorators (get_user_tenant, + setup_required, plugin_inner_api_only, plugin_data). These decorators + use @wraps where possible. This helper ensures we retrieve the original + post(self, user_model, tenant_model, payload) function by unwrapping + and, if necessary, walking the closure of the innermost wrapper. + """ + bottom = inspect.unwrap(cls.post) + + # If unwrap() didn't get us to the raw function (e.g. if a decorator + # missed @wraps), try to extract it from the closure if it looks like + # a plugin_data or similar wrapper that closes over 'view_func'. + if hasattr(bottom, "__code__") and "view_func" in bottom.__code__.co_freevars: + try: + idx = bottom.__code__.co_freevars.index("view_func") + return bottom.__closure__[idx].cell_contents + except (AttributeError, TypeError, IndexError): + pass + + return bottom + + +class TestPluginInvokeLLMApi: + """Test PluginInvokeLLMApi endpoint structure""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeLLMApi() + + def test_has_post_method(self, api_instance): + """Test that endpoint has post method""" + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeLLMWithStructuredOutputApi: + """Test PluginInvokeLLMWithStructuredOutputApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeLLMWithStructuredOutputApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeTextEmbeddingApi: + """Test PluginInvokeTextEmbeddingApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeTextEmbeddingApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeRerankApi: + """Test PluginInvokeRerankApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeRerankApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeTTSApi: + """Test PluginInvokeTTSApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeTTSApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeSpeech2TextApi: + """Test PluginInvokeSpeech2TextApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeSpeech2TextApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeModerationApi: + """Test PluginInvokeModerationApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeModerationApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeToolApi: + """Test PluginInvokeToolApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeToolApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeParameterExtractorNodeApi: + """Test PluginInvokeParameterExtractorNodeApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeParameterExtractorNodeApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeQuestionClassifierNodeApi: + """Test PluginInvokeQuestionClassifierNodeApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeQuestionClassifierNodeApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeAppApi: + """Test PluginInvokeAppApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeAppApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginInvokeEncryptApi: + """Test PluginInvokeEncryptApi endpoint structure and handler logic""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeEncryptApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + @patch("controllers.inner_api.plugin.plugin.PluginEncrypter") + def test_post_returns_encrypted_data(self, mock_encrypter, api_instance, app: Flask): + """Test that post() delegates to PluginEncrypter and returns model_dump output""" + # Arrange + mock_encrypter.invoke_encrypt.return_value = {"encrypted": "data"} + mock_tenant = MagicMock() + mock_user = MagicMock() + mock_payload = MagicMock() + + # Act — extract raw post() bypassing all decorators including plugin_data + raw_post = _extract_raw_post(PluginInvokeEncryptApi) + result = raw_post(api_instance, user_model=mock_user, tenant_model=mock_tenant, payload=mock_payload) + + # Assert + mock_encrypter.invoke_encrypt.assert_called_once_with(mock_tenant, mock_payload) + assert result["data"] == {"encrypted": "data"} + assert result.get("error") == "" + + @patch("controllers.inner_api.plugin.plugin.PluginEncrypter") + def test_post_returns_error_on_exception(self, mock_encrypter, api_instance, app: Flask): + """Test that post() catches exceptions and returns error response""" + # Arrange + mock_encrypter.invoke_encrypt.side_effect = RuntimeError("encrypt failed") + mock_tenant = MagicMock() + mock_user = MagicMock() + mock_payload = MagicMock() + + # Act + raw_post = _extract_raw_post(PluginInvokeEncryptApi) + result = raw_post(api_instance, user_model=mock_user, tenant_model=mock_tenant, payload=mock_payload) + + # Assert + assert "encrypt failed" in result["error"] + + +class TestPluginInvokeSummaryApi: + """Test PluginInvokeSummaryApi endpoint""" + + @pytest.fixture + def api_instance(self): + return PluginInvokeSummaryApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + +class TestPluginUploadFileRequestApi: + """Test PluginUploadFileRequestApi endpoint structure and handler logic""" + + @pytest.fixture + def api_instance(self): + return PluginUploadFileRequestApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + @patch("controllers.inner_api.plugin.plugin.get_signed_file_url_for_plugin") + def test_post_returns_signed_url(self, mock_get_url, api_instance, app: Flask): + """Test that post() generates a signed URL and returns it""" + # Arrange + mock_get_url.return_value = "https://storage.example.com/signed-upload-url" + mock_tenant = MagicMock() + mock_tenant.id = "tenant-id" + mock_user = MagicMock() + mock_user.id = "user-id" + mock_payload = MagicMock() + mock_payload.filename = "test.pdf" + mock_payload.mimetype = "application/pdf" + + # Act + raw_post = _extract_raw_post(PluginUploadFileRequestApi) + result = raw_post(api_instance, user_model=mock_user, tenant_model=mock_tenant, payload=mock_payload) + + # Assert + mock_get_url.assert_called_once_with( + filename="test.pdf", mimetype="application/pdf", tenant_id="tenant-id", user_id="user-id" + ) + assert result["data"]["url"] == "https://storage.example.com/signed-upload-url" + + +class TestPluginFetchAppInfoApi: + """Test PluginFetchAppInfoApi endpoint structure and handler logic""" + + @pytest.fixture + def api_instance(self): + return PluginFetchAppInfoApi() + + def test_has_post_method(self, api_instance): + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + @patch("controllers.inner_api.plugin.plugin.PluginAppBackwardsInvocation") + def test_post_returns_app_info(self, mock_invocation, api_instance, app: Flask): + """Test that post() fetches app info and returns it""" + # Arrange + mock_invocation.fetch_app_info.return_value = {"app_name": "My App", "mode": "chat"} + mock_tenant = MagicMock() + mock_tenant.id = "tenant-id" + mock_user = MagicMock() + mock_payload = MagicMock() + mock_payload.app_id = "app-123" + + # Act + raw_post = _extract_raw_post(PluginFetchAppInfoApi) + result = raw_post(api_instance, user_model=mock_user, tenant_model=mock_tenant, payload=mock_payload) + + # Assert + mock_invocation.fetch_app_info.assert_called_once_with("app-123", "tenant-id") + assert result["data"] == {"app_name": "My App", "mode": "chat"} diff --git a/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin_wraps.py b/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin_wraps.py new file mode 100644 index 0000000000..6de07a23e5 --- /dev/null +++ b/api/tests/unit_tests/controllers/inner_api/plugin/test_plugin_wraps.py @@ -0,0 +1,305 @@ +""" +Unit tests for inner_api plugin decorators +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from pydantic import ValidationError + +from controllers.inner_api.plugin.wraps import ( + TenantUserPayload, + get_user, + get_user_tenant, + plugin_data, +) + + +class TestTenantUserPayload: + """Test TenantUserPayload Pydantic model""" + + def test_valid_payload(self): + """Test valid payload passes validation""" + data = {"tenant_id": "tenant123", "user_id": "user456"} + payload = TenantUserPayload.model_validate(data) + assert payload.tenant_id == "tenant123" + assert payload.user_id == "user456" + + def test_missing_tenant_id(self): + """Test missing tenant_id raises ValidationError""" + with pytest.raises(ValidationError): + TenantUserPayload.model_validate({"user_id": "user456"}) + + def test_missing_user_id(self): + """Test missing user_id raises ValidationError""" + with pytest.raises(ValidationError): + TenantUserPayload.model_validate({"tenant_id": "tenant123"}) + + +class TestGetUser: + """Test get_user function""" + + @patch("controllers.inner_api.plugin.wraps.EndUser") + @patch("controllers.inner_api.plugin.wraps.Session") + @patch("controllers.inner_api.plugin.wraps.db") + def test_should_return_existing_user_by_id(self, mock_db, mock_session_class, mock_enduser_class, app: Flask): + """Test returning existing user when found by ID""" + # Arrange + mock_user = MagicMock() + mock_user.id = "user123" + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.query.return_value.where.return_value.first.return_value = mock_user + + # Act + with app.app_context(): + result = get_user("tenant123", "user123") + + # Assert + assert result == mock_user + mock_session.query.assert_called_once() + + @patch("controllers.inner_api.plugin.wraps.EndUser") + @patch("controllers.inner_api.plugin.wraps.Session") + @patch("controllers.inner_api.plugin.wraps.db") + def test_should_return_existing_anonymous_user_by_session_id( + self, mock_db, mock_session_class, mock_enduser_class, app: Flask + ): + """Test returning existing anonymous user by session_id""" + # Arrange + mock_user = MagicMock() + mock_user.session_id = "anonymous_session" + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.query.return_value.where.return_value.first.return_value = mock_user + + # Act + with app.app_context(): + result = get_user("tenant123", "anonymous_session") + + # Assert + assert result == mock_user + + @patch("controllers.inner_api.plugin.wraps.EndUser") + @patch("controllers.inner_api.plugin.wraps.Session") + @patch("controllers.inner_api.plugin.wraps.db") + def test_should_create_new_user_when_not_found(self, mock_db, mock_session_class, mock_enduser_class, app: Flask): + """Test creating new user when not found in database""" + # Arrange + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.query.return_value.where.return_value.first.return_value = None + mock_new_user = MagicMock() + mock_enduser_class.return_value = mock_new_user + + # Act + with app.app_context(): + result = get_user("tenant123", "user123") + + # Assert + assert result == mock_new_user + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + mock_session.refresh.assert_called_once() + + @patch("controllers.inner_api.plugin.wraps.EndUser") + @patch("controllers.inner_api.plugin.wraps.Session") + @patch("controllers.inner_api.plugin.wraps.db") + def test_should_use_default_session_id_when_user_id_none( + self, mock_db, mock_session_class, mock_enduser_class, app: Flask + ): + """Test using default session ID when user_id is None""" + # Arrange + mock_user = MagicMock() + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.query.return_value.where.return_value.first.return_value = mock_user + + # Act + with app.app_context(): + result = get_user("tenant123", None) + + # Assert + assert result == mock_user + + @patch("controllers.inner_api.plugin.wraps.EndUser") + @patch("controllers.inner_api.plugin.wraps.Session") + @patch("controllers.inner_api.plugin.wraps.db") + def test_should_raise_error_on_database_exception( + self, mock_db, mock_session_class, mock_enduser_class, app: Flask + ): + """Test raising ValueError when database operation fails""" + # Arrange + mock_session = MagicMock() + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.query.side_effect = Exception("Database error") + + # Act & Assert + with app.app_context(): + with pytest.raises(ValueError, match="user not found"): + get_user("tenant123", "user123") + + +class TestGetUserTenant: + """Test get_user_tenant decorator""" + + @patch("controllers.inner_api.plugin.wraps.Tenant") + def test_should_inject_tenant_and_user_models(self, mock_tenant_class, app: Flask, monkeypatch): + """Test that decorator injects tenant_model and user_model into kwargs""" + + # Arrange + @get_user_tenant + def protected_view(tenant_model, user_model, **kwargs): + return {"tenant": tenant_model, "user": user_model} + + mock_tenant = MagicMock() + mock_tenant.id = "tenant123" + mock_user = MagicMock() + mock_user.id = "user456" + + # Act + with app.test_request_context(json={"tenant_id": "tenant123", "user_id": "user456"}): + monkeypatch.setattr(app, "login_manager", MagicMock(), raising=False) + with patch("controllers.inner_api.plugin.wraps.db.session.query") as mock_query: + with patch("controllers.inner_api.plugin.wraps.get_user") as mock_get_user: + mock_query.return_value.where.return_value.first.return_value = mock_tenant + mock_get_user.return_value = mock_user + result = protected_view() + + # Assert + assert result["tenant"] == mock_tenant + assert result["user"] == mock_user + + def test_should_raise_error_when_tenant_id_missing(self, app: Flask): + """Test that Pydantic ValidationError is raised when tenant_id is missing from payload""" + + # Arrange + @get_user_tenant + def protected_view(tenant_model, user_model, **kwargs): + return "success" + + # Act & Assert - Pydantic validates payload before manual check + with app.test_request_context(json={"user_id": "user456"}): + with pytest.raises(ValidationError): + protected_view() + + def test_should_raise_error_when_tenant_not_found(self, app: Flask): + """Test that ValueError is raised when tenant is not found""" + + # Arrange + @get_user_tenant + def protected_view(tenant_model, user_model, **kwargs): + return "success" + + # Act & Assert + with app.test_request_context(json={"tenant_id": "nonexistent", "user_id": "user456"}): + with patch("controllers.inner_api.plugin.wraps.db.session.query") as mock_query: + mock_query.return_value.where.return_value.first.return_value = None + with pytest.raises(ValueError, match="tenant not found"): + protected_view() + + @patch("controllers.inner_api.plugin.wraps.Tenant") + def test_should_use_default_session_id_when_user_id_empty(self, mock_tenant_class, app: Flask, monkeypatch): + """Test that default session ID is used when user_id is empty string""" + + # Arrange + @get_user_tenant + def protected_view(tenant_model, user_model, **kwargs): + return {"tenant": tenant_model, "user": user_model} + + mock_tenant = MagicMock() + mock_tenant.id = "tenant123" + mock_user = MagicMock() + + # Act - use empty string for user_id to trigger default logic + with app.test_request_context(json={"tenant_id": "tenant123", "user_id": ""}): + monkeypatch.setattr(app, "login_manager", MagicMock(), raising=False) + with patch("controllers.inner_api.plugin.wraps.db.session.query") as mock_query: + with patch("controllers.inner_api.plugin.wraps.get_user") as mock_get_user: + mock_query.return_value.where.return_value.first.return_value = mock_tenant + mock_get_user.return_value = mock_user + result = protected_view() + + # Assert + assert result["tenant"] == mock_tenant + assert result["user"] == mock_user + from models.model import DefaultEndUserSessionID + + mock_get_user.assert_called_once_with("tenant123", DefaultEndUserSessionID.DEFAULT_SESSION_ID) + + +class PluginTestPayload: + """Simple test payload class""" + + def __init__(self, data: dict): + self.value = data.get("value") + + @classmethod + def model_validate(cls, data: dict): + return cls(data) + + +class TestPluginData: + """Test plugin_data decorator""" + + def test_should_inject_valid_payload(self, app: Flask): + """Test that valid payload is injected into kwargs""" + + # Arrange + @plugin_data(payload_type=PluginTestPayload) + def protected_view(payload, **kwargs): + return payload + + # Act + with app.test_request_context(json={"value": "test_data"}): + result = protected_view() + + # Assert + assert result.value == "test_data" + + def test_should_raise_error_on_invalid_json(self, app: Flask): + """Test that ValueError is raised when JSON parsing fails""" + + # Arrange + @plugin_data(payload_type=PluginTestPayload) + def protected_view(payload, **kwargs): + return payload + + # Act & Assert - Malformed JSON triggers ValueError + with app.test_request_context(data="not valid json", content_type="application/json"): + with pytest.raises(ValueError): + protected_view() + + def test_should_raise_error_on_invalid_payload(self, app: Flask): + """Test that ValueError is raised when payload validation fails""" + + # Arrange + class InvalidPayload: + @classmethod + def model_validate(cls, data: dict): + raise Exception("Validation failed") + + @plugin_data(payload_type=InvalidPayload) + def protected_view(payload, **kwargs): + return payload + + # Act & Assert + with app.test_request_context(json={"data": "test"}): + with pytest.raises(ValueError, match="invalid payload"): + protected_view() + + def test_should_work_as_parameterized_decorator(self, app: Flask): + """Test that decorator works when used with parentheses""" + + # Arrange + @plugin_data(payload_type=PluginTestPayload) + def protected_view(payload, **kwargs): + return payload + + # Act + with app.test_request_context(json={"value": "parameterized"}): + result = protected_view() + + # Assert + assert result.value == "parameterized" diff --git a/api/tests/unit_tests/controllers/inner_api/test_auth_wraps.py b/api/tests/unit_tests/controllers/inner_api/test_auth_wraps.py new file mode 100644 index 0000000000..883ccdea2c --- /dev/null +++ b/api/tests/unit_tests/controllers/inner_api/test_auth_wraps.py @@ -0,0 +1,309 @@ +""" +Unit tests for inner_api auth decorators +""" + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import HTTPException + +from configs import dify_config +from controllers.inner_api.wraps import ( + billing_inner_api_only, + enterprise_inner_api_only, + enterprise_inner_api_user_auth, + plugin_inner_api_only, +) + + +class TestBillingInnerApiOnly: + """Test billing_inner_api_only decorator""" + + def test_should_allow_when_inner_api_enabled_and_valid_key(self, app: Flask): + """Test that valid API key allows access when INNER_API is enabled""" + + # Arrange + @billing_inner_api_only + def protected_view(): + return "success" + + # Act + with app.test_request_context(headers={"X-Inner-Api-Key": "valid_key"}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + result = protected_view() + + # Assert + assert result == "success" + + def test_should_return_404_when_inner_api_disabled(self, app: Flask): + """Test that 404 is returned when INNER_API is disabled""" + + # Arrange + @billing_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(): + with patch.object(dify_config, "INNER_API", False): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 404 + + def test_should_return_401_when_api_key_missing(self, app: Flask): + """Test that 401 is returned when X-Inner-Api-Key header is missing""" + + # Arrange + @billing_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(headers={}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 401 + + def test_should_return_401_when_api_key_invalid(self, app: Flask): + """Test that 401 is returned when X-Inner-Api-Key header is invalid""" + + # Arrange + @billing_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(headers={"X-Inner-Api-Key": "invalid_key"}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 401 + + +class TestEnterpriseInnerApiOnly: + """Test enterprise_inner_api_only decorator""" + + def test_should_allow_when_inner_api_enabled_and_valid_key(self, app: Flask): + """Test that valid API key allows access when INNER_API is enabled""" + + # Arrange + @enterprise_inner_api_only + def protected_view(): + return "success" + + # Act + with app.test_request_context(headers={"X-Inner-Api-Key": "valid_key"}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + result = protected_view() + + # Assert + assert result == "success" + + def test_should_return_404_when_inner_api_disabled(self, app: Flask): + """Test that 404 is returned when INNER_API is disabled""" + + # Arrange + @enterprise_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(): + with patch.object(dify_config, "INNER_API", False): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 404 + + def test_should_return_401_when_api_key_missing(self, app: Flask): + """Test that 401 is returned when X-Inner-Api-Key header is missing""" + + # Arrange + @enterprise_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(headers={}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 401 + + def test_should_return_401_when_api_key_invalid(self, app: Flask): + """Test that 401 is returned when X-Inner-Api-Key header is invalid""" + + # Arrange + @enterprise_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(headers={"X-Inner-Api-Key": "invalid_key"}): + with patch.object(dify_config, "INNER_API", True): + with patch.object(dify_config, "INNER_API_KEY", "valid_key"): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 401 + + +class TestEnterpriseInnerApiUserAuth: + """Test enterprise_inner_api_user_auth decorator for HMAC-based user authentication""" + + def test_should_pass_through_when_inner_api_disabled(self, app: Flask): + """Test that request passes through when INNER_API is disabled""" + + # Arrange + @enterprise_inner_api_user_auth + def protected_view(**kwargs): + return kwargs.get("user", "no_user") + + # Act + with app.test_request_context(): + with patch.object(dify_config, "INNER_API", False): + result = protected_view() + + # Assert + assert result == "no_user" + + def test_should_pass_through_when_authorization_header_missing(self, app: Flask): + """Test that request passes through when Authorization header is missing""" + + # Arrange + @enterprise_inner_api_user_auth + def protected_view(**kwargs): + return kwargs.get("user", "no_user") + + # Act + with app.test_request_context(headers={}): + with patch.object(dify_config, "INNER_API", True): + result = protected_view() + + # Assert + assert result == "no_user" + + def test_should_pass_through_when_authorization_format_invalid(self, app: Flask): + """Test that request passes through when Authorization format is invalid (no colon)""" + + # Arrange + @enterprise_inner_api_user_auth + def protected_view(**kwargs): + return kwargs.get("user", "no_user") + + # Act + with app.test_request_context(headers={"Authorization": "invalid_format"}): + with patch.object(dify_config, "INNER_API", True): + result = protected_view() + + # Assert + assert result == "no_user" + + def test_should_pass_through_when_hmac_signature_invalid(self, app: Flask): + """Test that request passes through when HMAC signature is invalid""" + + # Arrange + @enterprise_inner_api_user_auth + def protected_view(**kwargs): + return kwargs.get("user", "no_user") + + # Act - use wrong signature + with app.test_request_context( + headers={"Authorization": "Bearer user123:wrong_signature", "X-Inner-Api-Key": "valid_key"} + ): + with patch.object(dify_config, "INNER_API", True): + result = protected_view() + + # Assert + assert result == "no_user" + + def test_should_inject_user_when_hmac_signature_valid(self, app: Flask): + """Test that user is injected when HMAC signature is valid""" + # Arrange + from base64 import b64encode + from hashlib import sha1 + from hmac import new as hmac_new + + @enterprise_inner_api_user_auth + def protected_view(**kwargs): + return kwargs.get("user") + + # Calculate valid HMAC signature + user_id = "user123" + inner_api_key = "valid_key" + data_to_sign = f"DIFY {user_id}" + signature = hmac_new(inner_api_key.encode("utf-8"), data_to_sign.encode("utf-8"), sha1) + valid_signature = b64encode(signature.digest()).decode("utf-8") + + # Create mock user + mock_user = MagicMock() + mock_user.id = user_id + + # Act + with app.test_request_context( + headers={"Authorization": f"Bearer {user_id}:{valid_signature}", "X-Inner-Api-Key": inner_api_key} + ): + with patch.object(dify_config, "INNER_API", True): + with patch("controllers.inner_api.wraps.db.session.query") as mock_query: + mock_query.return_value.where.return_value.first.return_value = mock_user + result = protected_view() + + # Assert + assert result == mock_user + + +class TestPluginInnerApiOnly: + """Test plugin_inner_api_only decorator""" + + def test_should_allow_when_plugin_daemon_key_set_and_valid_key(self, app: Flask): + """Test that valid API key allows access when PLUGIN_DAEMON_KEY is set""" + + # Arrange + @plugin_inner_api_only + def protected_view(): + return "success" + + # Act + with app.test_request_context(headers={"X-Inner-Api-Key": "valid_plugin_key"}): + with patch.object(dify_config, "PLUGIN_DAEMON_KEY", "plugin_key"): + with patch.object(dify_config, "INNER_API_KEY_FOR_PLUGIN", "valid_plugin_key"): + result = protected_view() + + # Assert + assert result == "success" + + def test_should_return_404_when_plugin_daemon_key_not_set(self, app: Flask): + """Test that 404 is returned when PLUGIN_DAEMON_KEY is not set""" + + # Arrange + @plugin_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(): + with patch.object(dify_config, "PLUGIN_DAEMON_KEY", ""): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 404 + + def test_should_return_404_when_api_key_invalid(self, app: Flask): + """Test that 404 is returned when X-Inner-Api-Key header is invalid (note: returns 404, not 401)""" + + # Arrange + @plugin_inner_api_only + def protected_view(): + return "success" + + # Act & Assert + with app.test_request_context(headers={"X-Inner-Api-Key": "invalid_key"}): + with patch.object(dify_config, "PLUGIN_DAEMON_KEY", "plugin_key"): + with patch.object(dify_config, "INNER_API_KEY_FOR_PLUGIN", "valid_plugin_key"): + with pytest.raises(HTTPException) as exc_info: + protected_view() + assert exc_info.value.code == 404 diff --git a/api/tests/unit_tests/controllers/inner_api/test_mail.py b/api/tests/unit_tests/controllers/inner_api/test_mail.py new file mode 100644 index 0000000000..c2ca35693e --- /dev/null +++ b/api/tests/unit_tests/controllers/inner_api/test_mail.py @@ -0,0 +1,206 @@ +""" +Unit tests for inner_api mail module +""" + +from unittest.mock import patch + +import pytest +from flask import Flask +from pydantic import ValidationError + +from controllers.inner_api.mail import ( + BaseMail, + BillingMail, + EnterpriseMail, + InnerMailPayload, +) + + +class TestInnerMailPayload: + """Test InnerMailPayload Pydantic model""" + + def test_valid_payload_with_all_fields(self): + """Test valid payload with all fields passes validation""" + data = { + "to": ["test@example.com"], + "subject": "Test Subject", + "body": "Test Body", + "substitutions": {"key": "value"}, + } + payload = InnerMailPayload.model_validate(data) + assert payload.to == ["test@example.com"] + assert payload.subject == "Test Subject" + assert payload.body == "Test Body" + assert payload.substitutions == {"key": "value"} + + def test_valid_payload_without_substitutions(self): + """Test valid payload without optional substitutions""" + data = { + "to": ["test@example.com"], + "subject": "Test Subject", + "body": "Test Body", + } + payload = InnerMailPayload.model_validate(data) + assert payload.to == ["test@example.com"] + assert payload.subject == "Test Subject" + assert payload.body == "Test Body" + assert payload.substitutions is None + + def test_empty_to_list_fails_validation(self): + """Test that empty 'to' list fails validation due to min_length=1""" + data = { + "to": [], + "subject": "Test Subject", + "body": "Test Body", + } + with pytest.raises(ValidationError): + InnerMailPayload.model_validate(data) + + def test_multiple_recipients_allowed(self): + """Test that multiple recipients are allowed""" + data = { + "to": ["user1@example.com", "user2@example.com"], + "subject": "Test Subject", + "body": "Test Body", + } + payload = InnerMailPayload.model_validate(data) + assert len(payload.to) == 2 + assert "user1@example.com" in payload.to + assert "user2@example.com" in payload.to + + def test_missing_to_field_fails_validation(self): + """Test that missing 'to' field fails validation""" + data = { + "subject": "Test Subject", + "body": "Test Body", + } + with pytest.raises(ValidationError): + InnerMailPayload.model_validate(data) + + def test_missing_subject_fails_validation(self): + """Test that missing 'subject' field fails validation""" + data = { + "to": ["test@example.com"], + "body": "Test Body", + } + with pytest.raises(ValidationError): + InnerMailPayload.model_validate(data) + + def test_missing_body_fails_validation(self): + """Test that missing 'body' field fails validation""" + data = { + "to": ["test@example.com"], + "subject": "Test Subject", + } + with pytest.raises(ValidationError): + InnerMailPayload.model_validate(data) + + +class TestBaseMail: + """Test BaseMail API endpoint""" + + @pytest.fixture + def api_instance(self): + """Create BaseMail API instance""" + return BaseMail() + + @patch("controllers.inner_api.mail.send_inner_email_task") + def test_post_sends_email_task(self, mock_task, api_instance, app: Flask): + """Test that POST sends inner email task""" + # Arrange + mock_task.delay.return_value = None + + # Act + with app.test_request_context( + json={ + "to": ["test@example.com"], + "subject": "Test Subject", + "body": "Test Body", + } + ): + with patch("controllers.inner_api.mail.inner_api_ns") as mock_ns: + mock_ns.payload = { + "to": ["test@example.com"], + "subject": "Test Subject", + "body": "Test Body", + } + result = api_instance.post() + + # Assert + assert result == ({"message": "success"}, 200) + mock_task.delay.assert_called_once_with( + to=["test@example.com"], + subject="Test Subject", + body="Test Body", + substitutions=None, + ) + + @patch("controllers.inner_api.mail.send_inner_email_task") + def test_post_with_substitutions(self, mock_task, api_instance, app: Flask): + """Test that POST sends email with substitutions""" + # Arrange + mock_task.delay.return_value = None + + # Act + with app.test_request_context(): + with patch("controllers.inner_api.mail.inner_api_ns") as mock_ns: + mock_ns.payload = { + "to": ["test@example.com"], + "subject": "Hello {{name}}", + "body": "Welcome {{name}}!", + "substitutions": {"name": "John"}, + } + result = api_instance.post() + + # Assert + assert result == ({"message": "success"}, 200) + mock_task.delay.assert_called_once_with( + to=["test@example.com"], + subject="Hello {{name}}", + body="Welcome {{name}}!", + substitutions={"name": "John"}, + ) + + +class TestEnterpriseMail: + """Test EnterpriseMail API endpoint""" + + @pytest.fixture + def api_instance(self): + """Create EnterpriseMail API instance""" + return EnterpriseMail() + + def test_has_enterprise_inner_api_only_decorator(self, api_instance): + """Test that EnterpriseMail has enterprise_inner_api_only decorator""" + # Check method_decorators + from controllers.inner_api.wraps import enterprise_inner_api_only + + assert enterprise_inner_api_only in api_instance.method_decorators + + def test_has_setup_required_decorator(self, api_instance): + """Test that EnterpriseMail has setup_required decorator""" + # Check by decorator name instead of object reference + decorator_names = [d.__name__ for d in api_instance.method_decorators] + assert "setup_required" in decorator_names + + +class TestBillingMail: + """Test BillingMail API endpoint""" + + @pytest.fixture + def api_instance(self): + """Create BillingMail API instance""" + return BillingMail() + + def test_has_billing_inner_api_only_decorator(self, api_instance): + """Test that BillingMail has billing_inner_api_only decorator""" + # Check method_decorators + from controllers.inner_api.wraps import billing_inner_api_only + + assert billing_inner_api_only in api_instance.method_decorators + + def test_has_setup_required_decorator(self, api_instance): + """Test that BillingMail has setup_required decorator""" + # Check by decorator name instead of object reference + decorator_names = [d.__name__ for d in api_instance.method_decorators] + assert "setup_required" in decorator_names diff --git a/api/tests/unit_tests/controllers/inner_api/workspace/__init__.py b/api/tests/unit_tests/controllers/inner_api/workspace/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/inner_api/workspace/test_workspace.py b/api/tests/unit_tests/controllers/inner_api/workspace/test_workspace.py new file mode 100644 index 0000000000..4fbf0f7125 --- /dev/null +++ b/api/tests/unit_tests/controllers/inner_api/workspace/test_workspace.py @@ -0,0 +1,184 @@ +""" +Unit tests for inner_api workspace module + +Tests Pydantic model validation and endpoint handler logic. +Auth/setup decorators are tested separately in test_auth_wraps.py; +handler tests use inspect.unwrap() to bypass them and focus on business logic. +""" + +import inspect +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from pydantic import ValidationError + +from controllers.inner_api.workspace.workspace import ( + EnterpriseWorkspace, + EnterpriseWorkspaceNoOwnerEmail, + WorkspaceCreatePayload, + WorkspaceOwnerlessPayload, +) + + +class TestWorkspaceCreatePayload: + """Test WorkspaceCreatePayload Pydantic model validation""" + + def test_valid_payload(self): + """Test valid payload with all fields passes validation""" + data = { + "name": "My Workspace", + "owner_email": "owner@example.com", + } + payload = WorkspaceCreatePayload.model_validate(data) + assert payload.name == "My Workspace" + assert payload.owner_email == "owner@example.com" + + def test_missing_name_fails_validation(self): + """Test that missing name fails validation""" + data = {"owner_email": "owner@example.com"} + with pytest.raises(ValidationError) as exc_info: + WorkspaceCreatePayload.model_validate(data) + assert "name" in str(exc_info.value) + + def test_missing_owner_email_fails_validation(self): + """Test that missing owner_email fails validation""" + data = {"name": "My Workspace"} + with pytest.raises(ValidationError) as exc_info: + WorkspaceCreatePayload.model_validate(data) + assert "owner_email" in str(exc_info.value) + + +class TestWorkspaceOwnerlessPayload: + """Test WorkspaceOwnerlessPayload Pydantic model validation""" + + def test_valid_payload(self): + """Test valid payload with name passes validation""" + data = {"name": "My Workspace"} + payload = WorkspaceOwnerlessPayload.model_validate(data) + assert payload.name == "My Workspace" + + def test_missing_name_fails_validation(self): + """Test that missing name fails validation""" + data = {} + with pytest.raises(ValidationError) as exc_info: + WorkspaceOwnerlessPayload.model_validate(data) + assert "name" in str(exc_info.value) + + +class TestEnterpriseWorkspace: + """Test EnterpriseWorkspace API endpoint handler logic. + + Uses inspect.unwrap() to bypass auth/setup decorators (tested in test_auth_wraps.py) + and exercise the core business logic directly. + """ + + @pytest.fixture + def api_instance(self): + return EnterpriseWorkspace() + + def test_has_post_method(self, api_instance): + """Test that EnterpriseWorkspace has post method""" + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + @patch("controllers.inner_api.workspace.workspace.tenant_was_created") + @patch("controllers.inner_api.workspace.workspace.TenantService") + @patch("controllers.inner_api.workspace.workspace.db") + def test_post_creates_workspace_with_owner(self, mock_db, mock_tenant_svc, mock_event, api_instance, app: Flask): + """Test that post() creates a workspace and assigns the owner account""" + # Arrange + mock_account = MagicMock() + mock_account.email = "owner@example.com" + mock_db.session.query.return_value.filter_by.return_value.first.return_value = mock_account + + now = datetime(2025, 1, 1, 12, 0, 0) + mock_tenant = MagicMock() + mock_tenant.id = "tenant-id" + mock_tenant.name = "My Workspace" + mock_tenant.plan = "sandbox" + mock_tenant.status = "normal" + mock_tenant.created_at = now + mock_tenant.updated_at = now + mock_tenant_svc.create_tenant.return_value = mock_tenant + + # Act — unwrap to bypass auth/setup decorators (tested in test_auth_wraps.py) + unwrapped_post = inspect.unwrap(api_instance.post) + with app.test_request_context(): + with patch("controllers.inner_api.workspace.workspace.inner_api_ns") as mock_ns: + mock_ns.payload = {"name": "My Workspace", "owner_email": "owner@example.com"} + result = unwrapped_post(api_instance) + + # Assert + assert result["message"] == "enterprise workspace created." + assert result["tenant"]["id"] == "tenant-id" + assert result["tenant"]["name"] == "My Workspace" + mock_tenant_svc.create_tenant.assert_called_once_with("My Workspace", is_from_dashboard=True) + mock_tenant_svc.create_tenant_member.assert_called_once_with(mock_tenant, mock_account, role="owner") + mock_event.send.assert_called_once_with(mock_tenant) + + @patch("controllers.inner_api.workspace.workspace.db") + def test_post_returns_404_when_owner_not_found(self, mock_db, api_instance, app: Flask): + """Test that post() returns 404 when the owner account does not exist""" + # Arrange + mock_db.session.query.return_value.filter_by.return_value.first.return_value = None + + # Act + unwrapped_post = inspect.unwrap(api_instance.post) + with app.test_request_context(): + with patch("controllers.inner_api.workspace.workspace.inner_api_ns") as mock_ns: + mock_ns.payload = {"name": "My Workspace", "owner_email": "missing@example.com"} + result = unwrapped_post(api_instance) + + # Assert + assert result == ({"message": "owner account not found."}, 404) + + +class TestEnterpriseWorkspaceNoOwnerEmail: + """Test EnterpriseWorkspaceNoOwnerEmail API endpoint handler logic. + + Uses inspect.unwrap() to bypass auth/setup decorators (tested in test_auth_wraps.py) + and exercise the core business logic directly. + """ + + @pytest.fixture + def api_instance(self): + return EnterpriseWorkspaceNoOwnerEmail() + + def test_has_post_method(self, api_instance): + """Test that endpoint has post method""" + assert hasattr(api_instance, "post") + assert callable(api_instance.post) + + @patch("controllers.inner_api.workspace.workspace.tenant_was_created") + @patch("controllers.inner_api.workspace.workspace.TenantService") + def test_post_creates_ownerless_workspace(self, mock_tenant_svc, mock_event, api_instance, app: Flask): + """Test that post() creates a workspace without an owner and returns expected fields""" + # Arrange + now = datetime(2025, 1, 1, 12, 0, 0) + mock_tenant = MagicMock() + mock_tenant.id = "tenant-id" + mock_tenant.name = "My Workspace" + mock_tenant.encrypt_public_key = "pub-key" + mock_tenant.plan = "sandbox" + mock_tenant.status = "normal" + mock_tenant.custom_config = None + mock_tenant.created_at = now + mock_tenant.updated_at = now + mock_tenant_svc.create_tenant.return_value = mock_tenant + + # Act — unwrap to bypass auth/setup decorators (tested in test_auth_wraps.py) + unwrapped_post = inspect.unwrap(api_instance.post) + with app.test_request_context(): + with patch("controllers.inner_api.workspace.workspace.inner_api_ns") as mock_ns: + mock_ns.payload = {"name": "My Workspace"} + result = unwrapped_post(api_instance) + + # Assert + assert result["message"] == "enterprise workspace created." + assert result["tenant"]["id"] == "tenant-id" + assert result["tenant"]["encrypt_public_key"] == "pub-key" + assert result["tenant"]["custom_config"] == {} + mock_tenant_svc.create_tenant.assert_called_once_with("My Workspace", is_from_dashboard=True) + mock_event.send.assert_called_once_with(mock_tenant) diff --git a/api/tests/unit_tests/controllers/web/__init__.py b/api/tests/unit_tests/controllers/web/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/web/conftest.py b/api/tests/unit_tests/controllers/web/conftest.py new file mode 100644 index 0000000000..274d78c9cf --- /dev/null +++ b/api/tests/unit_tests/controllers/web/conftest.py @@ -0,0 +1,85 @@ +"""Shared fixtures for controllers.web unit tests.""" + +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any + +import pytest +from flask import Flask + + +@pytest.fixture +def app() -> Flask: + """Minimal Flask app for request contexts.""" + flask_app = Flask(__name__) + flask_app.config["TESTING"] = True + return flask_app + + +class FakeSession: + """Stand-in for db.session that returns pre-seeded objects by model class name.""" + + def __init__(self, mapping: dict[str, Any] | None = None): + self._mapping: dict[str, Any] = mapping or {} + self._model_name: str | None = None + + def query(self, model: type) -> FakeSession: + self._model_name = model.__name__ + return self + + def where(self, *_args: object, **_kwargs: object) -> FakeSession: + return self + + def first(self) -> Any: + assert self._model_name is not None + return self._mapping.get(self._model_name) + + +class FakeDB: + """Minimal db stub exposing engine and session.""" + + def __init__(self, session: FakeSession | None = None): + self.session = session or FakeSession() + self.engine = object() + + +def make_app_model( + *, + app_id: str = "app-1", + tenant_id: str = "tenant-1", + mode: str = "chat", + enable_site: bool = True, + status: str = "normal", +) -> SimpleNamespace: + """Build a fake App model with common defaults.""" + tenant = SimpleNamespace( + id=tenant_id, + status="normal", + plan="basic", + custom_config_dict={}, + ) + return SimpleNamespace( + id=app_id, + tenant_id=tenant_id, + tenant=tenant, + mode=mode, + enable_site=enable_site, + status=status, + workflow=None, + app_model_config=None, + ) + + +def make_end_user( + *, + user_id: str = "end-user-1", + session_id: str = "session-1", + external_user_id: str = "ext-user-1", +) -> SimpleNamespace: + """Build a fake EndUser model with common defaults.""" + return SimpleNamespace( + id=user_id, + session_id=session_id, + external_user_id=external_user_id, + ) diff --git a/api/tests/unit_tests/controllers/web/test_app.py b/api/tests/unit_tests/controllers/web/test_app.py new file mode 100644 index 0000000000..ce7ae27188 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_app.py @@ -0,0 +1,165 @@ +"""Unit tests for controllers.web.app endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.web.app import AppAccessMode, AppMeta, AppParameterApi, AppWebAuthPermission +from controllers.web.error import AppUnavailableError + + +# --------------------------------------------------------------------------- +# AppParameterApi +# --------------------------------------------------------------------------- +class TestAppParameterApi: + def test_advanced_chat_mode_uses_workflow(self, app: Flask) -> None: + features_dict = {"opening_statement": "Hello"} + workflow = SimpleNamespace( + features_dict=features_dict, + user_input_form=lambda to_old_structure=False: [], + ) + app_model = SimpleNamespace(mode="advanced-chat", workflow=workflow) + + with ( + app.test_request_context("/parameters"), + patch("controllers.web.app.get_parameters_from_feature_dict", return_value={}) as mock_params, + patch("controllers.web.app.fields.Parameters") as mock_fields, + ): + mock_fields.model_validate.return_value.model_dump.return_value = {"result": "ok"} + result = AppParameterApi().get(app_model, SimpleNamespace()) + + mock_params.assert_called_once_with(features_dict=features_dict, user_input_form=[]) + assert result == {"result": "ok"} + + def test_workflow_mode_uses_workflow(self, app: Flask) -> None: + features_dict = {} + workflow = SimpleNamespace( + features_dict=features_dict, + user_input_form=lambda to_old_structure=False: [{"var": "x"}], + ) + app_model = SimpleNamespace(mode="workflow", workflow=workflow) + + with ( + app.test_request_context("/parameters"), + patch("controllers.web.app.get_parameters_from_feature_dict", return_value={}) as mock_params, + patch("controllers.web.app.fields.Parameters") as mock_fields, + ): + mock_fields.model_validate.return_value.model_dump.return_value = {} + AppParameterApi().get(app_model, SimpleNamespace()) + + mock_params.assert_called_once_with(features_dict=features_dict, user_input_form=[{"var": "x"}]) + + def test_advanced_chat_mode_no_workflow_raises(self, app: Flask) -> None: + app_model = SimpleNamespace(mode="advanced-chat", workflow=None) + with app.test_request_context("/parameters"): + with pytest.raises(AppUnavailableError): + AppParameterApi().get(app_model, SimpleNamespace()) + + def test_standard_mode_uses_app_model_config(self, app: Flask) -> None: + config = SimpleNamespace(to_dict=lambda: {"user_input_form": [{"var": "y"}], "key": "val"}) + app_model = SimpleNamespace(mode="chat", app_model_config=config) + + with ( + app.test_request_context("/parameters"), + patch("controllers.web.app.get_parameters_from_feature_dict", return_value={}) as mock_params, + patch("controllers.web.app.fields.Parameters") as mock_fields, + ): + mock_fields.model_validate.return_value.model_dump.return_value = {} + AppParameterApi().get(app_model, SimpleNamespace()) + + call_kwargs = mock_params.call_args + assert call_kwargs.kwargs["user_input_form"] == [{"var": "y"}] + + def test_standard_mode_no_config_raises(self, app: Flask) -> None: + app_model = SimpleNamespace(mode="chat", app_model_config=None) + with app.test_request_context("/parameters"): + with pytest.raises(AppUnavailableError): + AppParameterApi().get(app_model, SimpleNamespace()) + + +# --------------------------------------------------------------------------- +# AppMeta +# --------------------------------------------------------------------------- +class TestAppMeta: + @patch("controllers.web.app.AppService") + def test_get_returns_meta(self, mock_service_cls: MagicMock, app: Flask) -> None: + mock_service_cls.return_value.get_app_meta.return_value = {"tool_icons": {}} + app_model = SimpleNamespace(id="app-1") + + with app.test_request_context("/meta"): + result = AppMeta().get(app_model, SimpleNamespace()) + + assert result == {"tool_icons": {}} + + +# --------------------------------------------------------------------------- +# AppAccessMode +# --------------------------------------------------------------------------- +class TestAppAccessMode: + @patch("controllers.web.app.FeatureService.get_system_features") + def test_returns_public_when_webapp_auth_disabled(self, mock_features: MagicMock, app: Flask) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + with app.test_request_context("/webapp/access-mode?appId=app-1"): + result = AppAccessMode().get() + + assert result == {"accessMode": "public"} + + @patch("controllers.web.app.EnterpriseService.WebAppAuth.get_app_access_mode_by_id") + @patch("controllers.web.app.FeatureService.get_system_features") + def test_returns_access_mode_with_app_id( + self, mock_features: MagicMock, mock_access: MagicMock, app: Flask + ) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=True)) + mock_access.return_value = SimpleNamespace(access_mode="internal") + + with app.test_request_context("/webapp/access-mode?appId=app-1"): + result = AppAccessMode().get() + + assert result == {"accessMode": "internal"} + mock_access.assert_called_once_with("app-1") + + @patch("controllers.web.app.AppService.get_app_id_by_code", return_value="resolved-id") + @patch("controllers.web.app.EnterpriseService.WebAppAuth.get_app_access_mode_by_id") + @patch("controllers.web.app.FeatureService.get_system_features") + def test_resolves_app_code_to_id( + self, mock_features: MagicMock, mock_access: MagicMock, mock_resolve: MagicMock, app: Flask + ) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=True)) + mock_access.return_value = SimpleNamespace(access_mode="external") + + with app.test_request_context("/webapp/access-mode?appCode=code1"): + result = AppAccessMode().get() + + mock_resolve.assert_called_once_with("code1") + mock_access.assert_called_once_with("resolved-id") + assert result == {"accessMode": "external"} + + @patch("controllers.web.app.FeatureService.get_system_features") + def test_raises_when_no_app_id_or_code(self, mock_features: MagicMock, app: Flask) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=True)) + + with app.test_request_context("/webapp/access-mode"): + with pytest.raises(ValueError, match="appId or appCode"): + AppAccessMode().get() + + +# --------------------------------------------------------------------------- +# AppWebAuthPermission +# --------------------------------------------------------------------------- +class TestAppWebAuthPermission: + @patch("controllers.web.app.WebAppAuthService.is_app_require_permission_check", return_value=False) + def test_returns_true_when_no_permission_check_required(self, mock_check: MagicMock, app: Flask) -> None: + with app.test_request_context("/webapp/permission?appId=app-1", headers={"X-App-Code": "code1"}): + result = AppWebAuthPermission().get() + + assert result == {"result": True} + + def test_raises_when_missing_app_id(self, app: Flask) -> None: + with app.test_request_context("/webapp/permission", headers={"X-App-Code": "code1"}): + with pytest.raises(ValueError, match="appId"): + AppWebAuthPermission().get() diff --git a/api/tests/unit_tests/controllers/web/test_audio.py b/api/tests/unit_tests/controllers/web/test_audio.py new file mode 100644 index 0000000000..01f34345aa --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_audio.py @@ -0,0 +1,135 @@ +"""Unit tests for controllers.web.audio endpoints.""" + +from __future__ import annotations + +from io import BytesIO +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.web.audio import AudioApi, TextApi +from controllers.web.error import ( + AudioTooLargeError, + CompletionRequestError, + NoAudioUploadedError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderNotSupportSpeechToTextError, + ProviderQuotaExceededError, + UnsupportedAudioTypeError, +) +from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError +from dify_graph.model_runtime.errors.invoke import InvokeError +from services.errors.audio import ( + AudioTooLargeServiceError, + NoAudioUploadedServiceError, + ProviderNotSupportSpeechToTextServiceError, + UnsupportedAudioTypeServiceError, +) + + +def _app_model() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1", external_user_id="ext-1") + + +# --------------------------------------------------------------------------- +# AudioApi (audio-to-text) +# --------------------------------------------------------------------------- +class TestAudioApi: + @patch("controllers.web.audio.AudioService.transcript_asr", return_value={"text": "hello"}) + def test_happy_path(self, mock_asr: MagicMock, app: Flask) -> None: + app.config["RESTX_MASK_HEADER"] = "X-Fields" + data = {"file": (BytesIO(b"fake-audio"), "test.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + result = AudioApi().post(_app_model(), _end_user()) + + assert result == {"text": "hello"} + + @patch("controllers.web.audio.AudioService.transcript_asr", side_effect=NoAudioUploadedServiceError()) + def test_no_audio_uploaded(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b""), "empty.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(NoAudioUploadedError): + AudioApi().post(_app_model(), _end_user()) + + @patch("controllers.web.audio.AudioService.transcript_asr", side_effect=AudioTooLargeServiceError("too big")) + def test_audio_too_large(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"big"), "big.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(AudioTooLargeError): + AudioApi().post(_app_model(), _end_user()) + + @patch("controllers.web.audio.AudioService.transcript_asr", side_effect=UnsupportedAudioTypeServiceError()) + def test_unsupported_type(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"bad"), "bad.xyz")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(UnsupportedAudioTypeError): + AudioApi().post(_app_model(), _end_user()) + + @patch( + "controllers.web.audio.AudioService.transcript_asr", + side_effect=ProviderNotSupportSpeechToTextServiceError(), + ) + def test_provider_not_support(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"x"), "x.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(ProviderNotSupportSpeechToTextError): + AudioApi().post(_app_model(), _end_user()) + + @patch( + "controllers.web.audio.AudioService.transcript_asr", + side_effect=ProviderTokenNotInitError(description="no token"), + ) + def test_provider_not_init(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"x"), "x.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(ProviderNotInitializeError): + AudioApi().post(_app_model(), _end_user()) + + @patch("controllers.web.audio.AudioService.transcript_asr", side_effect=QuotaExceededError()) + def test_quota_exceeded(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"x"), "x.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(ProviderQuotaExceededError): + AudioApi().post(_app_model(), _end_user()) + + @patch("controllers.web.audio.AudioService.transcript_asr", side_effect=ModelCurrentlyNotSupportError()) + def test_model_not_support(self, mock_asr: MagicMock, app: Flask) -> None: + data = {"file": (BytesIO(b"x"), "x.mp3")} + with app.test_request_context("/audio-to-text", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + AudioApi().post(_app_model(), _end_user()) + + +# --------------------------------------------------------------------------- +# TextApi (text-to-audio) +# --------------------------------------------------------------------------- +class TestTextApi: + @patch("controllers.web.audio.AudioService.transcript_tts", return_value="audio-bytes") + @patch("controllers.web.audio.web_ns") + def test_happy_path(self, mock_ns: MagicMock, mock_tts: MagicMock, app: Flask) -> None: + mock_ns.payload = {"text": "hello", "voice": "alloy"} + + with app.test_request_context("/text-to-audio", method="POST"): + result = TextApi().post(_app_model(), _end_user()) + + assert result == "audio-bytes" + mock_tts.assert_called_once() + + @patch( + "controllers.web.audio.AudioService.transcript_tts", + side_effect=InvokeError(description="invoke failed"), + ) + @patch("controllers.web.audio.web_ns") + def test_invoke_error_mapped(self, mock_ns: MagicMock, mock_tts: MagicMock, app: Flask) -> None: + mock_ns.payload = {"text": "hello"} + + with app.test_request_context("/text-to-audio", method="POST"): + with pytest.raises(CompletionRequestError): + TextApi().post(_app_model(), _end_user()) diff --git a/api/tests/unit_tests/controllers/web/test_completion.py b/api/tests/unit_tests/controllers/web/test_completion.py new file mode 100644 index 0000000000..e88bcf2ae6 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_completion.py @@ -0,0 +1,161 @@ +"""Unit tests for controllers.web.completion endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.web.completion import ChatApi, ChatStopApi, CompletionApi, CompletionStopApi +from controllers.web.error import ( + CompletionRequestError, + NotChatAppError, + NotCompletionAppError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError +from dify_graph.model_runtime.errors.invoke import InvokeError + + +def _completion_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="completion") + + +def _chat_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# CompletionApi +# --------------------------------------------------------------------------- +class TestCompletionApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/completion-messages", method="POST"): + with pytest.raises(NotCompletionAppError): + CompletionApi().post(_chat_app(), _end_user()) + + @patch("controllers.web.completion.helper.compact_generate_response", return_value={"answer": "hi"}) + @patch("controllers.web.completion.AppGenerateService.generate") + @patch("controllers.web.completion.web_ns") + def test_happy_path(self, mock_ns: MagicMock, mock_gen: MagicMock, mock_compact: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}, "query": "test"} + mock_gen.return_value = "response-obj" + + with app.test_request_context("/completion-messages", method="POST"): + result = CompletionApi().post(_completion_app(), _end_user()) + + assert result == {"answer": "hi"} + + @patch( + "controllers.web.completion.AppGenerateService.generate", + side_effect=ProviderTokenNotInitError(description="not init"), + ) + @patch("controllers.web.completion.web_ns") + def test_provider_not_init_error(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}} + + with app.test_request_context("/completion-messages", method="POST"): + with pytest.raises(ProviderNotInitializeError): + CompletionApi().post(_completion_app(), _end_user()) + + @patch( + "controllers.web.completion.AppGenerateService.generate", + side_effect=QuotaExceededError(), + ) + @patch("controllers.web.completion.web_ns") + def test_quota_exceeded_error(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}} + + with app.test_request_context("/completion-messages", method="POST"): + with pytest.raises(ProviderQuotaExceededError): + CompletionApi().post(_completion_app(), _end_user()) + + @patch( + "controllers.web.completion.AppGenerateService.generate", + side_effect=ModelCurrentlyNotSupportError(), + ) + @patch("controllers.web.completion.web_ns") + def test_model_not_support_error(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}} + + with app.test_request_context("/completion-messages", method="POST"): + with pytest.raises(ProviderModelCurrentlyNotSupportError): + CompletionApi().post(_completion_app(), _end_user()) + + +# --------------------------------------------------------------------------- +# CompletionStopApi +# --------------------------------------------------------------------------- +class TestCompletionStopApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/completion-messages/task-1/stop", method="POST"): + with pytest.raises(NotCompletionAppError): + CompletionStopApi().post(_chat_app(), _end_user(), "task-1") + + @patch("controllers.web.completion.AppTaskService.stop_task") + def test_stop_success(self, mock_stop: MagicMock, app: Flask) -> None: + with app.test_request_context("/completion-messages/task-1/stop", method="POST"): + result, status = CompletionStopApi().post(_completion_app(), _end_user(), "task-1") + + assert status == 200 + assert result == {"result": "success"} + + +# --------------------------------------------------------------------------- +# ChatApi +# --------------------------------------------------------------------------- +class TestChatApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/chat-messages", method="POST"): + with pytest.raises(NotChatAppError): + ChatApi().post(_completion_app(), _end_user()) + + @patch("controllers.web.completion.helper.compact_generate_response", return_value={"answer": "reply"}) + @patch("controllers.web.completion.AppGenerateService.generate") + @patch("controllers.web.completion.web_ns") + def test_happy_path(self, mock_ns: MagicMock, mock_gen: MagicMock, mock_compact: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}, "query": "hi"} + mock_gen.return_value = "response" + + with app.test_request_context("/chat-messages", method="POST"): + result = ChatApi().post(_chat_app(), _end_user()) + + assert result == {"answer": "reply"} + + @patch( + "controllers.web.completion.AppGenerateService.generate", + side_effect=InvokeError(description="rate limit"), + ) + @patch("controllers.web.completion.web_ns") + def test_invoke_error_mapped(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}, "query": "x"} + + with app.test_request_context("/chat-messages", method="POST"): + with pytest.raises(CompletionRequestError): + ChatApi().post(_chat_app(), _end_user()) + + +# --------------------------------------------------------------------------- +# ChatStopApi +# --------------------------------------------------------------------------- +class TestChatStopApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/chat-messages/task-1/stop", method="POST"): + with pytest.raises(NotChatAppError): + ChatStopApi().post(_completion_app(), _end_user(), "task-1") + + @patch("controllers.web.completion.AppTaskService.stop_task") + def test_stop_success(self, mock_stop: MagicMock, app: Flask) -> None: + with app.test_request_context("/chat-messages/task-1/stop", method="POST"): + result, status = ChatStopApi().post(_chat_app(), _end_user(), "task-1") + + assert status == 200 + assert result == {"result": "success"} diff --git a/api/tests/unit_tests/controllers/web/test_conversation.py b/api/tests/unit_tests/controllers/web/test_conversation.py new file mode 100644 index 0000000000..e5adbbbf66 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_conversation.py @@ -0,0 +1,183 @@ +"""Unit tests for controllers.web.conversation endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +from controllers.web.conversation import ( + ConversationApi, + ConversationListApi, + ConversationPinApi, + ConversationRenameApi, + ConversationUnPinApi, +) +from controllers.web.error import NotChatAppError +from services.errors.conversation import ConversationNotExistsError + + +def _chat_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _completion_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="completion") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# ConversationListApi +# --------------------------------------------------------------------------- +class TestConversationListApi: + def test_non_chat_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/conversations"): + with pytest.raises(NotChatAppError): + ConversationListApi().get(_completion_app(), _end_user()) + + @patch("controllers.web.conversation.WebConversationService.pagination_by_last_id") + @patch("controllers.web.conversation.db") + def test_happy_path(self, mock_db: MagicMock, mock_paginate: MagicMock, app: Flask) -> None: + conv_id = str(uuid4()) + conv = SimpleNamespace( + id=conv_id, + name="Test", + inputs={}, + status="normal", + introduction="", + created_at=1700000000, + updated_at=1700000000, + ) + mock_paginate.return_value = SimpleNamespace(limit=20, has_more=False, data=[conv]) + mock_db.engine = "engine" + + session_mock = MagicMock() + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + + with ( + app.test_request_context("/conversations?limit=20"), + patch("controllers.web.conversation.Session", return_value=session_ctx), + ): + result = ConversationListApi().get(_chat_app(), _end_user()) + + assert result["limit"] == 20 + assert result["has_more"] is False + + +# --------------------------------------------------------------------------- +# ConversationApi (delete) +# --------------------------------------------------------------------------- +class TestConversationApi: + def test_non_chat_mode_raises(self, app: Flask) -> None: + with app.test_request_context(f"/conversations/{uuid4()}"): + with pytest.raises(NotChatAppError): + ConversationApi().delete(_completion_app(), _end_user(), uuid4()) + + @patch("controllers.web.conversation.ConversationService.delete") + def test_delete_success(self, mock_delete: MagicMock, app: Flask) -> None: + c_id = uuid4() + with app.test_request_context(f"/conversations/{c_id}"): + result, status = ConversationApi().delete(_chat_app(), _end_user(), c_id) + + assert status == 204 + assert result["result"] == "success" + + @patch("controllers.web.conversation.ConversationService.delete", side_effect=ConversationNotExistsError()) + def test_delete_not_found(self, mock_delete: MagicMock, app: Flask) -> None: + c_id = uuid4() + with app.test_request_context(f"/conversations/{c_id}"): + with pytest.raises(NotFound, match="Conversation Not Exists"): + ConversationApi().delete(_chat_app(), _end_user(), c_id) + + +# --------------------------------------------------------------------------- +# ConversationRenameApi +# --------------------------------------------------------------------------- +class TestConversationRenameApi: + def test_non_chat_mode_raises(self, app: Flask) -> None: + with app.test_request_context(f"/conversations/{uuid4()}/name", method="POST", json={"name": "x"}): + with pytest.raises(NotChatAppError): + ConversationRenameApi().post(_completion_app(), _end_user(), uuid4()) + + @patch("controllers.web.conversation.ConversationService.rename") + @patch("controllers.web.conversation.web_ns") + def test_rename_success(self, mock_ns: MagicMock, mock_rename: MagicMock, app: Flask) -> None: + c_id = uuid4() + mock_ns.payload = {"name": "New Name", "auto_generate": False} + conv = SimpleNamespace( + id=str(c_id), + name="New Name", + inputs={}, + status="normal", + introduction="", + created_at=1700000000, + updated_at=1700000000, + ) + mock_rename.return_value = conv + + with app.test_request_context(f"/conversations/{c_id}/name", method="POST", json={"name": "New Name"}): + result = ConversationRenameApi().post(_chat_app(), _end_user(), c_id) + + assert result["name"] == "New Name" + + @patch( + "controllers.web.conversation.ConversationService.rename", + side_effect=ConversationNotExistsError(), + ) + @patch("controllers.web.conversation.web_ns") + def test_rename_not_found(self, mock_ns: MagicMock, mock_rename: MagicMock, app: Flask) -> None: + c_id = uuid4() + mock_ns.payload = {"name": "X", "auto_generate": False} + + with app.test_request_context(f"/conversations/{c_id}/name", method="POST", json={"name": "X"}): + with pytest.raises(NotFound, match="Conversation Not Exists"): + ConversationRenameApi().post(_chat_app(), _end_user(), c_id) + + +# --------------------------------------------------------------------------- +# ConversationPinApi / ConversationUnPinApi +# --------------------------------------------------------------------------- +class TestConversationPinApi: + def test_non_chat_mode_raises(self, app: Flask) -> None: + with app.test_request_context(f"/conversations/{uuid4()}/pin", method="PATCH"): + with pytest.raises(NotChatAppError): + ConversationPinApi().patch(_completion_app(), _end_user(), uuid4()) + + @patch("controllers.web.conversation.WebConversationService.pin") + def test_pin_success(self, mock_pin: MagicMock, app: Flask) -> None: + c_id = uuid4() + with app.test_request_context(f"/conversations/{c_id}/pin", method="PATCH"): + result = ConversationPinApi().patch(_chat_app(), _end_user(), c_id) + + assert result["result"] == "success" + + @patch("controllers.web.conversation.WebConversationService.pin", side_effect=ConversationNotExistsError()) + def test_pin_not_found(self, mock_pin: MagicMock, app: Flask) -> None: + c_id = uuid4() + with app.test_request_context(f"/conversations/{c_id}/pin", method="PATCH"): + with pytest.raises(NotFound): + ConversationPinApi().patch(_chat_app(), _end_user(), c_id) + + +class TestConversationUnPinApi: + def test_non_chat_mode_raises(self, app: Flask) -> None: + with app.test_request_context(f"/conversations/{uuid4()}/unpin", method="PATCH"): + with pytest.raises(NotChatAppError): + ConversationUnPinApi().patch(_completion_app(), _end_user(), uuid4()) + + @patch("controllers.web.conversation.WebConversationService.unpin") + def test_unpin_success(self, mock_unpin: MagicMock, app: Flask) -> None: + c_id = uuid4() + with app.test_request_context(f"/conversations/{c_id}/unpin", method="PATCH"): + result = ConversationUnPinApi().patch(_chat_app(), _end_user(), c_id) + + assert result["result"] == "success" diff --git a/api/tests/unit_tests/controllers/web/test_error.py b/api/tests/unit_tests/controllers/web/test_error.py new file mode 100644 index 0000000000..0387d002ba --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_error.py @@ -0,0 +1,75 @@ +"""Unit tests for controllers.web.error HTTP exception classes.""" + +from __future__ import annotations + +import pytest + +from controllers.web.error import ( + AppMoreLikeThisDisabledError, + AppSuggestedQuestionsAfterAnswerDisabledError, + AppUnavailableError, + AudioTooLargeError, + CompletionRequestError, + ConversationCompletedError, + InvalidArgumentError, + InvokeRateLimitError, + NoAudioUploadedError, + NotChatAppError, + NotCompletionAppError, + NotFoundError, + NotWorkflowAppError, + ProviderModelCurrentlyNotSupportError, + ProviderNotInitializeError, + ProviderNotSupportSpeechToTextError, + ProviderQuotaExceededError, + UnsupportedAudioTypeError, + WebAppAuthAccessDeniedError, + WebAppAuthRequiredError, + WebFormRateLimitExceededError, +) + +_ERROR_SPECS: list[tuple[type, str, int]] = [ + (AppUnavailableError, "app_unavailable", 400), + (NotCompletionAppError, "not_completion_app", 400), + (NotChatAppError, "not_chat_app", 400), + (NotWorkflowAppError, "not_workflow_app", 400), + (ConversationCompletedError, "conversation_completed", 400), + (ProviderNotInitializeError, "provider_not_initialize", 400), + (ProviderQuotaExceededError, "provider_quota_exceeded", 400), + (ProviderModelCurrentlyNotSupportError, "model_currently_not_support", 400), + (CompletionRequestError, "completion_request_error", 400), + (AppMoreLikeThisDisabledError, "app_more_like_this_disabled", 403), + (AppSuggestedQuestionsAfterAnswerDisabledError, "app_suggested_questions_after_answer_disabled", 403), + (NoAudioUploadedError, "no_audio_uploaded", 400), + (AudioTooLargeError, "audio_too_large", 413), + (UnsupportedAudioTypeError, "unsupported_audio_type", 415), + (ProviderNotSupportSpeechToTextError, "provider_not_support_speech_to_text", 400), + (WebAppAuthRequiredError, "web_sso_auth_required", 401), + (WebAppAuthAccessDeniedError, "web_app_access_denied", 401), + (InvokeRateLimitError, "rate_limit_error", 429), + (WebFormRateLimitExceededError, "web_form_rate_limit_exceeded", 429), + (NotFoundError, "not_found", 404), + (InvalidArgumentError, "invalid_param", 400), +] + + +@pytest.mark.parametrize( + ("cls", "expected_code", "expected_status"), + _ERROR_SPECS, + ids=[cls.__name__ for cls, _, _ in _ERROR_SPECS], +) +def test_error_class_attributes(cls: type, expected_code: str, expected_status: int) -> None: + """Each error class exposes the correct error_code and HTTP status code.""" + assert cls.error_code == expected_code + assert cls.code == expected_status + + +def test_error_classes_have_description() -> None: + """Every error class has a description (string or None for generic errors).""" + # NotFoundError and InvalidArgumentError use None description by design + _NO_DESCRIPTION = {NotFoundError, InvalidArgumentError} + for cls, _, _ in _ERROR_SPECS: + if cls in _NO_DESCRIPTION: + continue + assert isinstance(cls.description, str), f"{cls.__name__} missing description" + assert len(cls.description) > 0, f"{cls.__name__} has empty description" diff --git a/api/tests/unit_tests/controllers/web/test_feature.py b/api/tests/unit_tests/controllers/web/test_feature.py new file mode 100644 index 0000000000..fe45d5f059 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_feature.py @@ -0,0 +1,38 @@ +"""Unit tests for controllers.web.feature endpoints.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from flask import Flask + +from controllers.web.feature import SystemFeatureApi + + +class TestSystemFeatureApi: + @patch("controllers.web.feature.FeatureService.get_system_features") + def test_returns_system_features(self, mock_features: MagicMock, app: Flask) -> None: + mock_model = MagicMock() + mock_model.model_dump.return_value = {"sso_enforced_for_signin": False, "webapp_auth": {"enabled": False}} + mock_features.return_value = mock_model + + with app.test_request_context("/system-features"): + result = SystemFeatureApi().get() + + assert result == {"sso_enforced_for_signin": False, "webapp_auth": {"enabled": False}} + mock_features.assert_called_once() + + @patch("controllers.web.feature.FeatureService.get_system_features") + def test_unauthenticated_access(self, mock_features: MagicMock, app: Flask) -> None: + """SystemFeatureApi is unauthenticated by design — no WebApiResource decorator.""" + mock_model = MagicMock() + mock_model.model_dump.return_value = {} + mock_features.return_value = mock_model + + # Verify it's a bare Resource, not WebApiResource + from flask_restx import Resource + + from controllers.web.wraps import WebApiResource + + assert issubclass(SystemFeatureApi, Resource) + assert not issubclass(SystemFeatureApi, WebApiResource) diff --git a/api/tests/unit_tests/controllers/web/test_files.py b/api/tests/unit_tests/controllers/web/test_files.py new file mode 100644 index 0000000000..a3921b0373 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_files.py @@ -0,0 +1,89 @@ +"""Unit tests for controllers.web.files endpoints.""" + +from __future__ import annotations + +from io import BytesIO +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.common.errors import ( + FilenameNotExistsError, + FileTooLargeError, + NoFileUploadedError, + TooManyFilesError, +) +from controllers.web.files import FileApi + + +def _app_model() -> SimpleNamespace: + return SimpleNamespace(id="app-1") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +class TestFileApi: + def test_no_file_uploaded(self, app: Flask) -> None: + with app.test_request_context("/files/upload", method="POST", content_type="multipart/form-data"): + with pytest.raises(NoFileUploadedError): + FileApi().post(_app_model(), _end_user()) + + def test_too_many_files(self, app: Flask) -> None: + data = { + "file": (BytesIO(b"a"), "a.txt"), + "file2": (BytesIO(b"b"), "b.txt"), + } + with app.test_request_context("/files/upload", method="POST", data=data, content_type="multipart/form-data"): + # Now has "file" key but len(request.files) > 1 + with pytest.raises(TooManyFilesError): + FileApi().post(_app_model(), _end_user()) + + def test_filename_missing(self, app: Flask) -> None: + data = {"file": (BytesIO(b"content"), "")} + with app.test_request_context("/files/upload", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(FilenameNotExistsError): + FileApi().post(_app_model(), _end_user()) + + @patch("controllers.web.files.FileService") + @patch("controllers.web.files.db") + def test_upload_success(self, mock_db: MagicMock, mock_file_svc_cls: MagicMock, app: Flask) -> None: + mock_db.engine = "engine" + from datetime import datetime + + upload_file = SimpleNamespace( + id="file-1", + name="test.txt", + size=100, + extension="txt", + mime_type="text/plain", + created_by="eu-1", + created_at=datetime(2024, 1, 1), + ) + mock_file_svc_cls.return_value.upload_file.return_value = upload_file + + data = {"file": (BytesIO(b"content"), "test.txt")} + with app.test_request_context("/files/upload", method="POST", data=data, content_type="multipart/form-data"): + result, status = FileApi().post(_app_model(), _end_user()) + + assert status == 201 + assert result["id"] == "file-1" + assert result["name"] == "test.txt" + + @patch("controllers.web.files.FileService") + @patch("controllers.web.files.db") + def test_file_too_large_from_service(self, mock_db: MagicMock, mock_file_svc_cls: MagicMock, app: Flask) -> None: + import services.errors.file + + mock_db.engine = "engine" + mock_file_svc_cls.return_value.upload_file.side_effect = services.errors.file.FileTooLargeError( + description="max 10MB" + ) + + data = {"file": (BytesIO(b"big"), "big.txt")} + with app.test_request_context("/files/upload", method="POST", data=data, content_type="multipart/form-data"): + with pytest.raises(FileTooLargeError): + FileApi().post(_app_model(), _end_user()) diff --git a/api/tests/unit_tests/controllers/web/test_message_endpoints.py b/api/tests/unit_tests/controllers/web/test_message_endpoints.py new file mode 100644 index 0000000000..89ab93d8d4 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_message_endpoints.py @@ -0,0 +1,156 @@ +"""Unit tests for controllers.web.message — feedback, more-like-this, suggested questions.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +from controllers.web.error import ( + AppMoreLikeThisDisabledError, + NotChatAppError, + NotCompletionAppError, +) +from controllers.web.message import ( + MessageFeedbackApi, + MessageMoreLikeThisApi, + MessageSuggestedQuestionApi, +) +from services.errors.app import MoreLikeThisDisabledError +from services.errors.message import MessageNotExistsError + + +def _chat_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _completion_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="completion") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# MessageFeedbackApi +# --------------------------------------------------------------------------- +class TestMessageFeedbackApi: + @patch("controllers.web.message.MessageService.create_feedback") + @patch("controllers.web.message.web_ns") + def test_feedback_success(self, mock_ns: MagicMock, mock_create: MagicMock, app: Flask) -> None: + mock_ns.payload = {"rating": "like", "content": "great"} + msg_id = uuid4() + + with app.test_request_context(f"/messages/{msg_id}/feedbacks", method="POST"): + result = MessageFeedbackApi().post(_chat_app(), _end_user(), msg_id) + + assert result == {"result": "success"} + mock_create.assert_called_once() + + @patch("controllers.web.message.MessageService.create_feedback") + @patch("controllers.web.message.web_ns") + def test_feedback_null_rating(self, mock_ns: MagicMock, mock_create: MagicMock, app: Flask) -> None: + mock_ns.payload = {"rating": None} + msg_id = uuid4() + + with app.test_request_context(f"/messages/{msg_id}/feedbacks", method="POST"): + result = MessageFeedbackApi().post(_chat_app(), _end_user(), msg_id) + + assert result == {"result": "success"} + + @patch( + "controllers.web.message.MessageService.create_feedback", + side_effect=MessageNotExistsError(), + ) + @patch("controllers.web.message.web_ns") + def test_feedback_message_not_found(self, mock_ns: MagicMock, mock_create: MagicMock, app: Flask) -> None: + mock_ns.payload = {"rating": "dislike"} + msg_id = uuid4() + + with app.test_request_context(f"/messages/{msg_id}/feedbacks", method="POST"): + with pytest.raises(NotFound, match="Message Not Exists"): + MessageFeedbackApi().post(_chat_app(), _end_user(), msg_id) + + +# --------------------------------------------------------------------------- +# MessageMoreLikeThisApi +# --------------------------------------------------------------------------- +class TestMessageMoreLikeThisApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/more-like-this?response_mode=blocking"): + with pytest.raises(NotCompletionAppError): + MessageMoreLikeThisApi().get(_chat_app(), _end_user(), msg_id) + + @patch("controllers.web.message.helper.compact_generate_response", return_value={"answer": "similar"}) + @patch("controllers.web.message.AppGenerateService.generate_more_like_this") + def test_happy_path(self, mock_gen: MagicMock, mock_compact: MagicMock, app: Flask) -> None: + msg_id = uuid4() + mock_gen.return_value = "response" + + with app.test_request_context(f"/messages/{msg_id}/more-like-this?response_mode=blocking"): + result = MessageMoreLikeThisApi().get(_completion_app(), _end_user(), msg_id) + + assert result == {"answer": "similar"} + + @patch( + "controllers.web.message.AppGenerateService.generate_more_like_this", + side_effect=MessageNotExistsError(), + ) + def test_message_not_found(self, mock_gen: MagicMock, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/more-like-this?response_mode=blocking"): + with pytest.raises(NotFound, match="Message Not Exists"): + MessageMoreLikeThisApi().get(_completion_app(), _end_user(), msg_id) + + @patch( + "controllers.web.message.AppGenerateService.generate_more_like_this", + side_effect=MoreLikeThisDisabledError(), + ) + def test_feature_disabled(self, mock_gen: MagicMock, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/more-like-this?response_mode=blocking"): + with pytest.raises(AppMoreLikeThisDisabledError): + MessageMoreLikeThisApi().get(_completion_app(), _end_user(), msg_id) + + +# --------------------------------------------------------------------------- +# MessageSuggestedQuestionApi +# --------------------------------------------------------------------------- +class TestMessageSuggestedQuestionApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/suggested-questions"): + with pytest.raises(NotChatAppError): + MessageSuggestedQuestionApi().get(_completion_app(), _end_user(), msg_id) + + def test_wrong_mode_raises(self, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/suggested-questions"): + with pytest.raises(NotChatAppError): + MessageSuggestedQuestionApi().get(_completion_app(), _end_user(), msg_id) + + @patch("controllers.web.message.MessageService.get_suggested_questions_after_answer") + def test_happy_path(self, mock_suggest: MagicMock, app: Flask) -> None: + msg_id = uuid4() + mock_suggest.return_value = ["What about X?", "Tell me more about Y."] + + with app.test_request_context(f"/messages/{msg_id}/suggested-questions"): + result = MessageSuggestedQuestionApi().get(_chat_app(), _end_user(), msg_id) + + assert result["data"] == ["What about X?", "Tell me more about Y."] + + @patch( + "controllers.web.message.MessageService.get_suggested_questions_after_answer", + side_effect=MessageNotExistsError(), + ) + def test_message_not_found(self, mock_suggest: MagicMock, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/messages/{msg_id}/suggested-questions"): + with pytest.raises(NotFound, match="Message not found"): + MessageSuggestedQuestionApi().get(_chat_app(), _end_user(), msg_id) diff --git a/api/tests/unit_tests/controllers/web/test_message_list.py b/api/tests/unit_tests/controllers/web/test_message_list.py index 1c096bfbcf..2bb425cdba 100644 --- a/api/tests/unit_tests/controllers/web/test_message_list.py +++ b/api/tests/unit_tests/controllers/web/test_message_list.py @@ -137,7 +137,7 @@ def test_message_list_mapping(app: Flask) -> None: {"id": "file-dict", "filename": "a.txt", "type": "file", "transfer_method": "local"}, message_file_obj, ], - status="success", + status="normal", error=None, message_metadata_dict={"meta": "value"}, extra_contents=[ diff --git a/api/tests/unit_tests/controllers/web/test_passport.py b/api/tests/unit_tests/controllers/web/test_passport.py new file mode 100644 index 0000000000..58d58626b2 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_passport.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest +from werkzeug.exceptions import NotFound, Unauthorized + +from controllers.web.error import WebAppAuthRequiredError +from controllers.web.passport import ( + PassportService, + decode_enterprise_webapp_user_id, + exchange_token_for_existing_web_user, + generate_session_id, +) +from services.webapp_auth_service import WebAppAuthType + + +def test_decode_enterprise_webapp_user_id_none() -> None: + assert decode_enterprise_webapp_user_id(None) is None + + +def test_decode_enterprise_webapp_user_id_invalid_source(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(PassportService, "verify", lambda *_args, **_kwargs: {"token_source": "bad"}) + with pytest.raises(Unauthorized): + decode_enterprise_webapp_user_id("token") + + +def test_decode_enterprise_webapp_user_id_valid(monkeypatch: pytest.MonkeyPatch) -> None: + decoded = {"token_source": "webapp_login_token", "user_id": "u1"} + monkeypatch.setattr(PassportService, "verify", lambda *_args, **_kwargs: decoded) + assert decode_enterprise_webapp_user_id("token") == decoded + + +def test_exchange_token_public_flow(monkeypatch: pytest.MonkeyPatch) -> None: + site = SimpleNamespace(id="s1", app_id="a1", code="code", status="normal") + app_model = SimpleNamespace(id="a1", status="normal", enable_site=True) + + def _scalar_side_effect(*_args, **_kwargs): + if not hasattr(_scalar_side_effect, "calls"): + _scalar_side_effect.calls = 0 + _scalar_side_effect.calls += 1 + return site if _scalar_side_effect.calls == 1 else app_model + + db_session = SimpleNamespace(scalar=_scalar_side_effect) + monkeypatch.setattr("controllers.web.passport.db", SimpleNamespace(session=db_session)) + monkeypatch.setattr("controllers.web.passport._exchange_for_public_app_token", lambda *_args, **_kwargs: "resp") + + decoded = {"auth_type": "public"} + result = exchange_token_for_existing_web_user("code", decoded, WebAppAuthType.PUBLIC) + assert result == "resp" + + +def test_exchange_token_requires_external(monkeypatch: pytest.MonkeyPatch) -> None: + site = SimpleNamespace(id="s1", app_id="a1", code="code", status="normal") + app_model = SimpleNamespace(id="a1", status="normal", enable_site=True) + + def _scalar_side_effect(*_args, **_kwargs): + if not hasattr(_scalar_side_effect, "calls"): + _scalar_side_effect.calls = 0 + _scalar_side_effect.calls += 1 + return site if _scalar_side_effect.calls == 1 else app_model + + db_session = SimpleNamespace(scalar=_scalar_side_effect) + monkeypatch.setattr("controllers.web.passport.db", SimpleNamespace(session=db_session)) + + decoded = {"auth_type": "internal"} + with pytest.raises(WebAppAuthRequiredError): + exchange_token_for_existing_web_user("code", decoded, WebAppAuthType.EXTERNAL) + + +def test_exchange_token_missing_session_id(monkeypatch: pytest.MonkeyPatch) -> None: + site = SimpleNamespace(id="s1", app_id="a1", code="code", status="normal") + app_model = SimpleNamespace(id="a1", status="normal", enable_site=True, tenant_id="t1") + + def _scalar_side_effect(*_args, **_kwargs): + if not hasattr(_scalar_side_effect, "calls"): + _scalar_side_effect.calls = 0 + _scalar_side_effect.calls += 1 + if _scalar_side_effect.calls == 1: + return site + if _scalar_side_effect.calls == 2: + return app_model + return None + + db_session = SimpleNamespace(scalar=_scalar_side_effect, add=lambda *_a, **_k: None, commit=lambda: None) + monkeypatch.setattr("controllers.web.passport.db", SimpleNamespace(session=db_session)) + + decoded = {"auth_type": "internal"} + with pytest.raises(NotFound): + exchange_token_for_existing_web_user("code", decoded, WebAppAuthType.INTERNAL) + + +def test_generate_session_id(monkeypatch: pytest.MonkeyPatch) -> None: + counts = [1, 0] + + def _scalar(*_args, **_kwargs): + return counts.pop(0) + + db_session = SimpleNamespace(scalar=_scalar) + monkeypatch.setattr("controllers.web.passport.db", SimpleNamespace(session=db_session)) + + session_id = generate_session_id() + assert session_id diff --git a/api/tests/unit_tests/controllers/web/test_pydantic_models.py b/api/tests/unit_tests/controllers/web/test_pydantic_models.py new file mode 100644 index 0000000000..dcf8133712 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_pydantic_models.py @@ -0,0 +1,423 @@ +"""Unit tests for Pydantic models defined in controllers.web modules. + +Covers validation logic, field defaults, constraints, and custom validators +for all ~15 Pydantic models across the web controller layer. +""" + +from __future__ import annotations + +from uuid import uuid4 + +import pytest +from pydantic import ValidationError + +# --------------------------------------------------------------------------- +# app.py models +# --------------------------------------------------------------------------- +from controllers.web.app import AppAccessModeQuery + + +class TestAppAccessModeQuery: + def test_alias_resolution(self) -> None: + q = AppAccessModeQuery.model_validate({"appId": "abc", "appCode": "xyz"}) + assert q.app_id == "abc" + assert q.app_code == "xyz" + + def test_defaults_to_none(self) -> None: + q = AppAccessModeQuery.model_validate({}) + assert q.app_id is None + assert q.app_code is None + + def test_accepts_snake_case(self) -> None: + q = AppAccessModeQuery(app_id="id1", app_code="code1") + assert q.app_id == "id1" + assert q.app_code == "code1" + + +# --------------------------------------------------------------------------- +# audio.py models +# --------------------------------------------------------------------------- +from controllers.web.audio import TextToAudioPayload + + +class TestTextToAudioPayload: + def test_defaults(self) -> None: + p = TextToAudioPayload.model_validate({}) + assert p.message_id is None + assert p.voice is None + assert p.text is None + assert p.streaming is None + + def test_valid_uuid_message_id(self) -> None: + uid = str(uuid4()) + p = TextToAudioPayload(message_id=uid) + assert p.message_id == uid + + def test_none_message_id_passthrough(self) -> None: + p = TextToAudioPayload(message_id=None) + assert p.message_id is None + + def test_invalid_uuid_message_id(self) -> None: + with pytest.raises(ValidationError, match="not a valid uuid"): + TextToAudioPayload(message_id="not-a-uuid") + + +# --------------------------------------------------------------------------- +# completion.py models +# --------------------------------------------------------------------------- +from controllers.web.completion import ChatMessagePayload, CompletionMessagePayload + + +class TestCompletionMessagePayload: + def test_defaults(self) -> None: + p = CompletionMessagePayload(inputs={}) + assert p.query == "" + assert p.files is None + assert p.response_mode is None + assert p.retriever_from == "web_app" + + def test_accepts_full_payload(self) -> None: + p = CompletionMessagePayload( + inputs={"key": "val"}, + query="test", + files=[{"id": "f1"}], + response_mode="streaming", + ) + assert p.response_mode == "streaming" + assert p.files == [{"id": "f1"}] + + def test_invalid_response_mode(self) -> None: + with pytest.raises(ValidationError): + CompletionMessagePayload(inputs={}, response_mode="invalid") + + +class TestChatMessagePayload: + def test_valid_uuid_fields(self) -> None: + cid = str(uuid4()) + pid = str(uuid4()) + p = ChatMessagePayload(inputs={}, query="hi", conversation_id=cid, parent_message_id=pid) + assert p.conversation_id == cid + assert p.parent_message_id == pid + + def test_none_uuid_fields(self) -> None: + p = ChatMessagePayload(inputs={}, query="hi") + assert p.conversation_id is None + assert p.parent_message_id is None + + def test_invalid_conversation_id(self) -> None: + with pytest.raises(ValidationError, match="not a valid uuid"): + ChatMessagePayload(inputs={}, query="hi", conversation_id="bad") + + def test_invalid_parent_message_id(self) -> None: + with pytest.raises(ValidationError, match="not a valid uuid"): + ChatMessagePayload(inputs={}, query="hi", parent_message_id="bad") + + def test_query_required(self) -> None: + with pytest.raises(ValidationError): + ChatMessagePayload(inputs={}) + + +# --------------------------------------------------------------------------- +# conversation.py models +# --------------------------------------------------------------------------- +from controllers.web.conversation import ConversationListQuery, ConversationRenamePayload + + +class TestConversationListQuery: + def test_defaults(self) -> None: + q = ConversationListQuery() + assert q.last_id is None + assert q.limit == 20 + assert q.pinned is None + assert q.sort_by == "-updated_at" + + def test_limit_lower_bound(self) -> None: + with pytest.raises(ValidationError): + ConversationListQuery(limit=0) + + def test_limit_upper_bound(self) -> None: + with pytest.raises(ValidationError): + ConversationListQuery(limit=101) + + def test_limit_boundaries_valid(self) -> None: + assert ConversationListQuery(limit=1).limit == 1 + assert ConversationListQuery(limit=100).limit == 100 + + def test_valid_sort_by_options(self) -> None: + for opt in ("created_at", "-created_at", "updated_at", "-updated_at"): + assert ConversationListQuery(sort_by=opt).sort_by == opt + + def test_invalid_sort_by(self) -> None: + with pytest.raises(ValidationError): + ConversationListQuery(sort_by="invalid") + + def test_valid_last_id(self) -> None: + uid = str(uuid4()) + assert ConversationListQuery(last_id=uid).last_id == uid + + def test_invalid_last_id(self) -> None: + with pytest.raises(ValidationError, match="not a valid uuid"): + ConversationListQuery(last_id="not-uuid") + + +class TestConversationRenamePayload: + def test_auto_generate_true_no_name_required(self) -> None: + p = ConversationRenamePayload(auto_generate=True) + assert p.name is None + + def test_auto_generate_false_requires_name(self) -> None: + with pytest.raises(ValidationError, match="name is required"): + ConversationRenamePayload(auto_generate=False) + + def test_auto_generate_false_blank_name_rejected(self) -> None: + with pytest.raises(ValidationError, match="name is required"): + ConversationRenamePayload(auto_generate=False, name=" ") + + def test_auto_generate_false_with_valid_name(self) -> None: + p = ConversationRenamePayload(auto_generate=False, name="My Chat") + assert p.name == "My Chat" + + def test_defaults(self) -> None: + p = ConversationRenamePayload(name="test") + assert p.auto_generate is False + assert p.name == "test" + + +# --------------------------------------------------------------------------- +# message.py models +# --------------------------------------------------------------------------- +from controllers.web.message import MessageFeedbackPayload, MessageListQuery, MessageMoreLikeThisQuery + + +class TestMessageListQuery: + def test_valid_query(self) -> None: + cid = str(uuid4()) + q = MessageListQuery(conversation_id=cid) + assert q.conversation_id == cid + assert q.first_id is None + assert q.limit == 20 + + def test_invalid_conversation_id(self) -> None: + with pytest.raises(ValidationError, match="not a valid uuid"): + MessageListQuery(conversation_id="bad") + + def test_limit_bounds(self) -> None: + cid = str(uuid4()) + with pytest.raises(ValidationError): + MessageListQuery(conversation_id=cid, limit=0) + with pytest.raises(ValidationError): + MessageListQuery(conversation_id=cid, limit=101) + + def test_valid_first_id(self) -> None: + cid = str(uuid4()) + fid = str(uuid4()) + q = MessageListQuery(conversation_id=cid, first_id=fid) + assert q.first_id == fid + + def test_invalid_first_id(self) -> None: + cid = str(uuid4()) + with pytest.raises(ValidationError, match="not a valid uuid"): + MessageListQuery(conversation_id=cid, first_id="invalid") + + +class TestMessageFeedbackPayload: + def test_defaults(self) -> None: + p = MessageFeedbackPayload() + assert p.rating is None + assert p.content is None + + def test_valid_ratings(self) -> None: + assert MessageFeedbackPayload(rating="like").rating == "like" + assert MessageFeedbackPayload(rating="dislike").rating == "dislike" + + def test_invalid_rating(self) -> None: + with pytest.raises(ValidationError): + MessageFeedbackPayload(rating="neutral") + + +class TestMessageMoreLikeThisQuery: + def test_valid_modes(self) -> None: + assert MessageMoreLikeThisQuery(response_mode="blocking").response_mode == "blocking" + assert MessageMoreLikeThisQuery(response_mode="streaming").response_mode == "streaming" + + def test_invalid_mode(self) -> None: + with pytest.raises(ValidationError): + MessageMoreLikeThisQuery(response_mode="invalid") + + def test_required(self) -> None: + with pytest.raises(ValidationError): + MessageMoreLikeThisQuery() + + +# --------------------------------------------------------------------------- +# remote_files.py models +# --------------------------------------------------------------------------- +from controllers.web.remote_files import RemoteFileUploadPayload + + +class TestRemoteFileUploadPayload: + def test_valid_url(self) -> None: + p = RemoteFileUploadPayload(url="https://example.com/file.pdf") + assert str(p.url) == "https://example.com/file.pdf" + + def test_invalid_url(self) -> None: + with pytest.raises(ValidationError): + RemoteFileUploadPayload(url="not-a-url") + + def test_url_required(self) -> None: + with pytest.raises(ValidationError): + RemoteFileUploadPayload() + + +# --------------------------------------------------------------------------- +# saved_message.py models +# --------------------------------------------------------------------------- +from controllers.web.saved_message import SavedMessageCreatePayload, SavedMessageListQuery + + +class TestSavedMessageListQuery: + def test_defaults(self) -> None: + q = SavedMessageListQuery() + assert q.last_id is None + assert q.limit == 20 + + def test_limit_bounds(self) -> None: + with pytest.raises(ValidationError): + SavedMessageListQuery(limit=0) + with pytest.raises(ValidationError): + SavedMessageListQuery(limit=101) + + def test_valid_last_id(self) -> None: + uid = str(uuid4()) + q = SavedMessageListQuery(last_id=uid) + assert q.last_id == uid + + def test_empty_last_id(self) -> None: + q = SavedMessageListQuery(last_id="") + assert q.last_id == "" + + +class TestSavedMessageCreatePayload: + def test_valid_message_id(self) -> None: + uid = str(uuid4()) + p = SavedMessageCreatePayload(message_id=uid) + assert p.message_id == uid + + def test_required(self) -> None: + with pytest.raises(ValidationError): + SavedMessageCreatePayload() + + +# --------------------------------------------------------------------------- +# workflow.py models +# --------------------------------------------------------------------------- +from controllers.web.workflow import WorkflowRunPayload + + +class TestWorkflowRunPayload: + def test_defaults(self) -> None: + p = WorkflowRunPayload(inputs={}) + assert p.inputs == {} + assert p.files is None + + def test_with_files(self) -> None: + p = WorkflowRunPayload(inputs={"k": "v"}, files=[{"id": "f1"}]) + assert p.files == [{"id": "f1"}] + + def test_inputs_required(self) -> None: + with pytest.raises(ValidationError): + WorkflowRunPayload() + + +# --------------------------------------------------------------------------- +# forgot_password.py models +# --------------------------------------------------------------------------- +from controllers.web.forgot_password import ( + ForgotPasswordCheckPayload, + ForgotPasswordResetPayload, + ForgotPasswordSendPayload, +) + + +class TestForgotPasswordSendPayload: + def test_valid_email(self) -> None: + p = ForgotPasswordSendPayload(email="user@example.com") + assert p.email == "user@example.com" + + def test_invalid_email(self) -> None: + with pytest.raises(ValidationError, match="not a valid email"): + ForgotPasswordSendPayload(email="not-an-email") + + def test_language_optional(self) -> None: + p = ForgotPasswordSendPayload(email="a@b.com") + assert p.language is None + + +class TestForgotPasswordCheckPayload: + def test_valid(self) -> None: + p = ForgotPasswordCheckPayload(email="a@b.com", code="1234", token="tok") + assert p.email == "a@b.com" + assert p.code == "1234" + assert p.token == "tok" + + def test_empty_token_rejected(self) -> None: + with pytest.raises(ValidationError): + ForgotPasswordCheckPayload(email="a@b.com", code="1234", token="") + + +class TestForgotPasswordResetPayload: + def test_valid_passwords(self) -> None: + p = ForgotPasswordResetPayload(token="tok", new_password="Valid1234", password_confirm="Valid1234") + assert p.new_password == "Valid1234" + + def test_weak_password_rejected(self) -> None: + with pytest.raises(ValidationError, match="Password must contain"): + ForgotPasswordResetPayload(token="tok", new_password="short", password_confirm="short") + + def test_letters_only_password_rejected(self) -> None: + with pytest.raises(ValidationError, match="Password must contain"): + ForgotPasswordResetPayload(token="tok", new_password="abcdefghi", password_confirm="abcdefghi") + + def test_digits_only_password_rejected(self) -> None: + with pytest.raises(ValidationError, match="Password must contain"): + ForgotPasswordResetPayload(token="tok", new_password="123456789", password_confirm="123456789") + + +# --------------------------------------------------------------------------- +# login.py models +# --------------------------------------------------------------------------- +from controllers.web.login import EmailCodeLoginSendPayload, EmailCodeLoginVerifyPayload, LoginPayload + + +class TestLoginPayload: + def test_valid(self) -> None: + p = LoginPayload(email="a@b.com", password="Valid1234") + assert p.email == "a@b.com" + + def test_invalid_email(self) -> None: + with pytest.raises(ValidationError, match="not a valid email"): + LoginPayload(email="bad", password="Valid1234") + + def test_weak_password(self) -> None: + with pytest.raises(ValidationError, match="Password must contain"): + LoginPayload(email="a@b.com", password="weak") + + +class TestEmailCodeLoginSendPayload: + def test_valid(self) -> None: + p = EmailCodeLoginSendPayload(email="a@b.com") + assert p.language is None + + def test_with_language(self) -> None: + p = EmailCodeLoginSendPayload(email="a@b.com", language="zh-Hans") + assert p.language == "zh-Hans" + + +class TestEmailCodeLoginVerifyPayload: + def test_valid(self) -> None: + p = EmailCodeLoginVerifyPayload(email="a@b.com", code="1234", token="tok") + assert p.code == "1234" + + def test_empty_token_rejected(self) -> None: + with pytest.raises(ValidationError): + EmailCodeLoginVerifyPayload(email="a@b.com", code="1234", token="") diff --git a/api/tests/unit_tests/controllers/web/test_remote_files.py b/api/tests/unit_tests/controllers/web/test_remote_files.py new file mode 100644 index 0000000000..8554f440b7 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_remote_files.py @@ -0,0 +1,147 @@ +"""Unit tests for controllers.web.remote_files endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.common.errors import FileTooLargeError, RemoteFileUploadError +from controllers.web.remote_files import RemoteFileInfoApi, RemoteFileUploadApi + + +def _app_model() -> SimpleNamespace: + return SimpleNamespace(id="app-1") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# RemoteFileInfoApi +# --------------------------------------------------------------------------- +class TestRemoteFileInfoApi: + @patch("controllers.web.remote_files.ssrf_proxy") + def test_head_success(self, mock_proxy: MagicMock, app: Flask) -> None: + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"Content-Type": "application/pdf", "Content-Length": "1024"} + mock_proxy.head.return_value = mock_resp + + with app.test_request_context("/remote-files/https%3A%2F%2Fexample.com%2Ffile.pdf"): + result = RemoteFileInfoApi().get(_app_model(), _end_user(), "https%3A%2F%2Fexample.com%2Ffile.pdf") + + assert result["file_type"] == "application/pdf" + assert result["file_length"] == 1024 + + @patch("controllers.web.remote_files.ssrf_proxy") + def test_fallback_to_get(self, mock_proxy: MagicMock, app: Flask) -> None: + head_resp = MagicMock() + head_resp.status_code = 405 # Method not allowed + get_resp = MagicMock() + get_resp.status_code = 200 + get_resp.headers = {"Content-Type": "text/plain", "Content-Length": "42"} + get_resp.raise_for_status = MagicMock() + mock_proxy.head.return_value = head_resp + mock_proxy.get.return_value = get_resp + + with app.test_request_context("/remote-files/https%3A%2F%2Fexample.com%2Ffile.txt"): + result = RemoteFileInfoApi().get(_app_model(), _end_user(), "https%3A%2F%2Fexample.com%2Ffile.txt") + + assert result["file_type"] == "text/plain" + mock_proxy.get.assert_called_once() + + +# --------------------------------------------------------------------------- +# RemoteFileUploadApi +# --------------------------------------------------------------------------- +class TestRemoteFileUploadApi: + @patch("controllers.web.remote_files.file_helpers.get_signed_file_url", return_value="https://signed-url") + @patch("controllers.web.remote_files.FileService") + @patch("controllers.web.remote_files.helpers.guess_file_info_from_response") + @patch("controllers.web.remote_files.ssrf_proxy") + @patch("controllers.web.remote_files.web_ns") + @patch("controllers.web.remote_files.db") + def test_upload_success( + self, + mock_db: MagicMock, + mock_ns: MagicMock, + mock_proxy: MagicMock, + mock_guess: MagicMock, + mock_file_svc_cls: MagicMock, + mock_signed: MagicMock, + app: Flask, + ) -> None: + mock_db.engine = "engine" + mock_ns.payload = {"url": "https://example.com/file.pdf"} + head_resp = MagicMock() + head_resp.status_code = 200 + head_resp.content = b"pdf-content" + head_resp.request.method = "HEAD" + mock_proxy.head.return_value = head_resp + get_resp = MagicMock() + get_resp.content = b"pdf-content" + mock_proxy.get.return_value = get_resp + + mock_guess.return_value = SimpleNamespace( + filename="file.pdf", extension="pdf", mimetype="application/pdf", size=100 + ) + mock_file_svc_cls.is_file_size_within_limit.return_value = True + + from datetime import datetime + + upload_file = SimpleNamespace( + id="f-1", + name="file.pdf", + size=100, + extension="pdf", + mime_type="application/pdf", + created_by="eu-1", + created_at=datetime(2024, 1, 1), + ) + mock_file_svc_cls.return_value.upload_file.return_value = upload_file + + with app.test_request_context("/remote-files/upload", method="POST"): + result, status = RemoteFileUploadApi().post(_app_model(), _end_user()) + + assert status == 201 + assert result["id"] == "f-1" + + @patch("controllers.web.remote_files.FileService.is_file_size_within_limit", return_value=False) + @patch("controllers.web.remote_files.helpers.guess_file_info_from_response") + @patch("controllers.web.remote_files.ssrf_proxy") + @patch("controllers.web.remote_files.web_ns") + def test_file_too_large( + self, + mock_ns: MagicMock, + mock_proxy: MagicMock, + mock_guess: MagicMock, + mock_size_check: MagicMock, + app: Flask, + ) -> None: + mock_ns.payload = {"url": "https://example.com/big.zip"} + head_resp = MagicMock() + head_resp.status_code = 200 + mock_proxy.head.return_value = head_resp + mock_guess.return_value = SimpleNamespace( + filename="big.zip", extension="zip", mimetype="application/zip", size=999999999 + ) + + with app.test_request_context("/remote-files/upload", method="POST"): + with pytest.raises(FileTooLargeError): + RemoteFileUploadApi().post(_app_model(), _end_user()) + + @patch("controllers.web.remote_files.ssrf_proxy") + @patch("controllers.web.remote_files.web_ns") + def test_fetch_failure_raises(self, mock_ns: MagicMock, mock_proxy: MagicMock, app: Flask) -> None: + import httpx + + mock_ns.payload = {"url": "https://example.com/bad"} + mock_proxy.head.side_effect = httpx.RequestError("connection failed") + + with app.test_request_context("/remote-files/upload", method="POST"): + with pytest.raises(RemoteFileUploadError): + RemoteFileUploadApi().post(_app_model(), _end_user()) diff --git a/api/tests/unit_tests/controllers/web/test_saved_message.py b/api/tests/unit_tests/controllers/web/test_saved_message.py new file mode 100644 index 0000000000..3d55804912 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_saved_message.py @@ -0,0 +1,97 @@ +"""Unit tests for controllers.web.saved_message endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound + +from controllers.web.error import NotCompletionAppError +from controllers.web.saved_message import SavedMessageApi, SavedMessageListApi +from services.errors.message import MessageNotExistsError + + +def _completion_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="completion") + + +def _chat_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# SavedMessageListApi (GET) +# --------------------------------------------------------------------------- +class TestSavedMessageListApiGet: + def test_non_completion_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/saved-messages"): + with pytest.raises(NotCompletionAppError): + SavedMessageListApi().get(_chat_app(), _end_user()) + + @patch("controllers.web.saved_message.SavedMessageService.pagination_by_last_id") + def test_happy_path(self, mock_paginate: MagicMock, app: Flask) -> None: + mock_paginate.return_value = SimpleNamespace(limit=20, has_more=False, data=[]) + + with app.test_request_context("/saved-messages?limit=20"): + result = SavedMessageListApi().get(_completion_app(), _end_user()) + + assert result["limit"] == 20 + assert result["has_more"] is False + + +# --------------------------------------------------------------------------- +# SavedMessageListApi (POST) +# --------------------------------------------------------------------------- +class TestSavedMessageListApiPost: + def test_non_completion_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/saved-messages", method="POST"): + with pytest.raises(NotCompletionAppError): + SavedMessageListApi().post(_chat_app(), _end_user()) + + @patch("controllers.web.saved_message.SavedMessageService.save") + @patch("controllers.web.saved_message.web_ns") + def test_save_success(self, mock_ns: MagicMock, mock_save: MagicMock, app: Flask) -> None: + msg_id = str(uuid4()) + mock_ns.payload = {"message_id": msg_id} + + with app.test_request_context("/saved-messages", method="POST"): + result = SavedMessageListApi().post(_completion_app(), _end_user()) + + assert result["result"] == "success" + + @patch("controllers.web.saved_message.SavedMessageService.save", side_effect=MessageNotExistsError()) + @patch("controllers.web.saved_message.web_ns") + def test_save_not_found(self, mock_ns: MagicMock, mock_save: MagicMock, app: Flask) -> None: + mock_ns.payload = {"message_id": str(uuid4())} + + with app.test_request_context("/saved-messages", method="POST"): + with pytest.raises(NotFound, match="Message Not Exists"): + SavedMessageListApi().post(_completion_app(), _end_user()) + + +# --------------------------------------------------------------------------- +# SavedMessageApi (DELETE) +# --------------------------------------------------------------------------- +class TestSavedMessageApi: + def test_non_completion_mode_raises(self, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/saved-messages/{msg_id}", method="DELETE"): + with pytest.raises(NotCompletionAppError): + SavedMessageApi().delete(_chat_app(), _end_user(), msg_id) + + @patch("controllers.web.saved_message.SavedMessageService.delete") + def test_delete_success(self, mock_delete: MagicMock, app: Flask) -> None: + msg_id = uuid4() + with app.test_request_context(f"/saved-messages/{msg_id}", method="DELETE"): + result, status = SavedMessageApi().delete(_completion_app(), _end_user(), msg_id) + + assert status == 204 + assert result["result"] == "success" diff --git a/api/tests/unit_tests/controllers/web/test_site.py b/api/tests/unit_tests/controllers/web/test_site.py new file mode 100644 index 0000000000..557bf93e9e --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_site.py @@ -0,0 +1,126 @@ +"""Unit tests for controllers.web.site endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import Forbidden + +from controllers.web.site import AppSiteApi, AppSiteInfo + + +def _tenant(*, status: str = "normal") -> SimpleNamespace: + return SimpleNamespace( + id="tenant-1", + status=status, + plan="basic", + custom_config_dict={"remove_webapp_brand": False, "replace_webapp_logo": False}, + ) + + +def _site() -> SimpleNamespace: + return SimpleNamespace( + title="Site", + icon_type="emoji", + icon="robot", + icon_background="#fff", + description="desc", + default_language="en", + chat_color_theme="light", + chat_color_theme_inverted=False, + copyright=None, + privacy_policy=None, + custom_disclaimer=None, + prompt_public=False, + show_workflow_steps=True, + use_icon_as_answer_icon=False, + ) + + +# --------------------------------------------------------------------------- +# AppSiteApi +# --------------------------------------------------------------------------- +class TestAppSiteApi: + @patch("controllers.web.site.FeatureService.get_features") + @patch("controllers.web.site.db") + def test_happy_path(self, mock_db: MagicMock, mock_features: MagicMock, app: Flask) -> None: + app.config["RESTX_MASK_HEADER"] = "X-Fields" + mock_features.return_value = SimpleNamespace(can_replace_logo=False) + site_obj = _site() + mock_db.session.query.return_value.where.return_value.first.return_value = site_obj + tenant = _tenant() + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", tenant=tenant, enable_site=True) + end_user = SimpleNamespace(id="eu-1") + + with app.test_request_context("/site"): + result = AppSiteApi().get(app_model, end_user) + + # marshal_with serializes AppSiteInfo to a dict + assert result["app_id"] == "app-1" + assert result["plan"] == "basic" + assert result["enable_site"] is True + + @patch("controllers.web.site.db") + def test_missing_site_raises_forbidden(self, mock_db: MagicMock, app: Flask) -> None: + app.config["RESTX_MASK_HEADER"] = "X-Fields" + mock_db.session.query.return_value.where.return_value.first.return_value = None + tenant = _tenant() + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", tenant=tenant) + end_user = SimpleNamespace(id="eu-1") + + with app.test_request_context("/site"): + with pytest.raises(Forbidden): + AppSiteApi().get(app_model, end_user) + + @patch("controllers.web.site.db") + def test_archived_tenant_raises_forbidden(self, mock_db: MagicMock, app: Flask) -> None: + app.config["RESTX_MASK_HEADER"] = "X-Fields" + from models.account import TenantStatus + + mock_db.session.query.return_value.where.return_value.first.return_value = _site() + tenant = SimpleNamespace( + id="tenant-1", + status=TenantStatus.ARCHIVE, + plan="basic", + custom_config_dict={}, + ) + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", tenant=tenant) + end_user = SimpleNamespace(id="eu-1") + + with app.test_request_context("/site"): + with pytest.raises(Forbidden): + AppSiteApi().get(app_model, end_user) + + +# --------------------------------------------------------------------------- +# AppSiteInfo +# --------------------------------------------------------------------------- +class TestAppSiteInfo: + def test_basic_fields(self) -> None: + tenant = _tenant() + site_obj = _site() + info = AppSiteInfo(tenant, SimpleNamespace(id="app-1", enable_site=True), site_obj, "eu-1", False) + + assert info.app_id == "app-1" + assert info.end_user_id == "eu-1" + assert info.enable_site is True + assert info.plan == "basic" + assert info.can_replace_logo is False + assert info.model_config is None + + @patch("controllers.web.site.dify_config", SimpleNamespace(FILES_URL="https://files.example.com")) + def test_can_replace_logo_sets_custom_config(self) -> None: + tenant = SimpleNamespace( + id="tenant-1", + plan="pro", + custom_config_dict={"remove_webapp_brand": True, "replace_webapp_logo": True}, + ) + site_obj = _site() + info = AppSiteInfo(tenant, SimpleNamespace(id="app-1", enable_site=True), site_obj, "eu-1", True) + + assert info.can_replace_logo is True + assert info.custom_config["remove_webapp_brand"] is True + assert "webapp-logo" in info.custom_config["replace_webapp_logo"] diff --git a/api/tests/unit_tests/controllers/web/test_web_login.py b/api/tests/unit_tests/controllers/web/test_web_login.py index e62993e8d5..0661c02578 100644 --- a/api/tests/unit_tests/controllers/web/test_web_login.py +++ b/api/tests/unit_tests/controllers/web/test_web_login.py @@ -5,7 +5,8 @@ from unittest.mock import MagicMock, patch import pytest from flask import Flask -from controllers.web.login import EmailCodeLoginApi, EmailCodeLoginSendEmailApi +import services.errors.account +from controllers.web.login import EmailCodeLoginApi, EmailCodeLoginSendEmailApi, LoginApi, LoginStatusApi, LogoutApi def encode_code(code: str) -> str: @@ -89,3 +90,114 @@ class TestEmailCodeLoginApi: mock_revoke_token.assert_called_once_with("token-123") mock_login.assert_called_once() mock_reset_login_rate.assert_called_once_with("user@example.com") + + +class TestLoginApi: + @patch("controllers.web.login.WebAppAuthService.login", return_value="access-tok") + @patch("controllers.web.login.WebAppAuthService.authenticate") + def test_login_success(self, mock_auth: MagicMock, mock_login: MagicMock, app: Flask) -> None: + mock_auth.return_value = MagicMock() + + with app.test_request_context( + "/web/login", + method="POST", + json={"email": "user@example.com", "password": base64.b64encode(b"Valid1234").decode()}, + ): + response = LoginApi().post() + + assert response.get_json()["data"]["access_token"] == "access-tok" + mock_auth.assert_called_once() + + @patch( + "controllers.web.login.WebAppAuthService.authenticate", + side_effect=services.errors.account.AccountLoginError(), + ) + def test_login_banned_account(self, mock_auth: MagicMock, app: Flask) -> None: + from controllers.console.error import AccountBannedError + + with app.test_request_context( + "/web/login", + method="POST", + json={"email": "user@example.com", "password": base64.b64encode(b"Valid1234").decode()}, + ): + with pytest.raises(AccountBannedError): + LoginApi().post() + + @patch( + "controllers.web.login.WebAppAuthService.authenticate", + side_effect=services.errors.account.AccountPasswordError(), + ) + def test_login_wrong_password(self, mock_auth: MagicMock, app: Flask) -> None: + from controllers.console.auth.error import AuthenticationFailedError + + with app.test_request_context( + "/web/login", + method="POST", + json={"email": "user@example.com", "password": base64.b64encode(b"Valid1234").decode()}, + ): + with pytest.raises(AuthenticationFailedError): + LoginApi().post() + + +class TestLoginStatusApi: + @patch("controllers.web.login.extract_webapp_access_token", return_value=None) + def test_no_app_code_returns_logged_in_false(self, mock_extract: MagicMock, app: Flask) -> None: + with app.test_request_context("/web/login/status"): + result = LoginStatusApi().get() + + assert result["logged_in"] is False + assert result["app_logged_in"] is False + + @patch("controllers.web.login.decode_jwt_token") + @patch("controllers.web.login.PassportService") + @patch("controllers.web.login.WebAppAuthService.is_app_require_permission_check", return_value=False) + @patch("controllers.web.login.AppService.get_app_id_by_code", return_value="app-1") + @patch("controllers.web.login.extract_webapp_access_token", return_value="tok") + def test_public_app_user_logged_in( + self, + mock_extract: MagicMock, + mock_app_id: MagicMock, + mock_perm: MagicMock, + mock_passport: MagicMock, + mock_decode: MagicMock, + app: Flask, + ) -> None: + mock_decode.return_value = (MagicMock(), MagicMock()) + + with app.test_request_context("/web/login/status?app_code=code1"): + result = LoginStatusApi().get() + + assert result["logged_in"] is True + assert result["app_logged_in"] is True + + @patch("controllers.web.login.decode_jwt_token", side_effect=Exception("bad")) + @patch("controllers.web.login.PassportService") + @patch("controllers.web.login.WebAppAuthService.is_app_require_permission_check", return_value=True) + @patch("controllers.web.login.AppService.get_app_id_by_code", return_value="app-1") + @patch("controllers.web.login.extract_webapp_access_token", return_value="tok") + def test_private_app_passport_fails( + self, + mock_extract: MagicMock, + mock_app_id: MagicMock, + mock_perm: MagicMock, + mock_passport_cls: MagicMock, + mock_decode: MagicMock, + app: Flask, + ) -> None: + mock_passport_cls.return_value.verify.side_effect = Exception("bad") + + with app.test_request_context("/web/login/status?app_code=code1"): + result = LoginStatusApi().get() + + assert result["logged_in"] is False + assert result["app_logged_in"] is False + + +class TestLogoutApi: + @patch("controllers.web.login.clear_webapp_access_token_from_cookie") + def test_logout_success(self, mock_clear: MagicMock, app: Flask) -> None: + with app.test_request_context("/web/logout", method="POST"): + response = LogoutApi().post() + + assert response.get_json() == {"result": "success"} + mock_clear.assert_called_once() diff --git a/api/tests/unit_tests/controllers/web/test_web_passport.py b/api/tests/unit_tests/controllers/web/test_web_passport.py new file mode 100644 index 0000000000..19b1d8504a --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_web_passport.py @@ -0,0 +1,192 @@ +"""Unit tests for controllers.web.passport — token issuance and enterprise auth exchange.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import NotFound, Unauthorized + +from controllers.web.error import WebAppAuthRequiredError +from controllers.web.passport import ( + PassportResource, + decode_enterprise_webapp_user_id, + exchange_token_for_existing_web_user, + generate_session_id, +) +from services.webapp_auth_service import WebAppAuthType + + +# --------------------------------------------------------------------------- +# decode_enterprise_webapp_user_id +# --------------------------------------------------------------------------- +class TestDecodeEnterpriseWebappUserId: + def test_none_token_returns_none(self) -> None: + assert decode_enterprise_webapp_user_id(None) is None + + @patch("controllers.web.passport.PassportService") + def test_valid_token_returns_decoded(self, mock_passport_cls: MagicMock) -> None: + mock_passport_cls.return_value.verify.return_value = { + "token_source": "webapp_login_token", + "user_id": "u1", + } + result = decode_enterprise_webapp_user_id("valid-jwt") + assert result["user_id"] == "u1" + + @patch("controllers.web.passport.PassportService") + def test_wrong_source_raises_unauthorized(self, mock_passport_cls: MagicMock) -> None: + mock_passport_cls.return_value.verify.return_value = { + "token_source": "other_source", + } + with pytest.raises(Unauthorized, match="Expected 'webapp_login_token'"): + decode_enterprise_webapp_user_id("bad-jwt") + + @patch("controllers.web.passport.PassportService") + def test_missing_source_raises_unauthorized(self, mock_passport_cls: MagicMock) -> None: + mock_passport_cls.return_value.verify.return_value = {} + with pytest.raises(Unauthorized, match="Expected 'webapp_login_token'"): + decode_enterprise_webapp_user_id("no-source-jwt") + + +# --------------------------------------------------------------------------- +# generate_session_id +# --------------------------------------------------------------------------- +class TestGenerateSessionId: + @patch("controllers.web.passport.db") + def test_returns_unique_session_id(self, mock_db: MagicMock) -> None: + mock_db.session.scalar.return_value = 0 + sid = generate_session_id() + assert isinstance(sid, str) + assert len(sid) == 36 # UUID format + + @patch("controllers.web.passport.db") + def test_retries_on_collision(self, mock_db: MagicMock) -> None: + # First call returns count=1 (collision), second returns 0 + mock_db.session.scalar.side_effect = [1, 0] + sid = generate_session_id() + assert isinstance(sid, str) + assert mock_db.session.scalar.call_count == 2 + + +# --------------------------------------------------------------------------- +# exchange_token_for_existing_web_user +# --------------------------------------------------------------------------- +class TestExchangeTokenForExistingWebUser: + @patch("controllers.web.passport.PassportService") + @patch("controllers.web.passport.db") + def test_external_auth_type_mismatch_raises(self, mock_db: MagicMock, mock_passport_cls: MagicMock) -> None: + site = SimpleNamespace(code="code1", app_id="app-1") + app_model = SimpleNamespace(id="app-1", status="normal", enable_site=True, tenant_id="t1") + mock_db.session.scalar.side_effect = [site, app_model] + + decoded = {"user_id": "u1", "auth_type": "internal"} # mismatch: expected "external" + with pytest.raises(WebAppAuthRequiredError, match="external"): + exchange_token_for_existing_web_user( + app_code="code1", enterprise_user_decoded=decoded, auth_type=WebAppAuthType.EXTERNAL + ) + + @patch("controllers.web.passport.PassportService") + @patch("controllers.web.passport.db") + def test_internal_auth_type_mismatch_raises(self, mock_db: MagicMock, mock_passport_cls: MagicMock) -> None: + site = SimpleNamespace(code="code1", app_id="app-1") + app_model = SimpleNamespace(id="app-1", status="normal", enable_site=True, tenant_id="t1") + mock_db.session.scalar.side_effect = [site, app_model] + + decoded = {"user_id": "u1", "auth_type": "external"} # mismatch: expected "internal" + with pytest.raises(WebAppAuthRequiredError, match="internal"): + exchange_token_for_existing_web_user( + app_code="code1", enterprise_user_decoded=decoded, auth_type=WebAppAuthType.INTERNAL + ) + + @patch("controllers.web.passport.PassportService") + @patch("controllers.web.passport.db") + def test_site_not_found_raises(self, mock_db: MagicMock, mock_passport_cls: MagicMock) -> None: + mock_db.session.scalar.return_value = None + decoded = {"user_id": "u1", "auth_type": "external"} + with pytest.raises(NotFound): + exchange_token_for_existing_web_user( + app_code="code1", enterprise_user_decoded=decoded, auth_type=WebAppAuthType.EXTERNAL + ) + + +# --------------------------------------------------------------------------- +# PassportResource.get +# --------------------------------------------------------------------------- +class TestPassportResource: + @patch("controllers.web.passport.FeatureService.get_system_features") + def test_missing_app_code_raises_unauthorized(self, mock_features: MagicMock, app: Flask) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + with app.test_request_context("/passport"): + with pytest.raises(Unauthorized, match="X-App-Code"): + PassportResource().get() + + @patch("controllers.web.passport.PassportService") + @patch("controllers.web.passport.generate_session_id", return_value="new-sess-id") + @patch("controllers.web.passport.db") + @patch("controllers.web.passport.FeatureService.get_system_features") + def test_creates_new_end_user_when_no_user_id( + self, + mock_features: MagicMock, + mock_db: MagicMock, + mock_gen_session: MagicMock, + mock_passport_cls: MagicMock, + app: Flask, + ) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + site = SimpleNamespace(app_id="app-1", code="code1") + app_model = SimpleNamespace(id="app-1", status="normal", enable_site=True, tenant_id="t1") + mock_db.session.scalar.side_effect = [site, app_model] + mock_passport_cls.return_value.issue.return_value = "issued-token" + + with app.test_request_context("/passport", headers={"X-App-Code": "code1"}): + response = PassportResource().get() + + assert response.get_json()["access_token"] == "issued-token" + mock_db.session.add.assert_called_once() + mock_db.session.commit.assert_called_once() + + @patch("controllers.web.passport.PassportService") + @patch("controllers.web.passport.db") + @patch("controllers.web.passport.FeatureService.get_system_features") + def test_reuses_existing_end_user_when_user_id_provided( + self, + mock_features: MagicMock, + mock_db: MagicMock, + mock_passport_cls: MagicMock, + app: Flask, + ) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + site = SimpleNamespace(app_id="app-1", code="code1") + app_model = SimpleNamespace(id="app-1", status="normal", enable_site=True, tenant_id="t1") + existing_user = SimpleNamespace(id="eu-1", session_id="sess-existing") + mock_db.session.scalar.side_effect = [site, app_model, existing_user] + mock_passport_cls.return_value.issue.return_value = "reused-token" + + with app.test_request_context("/passport?user_id=sess-existing", headers={"X-App-Code": "code1"}): + response = PassportResource().get() + + assert response.get_json()["access_token"] == "reused-token" + # Should not create a new end user + mock_db.session.add.assert_not_called() + + @patch("controllers.web.passport.db") + @patch("controllers.web.passport.FeatureService.get_system_features") + def test_site_not_found_raises(self, mock_features: MagicMock, mock_db: MagicMock, app: Flask) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + mock_db.session.scalar.return_value = None + with app.test_request_context("/passport", headers={"X-App-Code": "code1"}): + with pytest.raises(NotFound): + PassportResource().get() + + @patch("controllers.web.passport.db") + @patch("controllers.web.passport.FeatureService.get_system_features") + def test_disabled_app_raises_not_found(self, mock_features: MagicMock, mock_db: MagicMock, app: Flask) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + site = SimpleNamespace(app_id="app-1", code="code1") + disabled_app = SimpleNamespace(id="app-1", status="normal", enable_site=False) + mock_db.session.scalar.side_effect = [site, disabled_app] + with app.test_request_context("/passport", headers={"X-App-Code": "code1"}): + with pytest.raises(NotFound): + PassportResource().get() diff --git a/api/tests/unit_tests/controllers/web/test_workflow.py b/api/tests/unit_tests/controllers/web/test_workflow.py new file mode 100644 index 0000000000..0973340527 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_workflow.py @@ -0,0 +1,95 @@ +"""Unit tests for controllers.web.workflow endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.web.error import ( + NotWorkflowAppError, + ProviderNotInitializeError, + ProviderQuotaExceededError, +) +from controllers.web.workflow import WorkflowRunApi, WorkflowTaskStopApi +from core.errors.error import ProviderTokenNotInitError, QuotaExceededError + + +def _workflow_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="workflow") + + +def _chat_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", mode="chat") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# WorkflowRunApi +# --------------------------------------------------------------------------- +class TestWorkflowRunApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/workflows/run", method="POST"): + with pytest.raises(NotWorkflowAppError): + WorkflowRunApi().post(_chat_app(), _end_user()) + + @patch("controllers.web.workflow.helper.compact_generate_response", return_value={"result": "ok"}) + @patch("controllers.web.workflow.AppGenerateService.generate") + @patch("controllers.web.workflow.web_ns") + def test_happy_path(self, mock_ns: MagicMock, mock_gen: MagicMock, mock_compact: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {"key": "val"}} + mock_gen.return_value = "response" + + with app.test_request_context("/workflows/run", method="POST"): + result = WorkflowRunApi().post(_workflow_app(), _end_user()) + + assert result == {"result": "ok"} + + @patch( + "controllers.web.workflow.AppGenerateService.generate", + side_effect=ProviderTokenNotInitError(description="not init"), + ) + @patch("controllers.web.workflow.web_ns") + def test_provider_not_init(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}} + + with app.test_request_context("/workflows/run", method="POST"): + with pytest.raises(ProviderNotInitializeError): + WorkflowRunApi().post(_workflow_app(), _end_user()) + + @patch( + "controllers.web.workflow.AppGenerateService.generate", + side_effect=QuotaExceededError(), + ) + @patch("controllers.web.workflow.web_ns") + def test_quota_exceeded(self, mock_ns: MagicMock, mock_gen: MagicMock, app: Flask) -> None: + mock_ns.payload = {"inputs": {}} + + with app.test_request_context("/workflows/run", method="POST"): + with pytest.raises(ProviderQuotaExceededError): + WorkflowRunApi().post(_workflow_app(), _end_user()) + + +# --------------------------------------------------------------------------- +# WorkflowTaskStopApi +# --------------------------------------------------------------------------- +class TestWorkflowTaskStopApi: + def test_wrong_mode_raises(self, app: Flask) -> None: + with app.test_request_context("/workflows/tasks/task-1/stop", method="POST"): + with pytest.raises(NotWorkflowAppError): + WorkflowTaskStopApi().post(_chat_app(), _end_user(), "task-1") + + @patch("controllers.web.workflow.GraphEngineManager.send_stop_command") + @patch("controllers.web.workflow.AppQueueManager.set_stop_flag_no_user_check") + def test_stop_calls_both_mechanisms(self, mock_legacy: MagicMock, mock_graph: MagicMock, app: Flask) -> None: + with app.test_request_context("/workflows/tasks/task-1/stop", method="POST"): + result = WorkflowTaskStopApi().post(_workflow_app(), _end_user(), "task-1") + + assert result == {"result": "success"} + mock_legacy.assert_called_once_with("task-1") + mock_graph.assert_called_once_with("task-1") diff --git a/api/tests/unit_tests/controllers/web/test_workflow_events.py b/api/tests/unit_tests/controllers/web/test_workflow_events.py new file mode 100644 index 0000000000..64c09b5e22 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_workflow_events.py @@ -0,0 +1,127 @@ +"""Unit tests for controllers.web.workflow_events endpoints.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask + +from controllers.web.error import NotFoundError +from controllers.web.workflow_events import WorkflowEventsApi +from models.enums import CreatorUserRole + + +def _workflow_app() -> SimpleNamespace: + return SimpleNamespace(id="app-1", tenant_id="tenant-1", mode="workflow") + + +def _end_user() -> SimpleNamespace: + return SimpleNamespace(id="eu-1") + + +# --------------------------------------------------------------------------- +# WorkflowEventsApi +# --------------------------------------------------------------------------- +class TestWorkflowEventsApi: + @patch("controllers.web.workflow_events.DifyAPIRepositoryFactory") + @patch("controllers.web.workflow_events.db") + def test_workflow_run_not_found(self, mock_db: MagicMock, mock_factory: MagicMock, app: Flask) -> None: + mock_db.engine = "engine" + mock_repo = MagicMock() + mock_repo.get_workflow_run_by_id_and_tenant_id.return_value = None + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + with app.test_request_context("/workflow/run-1/events"): + with pytest.raises(NotFoundError): + WorkflowEventsApi().get(_workflow_app(), _end_user(), "run-1") + + @patch("controllers.web.workflow_events.DifyAPIRepositoryFactory") + @patch("controllers.web.workflow_events.db") + def test_workflow_run_wrong_app(self, mock_db: MagicMock, mock_factory: MagicMock, app: Flask) -> None: + mock_db.engine = "engine" + run = SimpleNamespace( + id="run-1", + app_id="other-app", + created_by_role=CreatorUserRole.END_USER, + created_by="eu-1", + finished_at=None, + ) + mock_repo = MagicMock() + mock_repo.get_workflow_run_by_id_and_tenant_id.return_value = run + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + with app.test_request_context("/workflow/run-1/events"): + with pytest.raises(NotFoundError): + WorkflowEventsApi().get(_workflow_app(), _end_user(), "run-1") + + @patch("controllers.web.workflow_events.DifyAPIRepositoryFactory") + @patch("controllers.web.workflow_events.db") + def test_workflow_run_not_created_by_end_user( + self, mock_db: MagicMock, mock_factory: MagicMock, app: Flask + ) -> None: + mock_db.engine = "engine" + run = SimpleNamespace( + id="run-1", + app_id="app-1", + created_by_role=CreatorUserRole.ACCOUNT, + created_by="eu-1", + finished_at=None, + ) + mock_repo = MagicMock() + mock_repo.get_workflow_run_by_id_and_tenant_id.return_value = run + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + with app.test_request_context("/workflow/run-1/events"): + with pytest.raises(NotFoundError): + WorkflowEventsApi().get(_workflow_app(), _end_user(), "run-1") + + @patch("controllers.web.workflow_events.DifyAPIRepositoryFactory") + @patch("controllers.web.workflow_events.db") + def test_workflow_run_wrong_end_user(self, mock_db: MagicMock, mock_factory: MagicMock, app: Flask) -> None: + mock_db.engine = "engine" + run = SimpleNamespace( + id="run-1", + app_id="app-1", + created_by_role=CreatorUserRole.END_USER, + created_by="other-user", + finished_at=None, + ) + mock_repo = MagicMock() + mock_repo.get_workflow_run_by_id_and_tenant_id.return_value = run + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + with app.test_request_context("/workflow/run-1/events"): + with pytest.raises(NotFoundError): + WorkflowEventsApi().get(_workflow_app(), _end_user(), "run-1") + + @patch("controllers.web.workflow_events.WorkflowResponseConverter") + @patch("controllers.web.workflow_events.DifyAPIRepositoryFactory") + @patch("controllers.web.workflow_events.db") + def test_finished_run_returns_sse_response( + self, mock_db: MagicMock, mock_factory: MagicMock, mock_converter: MagicMock, app: Flask + ) -> None: + from datetime import datetime + + mock_db.engine = "engine" + run = SimpleNamespace( + id="run-1", + app_id="app-1", + created_by_role=CreatorUserRole.END_USER, + created_by="eu-1", + finished_at=datetime(2024, 1, 1), + ) + mock_repo = MagicMock() + mock_repo.get_workflow_run_by_id_and_tenant_id.return_value = run + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + finish_response = MagicMock() + finish_response.model_dump.return_value = {"task_id": "run-1"} + finish_response.event.value = "workflow_finished" + mock_converter.workflow_run_result_to_finish_response.return_value = finish_response + + with app.test_request_context("/workflow/run-1/events"): + response = WorkflowEventsApi().get(_workflow_app(), _end_user(), "run-1") + + assert response.mimetype == "text/event-stream" diff --git a/api/tests/unit_tests/controllers/web/test_wraps.py b/api/tests/unit_tests/controllers/web/test_wraps.py new file mode 100644 index 0000000000..85049ae975 --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_wraps.py @@ -0,0 +1,393 @@ +"""Unit tests for controllers.web.wraps — JWT auth decorator and validation helpers.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from werkzeug.exceptions import BadRequest, NotFound, Unauthorized + +from controllers.web.error import WebAppAuthAccessDeniedError, WebAppAuthRequiredError +from controllers.web.wraps import ( + _validate_user_accessibility, + _validate_webapp_token, + decode_jwt_token, +) + + +# --------------------------------------------------------------------------- +# _validate_webapp_token +# --------------------------------------------------------------------------- +class TestValidateWebappToken: + def test_enterprise_enabled_and_app_auth_requires_webapp_source(self) -> None: + """When both flags are true, a non-webapp source must raise.""" + decoded = {"token_source": "other"} + with pytest.raises(WebAppAuthRequiredError): + _validate_webapp_token(decoded, app_web_auth_enabled=True, system_webapp_auth_enabled=True) + + def test_enterprise_enabled_and_app_auth_accepts_webapp_source(self) -> None: + decoded = {"token_source": "webapp"} + _validate_webapp_token(decoded, app_web_auth_enabled=True, system_webapp_auth_enabled=True) + + def test_enterprise_enabled_and_app_auth_missing_source_raises(self) -> None: + decoded = {} + with pytest.raises(WebAppAuthRequiredError): + _validate_webapp_token(decoded, app_web_auth_enabled=True, system_webapp_auth_enabled=True) + + def test_public_app_rejects_webapp_source(self) -> None: + """When auth is not required, a webapp-sourced token must be rejected.""" + decoded = {"token_source": "webapp"} + with pytest.raises(Unauthorized): + _validate_webapp_token(decoded, app_web_auth_enabled=False, system_webapp_auth_enabled=False) + + def test_public_app_accepts_non_webapp_source(self) -> None: + decoded = {"token_source": "other"} + _validate_webapp_token(decoded, app_web_auth_enabled=False, system_webapp_auth_enabled=False) + + def test_public_app_accepts_no_source(self) -> None: + decoded = {} + _validate_webapp_token(decoded, app_web_auth_enabled=False, system_webapp_auth_enabled=False) + + def test_system_enabled_but_app_public(self) -> None: + """system_webapp_auth_enabled=True but app is public — webapp source rejected.""" + decoded = {"token_source": "webapp"} + with pytest.raises(Unauthorized): + _validate_webapp_token(decoded, app_web_auth_enabled=False, system_webapp_auth_enabled=True) + + +# --------------------------------------------------------------------------- +# _validate_user_accessibility +# --------------------------------------------------------------------------- +class TestValidateUserAccessibility: + def test_skips_when_auth_disabled(self) -> None: + """No checks when system or app auth is disabled.""" + _validate_user_accessibility( + decoded={}, + app_code="code", + app_web_auth_enabled=False, + system_webapp_auth_enabled=False, + webapp_settings=None, + ) + + def test_missing_user_id_raises(self) -> None: + decoded = {} + with pytest.raises(WebAppAuthRequiredError): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=SimpleNamespace(access_mode="internal"), + ) + + def test_missing_webapp_settings_raises(self) -> None: + decoded = {"user_id": "u1"} + with pytest.raises(WebAppAuthRequiredError, match="settings not found"): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=None, + ) + + def test_missing_auth_type_raises(self) -> None: + decoded = {"user_id": "u1", "granted_at": 1} + settings = SimpleNamespace(access_mode="public") + with pytest.raises(WebAppAuthAccessDeniedError, match="auth_type"): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + def test_missing_granted_at_raises(self) -> None: + decoded = {"user_id": "u1", "auth_type": "external"} + settings = SimpleNamespace(access_mode="public") + with pytest.raises(WebAppAuthAccessDeniedError, match="granted_at"): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + @patch("controllers.web.wraps.EnterpriseService.get_app_sso_settings_last_update_time") + @patch("controllers.web.wraps.WebAppAuthService.is_app_require_permission_check", return_value=False) + def test_external_auth_type_checks_sso_update_time( + self, mock_perm_check: MagicMock, mock_sso_time: MagicMock + ) -> None: + # granted_at is before SSO update time → denied + mock_sso_time.return_value = datetime.now(UTC) + old_granted = int((datetime.now(UTC) - timedelta(hours=1)).timestamp()) + decoded = {"user_id": "u1", "auth_type": "external", "granted_at": old_granted} + settings = SimpleNamespace(access_mode="public") + with pytest.raises(WebAppAuthAccessDeniedError, match="SSO settings"): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + @patch("controllers.web.wraps.EnterpriseService.get_workspace_sso_settings_last_update_time") + @patch("controllers.web.wraps.WebAppAuthService.is_app_require_permission_check", return_value=False) + def test_internal_auth_type_checks_workspace_sso_update_time( + self, mock_perm_check: MagicMock, mock_workspace_sso: MagicMock + ) -> None: + mock_workspace_sso.return_value = datetime.now(UTC) + old_granted = int((datetime.now(UTC) - timedelta(hours=1)).timestamp()) + decoded = {"user_id": "u1", "auth_type": "internal", "granted_at": old_granted} + settings = SimpleNamespace(access_mode="public") + with pytest.raises(WebAppAuthAccessDeniedError, match="SSO settings"): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + @patch("controllers.web.wraps.EnterpriseService.get_app_sso_settings_last_update_time") + @patch("controllers.web.wraps.WebAppAuthService.is_app_require_permission_check", return_value=False) + def test_external_auth_passes_when_granted_after_sso_update( + self, mock_perm_check: MagicMock, mock_sso_time: MagicMock + ) -> None: + mock_sso_time.return_value = datetime.now(UTC) - timedelta(hours=2) + recent_granted = int(datetime.now(UTC).timestamp()) + decoded = {"user_id": "u1", "auth_type": "external", "granted_at": recent_granted} + settings = SimpleNamespace(access_mode="public") + # Should not raise + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + @patch("controllers.web.wraps.EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp", return_value=False) + @patch("controllers.web.wraps.AppService.get_app_id_by_code", return_value="app-id-1") + @patch("controllers.web.wraps.WebAppAuthService.is_app_require_permission_check", return_value=True) + def test_permission_check_denies_unauthorized_user( + self, mock_perm: MagicMock, mock_app_id: MagicMock, mock_allowed: MagicMock + ) -> None: + decoded = {"user_id": "u1", "auth_type": "external", "granted_at": int(datetime.now(UTC).timestamp())} + settings = SimpleNamespace(access_mode="internal") + with pytest.raises(WebAppAuthAccessDeniedError): + _validate_user_accessibility( + decoded=decoded, + app_code="code", + app_web_auth_enabled=True, + system_webapp_auth_enabled=True, + webapp_settings=settings, + ) + + +# --------------------------------------------------------------------------- +# decode_jwt_token +# --------------------------------------------------------------------------- +class TestDecodeJwtToken: + @patch("controllers.web.wraps._validate_user_accessibility") + @patch("controllers.web.wraps._validate_webapp_token") + @patch("controllers.web.wraps.EnterpriseService.WebAppAuth.get_app_access_mode_by_id") + @patch("controllers.web.wraps.AppService.get_app_id_by_code") + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.PassportService") + @patch("controllers.web.wraps.extract_webapp_passport") + @patch("controllers.web.wraps.db") + def test_happy_path( + self, + mock_db: MagicMock, + mock_extract: MagicMock, + mock_passport_cls: MagicMock, + mock_features: MagicMock, + mock_app_id: MagicMock, + mock_access_mode: MagicMock, + mock_validate_token: MagicMock, + mock_validate_user: MagicMock, + app: Flask, + ) -> None: + mock_extract.return_value = "jwt-token" + mock_passport_cls.return_value.verify.return_value = { + "app_code": "code1", + "app_id": "app-1", + "end_user_id": "eu-1", + } + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + app_model = SimpleNamespace(id="app-1", enable_site=True) + site = SimpleNamespace(code="code1") + end_user = SimpleNamespace(id="eu-1", session_id="sess-1") + + # Configure session mock to return correct objects via scalar() + session_mock = MagicMock() + session_mock.scalar.side_effect = [app_model, site, end_user] + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_db.engine = "engine" + + with patch("controllers.web.wraps.Session", return_value=session_ctx): + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + result_app, result_user = decode_jwt_token() + + assert result_app.id == "app-1" + assert result_user.id == "eu-1" + + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.extract_webapp_passport") + def test_missing_token_raises_unauthorized( + self, mock_extract: MagicMock, mock_features: MagicMock, app: Flask + ) -> None: + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + mock_extract.return_value = None + + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + with pytest.raises(Unauthorized): + decode_jwt_token() + + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.PassportService") + @patch("controllers.web.wraps.extract_webapp_passport") + @patch("controllers.web.wraps.db") + def test_missing_app_raises_not_found( + self, + mock_db: MagicMock, + mock_extract: MagicMock, + mock_passport_cls: MagicMock, + mock_features: MagicMock, + app: Flask, + ) -> None: + mock_extract.return_value = "jwt-token" + mock_passport_cls.return_value.verify.return_value = { + "app_code": "code1", + "app_id": "app-1", + "end_user_id": "eu-1", + } + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + session_mock = MagicMock() + session_mock.scalar.return_value = None # No app found + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_db.engine = "engine" + + with patch("controllers.web.wraps.Session", return_value=session_ctx): + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + with pytest.raises(NotFound): + decode_jwt_token() + + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.PassportService") + @patch("controllers.web.wraps.extract_webapp_passport") + @patch("controllers.web.wraps.db") + def test_disabled_site_raises_bad_request( + self, + mock_db: MagicMock, + mock_extract: MagicMock, + mock_passport_cls: MagicMock, + mock_features: MagicMock, + app: Flask, + ) -> None: + mock_extract.return_value = "jwt-token" + mock_passport_cls.return_value.verify.return_value = { + "app_code": "code1", + "app_id": "app-1", + "end_user_id": "eu-1", + } + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + app_model = SimpleNamespace(id="app-1", enable_site=False) + + session_mock = MagicMock() + # scalar calls: app_model, site (code found), then end_user + session_mock.scalar.side_effect = [app_model, SimpleNamespace(code="code1"), None] + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_db.engine = "engine" + + with patch("controllers.web.wraps.Session", return_value=session_ctx): + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + with pytest.raises(BadRequest, match="Site is disabled"): + decode_jwt_token() + + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.PassportService") + @patch("controllers.web.wraps.extract_webapp_passport") + @patch("controllers.web.wraps.db") + def test_missing_end_user_raises_not_found( + self, + mock_db: MagicMock, + mock_extract: MagicMock, + mock_passport_cls: MagicMock, + mock_features: MagicMock, + app: Flask, + ) -> None: + mock_extract.return_value = "jwt-token" + mock_passport_cls.return_value.verify.return_value = { + "app_code": "code1", + "app_id": "app-1", + "end_user_id": "eu-1", + } + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + app_model = SimpleNamespace(id="app-1", enable_site=True) + site = SimpleNamespace(code="code1") + + session_mock = MagicMock() + session_mock.scalar.side_effect = [app_model, site, None] # end_user is None + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_db.engine = "engine" + + with patch("controllers.web.wraps.Session", return_value=session_ctx): + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + with pytest.raises(NotFound): + decode_jwt_token() + + @patch("controllers.web.wraps.FeatureService.get_system_features") + @patch("controllers.web.wraps.PassportService") + @patch("controllers.web.wraps.extract_webapp_passport") + @patch("controllers.web.wraps.db") + def test_user_id_mismatch_raises_unauthorized( + self, + mock_db: MagicMock, + mock_extract: MagicMock, + mock_passport_cls: MagicMock, + mock_features: MagicMock, + app: Flask, + ) -> None: + mock_extract.return_value = "jwt-token" + mock_passport_cls.return_value.verify.return_value = { + "app_code": "code1", + "app_id": "app-1", + "end_user_id": "eu-1", + } + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + + app_model = SimpleNamespace(id="app-1", enable_site=True) + site = SimpleNamespace(code="code1") + end_user = SimpleNamespace(id="eu-1", session_id="sess-1") + + session_mock = MagicMock() + session_mock.scalar.side_effect = [app_model, site, end_user] + session_ctx = MagicMock() + session_ctx.__enter__ = MagicMock(return_value=session_mock) + session_ctx.__exit__ = MagicMock(return_value=False) + mock_db.engine = "engine" + + with patch("controllers.web.wraps.Session", return_value=session_ctx): + with app.test_request_context("/", headers={"X-App-Code": "code1"}): + with pytest.raises(Unauthorized, match="expired"): + decode_jwt_token(user_id="different-user") diff --git a/api/tests/unit_tests/core/agent/conftest.py b/api/tests/unit_tests/core/agent/conftest.py new file mode 100644 index 0000000000..a2aa501720 --- /dev/null +++ b/api/tests/unit_tests/core/agent/conftest.py @@ -0,0 +1,80 @@ +import pytest + + +class DummyTool: + def __init__(self, name): + self.name = name + + +class DummyPromptEntity: + def __init__(self, first_prompt): + self.first_prompt = first_prompt + + +class DummyAgentConfig: + def __init__(self, prompt_entity=None): + self.prompt = prompt_entity + + +class DummyAppConfig: + def __init__(self, agent=None): + self.agent = agent + + +class DummyScratchpadUnit: + def __init__( + self, + final=False, + thought=None, + action_str=None, + observation=None, + agent_response=None, + ): + self._final = final + self.thought = thought + self.action_str = action_str + self.observation = observation + self.agent_response = agent_response + + def is_final(self): + return self._final + + +@pytest.fixture +def dummy_tool_factory(): + def _factory(name): + return DummyTool(name) + + return _factory + + +@pytest.fixture +def dummy_prompt_entity_factory(): + def _factory(first_prompt): + return DummyPromptEntity(first_prompt) + + return _factory + + +@pytest.fixture +def dummy_agent_config_factory(): + def _factory(prompt_entity=None): + return DummyAgentConfig(prompt_entity) + + return _factory + + +@pytest.fixture +def dummy_app_config_factory(): + def _factory(agent=None): + return DummyAppConfig(agent) + + return _factory + + +@pytest.fixture +def dummy_scratchpad_unit_factory(): + def _factory(**kwargs): + return DummyScratchpadUnit(**kwargs) + + return _factory diff --git a/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py b/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py index ba8c903f65..9073ae1044 100644 --- a/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py +++ b/api/tests/unit_tests/core/agent/output_parser/test_cot_output_parser.py @@ -1,70 +1,255 @@ +"""Unit tests for CotAgentOutputParser. + +Verifies expected parsing behavior for streaming content and JSON payloads, +including edge cases such as empty/non-string content and malformed JSON. +Assumes lightweight fixtures (SimpleNamespace/MagicMock) stand in for real +model output structures. Implementation under test: +core.agent.output_parser.cot_output_parser.CotAgentOutputParser. +""" + import json -from collections.abc import Generator +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest -from core.agent.entities import AgentScratchpadUnit from core.agent.output_parser.cot_output_parser import CotAgentOutputParser -from dify_graph.model_runtime.entities.llm_entities import AssistantPromptMessage, LLMResultChunk, LLMResultChunkDelta -def mock_llm_response(text) -> Generator[LLMResultChunk, None, None]: - for i in range(len(text)): - yield LLMResultChunk( - model="model", - prompt_messages=[], - delta=LLMResultChunkDelta(index=0, message=AssistantPromptMessage(content=text[i], tool_calls=[])), +@pytest.fixture +def mock_action_class(mocker): + mock_action = MagicMock() + mocker.patch( + "core.agent.output_parser.cot_output_parser.AgentScratchpadUnit.Action", + mock_action, + ) + return mock_action + + +@pytest.fixture +def usage_dict(): + return {} + + +@pytest.fixture +def make_chunk(): + def _make_chunk(content=None, usage=None): + delta = SimpleNamespace( + message=SimpleNamespace(content=content), + usage=usage, ) + return SimpleNamespace(delta=delta) + + return _make_chunk -def test_cot_output_parser(): - test_cases = [ - { - "input": 'Through: abc\nAction: ```{"action": "Final Answer", "action_input": "```echarts\n {}\n```"}```', - "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, - "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', - }, - # code block with json - { - "input": 'Through: abc\nAction: ```json\n{"action": "Final Answer", "action_input": "```echarts\n {' - '}\n```"}```', - "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, - "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', - }, - # code block with JSON - { - "input": 'Through: abc\nAction: ```JSON\n{"action": "Final Answer", "action_input": "```echarts\n {' - '}\n```"}```', - "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, - "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', - }, - # list - { - "input": 'Through: abc\nAction: ```[{"action": "Final Answer", "action_input": "```echarts\n {}\n```"}]```', - "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, - "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', - }, - # no code block - { - "input": 'Through: abc\nAction: {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}', - "action": {"action": "Final Answer", "action_input": "```echarts\n {}\n```"}, - "output": 'Through: abc\n {"action": "Final Answer", "action_input": "```echarts\\n {}\\n```"}', - }, - # no code block and json - {"input": "Through: abc\nAction: efg", "action": {}, "output": "Through: abc\n efg"}, - ] +# ============================================================ +# Test Suite +# ============================================================ - parser = CotAgentOutputParser() - usage_dict = {} - for test_case in test_cases: - # mock llm_response as a generator by text - llm_response: Generator[LLMResultChunk, None, None] = mock_llm_response(test_case["input"]) - results = parser.handle_react_stream_output(llm_response, usage_dict) - output = "" - for result in results: - if isinstance(result, str): - output += result - elif isinstance(result, AgentScratchpadUnit.Action): - if test_case["action"]: - assert result.to_dict() == test_case["action"] - output += json.dumps(result.to_dict()) - if test_case["output"]: - assert output == test_case["output"] + +class TestCotAgentOutputParser: + """Validate CotAgentOutputParser streaming + JSON parsing behavior. + + Lifecycle: no explicit setup/teardown; relies on pytest fixtures for + lightweight chunk/action doubles. Invariants: non-string/empty content + yields no output, usage gets recorded when provided, and valid action JSON + results in Action instantiation. Usage: invoke via pytest (e.g., + `pytest -k TestCotAgentOutputParser`). + """ + + # -------------------------------------------------------- + # Basic streaming & usage + # -------------------------------------------------------- + + def test_stream_plain_text(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk("hello world")] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert "".join(result) == "hello world" + + def test_stream_empty_string(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk("")] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert result == [] + + def test_stream_none_content(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk(None)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert result == [] + + @pytest.mark.parametrize("content", [123, 12.5, [], {}, object()]) + def test_non_string_content(self, make_chunk, usage_dict, content) -> None: + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert result == [] + + def test_usage_update(self, make_chunk, usage_dict) -> None: + usage_data = {"tokens": 99} + chunks = [make_chunk("abc", usage=usage_data)] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert usage_dict["usage"] == usage_data + + # -------------------------------------------------------- + # JSON parsing (direct + streaming) + # -------------------------------------------------------- + + def test_single_json_action_valid(self, make_chunk, usage_dict, mock_action_class) -> None: + content = '{"action": "search", "input": "query"}' + chunks = [make_chunk(content)] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + mock_action_class.assert_called_once_with(action_name="search", action_input="query") + + def test_json_list_unwrap(self, make_chunk, usage_dict, mock_action_class) -> None: + content = '[{"action": "lookup", "input": "abc"}]' + chunks = [make_chunk(content)] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + mock_action_class.assert_called_once_with(action_name="lookup", action_input="abc") + + def test_json_missing_fields_returns_string(self, make_chunk, usage_dict) -> None: + content = '{"foo": "bar"}' + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + # Expect the serialized JSON to be yielded as a single element. + assert result == [json.dumps({"foo": "bar"})] + + def test_invalid_json_string_input(self, make_chunk, usage_dict) -> None: + content = "{invalid json}" + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert any("invalid json" in str(r) for r in result) + + def test_json_split_across_chunks(self, make_chunk, usage_dict, mock_action_class) -> None: + chunks = [ + make_chunk('{"action": '), + make_chunk('"multi", '), + make_chunk('"input": "step"}'), + ] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + mock_action_class.assert_called_once_with(action_name="multi", action_input="step") + + def test_unclosed_json_at_end(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk('{"foo": "bar"')] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert all(isinstance(item, str) for item in result) + assert any('{"foo": "bar"' in item for item in result) + + # -------------------------------------------------------- + # Code block JSON extraction + # -------------------------------------------------------- + + def test_code_block_json_valid(self, make_chunk, usage_dict, mock_action_class) -> None: + content = """```json +{"action": "lookup", "input": "abc"} +```""" + chunks = [make_chunk(content)] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + mock_action_class.assert_called_once_with(action_name="lookup", action_input="abc") + + def test_code_block_multiple_json(self, make_chunk, usage_dict, mock_action_class) -> None: + # Multiple JSON objects inside single code fence (invalid combined JSON) + # Parser should safely ignore invalid combined block + content = """```json +{"action": "a1", "input": "x"} +{"action": "a2", "input": "y"} +```""" + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + # No valid parsed action expected due to invalid combined JSON + assert mock_action_class.call_count == 0 + assert isinstance(result, list) + + def test_code_block_invalid_json(self, make_chunk, usage_dict) -> None: + content = """```json +{invalid} +```""" + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert result + + def test_unclosed_code_block(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk('```json {"a":1}')] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert all(isinstance(item, str) for item in result) + assert any('```json {"a":1}' in item for item in result) + + # -------------------------------------------------------- + # Action / Thought prefix handling + # -------------------------------------------------------- + + @pytest.mark.parametrize( + "content", + [ + " action: something", + " ACTION: something", + " thought: reasoning", + " THOUGHT: reasoning", + ], + ) + def test_prefix_handling(self, make_chunk, usage_dict, content) -> None: + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + joined = "".join(str(item) for item in result) + expected_word = "something" if "action:" in content.lower() else "reasoning" + assert expected_word in joined + assert "action:" not in joined.lower() + assert "thought:" not in joined.lower() + + def test_prefix_mid_word_yield_delta_branch(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk("xaction: test")] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert "x" in "".join(map(str, result)) + + # -------------------------------------------------------- + # Mixed streaming scenarios + # -------------------------------------------------------- + + def test_text_json_text_mix(self, make_chunk, usage_dict, mock_action_class) -> None: + content = 'start {"action": "mix", "input": "1"} end' + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + # JSON action should be parsed + mock_action_class.assert_called_once() + # Ensure surrounding text is streamed (character-level) + joined = "".join(str(r) for r in result if not isinstance(r, MagicMock)) + assert "start" in joined + assert "end" in joined + + def test_multiple_code_blocks_in_stream(self, make_chunk, usage_dict, mock_action_class) -> None: + content = '```json\n{"action":"a1","input":"x"}\n```middle```json\n{"action":"a2","input":"y"}\n```' + chunks = [make_chunk(content)] + list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert mock_action_class.call_count == 2 + + def test_backtick_noise(self, make_chunk, usage_dict) -> None: + chunks = [make_chunk("text with ` random ` backticks")] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert "text with" in "".join(result) + + # -------------------------------------------------------- + # Boundary & edge inputs + # -------------------------------------------------------- + + @pytest.mark.parametrize( + "content", + [ + "```", + "{", + "}", + "```json", + "action:", + "thought:", + " ", + ], + ) + def test_edge_inputs(self, make_chunk, usage_dict, content) -> None: + chunks = [make_chunk(content)] + result = list(CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict)) + assert all(isinstance(item, str) for item in result) + joined = "".join(result) + if content == " ": + assert result == [] or joined == content + if content in {"```", "{", "}", "```json"}: + assert content in joined + if content.lower() in {"action:", "thought:"}: + assert "action:" not in joined.lower() + assert "thought:" not in joined.lower() diff --git a/api/tests/unit_tests/core/agent/strategy/test_base.py b/api/tests/unit_tests/core/agent/strategy/test_base.py new file mode 100644 index 0000000000..83ff79e8a1 --- /dev/null +++ b/api/tests/unit_tests/core/agent/strategy/test_base.py @@ -0,0 +1,174 @@ +from collections.abc import Generator +from unittest.mock import MagicMock + +import pytest + +from core.agent.strategy.base import BaseAgentStrategy + + +class DummyStrategy(BaseAgentStrategy): + """ + Concrete implementation for testing BaseAgentStrategy + """ + + def __init__(self, return_values=None, raise_exception=None): + self.return_values = return_values or [] + self.raise_exception = raise_exception + self.received_args = None + + def _invoke( + self, + params, + user_id, + conversation_id=None, + app_id=None, + message_id=None, + credentials=None, + ) -> Generator: + self.received_args = ( + params, + user_id, + conversation_id, + app_id, + message_id, + credentials, + ) + + if self.raise_exception: + raise self.raise_exception + + yield from self.return_values + + +class TestBaseAgentStrategyInstantiation: + def test_cannot_instantiate_abstract_class(self) -> None: + with pytest.raises(TypeError): + BaseAgentStrategy() + + +class TestBaseAgentStrategyInvoke: + @pytest.fixture + def mock_message(self): + return MagicMock(name="AgentInvokeMessage") + + @pytest.fixture + def mock_credentials(self): + return MagicMock(name="InvokeCredentials") + + @pytest.mark.parametrize( + ("params", "user_id", "conversation_id", "app_id", "message_id"), + [ + ({"key": "value"}, "user1", "conv1", "app1", "msg1"), + ({}, "user2", None, None, None), + ({"a": 1}, "", "", "", ""), + ({"nested": {"x": 1}}, "user3", None, "app3", None), + ], + ) + def test_invoke_success( + self, + mock_message, + mock_credentials, + params, + user_id, + conversation_id, + app_id, + message_id, + ) -> None: + # Arrange + strategy = DummyStrategy(return_values=[mock_message]) + + # Act + result = list( + strategy.invoke( + params=params, + user_id=user_id, + conversation_id=conversation_id, + app_id=app_id, + message_id=message_id, + credentials=mock_credentials, + ) + ) + + # Assert + assert result == [mock_message] + assert strategy.received_args == ( + params, + user_id, + conversation_id, + app_id, + message_id, + mock_credentials, + ) + + def test_invoke_multiple_yields(self, mock_message) -> None: + # Arrange + messages = [mock_message, MagicMock(), MagicMock()] + strategy = DummyStrategy(return_values=messages) + + # Act + result = list(strategy.invoke(params={}, user_id="user")) + + # Assert + assert result == messages + + def test_invoke_empty_generator(self) -> None: + # Arrange + strategy = DummyStrategy(return_values=[]) + + # Act + result = list(strategy.invoke(params={}, user_id="user")) + + # Assert + assert result == [] + + def test_invoke_propagates_exception(self) -> None: + # Arrange + strategy = DummyStrategy(raise_exception=ValueError("failure")) + + # Act & Assert + with pytest.raises(ValueError, match="failure"): + list(strategy.invoke(params={}, user_id="user")) + + @pytest.mark.parametrize( + "invalid_params", + [ + None, + "", + 123, + [], + ], + ) + def test_invoke_invalid_params_type_pass_through(self, invalid_params) -> None: + """ + Base class does not validate types — ensure pass-through behavior + """ + strategy = DummyStrategy(return_values=[]) + + result = list(strategy.invoke(params=invalid_params, user_id="user")) + + assert result == [] + + def test_invoke_none_user_id(self) -> None: + strategy = DummyStrategy(return_values=[]) + + result = list(strategy.invoke(params={}, user_id=None)) + + assert result == [] + + +class TestBaseAgentStrategyGetParameters: + def test_get_parameters_default_empty_list(self) -> None: + strategy = DummyStrategy() + result = strategy.get_parameters() + + assert isinstance(result, list) + assert result == [] + + def test_get_parameters_returns_new_list_each_time(self) -> None: + strategy = DummyStrategy() + + first = strategy.get_parameters() + second = strategy.get_parameters() + + assert first == second == [] + assert first is not second diff --git a/api/tests/unit_tests/core/agent/strategy/test_plugin.py b/api/tests/unit_tests/core/agent/strategy/test_plugin.py new file mode 100644 index 0000000000..e0894f1e90 --- /dev/null +++ b/api/tests/unit_tests/core/agent/strategy/test_plugin.py @@ -0,0 +1,272 @@ +# File: tests/unit_tests/core/agent/strategy/test_plugin.py + +from unittest.mock import MagicMock + +import pytest + +from core.agent.strategy.plugin import PluginAgentStrategy + +# ============================================================ +# Fixtures +# ============================================================ + + +@pytest.fixture +def mock_parameter(): + def _factory(name="param", return_value="initialized"): + param = MagicMock() + param.name = name + param.init_frontend_parameter = MagicMock(return_value=return_value) + return param + + return _factory + + +@pytest.fixture +def mock_declaration(mock_parameter): + param1 = mock_parameter("param1", "init1") + param2 = mock_parameter("param2", "init2") + + identity = MagicMock() + identity.provider = "provider_x" + identity.name = "strategy_x" + + declaration = MagicMock() + declaration.parameters = [param1, param2] + declaration.identity = identity + + return declaration + + +@pytest.fixture +def strategy(mock_declaration): + return PluginAgentStrategy( + tenant_id="tenant_123", + declaration=mock_declaration, + meta_version="v1", + ) + + +# ============================================================ +# Initialization Tests +# ============================================================ + + +class TestPluginAgentStrategyInitialization: + def test_init_sets_attributes(self, mock_declaration) -> None: + strategy = PluginAgentStrategy( + tenant_id="tenant_test", + declaration=mock_declaration, + meta_version="meta_v", + ) + + assert strategy.tenant_id == "tenant_test" + assert strategy.declaration == mock_declaration + assert strategy.meta_version == "meta_v" + + def test_init_meta_version_none(self, mock_declaration) -> None: + strategy = PluginAgentStrategy( + tenant_id="tenant_test", + declaration=mock_declaration, + meta_version=None, + ) + + assert strategy.meta_version is None + + +# ============================================================ +# get_parameters Tests +# ============================================================ + + +class TestGetParameters: + def test_get_parameters_returns_parameters(self, strategy, mock_declaration) -> None: + result = strategy.get_parameters() + assert result == mock_declaration.parameters + + +# ============================================================ +# initialize_parameters Tests +# ============================================================ + + +class TestInitializeParameters: + def test_initialize_parameters_success(self, strategy, mock_declaration) -> None: + params = {"param1": "value1"} + + result = strategy.initialize_parameters(params.copy()) + + assert result["param1"] == "init1" + assert result["param2"] == "init2" + + mock_declaration.parameters[0].init_frontend_parameter.assert_called_once_with("value1") + mock_declaration.parameters[1].init_frontend_parameter.assert_called_once_with(None) + + @pytest.mark.parametrize( + "input_params", + [ + {}, + {"param1": None}, + {"param1": ""}, + {"param1": 0}, + {"param1": []}, + {"param1": {}, "param2": "value"}, + ], + ) + def test_initialize_parameters_edge_cases(self, strategy, input_params) -> None: + result = strategy.initialize_parameters(input_params.copy()) + + for param in strategy.declaration.parameters: + assert param.name in result + + def test_initialize_parameters_invalid_input_type(self, strategy) -> None: + with pytest.raises(AttributeError): + strategy.initialize_parameters(None) + + +# ============================================================ +# _invoke Tests +# ============================================================ + + +class TestInvoke: + def test_invoke_success_all_arguments(self, strategy, mocker) -> None: + mock_manager = MagicMock() + mock_manager.invoke = MagicMock(return_value=iter(["msg1", "msg2"])) + + mocker.patch( + "core.agent.strategy.plugin.PluginAgentClient", + return_value=mock_manager, + ) + + mock_convert = mocker.patch( + "core.agent.strategy.plugin.convert_parameters_to_plugin_format", + return_value={"converted": True}, + ) + + result = list( + strategy._invoke( + params={"param1": "value"}, + user_id="user_1", + conversation_id="conv_1", + app_id="app_1", + message_id="msg_1", + credentials=None, + ) + ) + + assert result == ["msg1", "msg2"] + mock_convert.assert_called_once() + mock_manager.invoke.assert_called_once() + + call_kwargs = mock_manager.invoke.call_args.kwargs + assert call_kwargs["tenant_id"] == "tenant_123" + assert call_kwargs["user_id"] == "user_1" + assert call_kwargs["agent_provider"] == "provider_x" + assert call_kwargs["agent_strategy"] == "strategy_x" + assert call_kwargs["agent_params"] == {"converted": True} + assert call_kwargs["conversation_id"] == "conv_1" + assert call_kwargs["app_id"] == "app_1" + assert call_kwargs["message_id"] == "msg_1" + assert call_kwargs["context"] is not None + + def test_invoke_with_credentials(self, strategy, mocker) -> None: + mock_manager = MagicMock() + mock_manager.invoke = MagicMock(return_value=iter([])) + + mocker.patch( + "core.agent.strategy.plugin.PluginAgentClient", + return_value=mock_manager, + ) + + mocker.patch( + "core.agent.strategy.plugin.convert_parameters_to_plugin_format", + return_value={}, + ) + + # Patch PluginInvokeContext to bypass pydantic validation + mock_context = MagicMock() + mocker.patch( + "core.agent.strategy.plugin.PluginInvokeContext", + return_value=mock_context, + ) + + credentials = MagicMock() + + result = list( + strategy._invoke( + params={}, + user_id="user_1", + credentials=credentials, + ) + ) + + assert result == [] + mock_manager.invoke.assert_called_once() + + @pytest.mark.parametrize( + ("conversation_id", "app_id", "message_id"), + [ + (None, None, None), + ("conv", None, None), + (None, "app", None), + (None, None, "msg"), + ], + ) + def test_invoke_optional_arguments(self, strategy, mocker, conversation_id, app_id, message_id) -> None: + mock_manager = MagicMock() + mock_manager.invoke = MagicMock(return_value=iter([])) + + mocker.patch( + "core.agent.strategy.plugin.PluginAgentClient", + return_value=mock_manager, + ) + + mocker.patch( + "core.agent.strategy.plugin.convert_parameters_to_plugin_format", + return_value={}, + ) + + result = list( + strategy._invoke( + params={}, + user_id="user_1", + conversation_id=conversation_id, + app_id=app_id, + message_id=message_id, + ) + ) + + assert result == [] + mock_manager.invoke.assert_called_once() + + def test_invoke_convert_raises_exception(self, strategy, mocker) -> None: + mocker.patch( + "core.agent.strategy.plugin.PluginAgentClient", + return_value=MagicMock(), + ) + + mocker.patch( + "core.agent.strategy.plugin.convert_parameters_to_plugin_format", + side_effect=ValueError("conversion failed"), + ) + + with pytest.raises(ValueError): + list(strategy._invoke(params={}, user_id="user_1")) + + def test_invoke_manager_raises_exception(self, strategy, mocker) -> None: + mock_manager = MagicMock() + mock_manager.invoke.side_effect = RuntimeError("invoke failed") + + mocker.patch( + "core.agent.strategy.plugin.PluginAgentClient", + return_value=mock_manager, + ) + + mocker.patch( + "core.agent.strategy.plugin.convert_parameters_to_plugin_format", + return_value={}, + ) + + with pytest.raises(RuntimeError): + list(strategy._invoke(params={}, user_id="user_1")) diff --git a/api/tests/unit_tests/core/agent/test_base_agent_runner.py b/api/tests/unit_tests/core/agent/test_base_agent_runner.py new file mode 100644 index 0000000000..683cc0e36f --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_base_agent_runner.py @@ -0,0 +1,802 @@ +import json +from decimal import Decimal +from unittest.mock import MagicMock + +import pytest + +import core.agent.base_agent_runner as module +from core.agent.base_agent_runner import BaseAgentRunner + +# ========================================================== +# Fixtures +# ========================================================== + + +@pytest.fixture +def mock_db_session(mocker): + session = mocker.MagicMock() + mocker.patch.object(module.db, "session", session) + return session + + +@pytest.fixture +def runner(mocker, mock_db_session): + r = BaseAgentRunner.__new__(BaseAgentRunner) + r.tenant_id = "tenant" + r.user_id = "user" + r.agent_thought_count = 0 + r.message = mocker.MagicMock(id="msg_current", conversation_id="conv1") + r.app_config = mocker.MagicMock() + r.app_config.app_id = "app1" + r.app_config.agent = None + r.dataset_tools = [] + r.application_generate_entity = mocker.MagicMock(invoke_from="test") + r._current_thoughts = [] + return r + + +# ========================================================== +# _repack_app_generate_entity +# ========================================================== + + +class TestRepack: + def test_sets_empty_if_none(self, runner, mocker): + entity = mocker.MagicMock() + entity.app_config.prompt_template.simple_prompt_template = None + result = runner._repack_app_generate_entity(entity) + assert result.app_config.prompt_template.simple_prompt_template == "" + + def test_keeps_existing(self, runner, mocker): + entity = mocker.MagicMock() + entity.app_config.prompt_template.simple_prompt_template = "abc" + result = runner._repack_app_generate_entity(entity) + assert result.app_config.prompt_template.simple_prompt_template == "abc" + + +# ========================================================== +# update_prompt_message_tool +# ========================================================== + + +class TestUpdatePromptTool: + def build_param(self, mocker, **kwargs): + p = mocker.MagicMock() + p.form = kwargs.get("form") + + mock_type = mocker.MagicMock() + mock_type.as_normal_type.return_value = "string" + p.type = mock_type + + p.name = kwargs.get("name", "p1") + p.llm_description = "desc" + p.input_schema = kwargs.get("input_schema") + p.options = kwargs.get("options") + p.required = kwargs.get("required", False) + return p + + def test_skip_non_llm(self, runner, mocker): + tool = mocker.MagicMock() + param = self.build_param(mocker, form="NOT_LLM") + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": []} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + assert result.parameters["properties"] == {} + + def test_enum_and_required(self, runner, mocker): + option = mocker.MagicMock(value="opt1") + param = self.build_param( + mocker, + form=module.ToolParameter.ToolParameterForm.LLM, + options=[option], + required=True, + ) + + tool = mocker.MagicMock() + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": []} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + assert "p1" in result.parameters["required"] + + def test_skip_file_type_param(self, runner, mocker): + tool = mocker.MagicMock() + param = self.build_param(mocker, form=module.ToolParameter.ToolParameterForm.LLM) + param.type = module.ToolParameter.ToolParameterType.FILE + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": []} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + assert result.parameters["properties"] == {} + + def test_duplicate_required_not_duplicated(self, runner, mocker): + tool = mocker.MagicMock() + + param = self.build_param( + mocker, + form=module.ToolParameter.ToolParameterForm.LLM, + required=True, + ) + + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": ["p1"]} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + + assert result.parameters["required"].count("p1") == 1 + + +# ========================================================== +# create_agent_thought +# ========================================================== + + +class TestCreateAgentThought: + def test_with_files(self, runner, mock_db_session, mocker): + mock_thought = mocker.MagicMock(id=10) + mocker.patch.object(module, "MessageAgentThought", return_value=mock_thought) + + result = runner.create_agent_thought("m", "msg", "tool", "input", ["f1"]) + assert result == "10" + assert runner.agent_thought_count == 1 + + def test_without_files(self, runner, mock_db_session, mocker): + mock_thought = mocker.MagicMock(id=11) + mocker.patch.object(module, "MessageAgentThought", return_value=mock_thought) + + result = runner.create_agent_thought("m", "msg", "tool", "input", []) + assert result == "11" + + +# ========================================================== +# save_agent_thought +# ========================================================== + + +class TestSaveAgentThought: + def setup_agent(self, mocker): + agent = mocker.MagicMock() + agent.tool = "tool1;tool2" + agent.tool_labels = {} + agent.thought = "" + return agent + + def test_not_found(self, runner, mock_db_session): + mock_db_session.scalar.return_value = None + with pytest.raises(ValueError): + runner.save_agent_thought("id", None, None, None, None, None, None, [], None) + + def test_full_update(self, runner, mock_db_session, mocker): + agent = self.setup_agent(mocker) + mock_db_session.scalar.return_value = agent + + mock_label = mocker.MagicMock() + mock_label.to_dict.return_value = {"en_US": "label"} + mocker.patch.object(module.ToolManager, "get_tool_label", return_value=mock_label) + + usage = mocker.MagicMock( + prompt_tokens=1, + prompt_price_unit=Decimal("0.1"), + prompt_unit_price=Decimal("0.1"), + completion_tokens=2, + completion_price_unit=Decimal("0.2"), + completion_unit_price=Decimal("0.2"), + total_tokens=3, + total_price=Decimal("0.3"), + ) + + runner.save_agent_thought( + "id", + "tool1;tool2", + {"a": 1}, + "thought", + {"b": 2}, + {"meta": 1}, + "answer", + ["f1"], + usage, + ) + + assert agent.answer == "answer" + assert agent.tokens == 3 + assert "tool1" in json.loads(agent.tool_labels_str) + + def test_label_fallback_when_none(self, runner, mock_db_session, mocker): + agent = self.setup_agent(mocker) + agent.tool = "unknown_tool" + mock_db_session.scalar.return_value = agent + mocker.patch.object(module.ToolManager, "get_tool_label", return_value=None) + + runner.save_agent_thought("id", None, None, None, None, None, None, [], None) + labels = json.loads(agent.tool_labels_str) + assert "unknown_tool" in labels + + def test_json_failure_paths(self, runner, mock_db_session, mocker): + agent = self.setup_agent(mocker) + mock_db_session.scalar.return_value = agent + + bad_obj = MagicMock() + bad_obj.__str__.return_value = "bad" + + runner.save_agent_thought( + "id", + None, + bad_obj, + None, + bad_obj, + bad_obj, + None, + [], + None, + ) + + assert mock_db_session.commit.called + + def test_messages_ids_none(self, runner, mock_db_session, mocker): + agent = self.setup_agent(mocker) + mock_db_session.scalar.return_value = agent + runner.save_agent_thought("id", None, None, None, None, None, None, None, None) + assert mock_db_session.commit.called + + def test_success_dict_serialization(self, runner, mock_db_session, mocker): + agent = self.setup_agent(mocker) + mock_db_session.scalar.return_value = agent + + runner.save_agent_thought( + "id", + None, + {"a": 1}, + None, + {"b": 2}, + None, + None, + [], + None, + ) + + assert isinstance(agent.tool_input, str) + assert isinstance(agent.observation, str) + + +# ========================================================== +# organize_agent_user_prompt +# ========================================================== + + +class TestOrganizeUserPrompt: + def test_no_files(self, runner, mock_db_session, mocker): + mock_db_session.scalars.return_value.all.return_value = [] + msg = mocker.MagicMock(id="1", query="hello", app_model_config=None) + result = runner.organize_agent_user_prompt(msg) + assert result.content == "hello" + + def test_with_files_no_config(self, runner, mock_db_session, mocker): + mock_db_session.scalars.return_value.all.return_value = [mocker.MagicMock()] + msg = mocker.MagicMock(id="1", query="hello", app_model_config=None) + result = runner.organize_agent_user_prompt(msg) + assert result.content == "hello" + + def test_image_detail_low_fallback(self, runner, mock_db_session, mocker): + mock_db_session.scalars.return_value.all.return_value = [mocker.MagicMock()] + file_config = mocker.MagicMock() + file_config.image_config = mocker.MagicMock(detail=None) + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=file_config) + mocker.patch.object(module.file_factory, "build_from_message_files", return_value=[]) + + msg = mocker.MagicMock(id="1", query="hello") + msg.app_model_config.to_dict.return_value = {} + + result = runner.organize_agent_user_prompt(msg) + assert result.content == "hello" + + +# ========================================================== +# organize_agent_history +# ========================================================== + + +class TestOrganizeHistory: + def test_empty(self, runner, mock_db_session, mocker): + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [] + mocker.patch.object(module, "extract_thread_messages", return_value=[]) + result = runner.organize_agent_history([]) + assert result == [] + + def test_with_answer_only(self, runner, mock_db_session, mocker): + msg = mocker.MagicMock(id="m1", answer="ans", agent_thoughts=[], app_model_config=None) + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + result = runner.organize_agent_history([]) + assert any(isinstance(x, module.AssistantPromptMessage) for x in result) + + def test_skip_current_message(self, runner, mock_db_session, mocker): + msg = mocker.MagicMock(id="msg_current", agent_thoughts=[], answer="ans", app_model_config=None) + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + result = runner.organize_agent_history([]) + assert result == [] + + def test_with_tool_calls_invalid_json(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock( + tool="tool1", + tool_input="invalid", + observation="invalid", + thought="thinking", + ) + msg = mocker.MagicMock(id="m2", agent_thoughts=[thought], answer=None, app_model_config=None) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + mocker.patch("uuid.uuid4", return_value="uuid") + + result = runner.organize_agent_history([]) + assert isinstance(result, list) + + def test_empty_tool_name_split(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock(tool=";", thought="thinking") + msg = mocker.MagicMock(id="m5", agent_thoughts=[thought], answer=None, app_model_config=None) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + result = runner.organize_agent_history([]) + assert isinstance(result, list) + + def test_valid_json_tool_flow(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock( + tool="tool1", + tool_input=json.dumps({"tool1": {"x": 1}}), + observation=json.dumps({"tool1": "obs"}), + thought="thinking", + ) + + msg = mocker.MagicMock( + id="m100", + agent_thoughts=[thought], + answer=None, + app_model_config=None, + ) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + mocker.patch("uuid.uuid4", return_value="uuid") + + result = runner.organize_agent_history([]) + assert isinstance(result, list) + + +# ========================================================== +# _convert_tool_to_prompt_message_tool (new coverage) +# ========================================================== + + +class TestConvertToolToPromptMessageTool: + def test_basic_conversion(self, runner, mocker): + tool = mocker.MagicMock(tool_name="tool1") + + runtime_param = mocker.MagicMock() + runtime_param.form = module.ToolParameter.ToolParameterForm.LLM + runtime_param.name = "param1" + runtime_param.llm_description = "desc" + runtime_param.required = True + runtime_param.input_schema = None + runtime_param.options = None + + mock_type = mocker.MagicMock() + mock_type.as_normal_type.return_value = "string" + runtime_param.type = mock_type + + tool_entity = mocker.MagicMock() + tool_entity.entity.description.llm = "desc" + tool_entity.get_merged_runtime_parameters.return_value = [runtime_param] + + mocker.patch.object(module.ToolManager, "get_agent_tool_runtime", return_value=tool_entity) + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt_tool, entity = runner._convert_tool_to_prompt_message_tool(tool) + assert entity == tool_entity + + def test_full_conversion_multiple_params(self, runner, mocker): + tool = mocker.MagicMock(tool_name="tool1") + + # LLM param with input_schema override + param1 = mocker.MagicMock() + param1.form = module.ToolParameter.ToolParameterForm.LLM + param1.name = "p1" + param1.llm_description = "desc" + param1.required = True + param1.input_schema = {"type": "integer"} + param1.options = None + param1.type = mocker.MagicMock() + + # SYSTEM_FILES param should be skipped + param2 = mocker.MagicMock() + param2.form = module.ToolParameter.ToolParameterForm.LLM + param2.name = "file_param" + param2.type = module.ToolParameter.ToolParameterType.SYSTEM_FILES + + tool_entity = mocker.MagicMock() + tool_entity.entity.description.llm = "desc" + tool_entity.get_merged_runtime_parameters.return_value = [param1, param2] + + mocker.patch.object(module.ToolManager, "get_agent_tool_runtime", return_value=tool_entity) + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt_tool, entity = runner._convert_tool_to_prompt_message_tool(tool) + + assert entity == tool_entity + + +# ========================================================== +# _init_prompt_tools additional branches +# ========================================================== + + +class TestInitPromptToolsExtended: + def test_agent_tool_branch(self, runner, mocker): + agent_tool = mocker.MagicMock(tool_name="agent_tool") + runner.app_config.agent = mocker.MagicMock(tools=[agent_tool]) + mocker.patch.object(runner, "_convert_tool_to_prompt_message_tool", return_value=(MagicMock(), "entity")) + + tools, prompts = runner._init_prompt_tools() + assert "agent_tool" in tools + + def test_exception_in_conversion(self, runner, mocker): + agent_tool = mocker.MagicMock(tool_name="bad_tool") + runner.app_config.agent = mocker.MagicMock(tools=[agent_tool]) + mocker.patch.object(runner, "_convert_tool_to_prompt_message_tool", side_effect=Exception) + + tools, prompts = runner._init_prompt_tools() + assert tools == {} + + +# ========================================================== +# Additional Coverage Tests (DO NOT MODIFY EXISTING TESTS) +# ========================================================== + + +class TestAdditionalCoverage: + def test_update_prompt_with_input_schema(self, runner, mocker): + tool = mocker.MagicMock() + + param = mocker.MagicMock() + param.form = module.ToolParameter.ToolParameterForm.LLM + param.name = "p1" + param.required = False + param.llm_description = "desc" + param.options = None + param.input_schema = {"type": "number"} + + mock_type = mocker.MagicMock() + mock_type.as_normal_type.return_value = "string" + param.type = mock_type + + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": []} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + assert result.parameters["properties"]["p1"]["type"] == "number" + + def test_save_agent_thought_existing_labels(self, runner, mock_db_session, mocker): + agent = mocker.MagicMock() + agent.tool = "tool1" + agent.tool_labels = {"tool1": {"en_US": "existing"}} + agent.thought = "" + mock_db_session.scalar.return_value = agent + + runner.save_agent_thought("id", None, None, None, None, None, None, [], None) + labels = json.loads(agent.tool_labels_str) + assert labels["tool1"]["en_US"] == "existing" + + def test_save_agent_thought_tool_meta_string(self, runner, mock_db_session, mocker): + agent = mocker.MagicMock() + agent.tool = "tool1" + agent.tool_labels = {} + agent.thought = "" + mock_db_session.scalar.return_value = agent + + runner.save_agent_thought("id", None, None, None, None, "meta_string", None, [], None) + assert agent.tool_meta_str == "meta_string" + + def test_convert_dataset_retriever_tool(self, runner, mocker): + ds_tool = mocker.MagicMock() + ds_tool.entity.identity.name = "ds" + ds_tool.entity.description.llm = "desc" + + param = mocker.MagicMock() + param.name = "query" + param.llm_description = "desc" + param.required = True + + ds_tool.get_runtime_parameters.return_value = [param] + + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt = runner._convert_dataset_retriever_tool_to_prompt_message_tool(ds_tool) + assert prompt is not None + + def test_organize_user_prompt_with_file_objects(self, runner, mock_db_session, mocker): + mock_db_session.scalars.return_value.all.return_value = [mocker.MagicMock()] + + file_config = mocker.MagicMock() + file_config.image_config = mocker.MagicMock(detail=None) + + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=file_config) + mocker.patch.object(module.file_factory, "build_from_message_files", return_value=["file1"]) + mocker.patch.object(module.file_manager, "to_prompt_message_content", return_value=mocker.MagicMock()) + + mocker.patch.object(module, "UserPromptMessage", side_effect=lambda **kw: MagicMock(**kw)) + mocker.patch.object(module, "TextPromptMessageContent", side_effect=lambda **kw: MagicMock(**kw)) + + msg = mocker.MagicMock(id="1", query="hello") + msg.app_model_config.to_dict.return_value = {} + + result = runner.organize_agent_user_prompt(msg) + assert result is not None + + def test_organize_history_without_tool_names(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock(tool=None, thought="thinking") + msg = mocker.MagicMock(id="m3", agent_thoughts=[thought], answer=None, app_model_config=None) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + + result = runner.organize_agent_history([]) + assert isinstance(result, list) + + def test_organize_history_multiple_tools_split(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock( + tool="tool1;tool2", + tool_input=json.dumps({"tool1": {}, "tool2": {}}), + observation=json.dumps({"tool1": "o1", "tool2": "o2"}), + thought="thinking", + ) + msg = mocker.MagicMock(id="m4", agent_thoughts=[thought], answer=None, app_model_config=None) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + mocker.patch("uuid.uuid4", return_value="uuid") + + result = runner.organize_agent_history([]) + assert isinstance(result, list) + + # ================= Additional Surgical Coverage ================= + + def test_convert_tool_select_enum_branch(self, runner, mocker): + tool = mocker.MagicMock(tool_name="tool1") + + param = mocker.MagicMock() + param.form = module.ToolParameter.ToolParameterForm.LLM + param.name = "select_param" + param.required = True + param.llm_description = "desc" + param.input_schema = None + + option1 = mocker.MagicMock(value="A") + option2 = mocker.MagicMock(value="B") + param.options = [option1, option2] + param.type = module.ToolParameter.ToolParameterType.SELECT + + tool_entity = mocker.MagicMock() + tool_entity.entity.description.llm = "desc" + tool_entity.get_merged_runtime_parameters.return_value = [param] + + mocker.patch.object(module.ToolManager, "get_agent_tool_runtime", return_value=tool_entity) + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt_tool, _ = runner._convert_tool_to_prompt_message_tool(tool) + assert prompt_tool is not None + + +class TestConvertDatasetRetrieverTool: + def test_required_param_added(self, runner, mocker): + ds_tool = mocker.MagicMock() + ds_tool.entity.identity.name = "ds" + ds_tool.entity.description.llm = "desc" + + param = mocker.MagicMock() + param.name = "query" + param.llm_description = "desc" + param.required = True + + ds_tool.get_runtime_parameters.return_value = [param] + + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt = runner._convert_dataset_retriever_tool_to_prompt_message_tool(ds_tool) + + assert prompt is not None + + +class TestBaseAgentRunnerInit: + def test_init_sets_stream_tool_call_and_files(self, mocker): + session = mocker.MagicMock() + session.query.return_value.where.return_value.count.return_value = 2 + mocker.patch.object(module.db, "session", session) + + mocker.patch.object(BaseAgentRunner, "organize_agent_history", return_value=[]) + mocker.patch.object(module.DatasetRetrieverTool, "get_dataset_tools", return_value=["ds_tool"]) + + llm = mocker.MagicMock() + llm.get_model_schema.return_value = mocker.MagicMock( + features=[module.ModelFeature.STREAM_TOOL_CALL, module.ModelFeature.VISION] + ) + model_instance = mocker.MagicMock(model_type_instance=llm, model="m", credentials="c") + + app_config = mocker.MagicMock() + app_config.app_id = "app1" + app_config.agent = None + app_config.dataset = mocker.MagicMock(dataset_ids=["d1"], retrieve_config={"k": "v"}) + app_config.additional_features = mocker.MagicMock(show_retrieve_source=True) + + app_generate = mocker.MagicMock(invoke_from="test", inputs={}, files=["file1"]) + message = mocker.MagicMock(id="msg1", conversation_id="conv1") + + runner = BaseAgentRunner( + tenant_id="tenant", + application_generate_entity=app_generate, + conversation=mocker.MagicMock(), + app_config=app_config, + model_config=mocker.MagicMock(), + config=mocker.MagicMock(), + queue_manager=mocker.MagicMock(), + message=message, + user_id="user", + model_instance=model_instance, + ) + + assert runner.stream_tool_call is True + assert runner.files == ["file1"] + assert runner.dataset_tools == ["ds_tool"] + assert runner.agent_thought_count == 2 + + +class TestBaseAgentRunnerCoverage: + def test_convert_tool_skips_non_llm_param(self, runner, mocker): + tool = mocker.MagicMock(tool_name="tool1") + + param = mocker.MagicMock() + param.form = "NOT_LLM" + param.type = mocker.MagicMock() + + tool_entity = mocker.MagicMock() + tool_entity.entity.description.llm = "desc" + tool_entity.get_merged_runtime_parameters.return_value = [param] + + mocker.patch.object(module.ToolManager, "get_agent_tool_runtime", return_value=tool_entity) + mocker.patch.object(module, "PromptMessageTool", side_effect=lambda **kw: MagicMock(**kw)) + + prompt_tool, _ = runner._convert_tool_to_prompt_message_tool(tool) + + assert prompt_tool.parameters["properties"] == {} + + def test_init_prompt_tools_adds_dataset_tools(self, runner, mocker): + dataset_tool = mocker.MagicMock() + dataset_tool.entity.identity.name = "ds" + runner.dataset_tools = [dataset_tool] + + mocker.patch.object(runner, "_convert_dataset_retriever_tool_to_prompt_message_tool", return_value=MagicMock()) + + tools, prompt_tools = runner._init_prompt_tools() + + assert tools["ds"] == dataset_tool + assert len(prompt_tools) == 1 + + def test_update_prompt_message_tool_select_enum(self, runner, mocker): + tool = mocker.MagicMock() + + option1 = mocker.MagicMock(value="A") + option2 = mocker.MagicMock(value="B") + + param = mocker.MagicMock() + param.form = module.ToolParameter.ToolParameterForm.LLM + param.name = "select_param" + param.required = False + param.llm_description = "desc" + param.input_schema = None + param.options = [option1, option2] + param.type = module.ToolParameter.ToolParameterType.SELECT + + tool.get_runtime_parameters.return_value = [param] + + prompt_tool = mocker.MagicMock() + prompt_tool.parameters = {"properties": {}, "required": []} + + result = runner.update_prompt_message_tool(tool, prompt_tool) + + assert result.parameters["properties"]["select_param"]["enum"] == ["A", "B"] + + def test_save_agent_thought_json_dumps_fallbacks(self, runner, mock_db_session, mocker): + agent = mocker.MagicMock() + agent.tool = "tool1" + agent.tool_labels = {} + agent.thought = "" + mock_db_session.scalar.return_value = agent + + mocker.patch.object(module.ToolManager, "get_tool_label", return_value=None) + + tool_input = {"a": 1} + observation = {"b": 2} + tool_meta = {"c": 3} + + real_dumps = json.dumps + + def dumps_side_effect(value, *args, **kwargs): + if value in (tool_input, observation, tool_meta) and kwargs.get("ensure_ascii") is False: + raise TypeError("fail") + return real_dumps(value, *args, **kwargs) + + mocker.patch.object(module.json, "dumps", side_effect=dumps_side_effect) + + runner.save_agent_thought( + "id", + "tool1", + tool_input, + None, + observation, + tool_meta, + None, + [], + None, + ) + + assert isinstance(agent.tool_input, str) + assert isinstance(agent.observation, str) + assert isinstance(agent.tool_meta_str, str) + + def test_save_agent_thought_skips_empty_tool_name(self, runner, mock_db_session, mocker): + agent = mocker.MagicMock() + agent.tool = "tool1;;" + agent.tool_labels = {} + agent.thought = "" + mock_db_session.scalar.return_value = agent + + mocker.patch.object(module.ToolManager, "get_tool_label", return_value=None) + + runner.save_agent_thought("id", None, None, None, None, None, None, [], None) + + labels = json.loads(agent.tool_labels_str) + assert "" not in labels + + def test_organize_history_includes_system_prompt(self, runner, mock_db_session, mocker): + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [] + mocker.patch.object(module, "extract_thread_messages", return_value=[]) + + system_message = module.SystemPromptMessage(content="sys") + + result = runner.organize_agent_history([system_message]) + + assert system_message in result + + def test_organize_history_tool_inputs_and_observation_none(self, runner, mock_db_session, mocker): + thought = mocker.MagicMock( + tool="tool1", + tool_input=None, + observation=None, + thought="thinking", + ) + msg = mocker.MagicMock(id="m6", agent_thoughts=[thought], answer=None, app_model_config=None) + + mock_db_session.execute.return_value.scalars.return_value.all.return_value = [msg] + mocker.patch.object(module, "extract_thread_messages", return_value=[msg]) + mocker.patch("uuid.uuid4", return_value="uuid") + + mocker.patch.object( + runner, + "organize_agent_user_prompt", + return_value=module.UserPromptMessage(content="user"), + ) + + result = runner.organize_agent_history([]) + + assert any(isinstance(item, module.ToolPromptMessage) for item in result) diff --git a/api/tests/unit_tests/core/agent/test_cot_agent_runner.py b/api/tests/unit_tests/core/agent/test_cot_agent_runner.py new file mode 100644 index 0000000000..f6d1edbaf0 --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_cot_agent_runner.py @@ -0,0 +1,551 @@ +import json +from unittest.mock import MagicMock + +import pytest + +from core.agent.cot_agent_runner import CotAgentRunner +from core.agent.entities import AgentScratchpadUnit +from core.agent.errors import AgentMaxIterationError +from dify_graph.model_runtime.entities.llm_entities import LLMUsage + + +class DummyRunner(CotAgentRunner): + """Concrete implementation for testing abstract methods.""" + + def __init__(self, **kwargs): + # Completely bypass BaseAgentRunner __init__ to avoid DB/session usage + for k, v in kwargs.items(): + setattr(self, k, v) + # Minimal required defaults + self.history_prompt_messages = [] + self.memory = None + + def _organize_prompt_messages(self): + return [] + + +@pytest.fixture +def runner(mocker): + # Prevent BaseAgentRunner __init__ from hitting database + mocker.patch( + "core.agent.base_agent_runner.BaseAgentRunner.organize_agent_history", + return_value=[], + ) + # Prepare required constructor dependencies for BaseAgentRunner + application_generate_entity = MagicMock() + application_generate_entity.model_conf = MagicMock() + application_generate_entity.model_conf.stop = [] + application_generate_entity.model_conf.provider = "openai" + application_generate_entity.model_conf.parameters = {} + application_generate_entity.trace_manager = None + application_generate_entity.invoke_from = "test" + + app_config = MagicMock() + app_config.agent = MagicMock() + app_config.agent.max_iteration = 1 + app_config.prompt_template.simple_prompt_template = "Hello {{name}}" + + model_instance = MagicMock() + model_instance.model = "test-model" + model_instance.model_name = "test-model" + model_instance.invoke_llm.return_value = [] + + model_config = MagicMock() + model_config.model = "test-model" + + queue_manager = MagicMock() + message = MagicMock() + + runner = DummyRunner( + tenant_id="tenant", + application_generate_entity=application_generate_entity, + conversation=MagicMock(), + app_config=app_config, + model_config=model_config, + config=MagicMock(), + queue_manager=queue_manager, + message=message, + user_id="user", + model_instance=model_instance, + ) + + # Patch internal methods to isolate behavior + runner._repack_app_generate_entity = MagicMock() + runner._init_prompt_tools = MagicMock(return_value=({}, [])) + runner.recalc_llm_max_tokens = MagicMock() + runner.create_agent_thought = MagicMock(return_value="thought-id") + runner.save_agent_thought = MagicMock() + runner.update_prompt_message_tool = MagicMock() + runner.agent_callback = None + runner.memory = None + runner.history_prompt_messages = [] + + return runner + + +class TestFillInputs: + @pytest.mark.parametrize( + ("instruction", "inputs", "expected"), + [ + ("Hello {{name}}", {"name": "John"}, "Hello John"), + ("No placeholders", {"name": "John"}, "No placeholders"), + ("{{a}}{{b}}", {"a": 1, "b": 2}, "12"), + ("{{x}}", {"x": None}, "None"), + ("", {"x": "y"}, ""), + ], + ) + def test_fill_in_inputs(self, runner, instruction, inputs, expected): + result = runner._fill_in_inputs_from_external_data_tools(instruction, inputs) + assert result == expected + + +class TestConvertDictToAction: + def test_convert_valid_dict(self, runner): + action_dict = {"action": "test", "action_input": {"a": 1}} + action = runner._convert_dict_to_action(action_dict) + assert action.action_name == "test" + assert action.action_input == {"a": 1} + + def test_convert_missing_keys(self, runner): + with pytest.raises(KeyError): + runner._convert_dict_to_action({"invalid": 1}) + + +class TestFormatAssistantMessage: + def test_format_assistant_message_multiple_scratchpads(self, runner): + sp1 = AgentScratchpadUnit( + agent_response="resp1", + thought="thought1", + action_str="action1", + action=AgentScratchpadUnit.Action(action_name="tool", action_input={}), + observation="obs1", + ) + sp2 = AgentScratchpadUnit( + agent_response="final", + thought="", + action_str="", + action=AgentScratchpadUnit.Action(action_name="Final Answer", action_input="done"), + observation=None, + ) + result = runner._format_assistant_message([sp1, sp2]) + assert "Final Answer:" in result + + def test_format_with_final(self, runner): + scratchpad = AgentScratchpadUnit( + agent_response="Done", + thought="", + action_str="", + action=None, + observation=None, + ) + # Simulate final state via action name + scratchpad.action = AgentScratchpadUnit.Action(action_name="Final Answer", action_input="Done") + result = runner._format_assistant_message([scratchpad]) + assert "Final Answer" in result + + def test_format_with_action_and_observation(self, runner): + scratchpad = AgentScratchpadUnit( + agent_response="resp", + thought="thinking", + action_str="action", + action=None, + observation="obs", + ) + # Non-final state: provide a non-final action + scratchpad.action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + result = runner._format_assistant_message([scratchpad]) + assert "Thought:" in result + assert "Action:" in result + assert "Observation:" in result + + +class TestHandleInvokeAction: + def test_handle_invoke_action_tool_not_present(self, runner): + action = AgentScratchpadUnit.Action(action_name="missing", action_input={}) + response, meta = runner._handle_invoke_action(action, {}, []) + assert "there is not a tool named" in response + + def test_tool_with_json_string_args(self, runner, mocker): + action = AgentScratchpadUnit.Action(action_name="tool", action_input=json.dumps({"a": 1})) + tool_instance = MagicMock() + tool_instances = {"tool": tool_instance} + + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("result", [], MagicMock(to_dict=lambda: {})), + ) + + response, meta = runner._handle_invoke_action(action, tool_instances, []) + assert response == "result" + + +class TestOrganizeHistoricPromptMessages: + def test_empty_history(self, runner, mocker): + mocker.patch( + "core.agent.cot_agent_runner.AgentHistoryPromptTransform.get_prompt", + return_value=[], + ) + result = runner._organize_historic_prompt_messages([]) + assert result == [] + + +class TestRun: + def test_run_handles_empty_parser_output(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[], + ) + + results = list(runner.run(message, "query", {})) + assert isinstance(results, list) + + def test_run_with_action_and_tool_invocation(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", [], MagicMock(to_dict=lambda: {})), + ) + + runner.agent_callback = None + + with pytest.raises(AgentMaxIterationError): + list(runner.run(message, "query", {"tool": MagicMock()})) + + def test_run_respects_max_iteration_boundary(self, runner, mocker): + runner.app_config.agent.max_iteration = 1 + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", [], MagicMock(to_dict=lambda: {})), + ) + + runner.agent_callback = None + + with pytest.raises(AgentMaxIterationError): + list(runner.run(message, "query", {"tool": MagicMock()})) + + def test_run_basic_flow(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[], + ) + + results = list(runner.run(message, "query", {"name": "John"})) + assert results + + def test_run_max_iteration_error(self, runner, mocker): + runner.app_config.agent.max_iteration = 0 + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + with pytest.raises(AgentMaxIterationError): + list(runner.run(message, "query", {})) + + def test_run_increase_usage_aggregation(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + runner.app_config.agent.max_iteration = 2 + + usage_1 = LLMUsage.empty_usage() + usage_1.prompt_tokens = 1 + usage_1.completion_tokens = 1 + usage_1.total_tokens = 2 + usage_1.prompt_price = 1 + usage_1.completion_price = 1 + usage_1.total_price = 2 + + usage_2 = LLMUsage.empty_usage() + usage_2.prompt_tokens = 1 + usage_2.completion_tokens = 1 + usage_2.total_tokens = 2 + usage_2.prompt_price = 1 + usage_2.completion_price = 1 + usage_2.total_price = 2 + + action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + + handle_output = mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + side_effect=[ + [action], + [], + ], + ) + + def _handle_side_effect(chunks, usage_dict): + call_index = handle_output.call_count + usage_dict["usage"] = usage_1 if call_index == 1 else usage_2 + return [action] if call_index == 1 else [] + + handle_output.side_effect = _handle_side_effect + runner.model_instance.invoke_llm = MagicMock(return_value=[]) + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", [], MagicMock(to_dict=lambda: {})), + ) + + fake_prompt_tool = MagicMock() + fake_prompt_tool.name = "tool" + runner._init_prompt_tools = MagicMock(return_value=({"tool": MagicMock()}, [fake_prompt_tool])) + + results = list(runner.run(message, "query", {})) + final_usage = results[-1].delta.usage + assert final_usage is not None + assert final_usage.prompt_tokens == 2 + assert final_usage.completion_tokens == 2 + assert final_usage.total_tokens == 4 + assert final_usage.prompt_price == 2 + assert final_usage.completion_price == 2 + assert final_usage.total_price == 4 + + def test_run_when_no_action_branch(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[], + ) + + results = list(runner.run(message, "query", {})) + assert results[-1].delta.message.content == "" + + def test_run_usage_missing_key_branch(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[], + ) + + runner.model_instance.invoke_llm = MagicMock(return_value=[]) + + list(runner.run(message, "query", {})) + + def test_run_prompt_tool_update_branch(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="tool", action_input={}) + + # First iteration → action + # Second iteration → no action (empty list) + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + side_effect=[[action], []], + ) + + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", [], MagicMock(to_dict=lambda: {})), + ) + + runner.app_config.agent.max_iteration = 5 + + fake_prompt_tool = MagicMock() + fake_prompt_tool.name = "tool" + + runner._init_prompt_tools = MagicMock(return_value=({"tool": MagicMock()}, [fake_prompt_tool])) + + runner.update_prompt_message_tool = MagicMock() + runner.agent_callback = None + + list(runner.run(message, "query", {})) + + runner.update_prompt_message_tool.assert_called_once() + + def test_historic_with_assistant_and_tool_calls(self, runner): + from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage, ToolPromptMessage + + assistant = AssistantPromptMessage(content="thinking") + assistant.tool_calls = [MagicMock(function=MagicMock(name="tool", arguments='{"a":1}'))] + + tool_msg = ToolPromptMessage(content="obs", tool_call_id="1") + + runner.history_prompt_messages = [assistant, tool_msg] + + result = runner._organize_historic_prompt_messages([]) + assert isinstance(result, list) + + def test_historic_final_flush_branch(self, runner): + from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage + + assistant = AssistantPromptMessage(content="final") + runner.history_prompt_messages = [assistant] + + result = runner._organize_historic_prompt_messages([]) + assert isinstance(result, list) + + +class TestInitReactState: + def test_init_react_state_resets_state(self, runner, mocker): + mocker.patch.object(runner, "_organize_historic_prompt_messages", return_value=["historic"]) + runner._agent_scratchpad = ["old"] + runner._query = "old" + + runner._init_react_state("new-query") + + assert runner._query == "new-query" + assert runner._agent_scratchpad == [] + assert runner._historic_prompt_messages == ["historic"] + + +class TestHandleInvokeActionExtended: + def test_tool_with_invalid_json_string_args(self, runner, mocker): + action = AgentScratchpadUnit.Action(action_name="tool", action_input="not-json") + tool_instance = MagicMock() + tool_instances = {"tool": tool_instance} + + mocker.patch( + "core.agent.cot_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", ["file1"], MagicMock(to_dict=lambda: {"k": "v"})), + ) + + message_file_ids = [] + response, meta = runner._handle_invoke_action(action, tool_instances, message_file_ids) + + assert response == "ok" + assert message_file_ids == ["file1"] + runner.queue_manager.publish.assert_called() + + +class TestFillInputsEdgeCases: + def test_fill_inputs_with_empty_inputs(self, runner): + result = runner._fill_in_inputs_from_external_data_tools("Hello {{x}}", {}) + assert result == "Hello {{x}}" + + def test_fill_inputs_with_exception_in_replace(self, runner): + class BadValue: + def __str__(self): + raise Exception("fail") + + # Should silently continue on exception + result = runner._fill_in_inputs_from_external_data_tools("Hello {{x}}", {"x": BadValue()}) + assert result == "Hello {{x}}" + + +class TestOrganizeHistoricPromptMessagesExtended: + def test_user_message_flushes_scratchpad(self, runner, mocker): + from dify_graph.model_runtime.entities.message_entities import UserPromptMessage + + user_message = UserPromptMessage(content="Hi") + + runner.history_prompt_messages = [user_message] + + mock_transform = mocker.patch( + "core.agent.cot_agent_runner.AgentHistoryPromptTransform", + ) + mock_transform.return_value.get_prompt.return_value = ["final"] + + result = runner._organize_historic_prompt_messages([]) + assert result == ["final"] + + def test_tool_message_without_scratchpad_raises(self, runner): + from dify_graph.model_runtime.entities.message_entities import ToolPromptMessage + + runner.history_prompt_messages = [ToolPromptMessage(content="obs", tool_call_id="1")] + + with pytest.raises(NotImplementedError): + runner._organize_historic_prompt_messages([]) + + def test_agent_history_transform_invocation(self, runner, mocker): + mock_transform = MagicMock() + mock_transform.get_prompt.return_value = [] + + mocker.patch( + "core.agent.cot_agent_runner.AgentHistoryPromptTransform", + return_value=mock_transform, + ) + + runner.history_prompt_messages = [] + result = runner._organize_historic_prompt_messages([]) + assert result == [] + + +class TestRunAdditionalBranches: + def test_run_with_no_action_final_answer_empty(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=["thinking"], + ) + + results = list(runner.run(message, "query", {})) + assert any(hasattr(r, "delta") for r in results) + + def test_run_with_final_answer_action_string(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="Final Answer", action_input="done") + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + results = list(runner.run(message, "query", {})) + assert results[-1].delta.message.content == "done" + + def test_run_with_final_answer_action_dict(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + action = AgentScratchpadUnit.Action(action_name="Final Answer", action_input={"a": 1}) + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + results = list(runner.run(message, "query", {})) + assert json.loads(results[-1].delta.message.content) == {"a": 1} + + def test_run_with_string_final_answer(self, runner, mocker): + message = MagicMock() + message.id = "msg-id" + + # Remove invalid branch: Pydantic enforces str|dict for action_input + action = AgentScratchpadUnit.Action(action_name="Final Answer", action_input="12345") + + mocker.patch( + "core.agent.cot_agent_runner.CotAgentOutputParser.handle_react_stream_output", + return_value=[action], + ) + + results = list(runner.run(message, "query", {})) + assert results[-1].delta.message.content == "12345" diff --git a/api/tests/unit_tests/core/agent/test_cot_chat_agent_runner.py b/api/tests/unit_tests/core/agent/test_cot_chat_agent_runner.py new file mode 100644 index 0000000000..f9d69d1196 --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_cot_chat_agent_runner.py @@ -0,0 +1,215 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.agent.cot_chat_agent_runner import CotChatAgentRunner +from dify_graph.model_runtime.entities.message_entities import TextPromptMessageContent +from tests.unit_tests.core.agent.conftest import ( + DummyAgentConfig, + DummyAppConfig, + DummyTool, +) +from tests.unit_tests.core.agent.conftest import ( + DummyPromptEntity as DummyPrompt, +) + + +class DummyFileUploadConfig: + def __init__(self, image_config=None): + self.image_config = image_config + + +class DummyImageConfig: + def __init__(self, detail=None): + self.detail = detail + + +class DummyGenerateEntity: + def __init__(self, file_upload_config=None): + self.file_upload_config = file_upload_config + + +class DummyUnit: + def __init__(self, final=False, thought=None, action_str=None, observation=None, agent_response=None): + self._final = final + self.thought = thought + self.action_str = action_str + self.observation = observation + self.agent_response = agent_response + + def is_final(self): + return self._final + + +@pytest.fixture +def runner(): + runner = CotChatAgentRunner.__new__(CotChatAgentRunner) + runner._instruction = "test_instruction" + runner._prompt_messages_tools = [DummyTool("tool1"), DummyTool("tool2")] + runner._query = "user query" + runner._agent_scratchpad = [] + runner.files = [] + runner.application_generate_entity = DummyGenerateEntity() + runner._organize_historic_prompt_messages = MagicMock(return_value=["historic"]) + return runner + + +class TestOrganizeSystemPrompt: + def test_organize_system_prompt_success(self, runner, mocker): + first_prompt = "Instruction: {{instruction}}, Tools: {{tools}}, Names: {{tool_names}}" + runner.app_config = DummyAppConfig(DummyAgentConfig(DummyPrompt(first_prompt))) + + mocker.patch( + "core.agent.cot_chat_agent_runner.jsonable_encoder", + return_value=[{"name": "tool1"}, {"name": "tool2"}], + ) + + result = runner._organize_system_prompt() + + assert "test_instruction" in result.content + assert "tool1" in result.content + assert "tool2" in result.content + assert "tool1, tool2" in result.content + + def test_organize_system_prompt_missing_agent(self, runner): + runner.app_config = DummyAppConfig(agent=None) + with pytest.raises(AssertionError): + runner._organize_system_prompt() + + def test_organize_system_prompt_missing_prompt(self, runner): + runner.app_config = DummyAppConfig(DummyAgentConfig(prompt_entity=None)) + with pytest.raises(AssertionError): + runner._organize_system_prompt() + + +class TestOrganizeUserQuery: + @pytest.mark.parametrize("files", [None, pytest.param([], id="empty_list")]) + def test_organize_user_query_no_files(self, runner, files): + runner.files = files + result = runner._organize_user_query("query", []) + assert len(result) == 1 + assert result[0].content == "query" + + @patch("core.agent.cot_chat_agent_runner.UserPromptMessage") + @patch("core.agent.cot_chat_agent_runner.file_manager.to_prompt_message_content") + def test_organize_user_query_with_image_file_default_config(self, mock_to_prompt, mock_user_prompt, runner): + from dify_graph.model_runtime.entities.message_entities import ImagePromptMessageContent + + mock_content = ImagePromptMessageContent( + url="http://test", + format="png", + mime_type="image/png", + ) + mock_to_prompt.return_value = mock_content + mock_user_prompt.side_effect = lambda content: MagicMock(content=content) + + runner.files = ["file1"] + runner.application_generate_entity = DummyGenerateEntity(None) + + result = runner._organize_user_query("query", []) + assert len(result) == 1 + assert isinstance(result[0].content, list) + assert mock_content in result[0].content + mock_to_prompt.assert_called_once_with( + "file1", + image_detail_config=ImagePromptMessageContent.DETAIL.LOW, + ) + + @patch("core.agent.cot_chat_agent_runner.UserPromptMessage") + @patch("core.agent.cot_chat_agent_runner.file_manager.to_prompt_message_content") + def test_organize_user_query_with_image_file_high_detail(self, mock_to_prompt, mock_user_prompt, runner): + from dify_graph.model_runtime.entities.message_entities import ImagePromptMessageContent + + mock_content = ImagePromptMessageContent( + url="http://test", + format="png", + mime_type="image/png", + ) + mock_to_prompt.return_value = mock_content + mock_user_prompt.side_effect = lambda content: MagicMock(content=content) + + runner.files = ["file1"] + + image_config = DummyImageConfig(detail="high") + runner.application_generate_entity = DummyGenerateEntity(DummyFileUploadConfig(image_config)) + + result = runner._organize_user_query("query", []) + assert len(result) == 1 + assert isinstance(result[0].content, list) + assert mock_content in result[0].content + mock_to_prompt.assert_called_once_with( + "file1", + image_detail_config=ImagePromptMessageContent.DETAIL.HIGH, + ) + + @patch("core.agent.cot_chat_agent_runner.file_manager.to_prompt_message_content") + def test_organize_user_query_with_text_file_no_config(self, mock_to_prompt, runner): + mock_to_prompt.return_value = TextPromptMessageContent(data="file_content") + runner.files = ["file1"] + runner.application_generate_entity = DummyGenerateEntity(None) + + result = runner._organize_user_query("query", []) + assert len(result) == 1 + assert isinstance(result[0].content, list) + + +class TestOrganizePromptMessages: + def test_no_scratchpad(self, runner, mocker): + runner.app_config = DummyAppConfig(DummyAgentConfig(DummyPrompt("{{instruction}}"))) + runner._organize_system_prompt = MagicMock(return_value="system") + runner._organize_user_query = MagicMock(return_value=["query"]) + + result = runner._organize_prompt_messages() + assert "system" in result + assert "query" in result + runner._organize_historic_prompt_messages.assert_called_once() + + def test_with_final_scratchpad(self, runner, mocker): + runner.app_config = DummyAppConfig(DummyAgentConfig(DummyPrompt("{{instruction}}"))) + runner._organize_system_prompt = MagicMock(return_value="system") + runner._organize_user_query = MagicMock(return_value=["query"]) + + unit = DummyUnit(final=True, agent_response="done") + runner._agent_scratchpad = [unit] + + result = runner._organize_prompt_messages() + assistant_msgs = [m for m in result if hasattr(m, "content")] + combined = "".join([m.content for m in assistant_msgs if isinstance(m.content, str)]) + assert "Final Answer: done" in combined + + def test_with_thought_action_observation(self, runner, mocker): + runner.app_config = DummyAppConfig(DummyAgentConfig(DummyPrompt("{{instruction}}"))) + runner._organize_system_prompt = MagicMock(return_value="system") + runner._organize_user_query = MagicMock(return_value=["query"]) + + unit = DummyUnit( + final=False, + thought="thinking", + action_str="action", + observation="observe", + ) + runner._agent_scratchpad = [unit] + + result = runner._organize_prompt_messages() + assistant_msgs = [m for m in result if hasattr(m, "content")] + combined = "".join([m.content for m in assistant_msgs if isinstance(m.content, str)]) + assert "Thought: thinking" in combined + assert "Action: action" in combined + assert "Observation: observe" in combined + + def test_multiple_units_mixed(self, runner, mocker): + runner.app_config = DummyAppConfig(DummyAgentConfig(DummyPrompt("{{instruction}}"))) + runner._organize_system_prompt = MagicMock(return_value="system") + runner._organize_user_query = MagicMock(return_value=["query"]) + + units = [ + DummyUnit(final=False, thought="t1"), + DummyUnit(final=True, agent_response="done"), + ] + runner._agent_scratchpad = units + + result = runner._organize_prompt_messages() + assistant_msgs = [m for m in result if hasattr(m, "content")] + combined = "".join([m.content for m in assistant_msgs if isinstance(m.content, str)]) + assert "Thought: t1" in combined + assert "Final Answer: done" in combined diff --git a/api/tests/unit_tests/core/agent/test_cot_completion_agent_runner.py b/api/tests/unit_tests/core/agent/test_cot_completion_agent_runner.py new file mode 100644 index 0000000000..ab822bb57d --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_cot_completion_agent_runner.py @@ -0,0 +1,234 @@ +import json + +import pytest + +from core.agent.cot_completion_agent_runner import CotCompletionAgentRunner +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + TextPromptMessageContent, + UserPromptMessage, +) + +# ----------------------------- +# Fixtures +# ----------------------------- + + +@pytest.fixture +def runner(mocker, dummy_tool_factory): + runner = CotCompletionAgentRunner.__new__(CotCompletionAgentRunner) + + runner._instruction = "Test instruction" + runner._prompt_messages_tools = [dummy_tool_factory("toolA"), dummy_tool_factory("toolB")] + runner._query = "What is Python?" + runner._agent_scratchpad = [] + + mocker.patch( + "core.agent.cot_completion_agent_runner.jsonable_encoder", + side_effect=lambda tools: [{"name": t.name} for t in tools], + ) + + return runner + + +# ====================================================== +# _organize_instruction_prompt Tests +# ====================================================== + + +class TestOrganizeInstructionPrompt: + def test_success_all_placeholders( + self, runner, dummy_app_config_factory, dummy_agent_config_factory, dummy_prompt_entity_factory + ): + template = ( + "{{instruction}} | {{tools}} | {{tool_names}} | {{historic_messages}} | {{agent_scratchpad}} | {{query}}" + ) + + runner.app_config = dummy_app_config_factory( + agent=dummy_agent_config_factory(prompt_entity=dummy_prompt_entity_factory(template)) + ) + + result = runner._organize_instruction_prompt() + + assert "Test instruction" in result + assert "toolA" in result + assert "toolB" in result + tools_payload = json.loads(result.split(" | ")[1]) + assert {item["name"] for item in tools_payload} == {"toolA", "toolB"} + + def test_agent_none_raises(self, runner, dummy_app_config_factory): + runner.app_config = dummy_app_config_factory(agent=None) + with pytest.raises(ValueError, match="Agent configuration is not set"): + runner._organize_instruction_prompt() + + def test_prompt_entity_none_raises(self, runner, dummy_app_config_factory, dummy_agent_config_factory): + runner.app_config = dummy_app_config_factory(agent=dummy_agent_config_factory(prompt_entity=None)) + with pytest.raises(ValueError, match="prompt entity is not set"): + runner._organize_instruction_prompt() + + +# ====================================================== +# _organize_historic_prompt Tests +# ====================================================== + + +class TestOrganizeHistoricPrompt: + def test_with_user_and_assistant_string(self, runner, mocker): + user_msg = UserPromptMessage(content="Hello") + assistant_msg = AssistantPromptMessage(content="Hi there") + + mocker.patch.object( + runner, + "_organize_historic_prompt_messages", + return_value=[user_msg, assistant_msg], + ) + + result = runner._organize_historic_prompt() + + assert "Question: Hello" in result + assert "Hi there" in result + + def test_assistant_list_with_text_content(self, runner, mocker): + text_content = TextPromptMessageContent(data="Partial answer") + assistant_msg = AssistantPromptMessage(content=[text_content]) + + mocker.patch.object( + runner, + "_organize_historic_prompt_messages", + return_value=[assistant_msg], + ) + + result = runner._organize_historic_prompt() + + assert "Partial answer" in result + + def test_assistant_list_with_non_text_content_ignored(self, runner, mocker): + non_text_content = ImagePromptMessageContent(format="url", mime_type="image/png") + assistant_msg = AssistantPromptMessage(content=[non_text_content]) + + mocker.patch.object( + runner, + "_organize_historic_prompt_messages", + return_value=[assistant_msg], + ) + + result = runner._organize_historic_prompt() + assert result == "" + + def test_empty_history(self, runner, mocker): + mocker.patch.object( + runner, + "_organize_historic_prompt_messages", + return_value=[], + ) + + result = runner._organize_historic_prompt() + assert result == "" + + +# ====================================================== +# _organize_prompt_messages Tests +# ====================================================== + + +class TestOrganizePromptMessages: + def test_full_flow_with_scratchpad( + self, + runner, + mocker, + dummy_app_config_factory, + dummy_agent_config_factory, + dummy_prompt_entity_factory, + dummy_scratchpad_unit_factory, + ): + template = "SYS {{historic_messages}} {{agent_scratchpad}} {{query}}" + + runner.app_config = dummy_app_config_factory( + agent=dummy_agent_config_factory(prompt_entity=dummy_prompt_entity_factory(template)) + ) + + mocker.patch.object(runner, "_organize_historic_prompt", return_value="History\n") + + runner._agent_scratchpad = [ + dummy_scratchpad_unit_factory(final=False, thought="Thinking", action_str="Act", observation="Obs"), + dummy_scratchpad_unit_factory(final=True, agent_response="Done"), + ] + + result = runner._organize_prompt_messages() + + assert isinstance(result, list) + assert len(result) == 1 + assert isinstance(result[0], UserPromptMessage) + + content = result[0].content + + assert "History" in content + assert "Thought: Thinking" in content + assert "Action: Act" in content + assert "Observation: Obs" in content + assert "Final Answer: Done" in content + assert "Question: What is Python?" in content + + def test_no_scratchpad( + self, runner, mocker, dummy_app_config_factory, dummy_agent_config_factory, dummy_prompt_entity_factory + ): + template = "SYS {{historic_messages}} {{agent_scratchpad}} {{query}}" + + runner.app_config = dummy_app_config_factory( + agent=dummy_agent_config_factory(prompt_entity=dummy_prompt_entity_factory(template)) + ) + + mocker.patch.object(runner, "_organize_historic_prompt", return_value="") + + runner._agent_scratchpad = None + + result = runner._organize_prompt_messages() + + assert "Question: What is Python?" in result[0].content + + @pytest.mark.parametrize( + ("thought", "action", "observation"), + [ + ("T", None, None), + ("T", "A", None), + ("T", None, "O"), + ], + ) + def test_partial_scratchpad_units( + self, + runner, + mocker, + thought, + action, + observation, + dummy_app_config_factory, + dummy_agent_config_factory, + dummy_prompt_entity_factory, + dummy_scratchpad_unit_factory, + ): + template = "SYS {{historic_messages}} {{agent_scratchpad}} {{query}}" + + runner.app_config = dummy_app_config_factory( + agent=dummy_agent_config_factory(prompt_entity=dummy_prompt_entity_factory(template)) + ) + + mocker.patch.object(runner, "_organize_historic_prompt", return_value="") + + runner._agent_scratchpad = [ + dummy_scratchpad_unit_factory( + final=False, + thought=thought, + action_str=action, + observation=observation, + ) + ] + + result = runner._organize_prompt_messages() + content = result[0].content + + assert "Thought:" in content + if action: + assert "Action:" in content + if observation: + assert "Observation:" in content diff --git a/api/tests/unit_tests/core/agent/test_fc_agent_runner.py b/api/tests/unit_tests/core/agent/test_fc_agent_runner.py new file mode 100644 index 0000000000..299c9b31d2 --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_fc_agent_runner.py @@ -0,0 +1,452 @@ +import json +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from core.agent.errors import AgentMaxIterationError +from core.agent.fc_agent_runner import FunctionCallAgentRunner +from core.app.apps.base_app_queue_manager import PublishFrom +from core.app.entities.queue_entities import QueueMessageFileEvent +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.model_runtime.entities.message_entities import ( + DocumentPromptMessageContent, + ImagePromptMessageContent, + TextPromptMessageContent, + UserPromptMessage, +) + +# ============================== +# Dummy Helper Classes +# ============================== + + +def build_usage(pt=1, ct=1, tt=2) -> LLMUsage: + usage = LLMUsage.empty_usage() + usage.prompt_tokens = pt + usage.completion_tokens = ct + usage.total_tokens = tt + usage.prompt_price = 0 + usage.completion_price = 0 + usage.total_price = 0 + return usage + + +class DummyMessage: + def __init__(self, content: str | None = None, tool_calls: list[Any] | None = None): + self.content: str | None = content + self.tool_calls: list[Any] = tool_calls or [] + + +class DummyDelta: + def __init__(self, message: DummyMessage | None = None, usage: LLMUsage | None = None): + self.message: DummyMessage | None = message + self.usage: LLMUsage | None = usage + + +class DummyChunk: + def __init__(self, message: DummyMessage | None = None, usage: LLMUsage | None = None): + self.delta: DummyDelta = DummyDelta(message=message, usage=usage) + + +class DummyResult: + def __init__( + self, + message: DummyMessage | None = None, + usage: LLMUsage | None = None, + prompt_messages: list[DummyMessage] | None = None, + ): + self.message: DummyMessage | None = message + self.usage: LLMUsage | None = usage + self.prompt_messages: list[DummyMessage] = prompt_messages or [] + self.system_fingerprint: str = "" + + +# ============================== +# Fixtures +# ============================== + + +@pytest.fixture +def runner(mocker): + # Completely bypass BaseAgentRunner __init__ to avoid DB / Flask context + mocker.patch( + "core.agent.base_agent_runner.BaseAgentRunner.__init__", + return_value=None, + ) + + # Patch streaming chunk models to avoid validation on dummy message objects + mocker.patch("core.agent.fc_agent_runner.LLMResultChunk", MagicMock) + mocker.patch("core.agent.fc_agent_runner.LLMResultChunkDelta", MagicMock) + + app_config = MagicMock() + app_config.agent = MagicMock(max_iteration=2) + app_config.prompt_template = MagicMock(simple_prompt_template="system") + + application_generate_entity = MagicMock() + application_generate_entity.model_conf = MagicMock(parameters={}, stop=None) + application_generate_entity.trace_manager = MagicMock() + application_generate_entity.invoke_from = "test" + application_generate_entity.app_config = MagicMock(app_id="app") + application_generate_entity.file_upload_config = None + + queue_manager = MagicMock() + model_instance = MagicMock() + model_instance.model = "test-model" + model_instance.model_name = "test-model" + + message = MagicMock(id="msg1") + conversation = MagicMock(id="conv1") + + runner = FunctionCallAgentRunner( + tenant_id="tenant", + application_generate_entity=application_generate_entity, + conversation=conversation, + app_config=app_config, + model_config=MagicMock(), + config=MagicMock(), + queue_manager=queue_manager, + message=message, + user_id="user", + model_instance=model_instance, + ) + + # Manually inject required attributes normally set by BaseAgentRunner + runner.tenant_id = "tenant" + runner.application_generate_entity = application_generate_entity + runner.conversation = conversation + runner.app_config = app_config + runner.model_config = MagicMock() + runner.config = MagicMock() + runner.queue_manager = queue_manager + runner.message = message + runner.user_id = "user" + runner.model_instance = model_instance + + runner.stream_tool_call = False + runner.memory = None + runner.history_prompt_messages = [] + runner._current_thoughts = [] + runner.files = [] + runner.agent_callback = MagicMock() + + runner._init_prompt_tools = MagicMock(return_value=({}, [])) + runner.create_agent_thought = MagicMock(return_value="thought1") + runner.save_agent_thought = MagicMock() + runner.recalc_llm_max_tokens = MagicMock() + runner.update_prompt_message_tool = MagicMock() + + return runner + + +# ============================== +# Tool Call Checks +# ============================== + + +class TestToolCallChecks: + @pytest.mark.parametrize(("tool_calls", "expected"), [([], False), ([MagicMock()], True)]) + def test_check_tool_calls(self, runner, tool_calls, expected): + chunk = DummyChunk(message=DummyMessage(tool_calls=tool_calls)) + assert runner.check_tool_calls(chunk) is expected + + @pytest.mark.parametrize(("tool_calls", "expected"), [([], False), ([MagicMock()], True)]) + def test_check_blocking_tool_calls(self, runner, tool_calls, expected): + result = DummyResult(message=DummyMessage(tool_calls=tool_calls)) + assert runner.check_blocking_tool_calls(result) is expected + + +# ============================== +# Extract Tool Calls +# ============================== + + +class TestExtractToolCalls: + def test_extract_tool_calls_with_valid_json(self, runner): + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = json.dumps({"a": 1}) + + chunk = DummyChunk(message=DummyMessage(tool_calls=[tool_call])) + calls = runner.extract_tool_calls(chunk) + + assert calls == [("1", "tool", {"a": 1})] + + def test_extract_tool_calls_empty_arguments(self, runner): + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = "" + + chunk = DummyChunk(message=DummyMessage(tool_calls=[tool_call])) + calls = runner.extract_tool_calls(chunk) + + assert calls == [("1", "tool", {})] + + def test_extract_blocking_tool_calls(self, runner): + tool_call = MagicMock() + tool_call.id = "2" + tool_call.function.name = "block" + tool_call.function.arguments = json.dumps({"x": 2}) + + result = DummyResult(message=DummyMessage(tool_calls=[tool_call])) + calls = runner.extract_blocking_tool_calls(result) + + assert calls == [("2", "block", {"x": 2})] + + +# ============================== +# System Message Initialization +# ============================== + + +class TestInitSystemMessage: + def test_init_system_message_empty_prompt_messages(self, runner): + result = runner._init_system_message("system", []) + assert len(result) == 1 + + def test_init_system_message_insert_at_start(self, runner): + msgs = [MagicMock()] + result = runner._init_system_message("system", msgs) + assert result[0].content == "system" + + def test_init_system_message_no_template(self, runner): + result = runner._init_system_message("", []) + assert result == [] + + +# ============================== +# Organize User Query +# ============================== + + +class TestOrganizeUserQuery: + def test_without_files(self, runner): + result = runner._organize_user_query("query", []) + assert len(result) == 1 + + def test_with_none_query(self, runner): + result = runner._organize_user_query(None, []) + assert len(result) == 1 + + def test_with_files_uses_image_detail_config(self, runner, mocker): + file_content = TextPromptMessageContent(data="file-content") + mock_to_prompt = mocker.patch( + "core.agent.fc_agent_runner.file_manager.to_prompt_message_content", + return_value=file_content, + ) + + image_config = MagicMock(detail=ImagePromptMessageContent.DETAIL.HIGH) + runner.application_generate_entity.file_upload_config = MagicMock(image_config=image_config) + runner.files = ["file1"] + + result = runner._organize_user_query("query", []) + + assert len(result) == 1 + assert isinstance(result[0].content, list) + mock_to_prompt.assert_called_once_with("file1", image_detail_config=ImagePromptMessageContent.DETAIL.HIGH) + + +# ============================== +# Clear User Prompt Images +# ============================== + + +class TestClearUserPromptImageMessages: + def test_clear_text_and_image_content(self, runner): + text = MagicMock() + text.type = "text" + text.data = "hello" + + image = MagicMock() + image.type = "image" + image.data = "img" + + user_msg = MagicMock() + user_msg.__class__.__name__ = "UserPromptMessage" + user_msg.content = [text, image] + + result = runner._clear_user_prompt_image_messages([user_msg]) + assert isinstance(result, list) + + def test_clear_includes_file_placeholder(self, runner): + text = TextPromptMessageContent(data="hello") + image = ImagePromptMessageContent(format="url", mime_type="image/png") + document = DocumentPromptMessageContent(format="url", mime_type="application/pdf") + + user_msg = UserPromptMessage(content=[text, image, document]) + + result = runner._clear_user_prompt_image_messages([user_msg]) + + assert result[0].content == "hello\n[image]\n[file]" + + +# ============================== +# Run Method Tests +# ============================== + + +class TestRunMethod: + def test_run_non_streaming_no_tool_calls(self, runner): + message = MagicMock(id="m1") + dummy_message = DummyMessage(content="hello") + result = DummyResult(message=dummy_message, usage=build_usage()) + + runner.model_instance.invoke_llm.return_value = result + + outputs = list(runner.run(message, "query")) + assert len(outputs) == 1 + runner.queue_manager.publish.assert_called() + + queue_calls = runner.queue_manager.publish.call_args_list + assert any(call.args and call.args[0].__class__.__name__ == "QueueMessageEndEvent" for call in queue_calls) + + def test_run_streaming_branch(self, runner): + message = MagicMock(id="m1") + runner.stream_tool_call = True + + content = [TextPromptMessageContent(data="hi")] + chunk = DummyChunk(message=DummyMessage(content=content), usage=build_usage()) + + def generator(): + yield chunk + + runner.model_instance.invoke_llm.return_value = generator() + + outputs = list(runner.run(message, "query")) + assert len(outputs) == 1 + + def test_run_streaming_tool_calls_list_content(self, runner): + message = MagicMock(id="m1") + runner.stream_tool_call = True + + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = json.dumps({"a": 1}) + + content = [TextPromptMessageContent(data="hi")] + chunk = DummyChunk(message=DummyMessage(content=content, tool_calls=[tool_call]), usage=build_usage()) + + def generator(): + yield chunk + + final_message = DummyMessage(content="done", tool_calls=[]) + final_result = DummyResult(message=final_message, usage=build_usage()) + + runner.model_instance.invoke_llm.side_effect = [generator(), final_result] + + outputs = list(runner.run(message, "query")) + assert len(outputs) >= 1 + + def test_run_non_streaming_list_content(self, runner): + message = MagicMock(id="m1") + content = [TextPromptMessageContent(data="hi")] + dummy_message = DummyMessage(content=content) + result = DummyResult(message=dummy_message, usage=build_usage()) + + runner.model_instance.invoke_llm.return_value = result + + outputs = list(runner.run(message, "query")) + assert len(outputs) == 1 + assert runner.save_agent_thought.call_args.kwargs["thought"] == "hi" + + def test_run_streaming_tool_call_inputs_type_error(self, runner, mocker): + message = MagicMock(id="m1") + runner.stream_tool_call = True + + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = json.dumps({"a": 1}) + + chunk = DummyChunk(message=DummyMessage(content="hi", tool_calls=[tool_call]), usage=build_usage()) + + def generator(): + yield chunk + + runner.model_instance.invoke_llm.return_value = generator() + + real_dumps = json.dumps + + def flaky_dumps(obj, *args, **kwargs): + if kwargs.get("ensure_ascii") is False: + return real_dumps(obj, *args, **kwargs) + raise TypeError("boom") + + mocker.patch("core.agent.fc_agent_runner.json.dumps", side_effect=flaky_dumps) + + outputs = list(runner.run(message, "query")) + assert len(outputs) == 1 + + def test_run_with_missing_tool_instance(self, runner): + message = MagicMock(id="m1") + + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "missing" + tool_call.function.arguments = json.dumps({}) + + dummy_message = DummyMessage(content="", tool_calls=[tool_call]) + result = DummyResult(message=dummy_message, usage=build_usage()) + final_message = DummyMessage(content="done", tool_calls=[]) + final_result = DummyResult(message=final_message, usage=build_usage()) + + runner.model_instance.invoke_llm.side_effect = [result, final_result] + + outputs = list(runner.run(message, "query")) + assert len(outputs) >= 1 + + def test_run_with_tool_instance_and_files(self, runner, mocker): + message = MagicMock(id="m1") + + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = json.dumps({"a": 1}) + + dummy_message = DummyMessage(content="", tool_calls=[tool_call]) + result = DummyResult(message=dummy_message, usage=build_usage()) + final_result = DummyResult(message=DummyMessage(content="done", tool_calls=[]), usage=build_usage()) + + runner.model_instance.invoke_llm.side_effect = [result, final_result] + + tool_instance = MagicMock() + prompt_tool = MagicMock() + prompt_tool.name = "tool" + runner._init_prompt_tools.return_value = ({"tool": tool_instance}, [prompt_tool]) + + tool_invoke_meta = MagicMock() + tool_invoke_meta.to_dict.return_value = {"ok": True} + mocker.patch( + "core.agent.fc_agent_runner.ToolEngine.agent_invoke", + return_value=("ok", ["file1"], tool_invoke_meta), + ) + + outputs = list(runner.run(message, "query")) + assert len(outputs) >= 1 + assert any( + isinstance(call.args[0], QueueMessageFileEvent) + and call.args[0].message_file_id == "file1" + and call.args[1] == PublishFrom.APPLICATION_MANAGER + for call in runner.queue_manager.publish.call_args_list + ) + + def test_run_max_iteration_error(self, runner): + runner.app_config.agent.max_iteration = 0 + + message = MagicMock(id="m1") + + tool_call = MagicMock() + tool_call.id = "1" + tool_call.function.name = "tool" + tool_call.function.arguments = "{}" + + dummy_message = DummyMessage(content="", tool_calls=[tool_call]) + result = DummyResult(message=dummy_message, usage=build_usage()) + + runner.model_instance.invoke_llm.return_value = result + + with pytest.raises(AgentMaxIterationError): + list(runner.run(message, "query")) diff --git a/api/tests/unit_tests/core/agent/test_plugin_entities.py b/api/tests/unit_tests/core/agent/test_plugin_entities.py new file mode 100644 index 0000000000..9955190aca --- /dev/null +++ b/api/tests/unit_tests/core/agent/test_plugin_entities.py @@ -0,0 +1,324 @@ +"""Unit tests for core.agent.plugin_entities. + +Covers entities such as AgentFeature, AgentProviderEntityWithPlugin, +AgentStrategyEntity, AgentStrategyIdentity, AgentStrategyParameter, +AgentStrategyProviderEntity, and AgentStrategyProviderIdentity. Tests rely on +Pydantic ValidationError behavior and pytest fixtures for validation and +mocking; ensure entity invariants and validation rules remain stable. +""" + +import pytest +from pydantic import ValidationError + +from core.agent.plugin_entities import ( + AgentFeature, + AgentProviderEntityWithPlugin, + AgentStrategyEntity, + AgentStrategyIdentity, + AgentStrategyParameter, + AgentStrategyProviderEntity, + AgentStrategyProviderIdentity, +) +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolIdentity, ToolProviderIdentity + +# ========================================================= +# Fixtures +# ========================================================= + + +@pytest.fixture +def mock_identity(mocker): + return mocker.MagicMock(spec=AgentStrategyIdentity) + + +@pytest.fixture +def mock_provider_identity(mocker): + return mocker.MagicMock(spec=AgentStrategyProviderIdentity) + + +# ========================================================= +# AgentStrategyParameterType Tests +# ========================================================= + + +class TestAgentStrategyParameterType: + @pytest.mark.parametrize( + "enum_member", + list(AgentStrategyParameter.AgentStrategyParameterType), + ) + def test_as_normal_type_calls_external_function(self, mocker, enum_member) -> None: + mock_func = mocker.patch( + "core.agent.plugin_entities.as_normal_type", + return_value="normalized", + ) + + result = enum_member.as_normal_type() + + mock_func.assert_called_once_with(enum_member) + assert result == "normalized" + + def test_as_normal_type_propagates_exception(self, mocker) -> None: + enum_member = AgentStrategyParameter.AgentStrategyParameterType.STRING + mocker.patch( + "core.agent.plugin_entities.as_normal_type", + side_effect=RuntimeError("boom"), + ) + + with pytest.raises(RuntimeError): + enum_member.as_normal_type() + + @pytest.mark.parametrize( + ("enum_member", "value"), + [ + (AgentStrategyParameter.AgentStrategyParameterType.STRING, "abc"), + (AgentStrategyParameter.AgentStrategyParameterType.NUMBER, 10), + (AgentStrategyParameter.AgentStrategyParameterType.BOOLEAN, True), + (AgentStrategyParameter.AgentStrategyParameterType.ANY, {"a": 1}), + (AgentStrategyParameter.AgentStrategyParameterType.STRING, None), + (AgentStrategyParameter.AgentStrategyParameterType.FILES, []), + ], + ) + def test_cast_value_calls_external_function(self, mocker, enum_member, value) -> None: + mock_func = mocker.patch( + "core.agent.plugin_entities.cast_parameter_value", + return_value="casted", + ) + + result = enum_member.cast_value(value) + + mock_func.assert_called_once_with(enum_member, value) + assert result == "casted" + + def test_cast_value_propagates_exception(self, mocker) -> None: + enum_member = AgentStrategyParameter.AgentStrategyParameterType.STRING + mocker.patch( + "core.agent.plugin_entities.cast_parameter_value", + side_effect=ValueError("invalid"), + ) + + with pytest.raises(ValueError): + enum_member.cast_value("bad") + + +# ========================================================= +# AgentStrategyParameter Tests +# ========================================================= + + +class TestAgentStrategyParameter: + def test_valid_creation_minimal(self) -> None: + # bypass base PluginParameter required fields using model_construct + param = AgentStrategyParameter.model_construct( + type=AgentStrategyParameter.AgentStrategyParameterType.STRING, + name="test", + label="label", + help=None, + ) + assert param.type == AgentStrategyParameter.AgentStrategyParameterType.STRING + assert param.help is None + + def test_valid_creation_with_help(self) -> None: + help_obj = I18nObject(en_US="test") + + param = AgentStrategyParameter.model_construct( + type=AgentStrategyParameter.AgentStrategyParameterType.STRING, + name="test", + label="label", + help=help_obj, + ) + assert param.help == help_obj + + @pytest.mark.parametrize("invalid_type", [None, "invalid_type", 999, [], {}, ["bad"], {"bad": 1}]) + def test_invalid_type_raises_validation_error(self, invalid_type) -> None: + with pytest.raises(ValidationError) as exc_info: + AgentStrategyParameter(type=invalid_type, name="x", label=I18nObject(en_US="y", zh_Hans="y")) + + assert any(error["loc"] == ("type",) for error in exc_info.value.errors()) + + def test_init_frontend_parameter_calls_external(self, mocker) -> None: + mock_func = mocker.patch( + "core.agent.plugin_entities.init_frontend_parameter", + return_value="frontend", + ) + + param = AgentStrategyParameter.model_construct( + type=AgentStrategyParameter.AgentStrategyParameterType.STRING, + name="test", + label="label", + ) + + result = param.init_frontend_parameter("value") + + mock_func.assert_called_once_with(param, param.type, "value") + assert result == "frontend" + + def test_init_frontend_parameter_propagates_exception(self, mocker) -> None: + mocker.patch( + "core.agent.plugin_entities.init_frontend_parameter", + side_effect=RuntimeError("error"), + ) + + param = AgentStrategyParameter.model_construct( + type=AgentStrategyParameter.AgentStrategyParameterType.STRING, + name="test", + label="label", + ) + + with pytest.raises(RuntimeError): + param.init_frontend_parameter("value") + + +# ========================================================= +# AgentStrategyProviderEntity Tests +# ========================================================= + + +class TestAgentStrategyProviderEntity: + def test_creation_with_plugin_id(self, mock_provider_identity) -> None: + entity = AgentStrategyProviderEntity( + identity=mock_provider_identity, + plugin_id="plugin-123", + ) + assert entity.plugin_id == "plugin-123" + + def test_creation_with_empty_plugin_id(self, mock_provider_identity) -> None: + entity = AgentStrategyProviderEntity( + identity=mock_provider_identity, + plugin_id="", + ) + assert entity.plugin_id == "" + + def test_creation_without_plugin_id(self, mock_provider_identity) -> None: + entity = AgentStrategyProviderEntity(identity=mock_provider_identity) + assert entity.plugin_id is None + + def test_invalid_identity_raises(self) -> None: + with pytest.raises(ValidationError): + AgentStrategyProviderEntity(identity="invalid") + + +# ========================================================= +# AgentStrategyEntity Tests +# ========================================================= + + +class TestAgentStrategyEntity: + def test_parameters_default_empty(self, mock_identity) -> None: + entity = AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + ) + assert entity.parameters == [] + + def test_parameters_none_converted_to_empty(self, mock_identity) -> None: + entity = AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + parameters=None, + ) + assert entity.parameters == [] + + def test_parameters_preserved(self, mock_identity) -> None: + param = AgentStrategyParameter.model_construct( + type=AgentStrategyParameter.AgentStrategyParameterType.STRING, + name="test", + label="label", + ) + + entity = AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + parameters=[param], + ) + assert entity.parameters == [param] + + def test_invalid_parameters_type_raises(self, mock_identity) -> None: + with pytest.raises(ValidationError): + AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + parameters="invalid", + ) + + @pytest.mark.parametrize( + "features", + [ + None, + [], + [AgentFeature.HISTORY_MESSAGES], + ], + ) + def test_features_valid(self, mock_identity, features) -> None: + entity = AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + features=features, + ) + assert entity.features == features + + def test_invalid_features_type_raises(self, mock_identity) -> None: + with pytest.raises(ValidationError): + AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + features="invalid", + ) + + def test_output_schema_and_meta_version(self, mock_identity) -> None: + entity = AgentStrategyEntity( + identity=mock_identity, + description=I18nObject(en_US="test"), + output_schema={"type": "object"}, + meta_version="v1", + ) + assert entity.output_schema == {"type": "object"} + assert entity.meta_version == "v1" + + def test_missing_required_fields_raise(self, mock_identity) -> None: + with pytest.raises(ValidationError): + AgentStrategyEntity(identity=mock_identity) + + +# ========================================================= +# AgentProviderEntityWithPlugin Tests +# ========================================================= + + +class TestAgentProviderEntityWithPlugin: + def test_default_strategies_empty(self, mock_provider_identity) -> None: + entity = AgentProviderEntityWithPlugin(identity=mock_provider_identity) + assert entity.strategies == [] + + def test_strategies_assignment(self, mock_provider_identity, mock_identity) -> None: + strategy = AgentStrategyEntity.model_construct( + identity=mock_identity, + description=I18nObject(en_US="test"), + parameters=[], + ) + + entity = AgentProviderEntityWithPlugin( + identity=mock_provider_identity, + strategies=[strategy], + ) + assert entity.strategies == [strategy] + + def test_invalid_strategies_type_raises(self, mock_provider_identity) -> None: + with pytest.raises(ValidationError): + AgentProviderEntityWithPlugin( + identity=mock_provider_identity, + strategies="invalid", + ) + + +# ========================================================= +# Inheritance Smoke Tests +# ========================================================= + + +class TestInheritanceBehavior: + def test_agent_strategy_identity_inherits(self) -> None: + assert issubclass(AgentStrategyIdentity, ToolIdentity) + + def test_agent_strategy_provider_identity_inherits(self) -> None: + assert issubclass(AgentStrategyProviderIdentity, ToolProviderIdentity) diff --git a/api/tests/unit_tests/core/app/apps/__init__.py b/api/tests/unit_tests/core/app/apps/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/__init__.py b/api/tests/unit_tests/core/app/apps/advanced_chat/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_config_manager.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_config_manager.py new file mode 100644 index 0000000000..6ca4f60459 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_config_manager.py @@ -0,0 +1,75 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from core.app.apps.advanced_chat.app_config_manager import AdvancedChatAppConfigManager +from models.model import AppMode + + +class TestAdvancedChatAppConfigManager: + def test_get_app_config(self): + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", mode=AppMode.ADVANCED_CHAT.value) + workflow = SimpleNamespace(id="wf-1", features_dict={}) + + with ( + patch( + "core.app.apps.advanced_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.convert", + return_value=None, + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.WorkflowVariablesConfigManager.convert", + return_value=[], + ), + ): + app_config = AdvancedChatAppConfigManager.get_app_config(app_model, workflow) + + assert app_config.workflow_id == "wf-1" + assert app_config.app_mode == AppMode.ADVANCED_CHAT + + def test_config_validate_filters_keys(self): + def _add_key(key, value): + def _inner(*args, **kwargs): + config = kwargs.get("config") if kwargs else args[-1] + config = {**config, key: value} + return config, [key] + + return _inner + + with ( + patch( + "core.app.apps.advanced_chat.app_config_manager.FileUploadConfigManager.validate_and_set_defaults", + side_effect=_add_key("file_upload", 1), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.OpeningStatementConfigManager.validate_and_set_defaults", + side_effect=_add_key("opening_statement", 2), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.SuggestedQuestionsAfterAnswerConfigManager.validate_and_set_defaults", + side_effect=_add_key("suggested_questions_after_answer", 3), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.SpeechToTextConfigManager.validate_and_set_defaults", + side_effect=_add_key("speech_to_text", 4), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.TextToSpeechConfigManager.validate_and_set_defaults", + side_effect=_add_key("text_to_speech", 5), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.RetrievalResourceConfigManager.validate_and_set_defaults", + side_effect=_add_key("retriever_resource", 6), + ), + patch( + "core.app.apps.advanced_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.validate_and_set_defaults", + side_effect=_add_key("sensitive_word_avoidance", 7), + ), + ): + filtered = AdvancedChatAppConfigManager.config_validate(tenant_id="t1", config={}) + + assert filtered["file_upload"] == 1 + assert filtered["opening_statement"] == 2 + assert filtered["suggested_questions_after_answer"] == 3 + assert filtered["speech_to_text"] == 4 + assert filtered["text_to_speech"] == 5 + assert filtered["retriever_resource"] == 6 + assert filtered["sensitive_word_avoidance"] == 7 diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_generator.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_generator.py new file mode 100644 index 0000000000..e2618d960c --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_generator.py @@ -0,0 +1,1258 @@ +from __future__ import annotations + +from contextlib import contextmanager +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from pydantic import BaseModel, ValidationError + +from constants import UUID_NIL +from core.app.app_config.entities import AppAdditionalFeatures, WorkflowUIBasedAppConfig +from core.app.apps.advanced_chat.app_generator import AdvancedChatAppGenerator, _refresh_model +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom +from core.ops.ops_trace_manager import TraceQueueManager +from models.model import AppMode + + +class TestAdvancedChatAppGeneratorValidation: + def test_generate_requires_query(self): + generator = AdvancedChatAppGenerator() + + with pytest.raises(ValueError, match="query is required"): + generator.generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + user=SimpleNamespace(), + args={"inputs": {}}, + invoke_from=InvokeFrom.WEB_APP, + workflow_run_id="run-id", + streaming=False, + ) + + def test_generate_requires_string_query(self): + generator = AdvancedChatAppGenerator() + + with pytest.raises(ValueError, match="query must be a string"): + generator.generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + user=SimpleNamespace(), + args={"inputs": {}, "query": 123}, + invoke_from=InvokeFrom.WEB_APP, + workflow_run_id="run-id", + streaming=False, + ) + + def test_single_iteration_generate_validates_args(self): + generator = AdvancedChatAppGenerator() + + with pytest.raises(ValueError, match="node_id is required"): + generator.single_iteration_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="", + user=SimpleNamespace(), + args={"inputs": {}}, + streaming=False, + ) + + with pytest.raises(ValueError, match="inputs is required"): + generator.single_iteration_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="node", + user=SimpleNamespace(), + args={}, + streaming=False, + ) + + def test_single_loop_generate_validates_args(self): + generator = AdvancedChatAppGenerator() + + with pytest.raises(ValueError, match="node_id is required"): + generator.single_loop_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="", + user=SimpleNamespace(), + args=SimpleNamespace(inputs={}), + streaming=False, + ) + + with pytest.raises(ValueError, match="inputs is required"): + generator.single_loop_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="node", + user=SimpleNamespace(), + args=SimpleNamespace(inputs=None), + streaming=False, + ) + + +class TestAdvancedChatAppGeneratorInternals: + @staticmethod + def _build_app_config() -> WorkflowUIBasedAppConfig: + return WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + + def test_generate_loads_conversation_and_files(self, monkeypatch): + generator = AdvancedChatAppGenerator() + app_config = self._build_app_config() + + conversation = SimpleNamespace(id="conversation-id") + built_files: list[object] = [] + build_files_called = {"called": False} + captured: dict[str, object] = {} + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.ConversationService.get_conversation", + lambda **kwargs: conversation, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.FileUploadConfigManager.convert", + lambda *args, **kwargs: {"enabled": True}, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.file_factory.build_from_mappings", + lambda **kwargs: build_files_called.update({"called": True}) or built_files, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppConfigManager.get_app_config", + lambda **kwargs: app_config, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.sessionmaker", lambda **kwargs: SimpleNamespace() + ) + monkeypatch.setattr(generator, "_prepare_user_inputs", lambda **kwargs: kwargs["user_inputs"]) + + DummyTraceQueueManager = type( + "_DummyTraceQueueManager", + (TraceQueueManager,), + { + "__init__": lambda self, app_id=None, user_id=None: ( + setattr(self, "app_id", app_id) or setattr(self, "user_id", user_id) + ) + }, + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.TraceQueueManager", DummyTraceQueueManager) + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + from models import Account + + user = Account(name="Tester", email="tester@example.com") + user.id = "user-id" + + result = generator.generate( + app_model=SimpleNamespace(id="app", tenant_id="tenant"), + workflow=SimpleNamespace(features_dict={}), + user=user, + args={ + "query": "hello", + "inputs": {"k": "v"}, + "conversation_id": "conversation-id", + "files": [{"id": "f"}], + }, + invoke_from=InvokeFrom.WEB_APP, + workflow_run_id="run-id", + streaming=False, + ) + + assert result == {"ok": True} + assert captured["conversation"] is conversation + assert captured["application_generate_entity"].files == built_files + assert build_files_called["called"] is True + + def test_resume_delegates_to_generate(self, monkeypatch): + generator = AdvancedChatAppGenerator() + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=self._build_app_config(), + inputs={}, + query="hello", + files=[], + user_id="user", + stream=True, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + captured: dict[str, object] = {} + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"resumed": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + result = generator.resume( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + user=SimpleNamespace(), + conversation=SimpleNamespace(id="conversation-id"), + message=SimpleNamespace(id="message-id"), + application_generate_entity=application_generate_entity, + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_runtime_state=SimpleNamespace(), + pause_state_config=None, + ) + + assert result == {"resumed": True} + assert captured["graph_runtime_state"] is not None + + def test_single_iteration_generate_builds_debug_task(self, monkeypatch): + generator = AdvancedChatAppGenerator() + app_config = self._build_app_config() + captured: dict[str, object] = {} + prefill_calls: list[object] = [] + var_loader = SimpleNamespace(loader="draft") + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppConfigManager.get_app_config", + lambda **kwargs: app_config, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(repo="execution"), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(repo="node"), + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.DraftVarLoader", lambda **kwargs: var_loader) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.sessionmaker", lambda **kwargs: SimpleNamespace() + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=lambda: SimpleNamespace()), + ) + + class _DraftVarService: + def __init__(self, session): + _ = session + + def prefill_conversation_variable_default_values(self, workflow): + prefill_calls.append(workflow) + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.WorkflowDraftVariableService", _DraftVarService) + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + result = generator.single_iteration_generate( + app_model=SimpleNamespace(id="app", tenant_id="tenant"), + workflow=SimpleNamespace(id="workflow-id"), + node_id="node-1", + user=SimpleNamespace(id="user-id"), + args={"inputs": {"foo": "bar"}}, + streaming=False, + ) + + assert result == {"ok": True} + assert prefill_calls + assert captured["variable_loader"] is var_loader + assert captured["application_generate_entity"].single_iteration_run.node_id == "node-1" + + def test_single_loop_generate_builds_debug_task(self, monkeypatch): + generator = AdvancedChatAppGenerator() + app_config = self._build_app_config() + captured: dict[str, object] = {} + prefill_calls: list[object] = [] + var_loader = SimpleNamespace(loader="draft") + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppConfigManager.get_app_config", + lambda **kwargs: app_config, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(repo="execution"), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(repo="node"), + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.DraftVarLoader", lambda **kwargs: var_loader) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.sessionmaker", lambda **kwargs: SimpleNamespace() + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=lambda: SimpleNamespace()), + ) + + class _DraftVarService: + def __init__(self, session): + _ = session + + def prefill_conversation_variable_default_values(self, workflow): + prefill_calls.append(workflow) + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.WorkflowDraftVariableService", _DraftVarService) + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + result = generator.single_loop_generate( + app_model=SimpleNamespace(id="app", tenant_id="tenant"), + workflow=SimpleNamespace(id="workflow-id"), + node_id="node-2", + user=SimpleNamespace(id="user-id"), + args=SimpleNamespace(inputs={"foo": "bar"}), + streaming=False, + ) + + assert result == {"ok": True} + assert prefill_calls + assert captured["variable_loader"] is var_loader + assert captured["application_generate_entity"].single_loop_run.node_id == "node-2" + + def test_generate_internal_flow_initial_conversation_with_pause_layer(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 0 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + conversation = SimpleNamespace(id="conv-1", mode=AppMode.ADVANCED_CHAT, override_model_configs=None) + message = SimpleNamespace(id="msg-1") + db_session = SimpleNamespace(commit=MagicMock(), refresh=MagicMock(), close=MagicMock()) + captured: dict[str, object] = {} + thread_data: dict[str, object] = {} + + monkeypatch.setattr(generator, "_init_generate_records", lambda *args: (conversation, message)) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.get_thread_messages_length", lambda _: 2) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.MessageBasedAppQueueManager", + lambda **kwargs: SimpleNamespace(**kwargs), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.PauseStatePersistenceLayer", + lambda **kwargs: "pause-layer", + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.current_app", + SimpleNamespace(_get_current_object=lambda: SimpleNamespace(name="flask")), + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.contextvars.copy_context", lambda: "ctx") + + class _Thread: + def __init__(self, *, target, kwargs): + thread_data["target"] = target + thread_data["kwargs"] = kwargs + + def start(self): + thread_data["started"] = True + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.threading.Thread", _Thread) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator._refresh_model", lambda session, model: model) + + class _Session: + def __init__(self, *args, **kwargs): + _ = args, kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", SimpleNamespace(engine=object(), session=db_session) + ) + monkeypatch.setattr(generator, "_get_draft_var_saver_factory", lambda *args, **kwargs: "draft-factory") + monkeypatch.setattr( + generator, + "_handle_advanced_chat_response", + lambda **kwargs: captured.update(kwargs) or {"raw": True}, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppGenerateResponseConverter.convert", + lambda response, invoke_from: {"response": response, "invoke_from": invoke_from}, + ) + + pause_state_config = SimpleNamespace(session_factory="session-factory", state_owner_user_id="owner") + + response = generator._generate( + workflow=SimpleNamespace(features={"feature": True}), + user=SimpleNamespace(id="user"), + invoke_from=InvokeFrom.WEB_APP, + application_generate_entity=application_generate_entity, + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + conversation=None, + message=None, + stream=False, + pause_state_config=pause_state_config, + ) + + assert response["response"] == {"raw": True} + assert thread_data["started"] is True + assert "pause-layer" in thread_data["kwargs"]["graph_engine_layers"] + assert generator._dialogue_count == 3 + db_session.commit.assert_called_once() + db_session.refresh.assert_called_once_with(conversation) + db_session.close.assert_called_once() + assert captured["draft_var_saver_factory"] == "draft-factory" + + def test_generate_internal_flow_with_existing_records_skips_init(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 0 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + conversation = SimpleNamespace(id="conv-2", mode=AppMode.ADVANCED_CHAT, override_model_configs=None) + message = SimpleNamespace(id="msg-2") + db_session = SimpleNamespace(close=MagicMock(), commit=MagicMock(), refresh=MagicMock()) + init_records = MagicMock() + thread_data: dict[str, object] = {} + + monkeypatch.setattr(generator, "_init_generate_records", init_records) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.get_thread_messages_length", lambda _: 0) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.MessageBasedAppQueueManager", + lambda **kwargs: SimpleNamespace(**kwargs), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.current_app", + SimpleNamespace(_get_current_object=lambda: SimpleNamespace(name="flask")), + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.contextvars.copy_context", lambda: "ctx") + + class _Thread: + def __init__(self, *, target, kwargs): + thread_data["target"] = target + thread_data["kwargs"] = kwargs + + def start(self): + thread_data["started"] = True + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.threading.Thread", _Thread) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator._refresh_model", lambda session, model: model) + + class _Session: + def __init__(self, *args, **kwargs): + _ = args, kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", SimpleNamespace(engine=object(), session=db_session) + ) + monkeypatch.setattr(generator, "_get_draft_var_saver_factory", lambda *args, **kwargs: "draft-factory") + monkeypatch.setattr( + generator, + "_handle_advanced_chat_response", + lambda **kwargs: {"raw": True}, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppGenerateResponseConverter.convert", + lambda response, invoke_from: response, + ) + + response = generator._generate( + workflow=SimpleNamespace(features={}), + user=SimpleNamespace(id="user"), + invoke_from=InvokeFrom.WEB_APP, + application_generate_entity=application_generate_entity, + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + conversation=conversation, + message=message, + stream=False, + ) + + assert response == {"raw": True} + init_records.assert_not_called() + assert thread_data["started"] is True + db_session.commit.assert_not_called() + db_session.refresh.assert_not_called() + db_session.close.assert_called_once() + + def test_generate_worker_raises_when_workflow_not_found(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv")) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock(return_value=None) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + with pytest.raises(ValueError, match="Workflow not found"): + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=MagicMock(), + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + def test_generate_worker_raises_when_app_not_found_for_internal_call(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="internal-user", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv")) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock( + side_effect=[ + SimpleNamespace(id="workflow-id", tenant_id="tenant", app_id="app"), + None, + ] + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + with pytest.raises(ValueError, match="App not found"): + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=MagicMock(), + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + def test_generate_worker_handles_stopped_error(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="internal-user", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + queue_manager = MagicMock() + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv")) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock( + side_effect=[ + SimpleNamespace(id="workflow-id", tenant_id="tenant", app_id="app"), + SimpleNamespace(id="app"), + ] + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + class _Runner: + def __init__(self, **kwargs): + _ = kwargs + + def run(self): + raise GenerateTaskStoppedError() + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.AdvancedChatAppRunner", _Runner) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + queue_manager.publish_error.assert_not_called() + + def test_generate_worker_handles_validation_error(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + app_config = self._build_app_config() + + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="internal-user", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + class _ValidationModel(BaseModel): + value: int + + try: + _ValidationModel(value="invalid") + except ValidationError as error: + validation_error = error + else: + raise AssertionError("validation error should be created") + + queue_manager = MagicMock() + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv")) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock( + side_effect=[ + SimpleNamespace(id="workflow-id", tenant_id="tenant", app_id="app"), + SimpleNamespace(id="app"), + ] + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + class _Runner: + def __init__(self, **kwargs): + _ = kwargs + + def run(self): + raise validation_error + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.AdvancedChatAppRunner", _Runner) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + queue_manager.publish_error.assert_called_once() + + def test_generate_worker_handles_value_and_unknown_errors(self, monkeypatch): + app_config = self._build_app_config() + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + def _make_runner(error: Exception): + class _Runner: + def __init__(self, **kwargs): + _ = kwargs + + def run(self): + raise error + + return _Runner + + for raised_error in [ValueError("bad input"), RuntimeError("unexpected")]: + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="internal-user", + stream=False, + invoke_from=InvokeFrom.DEBUGGER, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + queue_manager = MagicMock() + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv")) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock( + side_effect=[ + SimpleNamespace(id="workflow-id", tenant_id="tenant", app_id="app"), + SimpleNamespace(id="app"), + ] + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppRunner", + _make_runner(raised_error), + ) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.dify_config", SimpleNamespace(DEBUG=True)) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + queue_manager.publish_error.assert_called_once() + + def test_handle_response_closed_file_raises_stopped(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + class _Pipeline: + def __init__(self, **kwargs) -> None: + _ = kwargs + + def process(self): + raise ValueError("I/O operation on closed file.") + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppGenerateTaskPipeline", + _Pipeline, + ) + + with pytest.raises(GenerateTaskStoppedError): + generator._handle_advanced_chat_response( + application_generate_entity=application_generate_entity, + workflow=SimpleNamespace(), + queue_manager=SimpleNamespace(), + conversation=SimpleNamespace(id="conv", mode=AppMode.ADVANCED_CHAT), + message=SimpleNamespace(id="msg"), + user=SimpleNamespace(), + draft_var_saver_factory=lambda **kwargs: None, + stream=False, + ) + + def test_handle_response_re_raises_value_error(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + app_config = self._build_app_config() + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + class _Pipeline: + def __init__(self, **kwargs): + _ = kwargs + + def process(self): + raise ValueError("other error") + + logger_exception = MagicMock() + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.logger.exception", logger_exception) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.AdvancedChatAppGenerateTaskPipeline", _Pipeline) + + with pytest.raises(ValueError, match="other error"): + generator._handle_advanced_chat_response( + application_generate_entity=application_generate_entity, + workflow=SimpleNamespace(), + queue_manager=SimpleNamespace(), + conversation=SimpleNamespace(id="conv", mode=AppMode.ADVANCED_CHAT), + message=SimpleNamespace(id="msg"), + user=SimpleNamespace(), + draft_var_saver_factory=lambda **kwargs: None, + stream=False, + ) + + logger_exception.assert_called_once() + + def test_refresh_model_returns_detached_model(self, monkeypatch): + source_model = SimpleNamespace(id="source-id") + detached_model = SimpleNamespace(id="source-id", detached=True) + + class _Session: + def __init__(self, *args, **kwargs): + _ = args, kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, model_type, model_id): + _ = model_type + return detached_model if model_id == "source-id" else None + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.db", SimpleNamespace(engine=object())) + + refreshed = _refresh_model(session=SimpleNamespace(), model=source_model) + + assert refreshed is detached_model + + def test_generate_worker_handles_invoke_auth_error(self, monkeypatch): + generator = AdvancedChatAppGenerator() + generator._dialogue_count = 1 + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="end-user-id", + stream=False, + invoke_from=InvokeFrom.SERVICE_API, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + queue_manager = MagicMock() + + generator._get_conversation = MagicMock(return_value=SimpleNamespace(id="conv", mode=AppMode.ADVANCED_CHAT)) + generator._get_message = MagicMock(return_value=SimpleNamespace(id="msg")) + + class _Runner: + def __init__(self, **kwargs) -> None: + _ = kwargs + + def run(self): + from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError + + raise InvokeAuthorizationError("bad key") + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.AdvancedChatAppRunner", _Runner) + + @contextmanager + def _fake_context(*args, **kwargs): + yield + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.preserve_flask_contexts", _fake_context) + + class _Session: + def __init__(self, *args, **kwargs): + self.scalar = MagicMock( + side_effect=[ + SimpleNamespace(id="workflow-id", tenant_id="tenant", app_id="app"), + SimpleNamespace(id="end-user-id", session_id="session-id"), + SimpleNamespace(id="app"), + ] + ) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + monkeypatch.setattr("core.app.apps.advanced_chat.app_generator.Session", _Session) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + + generator._generate_worker( + flask_app=SimpleNamespace(), + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + context=SimpleNamespace(), + variable_loader=SimpleNamespace(), + workflow_execution_repository=SimpleNamespace(), + workflow_node_execution_repository=SimpleNamespace(), + graph_engine_layers=(), + graph_runtime_state=None, + ) + + assert queue_manager.publish_error.called + + def test_generate_debugger_enables_retrieve_source(self, monkeypatch): + generator = AdvancedChatAppGenerator() + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppConfigManager.get_app_config", + lambda app_model, workflow: app_config, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.FileUploadConfigManager.convert", + lambda features_dict, is_vision=False: None, + ) + DummyTraceQueueManager = type( + "_DummyTraceQueueManager", + (TraceQueueManager,), + { + "__init__": lambda self, app_id=None, user_id=None: ( + setattr(self, "app_id", app_id) or setattr(self, "user_id", user_id) + ) + }, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.TraceQueueManager", + DummyTraceQueueManager, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.sessionmaker", + lambda **kwargs: SimpleNamespace(), + ) + + captured = {} + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + app_model = SimpleNamespace(id="app", tenant_id="tenant") + workflow = SimpleNamespace(features_dict={}) + from models import Account + + user = Account(name="Tester", email="tester@example.com") + user.id = "user" + + result = generator.generate( + app_model=app_model, + workflow=workflow, + user=user, + args={"query": "hello\x00", "inputs": {}}, + invoke_from=InvokeFrom.DEBUGGER, + workflow_run_id="run-id", + streaming=False, + ) + + assert result == {"ok": True} + assert app_config.additional_features.show_retrieve_source is True + assert captured["application_generate_entity"].query == "hello" + + def test_generate_service_api_sets_parent_message_id(self, monkeypatch): + generator = AdvancedChatAppGenerator() + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.AdvancedChatAppConfigManager.get_app_config", + lambda app_model, workflow: app_config, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.FileUploadConfigManager.convert", + lambda features_dict, is_vision=False: None, + ) + DummyTraceQueueManager = type( + "_DummyTraceQueueManager", + (TraceQueueManager,), + { + "__init__": lambda self, app_id=None, user_id=None: ( + setattr(self, "app_id", app_id) or setattr(self, "user_id", user_id) + ) + }, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.TraceQueueManager", + DummyTraceQueueManager, + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + monkeypatch.setattr( + "core.app.apps.advanced_chat.app_generator.sessionmaker", + lambda **kwargs: SimpleNamespace(), + ) + + captured = {} + + def _fake_generate(**kwargs): + captured.update(kwargs) + return {"ok": True} + + monkeypatch.setattr(generator, "_generate", _fake_generate) + + app_model = SimpleNamespace(id="app", tenant_id="tenant") + workflow = SimpleNamespace(features_dict={}) + from models.model import EndUser + + user = EndUser(tenant_id="tenant", type="session", name="tester", session_id="session") + user.id = "end-user" + + generator.generate( + app_model=app_model, + workflow=workflow, + user=user, + args={"query": "hello", "inputs": {}, "parent_message_id": "p1"}, + invoke_from=InvokeFrom.SERVICE_API, + workflow_run_id="run-id", + streaming=False, + ) + + assert captured["application_generate_entity"].parent_message_id == UUID_NIL diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py index 12ab587564..15aceef2c7 100644 --- a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_conversation_variables.py @@ -125,7 +125,11 @@ class TestAdvancedChatAppRunnerConversationVariables: patch("core.app.apps.advanced_chat.app_runner.select") as mock_select, patch("core.app.apps.advanced_chat.app_runner.db") as mock_db, patch.object(runner, "_init_graph") as mock_init_graph, - patch.object(runner, "handle_input_moderation", return_value=False), + patch.object( + runner, + "handle_input_moderation", + return_value=(False, mock_app_generate_entity.inputs, mock_app_generate_entity.query), + ), patch.object(runner, "handle_annotation_reply", return_value=False), patch("core.app.apps.advanced_chat.app_runner.WorkflowEntry") as mock_workflow_entry_class, patch("core.app.apps.advanced_chat.app_runner.GraphRuntimeState") as mock_graph_runtime_state_class, @@ -265,7 +269,11 @@ class TestAdvancedChatAppRunnerConversationVariables: patch("core.app.apps.advanced_chat.app_runner.select") as mock_select, patch("core.app.apps.advanced_chat.app_runner.db") as mock_db, patch.object(runner, "_init_graph") as mock_init_graph, - patch.object(runner, "handle_input_moderation", return_value=False), + patch.object( + runner, + "handle_input_moderation", + return_value=(False, mock_app_generate_entity.inputs, mock_app_generate_entity.query), + ), patch.object(runner, "handle_annotation_reply", return_value=False), patch("core.app.apps.advanced_chat.app_runner.WorkflowEntry") as mock_workflow_entry_class, patch("core.app.apps.advanced_chat.app_runner.GraphRuntimeState") as mock_graph_runtime_state_class, @@ -412,7 +420,11 @@ class TestAdvancedChatAppRunnerConversationVariables: patch("core.app.apps.advanced_chat.app_runner.select") as mock_select, patch("core.app.apps.advanced_chat.app_runner.db") as mock_db, patch.object(runner, "_init_graph") as mock_init_graph, - patch.object(runner, "handle_input_moderation", return_value=False), + patch.object( + runner, + "handle_input_moderation", + return_value=(False, mock_app_generate_entity.inputs, mock_app_generate_entity.query), + ), patch.object(runner, "handle_annotation_reply", return_value=False), patch("core.app.apps.advanced_chat.app_runner.WorkflowEntry") as mock_workflow_entry_class, patch("core.app.apps.advanced_chat.app_runner.GraphRuntimeState") as mock_graph_runtime_state_class, diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_input_moderation.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_input_moderation.py new file mode 100644 index 0000000000..5792a2f1e2 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_app_runner_input_moderation.py @@ -0,0 +1,170 @@ +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest + +from core.app.apps.advanced_chat.app_runner import AdvancedChatAppRunner +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom +from core.app.entities.queue_entities import QueueStopEvent +from core.moderation.base import ModerationError + + +@pytest.fixture +def build_runner(): + """Construct a minimal AdvancedChatAppRunner with heavy dependencies mocked.""" + app_id = str(uuid4()) + workflow_id = str(uuid4()) + + # Mocks for constructor args + mock_queue_manager = MagicMock() + + mock_conversation = MagicMock() + mock_conversation.id = str(uuid4()) + mock_conversation.app_id = app_id + + mock_message = MagicMock() + mock_message.id = str(uuid4()) + + mock_workflow = MagicMock() + mock_workflow.id = workflow_id + mock_workflow.tenant_id = str(uuid4()) + mock_workflow.app_id = app_id + mock_workflow.type = "chat" + mock_workflow.graph_dict = {} + mock_workflow.environment_variables = [] + + mock_app_config = MagicMock() + mock_app_config.app_id = app_id + mock_app_config.workflow_id = workflow_id + mock_app_config.tenant_id = str(uuid4()) + + gen = MagicMock(spec=AdvancedChatAppGenerateEntity) + gen.app_config = mock_app_config + gen.inputs = {"q": "raw"} + gen.query = "raw-query" + gen.files = [] + gen.user_id = str(uuid4()) + gen.invoke_from = InvokeFrom.SERVICE_API + gen.workflow_run_id = str(uuid4()) + gen.task_id = str(uuid4()) + gen.call_depth = 0 + gen.single_iteration_run = None + gen.single_loop_run = None + gen.trace_manager = None + + runner = AdvancedChatAppRunner( + application_generate_entity=gen, + queue_manager=mock_queue_manager, + conversation=mock_conversation, + message=mock_message, + dialogue_count=1, + variable_loader=MagicMock(), + workflow=mock_workflow, + system_user_id=str(uuid4()), + app=MagicMock(), + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + return runner + + +def _patch_common_run_deps(runner: AdvancedChatAppRunner): + """Context manager that patches common heavy deps used by run().""" + return patch.multiple( + "core.app.apps.advanced_chat.app_runner", + Session=MagicMock( + return_value=MagicMock( + __enter__=lambda s: s, + __exit__=lambda *a, **k: False, + scalar=lambda *a, **k: MagicMock(), + ), + ), + select=MagicMock(), + db=MagicMock(engine=MagicMock()), + RedisChannel=MagicMock(), + redis_client=MagicMock(), + WorkflowEntry=MagicMock(**{"return_value.run.return_value": iter([])}), + GraphRuntimeState=MagicMock(), + ) + + +def test_handle_input_moderation_stops_on_moderation_error(build_runner): + runner = build_runner + + # moderation_for_inputs raises ModerationError -> should stop and emit stop event + with ( + patch.object(runner, "moderation_for_inputs", side_effect=ModerationError("blocked")), + patch.object(runner, "_complete_with_stream_output") as mock_complete, + ): + stop, new_inputs, new_query = runner.handle_input_moderation( + app_record=MagicMock(), + app_generate_entity=runner.application_generate_entity, + inputs={"k": "v"}, + query="hello", + message_id="mid", + ) + + assert stop is True + # inputs/query should be unchanged on error path + assert new_inputs == {"k": "v"} + assert new_query == "hello" + # ensure stopped_by reason is INPUT_MODERATION + assert mock_complete.called + args, kwargs = mock_complete.call_args + assert kwargs.get("stopped_by") == QueueStopEvent.StopBy.INPUT_MODERATION + + +def test_run_applies_overridden_inputs_and_query_from_moderation(build_runner): + runner = build_runner + + overridden_inputs = {"q": "sanitized"} + overridden_query = "sanitized-query" + + with ( + _patch_common_run_deps(runner), + patch.object( + runner, + "moderation_for_inputs", + return_value=(True, overridden_inputs, overridden_query), + ) as mock_moderate, + patch.object(runner, "handle_annotation_reply", return_value=False) as mock_anno, + patch.object(runner, "_init_graph", return_value=MagicMock()) as mock_init_graph, + ): + runner.run() + + # moderation called with original values + mock_moderate.assert_called_once() + + # application_generate_entity should be updated to overridden values + assert runner.application_generate_entity.inputs == overridden_inputs + assert runner.application_generate_entity.query == overridden_query + + # annotation reply should use the new query + mock_anno.assert_called() + assert mock_anno.call_args.kwargs.get("query") == overridden_query + + # since not stopped, graph initialization should proceed + assert mock_init_graph.called + + +def test_run_returns_early_when_direct_output_via_handle_input_moderation(build_runner): + runner = build_runner + + with ( + _patch_common_run_deps(runner), + # Simulate handle_input_moderation signalling to stop + patch.object( + runner, + "handle_input_moderation", + return_value=(True, runner.application_generate_entity.inputs, runner.application_generate_entity.query), + ) as mock_handle, + patch.object(runner, "_init_graph") as mock_init_graph, + patch.object(runner, "handle_annotation_reply") as mock_anno, + ): + runner.run() + + mock_handle.assert_called_once() + # Ensure no further steps executed + mock_anno.assert_not_called() + mock_init_graph.assert_not_called() diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_response_converter.py new file mode 100644 index 0000000000..5b199e0c52 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_response_converter.py @@ -0,0 +1,96 @@ +from collections.abc import Generator + +from core.app.apps.advanced_chat.generate_response_converter import AdvancedChatAppGenerateResponseConverter +from core.app.entities.task_entities import ( + ChatbotAppBlockingResponse, + ChatbotAppStreamResponse, + ErrorStreamResponse, + MessageEndStreamResponse, + NodeFinishStreamResponse, + NodeStartStreamResponse, + PingStreamResponse, +) +from dify_graph.enums import WorkflowNodeExecutionStatus + + +class TestAdvancedChatGenerateResponseConverter: + def test_blocking_simple_response_metadata(self): + data = ChatbotAppBlockingResponse.Data( + id="msg-1", + mode="chat", + conversation_id="c1", + message_id="m1", + answer="hi", + metadata={"usage": {"total_tokens": 1}}, + created_at=1, + ) + blocking = ChatbotAppBlockingResponse(task_id="t1", data=data) + response = AdvancedChatAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + assert "usage" not in response["metadata"] + + def test_stream_simple_response_includes_node_events(self): + node_start = NodeStartStreamResponse( + task_id="t1", + workflow_run_id="r1", + data=NodeStartStreamResponse.Data( + id="e1", + node_id="n1", + node_type="answer", + title="Answer", + index=1, + created_at=1, + ), + ) + node_finish = NodeFinishStreamResponse( + task_id="t1", + workflow_run_id="r1", + data=NodeFinishStreamResponse.Data( + id="e1", + node_id="n1", + node_type="answer", + title="Answer", + index=1, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + elapsed_time=0.1, + created_at=1, + finished_at=2, + ), + ) + + def stream() -> Generator[ChatbotAppStreamResponse, None, None]: + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=PingStreamResponse(task_id="t1"), + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=node_start, + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=node_finish, + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=ErrorStreamResponse(task_id="t1", err=ValueError("boom")), + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=MessageEndStreamResponse(task_id="t1", id="m1"), + ) + + converted = list(AdvancedChatAppGenerateResponseConverter.convert_stream_simple_response(stream())) + assert converted[0] == "ping" + assert converted[1]["event"] == "node_started" + assert converted[2]["event"] == "node_finished" + assert converted[3]["event"] == "error" diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_extra_contents.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline.py similarity index 56% rename from api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_extra_contents.py rename to api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline.py index be773557f6..83a6e0f231 100644 --- a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_extra_contents.py +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline.py @@ -9,8 +9,16 @@ import pytest from core.app.apps.advanced_chat import generate_task_pipeline as pipeline_module from core.app.entities.app_invoke_entities import InvokeFrom -from core.app.entities.queue_entities import QueueTextChunkEvent, QueueWorkflowPausedEvent +from core.app.entities.queue_entities import ( + QueuePingEvent, + QueueTextChunkEvent, + QueueWorkflowPartialSuccessEvent, + QueueWorkflowPausedEvent, + QueueWorkflowSucceededEvent, +) +from core.app.entities.task_entities import StreamEvent from dify_graph.entities.pause_reason import HumanInputRequired +from dify_graph.enums import WorkflowExecutionStatus from models.enums import MessageStatus from models.execution_extra_content import HumanInputContent from models.model import EndUser @@ -185,3 +193,97 @@ def test_resume_appends_chunks_to_paused_answer() -> None: assert message.answer == "beforeafter" assert message.status == MessageStatus.NORMAL + + +def test_workflow_succeeded_emits_message_end_before_workflow_finished() -> None: + pipeline = _build_pipeline() + pipeline._application_generate_entity = SimpleNamespace(task_id="task-1") + pipeline._workflow_id = "workflow-1" + pipeline._ensure_workflow_initialized = mock.Mock() + runtime_state = SimpleNamespace() + pipeline._ensure_graph_runtime_initialized = mock.Mock(return_value=runtime_state) + pipeline._handle_advanced_chat_message_end_event = mock.Mock( + return_value=iter([SimpleNamespace(event=StreamEvent.MESSAGE_END)]) + ) + pipeline._workflow_response_converter = mock.Mock() + pipeline._workflow_response_converter.workflow_finish_to_stream_response.return_value = SimpleNamespace( + event=StreamEvent.WORKFLOW_FINISHED, + data=SimpleNamespace(status=WorkflowExecutionStatus.SUCCEEDED), + ) + + event = QueueWorkflowSucceededEvent(outputs={}) + responses = list(pipeline._handle_workflow_succeeded_event(event)) + + assert [resp.event for resp in responses] == [StreamEvent.MESSAGE_END, StreamEvent.WORKFLOW_FINISHED] + + +def test_workflow_partial_success_emits_message_end_before_workflow_finished() -> None: + pipeline = _build_pipeline() + pipeline._application_generate_entity = SimpleNamespace(task_id="task-1") + pipeline._workflow_id = "workflow-1" + pipeline._ensure_workflow_initialized = mock.Mock() + runtime_state = SimpleNamespace() + pipeline._ensure_graph_runtime_initialized = mock.Mock(return_value=runtime_state) + pipeline._handle_advanced_chat_message_end_event = mock.Mock( + return_value=iter([SimpleNamespace(event=StreamEvent.MESSAGE_END)]) + ) + pipeline._workflow_response_converter = mock.Mock() + pipeline._workflow_response_converter.workflow_finish_to_stream_response.return_value = SimpleNamespace( + event=StreamEvent.WORKFLOW_FINISHED, + data=SimpleNamespace(status=WorkflowExecutionStatus.PARTIAL_SUCCEEDED), + ) + + event = QueueWorkflowPartialSuccessEvent(exceptions_count=1, outputs={}) + responses = list(pipeline._handle_workflow_partial_success_event(event)) + + assert [resp.event for resp in responses] == [StreamEvent.MESSAGE_END, StreamEvent.WORKFLOW_FINISHED] + + +def test_process_stream_response_breaks_after_workflow_succeeded() -> None: + pipeline = _build_pipeline() + succeeded_event = QueueWorkflowSucceededEvent(outputs={}) + ping_event = QueuePingEvent() + queue_messages = [ + SimpleNamespace(event=succeeded_event), + SimpleNamespace(event=ping_event), + ] + + pipeline._conversation_name_generate_thread = None + pipeline._base_task_pipeline = mock.Mock() + pipeline._base_task_pipeline.queue_manager = mock.Mock() + pipeline._base_task_pipeline.queue_manager.listen.return_value = iter(queue_messages) + pipeline._base_task_pipeline.ping_stream_response = mock.Mock(return_value=SimpleNamespace(event=StreamEvent.PING)) + pipeline._handle_workflow_succeeded_event = mock.Mock( + return_value=iter([SimpleNamespace(event=StreamEvent.WORKFLOW_FINISHED)]) + ) + + responses = list(pipeline._process_stream_response()) + + assert [resp.event for resp in responses] == [StreamEvent.WORKFLOW_FINISHED] + pipeline._handle_workflow_succeeded_event.assert_called_once_with(succeeded_event, trace_manager=None) + pipeline._base_task_pipeline.ping_stream_response.assert_not_called() + + +def test_process_stream_response_breaks_after_workflow_partial_success() -> None: + pipeline = _build_pipeline() + partial_event = QueueWorkflowPartialSuccessEvent(exceptions_count=1, outputs={}) + ping_event = QueuePingEvent() + queue_messages = [ + SimpleNamespace(event=partial_event), + SimpleNamespace(event=ping_event), + ] + + pipeline._conversation_name_generate_thread = None + pipeline._base_task_pipeline = mock.Mock() + pipeline._base_task_pipeline.queue_manager = mock.Mock() + pipeline._base_task_pipeline.queue_manager.listen.return_value = iter(queue_messages) + pipeline._base_task_pipeline.ping_stream_response = mock.Mock(return_value=SimpleNamespace(event=StreamEvent.PING)) + pipeline._handle_workflow_partial_success_event = mock.Mock( + return_value=iter([SimpleNamespace(event=StreamEvent.WORKFLOW_FINISHED)]) + ) + + responses = list(pipeline._process_stream_response()) + + assert [resp.event for resp in responses] == [StreamEvent.WORKFLOW_FINISHED] + pipeline._handle_workflow_partial_success_event.assert_called_once_with(partial_event, trace_manager=None) + pipeline._base_task_pipeline.ping_stream_response.assert_not_called() diff --git a/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py new file mode 100644 index 0000000000..0a244b3fea --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/advanced_chat/test_generate_task_pipeline_core.py @@ -0,0 +1,600 @@ +from __future__ import annotations + +from contextlib import contextmanager +from datetime import datetime +from types import SimpleNamespace + +import pytest + +from core.app.app_config.entities import AppAdditionalFeatures, WorkflowUIBasedAppConfig +from core.app.apps.advanced_chat.generate_task_pipeline import AdvancedChatAppGenerateTaskPipeline +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom +from core.app.entities.queue_entities import ( + QueueAdvancedChatMessageEndEvent, + QueueAnnotationReplyEvent, + QueueErrorEvent, + QueueHumanInputFormFilledEvent, + QueueHumanInputFormTimeoutEvent, + QueueIterationCompletedEvent, + QueueIterationNextEvent, + QueueIterationStartEvent, + QueueLoopCompletedEvent, + QueueLoopNextEvent, + QueueLoopStartEvent, + QueueMessageReplaceEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueuePingEvent, + QueueRetrieverResourcesEvent, + QueueStopEvent, + QueueTextChunkEvent, + QueueWorkflowFailedEvent, + QueueWorkflowPartialSuccessEvent, + QueueWorkflowPausedEvent, + QueueWorkflowStartedEvent, + QueueWorkflowSucceededEvent, +) +from core.app.entities.task_entities import ( + AnnotationReply, + AnnotationReplyAccount, + MessageAudioStreamResponse, + MessageEndStreamResponse, + PingStreamResponse, +) +from core.base.tts.app_generator_tts_publisher import AudioTrunk +from dify_graph.enums import BuiltinNodeTypes +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from models.enums import MessageStatus +from models.model import AppMode, EndUser + + +def _make_pipeline(): + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.ADVANCED_CHAT, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = AdvancedChatAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + query="hello", + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_run_id="run-id", + ) + + message = SimpleNamespace( + id="message-id", + query="hello", + created_at=datetime.utcnow(), + status=MessageStatus.NORMAL, + answer="", + ) + conversation = SimpleNamespace(id="conv-id", mode=AppMode.ADVANCED_CHAT) + workflow = SimpleNamespace(id="workflow-id", tenant_id="tenant", features_dict={}) + user = EndUser(tenant_id="tenant", type="session", name="tester", session_id="session") + + pipeline = AdvancedChatAppGenerateTaskPipeline( + application_generate_entity=application_generate_entity, + workflow=workflow, + queue_manager=SimpleNamespace(invoke_from=InvokeFrom.WEB_APP, graph_runtime_state=None), + conversation=conversation, + message=message, + user=user, + stream=False, + dialogue_count=1, + draft_var_saver_factory=lambda **kwargs: None, + ) + + return pipeline + + +class TestAdvancedChatGenerateTaskPipeline: + def test_ensure_workflow_initialized_raises(self): + pipeline = _make_pipeline() + + with pytest.raises(ValueError, match="workflow run not initialized"): + pipeline._ensure_workflow_initialized() + + def test_to_blocking_response_returns_message_end(self): + pipeline = _make_pipeline() + pipeline._task_state.answer = "done" + + def _gen(): + yield MessageEndStreamResponse(task_id="task", id="message-id", metadata={"k": "v"}) + + response = pipeline._to_blocking_response(_gen()) + + assert response.data.answer == "done" + assert response.data.metadata == {"k": "v"} + + def test_handle_text_chunk_event_updates_state(self): + pipeline = _make_pipeline() + pipeline._message_cycle_manager = SimpleNamespace( + message_to_stream_response=lambda **kwargs: MessageEndStreamResponse( + task_id="task", id="message-id", metadata={} + ) + ) + + event = SimpleNamespace(text="hi", from_variable_selector=None) + + responses = list(pipeline._handle_text_chunk_event(event)) + + assert pipeline._task_state.answer == "hi" + assert responses + + def test_listen_audio_msg_returns_audio_stream(self): + pipeline = _make_pipeline() + publisher = SimpleNamespace(check_and_get_audio=lambda: AudioTrunk(status="stream", audio="data")) + + response = pipeline._listen_audio_msg(publisher=publisher, task_id="task") + + assert isinstance(response, MessageAudioStreamResponse) + + def test_handle_ping_event(self): + pipeline = _make_pipeline() + pipeline._base_task_pipeline.ping_stream_response = lambda: PingStreamResponse(task_id="task") + + responses = list(pipeline._handle_ping_event(QueuePingEvent())) + + assert isinstance(responses[0], PingStreamResponse) + + def test_handle_error_event(self): + pipeline = _make_pipeline() + pipeline._base_task_pipeline.handle_error = lambda **kwargs: ValueError("boom") + pipeline._base_task_pipeline.error_to_stream_response = lambda err: err + + @contextmanager + def _fake_session(): + yield SimpleNamespace() + + pipeline._database_session = _fake_session + + responses = list(pipeline._handle_error_event(QueueErrorEvent(error=ValueError("boom")))) + + assert isinstance(responses[0], ValueError) + + def test_handle_workflow_started_event_sets_run_id(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._workflow_response_converter.workflow_start_to_stream_response = lambda **kwargs: "started" + + @contextmanager + def _fake_session(): + yield SimpleNamespace() + + monkeypatch.setattr(pipeline, "_database_session", _fake_session) + monkeypatch.setattr(pipeline, "_get_message", lambda **kwargs: SimpleNamespace()) + + responses = list(pipeline._handle_workflow_started_event(QueueWorkflowStartedEvent())) + + assert pipeline._workflow_run_id == "run-id" + assert responses == ["started"] + + def test_message_end_to_stream_response_strips_annotation_reply(self): + pipeline = _make_pipeline() + pipeline._task_state.metadata.annotation_reply = AnnotationReply( + id="ann", + account=AnnotationReplyAccount(id="acc", name="acc"), + ) + + response = pipeline._message_end_to_stream_response() + + assert "annotation_reply" not in response.metadata + + def test_handle_output_moderation_chunk_publishes_stop(self): + pipeline = _make_pipeline() + events: list[object] = [] + + class _Moderation: + def should_direct_output(self): + return True + + def get_final_output(self): + return "final" + + pipeline._base_task_pipeline.output_moderation_handler = _Moderation() + pipeline._base_task_pipeline.queue_manager = SimpleNamespace( + publish=lambda event, pub_from: events.append(event) + ) + + result = pipeline._handle_output_moderation_chunk("ignored") + + assert result is True + assert pipeline._task_state.answer == "final" + assert any(isinstance(event, QueueTextChunkEvent) for event in events) + assert any(isinstance(event, QueueStopEvent) for event in events) + + def test_handle_node_succeeded_event_records_files(self): + pipeline = _make_pipeline() + pipeline._workflow_response_converter.fetch_files_from_node_outputs = lambda outputs: [ + {"type": "file", "transfer_method": "local"} + ] + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "done" + pipeline._save_output_for_event = lambda event, node_execution_id: None + + event = SimpleNamespace( + node_type=BuiltinNodeTypes.ANSWER, + outputs={"k": "v"}, + node_execution_id="exec", + node_id="node", + ) + + responses = list(pipeline._handle_node_succeeded_event(event)) + + assert responses == ["done"] + assert pipeline._recorded_files + + def test_iteration_and_loop_handlers(self): + pipeline = _make_pipeline() + pipeline._workflow_run_id = "run-id" + pipeline._workflow_response_converter.workflow_iteration_start_to_stream_response = lambda **kwargs: ( + "iter_start" + ) + pipeline._workflow_response_converter.workflow_iteration_next_to_stream_response = lambda **kwargs: "iter_next" + pipeline._workflow_response_converter.workflow_iteration_completed_to_stream_response = lambda **kwargs: ( + "iter_done" + ) + pipeline._workflow_response_converter.workflow_loop_start_to_stream_response = lambda **kwargs: "loop_start" + pipeline._workflow_response_converter.workflow_loop_next_to_stream_response = lambda **kwargs: "loop_next" + pipeline._workflow_response_converter.workflow_loop_completed_to_stream_response = lambda **kwargs: "loop_done" + + iter_start = QueueIterationStartEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + iter_next = QueueIterationNextEvent( + index=1, + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + node_run_index=1, + ) + iter_done = QueueIterationCompletedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + loop_start = QueueLoopStartEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + loop_next = QueueLoopNextEvent( + index=1, + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + node_run_index=1, + ) + loop_done = QueueLoopCompletedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + + assert list(pipeline._handle_iteration_start_event(iter_start)) == ["iter_start"] + assert list(pipeline._handle_iteration_next_event(iter_next)) == ["iter_next"] + assert list(pipeline._handle_iteration_completed_event(iter_done)) == ["iter_done"] + assert list(pipeline._handle_loop_start_event(loop_start)) == ["loop_start"] + assert list(pipeline._handle_loop_next_event(loop_next)) == ["loop_next"] + assert list(pipeline._handle_loop_completed_event(loop_done)) == ["loop_done"] + + def test_workflow_finish_handlers(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._workflow_run_id = "run-id" + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._workflow_response_converter.workflow_finish_to_stream_response = lambda **kwargs: "finish" + pipeline._workflow_response_converter.workflow_pause_to_stream_response = lambda **kwargs: ["pause"] + pipeline._persist_human_input_extra_content = lambda **kwargs: None + pipeline._save_message = lambda **kwargs: None + pipeline._base_task_pipeline.queue_manager.publish = lambda *args, **kwargs: None + pipeline._base_task_pipeline.handle_error = lambda **kwargs: ValueError("boom") + pipeline._base_task_pipeline.error_to_stream_response = lambda err: err + pipeline._get_message = lambda **kwargs: SimpleNamespace(id="message-id") + + @contextmanager + def _fake_session(): + yield SimpleNamespace(scalar=lambda *args, **kwargs: None) + + monkeypatch.setattr(pipeline, "_database_session", _fake_session) + + succeeded_responses = list(pipeline._handle_workflow_succeeded_event(QueueWorkflowSucceededEvent(outputs={}))) + assert len(succeeded_responses) == 2 + assert isinstance(succeeded_responses[0], MessageEndStreamResponse) + assert succeeded_responses[1] == "finish" + + partial_success_responses = list( + pipeline._handle_workflow_partial_success_event( + QueueWorkflowPartialSuccessEvent(exceptions_count=1, outputs={}) + ) + ) + assert len(partial_success_responses) == 2 + assert isinstance(partial_success_responses[0], MessageEndStreamResponse) + assert partial_success_responses[1] == "finish" + assert ( + list(pipeline._handle_workflow_failed_event(QueueWorkflowFailedEvent(error="err", exceptions_count=1)))[0] + == "finish" + ) + assert list(pipeline._handle_workflow_paused_event(QueueWorkflowPausedEvent(reasons=[], outputs={}))) == [ + "pause" + ] + + def test_node_failure_handlers(self): + pipeline = _make_pipeline() + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "node_finish" + pipeline._save_output_for_event = lambda event, node_execution_id: None + + failed_event = QueueNodeFailedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + error="err", + ) + exc_event = QueueNodeExceptionEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + error="err", + ) + + assert list(pipeline._handle_node_failed_events(failed_event)) == ["node_finish"] + assert list(pipeline._handle_node_failed_events(exc_event)) == ["node_finish"] + + def test_handle_text_chunk_event_tracks_streaming_metrics(self): + pipeline = _make_pipeline() + published: list[object] = [] + + class _Publisher: + def publish(self, message): + published.append(message) + + pipeline._message_cycle_manager = SimpleNamespace(message_to_stream_response=lambda **kwargs: "chunk") + + event = SimpleNamespace(text="hi", from_variable_selector=["a"]) + queue_message = SimpleNamespace(event=event) + + responses = list( + pipeline._handle_text_chunk_event(event, tts_publisher=_Publisher(), queue_message=queue_message) + ) + + assert responses == ["chunk"] + assert pipeline._task_state.is_streaming_response is True + assert pipeline._task_state.first_token_time is not None + assert pipeline._task_state.last_token_time is not None + assert pipeline._task_state.answer == "hi" + assert published == [queue_message] + + def test_handle_output_moderation_chunk_appends_token(self): + pipeline = _make_pipeline() + seen: list[str] = [] + + class _Moderation: + def should_direct_output(self): + return False + + def append_new_token(self, text): + seen.append(text) + + pipeline._base_task_pipeline.output_moderation_handler = _Moderation() + + result = pipeline._handle_output_moderation_chunk("token") + + assert result is False + assert seen == ["token"] + + def test_handle_retriever_and_annotation_events(self): + pipeline = _make_pipeline() + calls = {"retriever": 0, "annotation": 0} + + def _hit_retriever(event): + calls["retriever"] += 1 + + def _hit_annotation(event): + calls["annotation"] += 1 + + pipeline._message_cycle_manager.handle_retriever_resources = _hit_retriever + pipeline._message_cycle_manager.handle_annotation_reply = _hit_annotation + + retriever_event = QueueRetrieverResourcesEvent(retriever_resources=[]) + annotation_event = QueueAnnotationReplyEvent(message_annotation_id="ann") + + assert list(pipeline._handle_retriever_resources_event(retriever_event)) == [] + assert list(pipeline._handle_annotation_reply_event(annotation_event)) == [] + assert calls == {"retriever": 1, "annotation": 1} + + def test_handle_message_replace_event(self): + pipeline = _make_pipeline() + pipeline._message_cycle_manager.message_replace_to_stream_response = lambda **kwargs: "replace" + + event = QueueMessageReplaceEvent( + text="new", + reason=QueueMessageReplaceEvent.MessageReplaceReason.OUTPUT_MODERATION, + ) + + assert list(pipeline._handle_message_replace_event(event)) == ["replace"] + + def test_handle_human_input_events(self): + pipeline = _make_pipeline() + persisted: list[str] = [] + pipeline._persist_human_input_extra_content = lambda **kwargs: persisted.append("saved") + pipeline._workflow_response_converter.human_input_form_filled_to_stream_response = lambda **kwargs: "filled" + pipeline._workflow_response_converter.human_input_form_timeout_to_stream_response = lambda **kwargs: "timeout" + + filled_event = QueueHumanInputFormFilledEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="title", + rendered_content="content", + action_id="action", + action_text="action", + ) + timeout_event = QueueHumanInputFormTimeoutEvent( + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="title", + expiration_time=datetime.utcnow(), + ) + + assert list(pipeline._handle_human_input_form_filled_event(filled_event)) == ["filled"] + assert list(pipeline._handle_human_input_form_timeout_event(timeout_event)) == ["timeout"] + assert persisted == ["saved"] + + def test_save_message_strips_markdown_and_sets_usage(self): + pipeline = _make_pipeline() + pipeline._recorded_files = [ + { + "type": "image", + "transfer_method": "remote", + "remote_url": "http://example.com/file.png", + "related_id": "file-id", + } + ] + pipeline._task_state.answer = "![img](url) hello" + pipeline._task_state.is_streaming_response = True + pipeline._task_state.first_token_time = pipeline._base_task_pipeline.start_at + 0.1 + pipeline._task_state.last_token_time = pipeline._base_task_pipeline.start_at + 0.2 + + message = SimpleNamespace( + id="message-id", + status=MessageStatus.PAUSED, + answer="", + updated_at=None, + provider_response_latency=None, + message_tokens=None, + message_unit_price=None, + message_price_unit=None, + answer_tokens=None, + answer_unit_price=None, + answer_price_unit=None, + total_price=None, + currency=None, + message_metadata=None, + invoke_from=InvokeFrom.WEB_APP, + from_account_id=None, + from_end_user_id="end-user", + ) + + class _Session: + def scalar(self, *args, **kwargs): + return message + + def add_all(self, items): + self.items = items + + graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + + pipeline._save_message(session=_Session(), graph_runtime_state=graph_runtime_state) + + assert message.status == MessageStatus.NORMAL + assert message.answer == "hello" + assert message.message_metadata + + def test_handle_stop_event_saves_message_for_moderation(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._message_end_to_stream_response = lambda: "end" + saved: list[str] = [] + + def _save_message(**kwargs): + saved.append("saved") + + pipeline._save_message = _save_message + + @contextmanager + def _fake_session(): + yield SimpleNamespace() + + monkeypatch.setattr(pipeline, "_database_session", _fake_session) + + responses = list(pipeline._handle_stop_event(QueueStopEvent(stopped_by=QueueStopEvent.StopBy.INPUT_MODERATION))) + + assert responses == ["end"] + assert saved == ["saved"] + + def test_handle_message_end_event_applies_output_moderation(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._base_task_pipeline.handle_output_moderation_when_task_finished = lambda answer: "safe" + pipeline._message_cycle_manager.message_replace_to_stream_response = lambda **kwargs: "replace" + pipeline._message_end_to_stream_response = lambda: "end" + + saved: list[str] = [] + + def _save_message(**kwargs): + saved.append("saved") + + pipeline._save_message = _save_message + + @contextmanager + def _fake_session(): + yield SimpleNamespace() + + monkeypatch.setattr(pipeline, "_database_session", _fake_session) + + responses = list(pipeline._handle_advanced_chat_message_end_event(QueueAdvancedChatMessageEndEvent())) + + assert responses == ["replace", "end"] + assert saved == ["saved"] + + def test_dispatch_event_handles_node_exception(self): + pipeline = _make_pipeline() + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "failed" + pipeline._save_output_for_event = lambda *args, **kwargs: None + + event = QueueNodeExceptionEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + error="err", + ) + + assert list(pipeline._dispatch_event(event)) == ["failed"] diff --git a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_config_manager.py b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_config_manager.py new file mode 100644 index 0000000000..a871e8d93b --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_config_manager.py @@ -0,0 +1,302 @@ +import uuid +from types import SimpleNamespace + +import pytest + +from core.app.app_config.entities import EasyUIBasedAppModelConfigFrom +from core.app.apps.agent_chat.app_config_manager import ( + AgentChatAppConfigManager, +) +from core.entities.agent_entities import PlanningStrategy + + +class TestAgentChatAppConfigManagerGetAppConfig: + def test_get_app_config_override_config(self, mocker): + app_model = mocker.MagicMock(id="app1", tenant_id="tenant", mode="agent-chat") + app_model_config = mocker.MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"ignored": True} + + override_config = {"model": {"provider": "p"}} + + mocker.patch("core.app.apps.agent_chat.app_config_manager.ModelConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.PromptTemplateConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.AgentConfigManager.convert") + mocker.patch.object(AgentChatAppConfigManager, "convert_features") + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.BasicVariablesConfigManager.convert", + return_value=("variables", "external"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.AgentChatAppConfig", + side_effect=lambda **kwargs: SimpleNamespace(**kwargs), + ) + + result = AgentChatAppConfigManager.get_app_config( + app_model=app_model, + app_model_config=app_model_config, + conversation=None, + override_config_dict=override_config, + ) + + assert result.app_model_config_dict == override_config + assert result.app_model_config_from == EasyUIBasedAppModelConfigFrom.ARGS + assert result.variables == "variables" + assert result.external_data_variables == "external" + + def test_get_app_config_conversation_specific(self, mocker): + app_model = mocker.MagicMock(id="app1", tenant_id="tenant", mode="agent-chat") + app_model_config = mocker.MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "p"}} + conversation = mocker.MagicMock() + + mocker.patch("core.app.apps.agent_chat.app_config_manager.ModelConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.PromptTemplateConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.AgentConfigManager.convert") + mocker.patch.object(AgentChatAppConfigManager, "convert_features") + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.BasicVariablesConfigManager.convert", + return_value=("variables", "external"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.AgentChatAppConfig", + side_effect=lambda **kwargs: SimpleNamespace(**kwargs), + ) + + result = AgentChatAppConfigManager.get_app_config( + app_model=app_model, + app_model_config=app_model_config, + conversation=conversation, + override_config_dict=None, + ) + + assert result.app_model_config_dict == app_model_config.to_dict.return_value + assert result.app_model_config_from.value == "conversation-specific-config" + + def test_get_app_config_latest_config(self, mocker): + app_model = mocker.MagicMock(id="app1", tenant_id="tenant", mode="agent-chat") + app_model_config = mocker.MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "p"}} + + mocker.patch("core.app.apps.agent_chat.app_config_manager.ModelConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.PromptTemplateConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.convert") + mocker.patch("core.app.apps.agent_chat.app_config_manager.AgentConfigManager.convert") + mocker.patch.object(AgentChatAppConfigManager, "convert_features") + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.BasicVariablesConfigManager.convert", + return_value=("variables", "external"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.AgentChatAppConfig", + side_effect=lambda **kwargs: SimpleNamespace(**kwargs), + ) + + result = AgentChatAppConfigManager.get_app_config( + app_model=app_model, + app_model_config=app_model_config, + conversation=None, + override_config_dict=None, + ) + + assert result.app_model_config_from.value == "app-latest-config" + + +class TestAgentChatAppConfigManagerConfigValidate: + def test_config_validate_filters_related_keys(self, mocker): + config = { + "model": {}, + "user_input_form": {}, + "file_upload": {}, + "prompt_template": {}, + "agent_mode": {}, + "opening_statement": {}, + "suggested_questions_after_answer": {}, + "speech_to_text": {}, + "text_to_speech": {}, + "retriever_resource": {}, + "dataset": {}, + "moderation": {}, + "extra": "value", + } + + def return_with_key(key): + return config, [key] + + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.ModelConfigManager.validate_and_set_defaults", + side_effect=lambda tenant_id, cfg: return_with_key("model"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.BasicVariablesConfigManager.validate_and_set_defaults", + side_effect=lambda tenant_id, cfg: return_with_key("user_input_form"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.FileUploadConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("file_upload"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.PromptTemplateConfigManager.validate_and_set_defaults", + side_effect=lambda app_mode, cfg: return_with_key("prompt_template"), + ) + mocker.patch.object( + AgentChatAppConfigManager, + "validate_agent_mode_and_set_defaults", + side_effect=lambda tenant_id, cfg: return_with_key("agent_mode"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.OpeningStatementConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("opening_statement"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.SuggestedQuestionsAfterAnswerConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("suggested_questions_after_answer"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.SpeechToTextConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("speech_to_text"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.TextToSpeechConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("text_to_speech"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.RetrievalResourceConfigManager.validate_and_set_defaults", + side_effect=lambda cfg: return_with_key("retriever_resource"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.validate_and_set_defaults", + side_effect=lambda tenant_id, app_mode, cfg: return_with_key("dataset"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.SensitiveWordAvoidanceConfigManager.validate_and_set_defaults", + side_effect=lambda tenant_id, cfg: return_with_key("moderation"), + ) + + filtered = AgentChatAppConfigManager.config_validate("tenant", config) + assert set(filtered.keys()) == { + "model", + "user_input_form", + "file_upload", + "prompt_template", + "agent_mode", + "opening_statement", + "suggested_questions_after_answer", + "speech_to_text", + "text_to_speech", + "retriever_resource", + "dataset", + "moderation", + } + assert "extra" not in filtered + + +class TestValidateAgentModeAndSetDefaults: + def test_defaults_when_missing(self): + config = {} + updated, keys = AgentChatAppConfigManager.validate_agent_mode_and_set_defaults("tenant", config) + assert "agent_mode" in updated + assert updated["agent_mode"]["enabled"] is False + assert updated["agent_mode"]["tools"] == [] + assert keys == ["agent_mode"] + + @pytest.mark.parametrize( + "agent_mode", + ["invalid", 123], + ) + def test_agent_mode_type_validation(self, agent_mode): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults("tenant", {"agent_mode": agent_mode}) + + def test_agent_mode_empty_list_defaults(self): + config = {"agent_mode": []} + updated, _ = AgentChatAppConfigManager.validate_agent_mode_and_set_defaults("tenant", config) + assert updated["agent_mode"]["enabled"] is False + assert updated["agent_mode"]["tools"] == [] + + def test_enabled_must_be_bool(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults("tenant", {"agent_mode": {"enabled": "yes"}}) + + def test_strategy_must_be_valid(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", {"agent_mode": {"enabled": True, "strategy": "invalid"}} + ) + + def test_tools_must_be_list(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", {"agent_mode": {"enabled": True, "tools": "not-list"}} + ) + + def test_old_tool_dataset_requires_id(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", {"agent_mode": {"enabled": True, "tools": [{"dataset": {"enabled": True}}]}} + ) + + def test_old_tool_dataset_id_must_be_uuid(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", + {"agent_mode": {"enabled": True, "tools": [{"dataset": {"enabled": True, "id": "bad"}}]}}, + ) + + def test_old_tool_dataset_id_not_exists(self, mocker): + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.is_dataset_exists", + return_value=False, + ) + dataset_id = str(uuid.uuid4()) + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", + {"agent_mode": {"enabled": True, "tools": [{"dataset": {"enabled": True, "id": dataset_id}}]}}, + ) + + def test_old_tool_enabled_must_be_bool(self): + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", + {"agent_mode": {"enabled": True, "tools": [{"dataset": {"enabled": "yes", "id": str(uuid.uuid4())}}]}}, + ) + + @pytest.mark.parametrize("missing_key", ["provider_type", "provider_id", "tool_name", "tool_parameters"]) + def test_new_style_tool_requires_fields(self, missing_key): + tool = {"enabled": True, "provider_type": "type", "provider_id": "id", "tool_name": "tool"} + tool.pop(missing_key, None) + with pytest.raises(ValueError): + AgentChatAppConfigManager.validate_agent_mode_and_set_defaults( + "tenant", {"agent_mode": {"enabled": True, "tools": [tool]}} + ) + + def test_valid_old_and_new_style_tools(self, mocker): + mocker.patch( + "core.app.apps.agent_chat.app_config_manager.DatasetConfigManager.is_dataset_exists", + return_value=True, + ) + dataset_id = str(uuid.uuid4()) + config = { + "agent_mode": { + "enabled": True, + "strategy": PlanningStrategy.ROUTER.value, + "tools": [ + {"dataset": {"id": dataset_id}}, + { + "provider_type": "builtin", + "provider_id": "p1", + "tool_name": "tool", + "tool_parameters": {}, + }, + ], + } + } + + updated, _ = AgentChatAppConfigManager.validate_agent_mode_and_set_defaults("tenant", config) + assert updated["agent_mode"]["tools"][0]["dataset"]["enabled"] is False + assert updated["agent_mode"]["tools"][1]["enabled"] is False diff --git a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_generator.py b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_generator.py new file mode 100644 index 0000000000..53f26d1592 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_generator.py @@ -0,0 +1,296 @@ +import contextlib + +import pytest +from pydantic import ValidationError + +from core.app.apps.agent_chat.app_generator import AgentChatAppGenerator +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import InvokeFrom +from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError + + +class DummyAccount: + def __init__(self, user_id): + self.id = user_id + self.session_id = f"session-{user_id}" + + +@pytest.fixture +def generator(mocker): + gen = AgentChatAppGenerator() + mocker.patch( + "core.app.apps.agent_chat.app_generator.current_app", + new=mocker.MagicMock(_get_current_object=mocker.MagicMock()), + ) + mocker.patch("core.app.apps.agent_chat.app_generator.contextvars.copy_context", return_value="ctx") + return gen + + +class TestAgentChatAppGeneratorGenerate: + def test_generate_rejects_blocking_mode(self, generator, mocker): + app_model = mocker.MagicMock() + user = DummyAccount("user") + with pytest.raises(ValueError): + generator.generate(app_model=app_model, user=user, args={}, invoke_from=mocker.MagicMock(), streaming=False) + + def test_generate_requires_query(self, generator, mocker): + app_model = mocker.MagicMock() + user = DummyAccount("user") + with pytest.raises(ValueError): + generator.generate(app_model=app_model, user=user, args={"inputs": {}}, invoke_from=mocker.MagicMock()) + + def test_generate_rejects_non_string_query(self, generator, mocker): + app_model = mocker.MagicMock() + user = DummyAccount("user") + with pytest.raises(ValueError): + generator.generate( + app_model=app_model, + user=user, + args={"query": 123, "inputs": {}}, + invoke_from=mocker.MagicMock(), + ) + + def test_generate_override_requires_debugger(self, generator, mocker): + app_model = mocker.MagicMock() + user = DummyAccount("user") + + with pytest.raises(ValueError): + generator.generate( + app_model=app_model, + user=user, + args={"query": "hi", "inputs": {}, "model_config": {"model": {"provider": "p"}}}, + invoke_from=InvokeFrom.WEB_APP, + ) + + def test_generate_success_with_debugger_override(self, generator, mocker): + app_model = mocker.MagicMock(id="app1", tenant_id="tenant", mode="agent-chat") + app_model_config = mocker.MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "p"}} + + user = DummyAccount("user") + invoke_from = InvokeFrom.DEBUGGER + + generator._get_app_model_config = mocker.MagicMock(return_value=app_model_config) + generator._prepare_user_inputs = mocker.MagicMock(return_value={"x": 1}) + generator._init_generate_records = mocker.MagicMock( + return_value=(mocker.MagicMock(id="conv", mode="agent-chat"), mocker.MagicMock(id="msg")) + ) + generator._handle_response = mocker.MagicMock(return_value="response") + + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppConfigManager.config_validate", + return_value={"validated": True}, + ) + app_config = mocker.MagicMock(variables={}, prompt_template=mocker.MagicMock(), external_data_variables=[]) + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppConfigManager.get_app_config", + return_value=app_config, + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.ModelConfigConverter.convert", + return_value=mocker.MagicMock(), + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.FileUploadConfigManager.convert", + return_value=mocker.MagicMock(), + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.file_factory.build_from_mappings", + return_value=["file-obj"], + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.ConversationService.get_conversation", + return_value=mocker.MagicMock(id="conv"), + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.TraceQueueManager", + return_value=mocker.MagicMock(), + ) + + queue_manager = mocker.MagicMock() + mocker.patch( + "core.app.apps.agent_chat.app_generator.MessageBasedAppQueueManager", + return_value=queue_manager, + ) + + thread_obj = mocker.MagicMock() + mocker.patch( + "core.app.apps.agent_chat.app_generator.threading.Thread", + return_value=thread_obj, + ) + + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppGenerateResponseConverter.convert", + return_value={"result": "ok"}, + ) + app_entity = mocker.MagicMock(task_id="task", user_id="user", invoke_from=invoke_from) + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppGenerateEntity", + return_value=app_entity, + ) + + args = { + "query": "hello", + "inputs": {"name": "world"}, + "conversation_id": "conv", + "model_config": {"model": {"provider": "p"}}, + "files": [{"id": "f1"}], + } + + result = generator.generate(app_model=app_model, user=user, args=args, invoke_from=invoke_from, streaming=True) + + assert result == {"result": "ok"} + thread_obj.start.assert_called_once() + + def test_generate_without_file_config(self, generator, mocker): + app_model = mocker.MagicMock(id="app1", tenant_id="tenant", mode="agent-chat") + app_model_config = mocker.MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "p"}} + + user = DummyAccount("user") + + generator._get_app_model_config = mocker.MagicMock(return_value=app_model_config) + generator._prepare_user_inputs = mocker.MagicMock(return_value={"x": 1}) + generator._init_generate_records = mocker.MagicMock( + return_value=(mocker.MagicMock(id="conv", mode="agent-chat"), mocker.MagicMock(id="msg")) + ) + generator._handle_response = mocker.MagicMock(return_value="response") + + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppConfigManager.get_app_config", + return_value=mocker.MagicMock(variables={}, prompt_template=mocker.MagicMock(), external_data_variables=[]), + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.ModelConfigConverter.convert", + return_value=mocker.MagicMock(), + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.FileUploadConfigManager.convert", + return_value=None, + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.file_factory.build_from_mappings", + return_value=["file-obj"], + ) + mocker.patch( + "core.app.apps.agent_chat.app_generator.TraceQueueManager", + return_value=mocker.MagicMock(), + ) + + mocker.patch( + "core.app.apps.agent_chat.app_generator.MessageBasedAppQueueManager", + return_value=mocker.MagicMock(), + ) + + thread_obj = mocker.MagicMock() + mocker.patch( + "core.app.apps.agent_chat.app_generator.threading.Thread", + return_value=thread_obj, + ) + + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppGenerateResponseConverter.convert", + return_value={"result": "ok"}, + ) + app_entity = mocker.MagicMock(task_id="task", user_id="user", invoke_from=InvokeFrom.WEB_APP) + mocker.patch( + "core.app.apps.agent_chat.app_generator.AgentChatAppGenerateEntity", + return_value=app_entity, + ) + + args = {"query": "hello", "inputs": {"name": "world"}} + + result = generator.generate( + app_model=app_model, + user=user, + args=args, + invoke_from=InvokeFrom.WEB_APP, + streaming=True, + ) + + assert result == {"result": "ok"} + + +class TestAgentChatAppGeneratorWorker: + @pytest.fixture(autouse=True) + def patch_context(self, mocker): + @contextlib.contextmanager + def ctx_manager(*args, **kwargs): + yield + + mocker.patch("core.app.apps.agent_chat.app_generator.preserve_flask_contexts", ctx_manager) + + def test_generate_worker_handles_generate_task_stopped(self, generator, mocker): + queue_manager = mocker.MagicMock() + generator._get_conversation = mocker.MagicMock(return_value=mocker.MagicMock()) + generator._get_message = mocker.MagicMock(return_value=mocker.MagicMock()) + + runner = mocker.MagicMock() + runner.run.side_effect = GenerateTaskStoppedError() + mocker.patch("core.app.apps.agent_chat.app_generator.AgentChatAppRunner", return_value=runner) + mocker.patch("core.app.apps.agent_chat.app_generator.db.session.close") + + generator._generate_worker( + flask_app=mocker.MagicMock(), + context=mocker.MagicMock(), + application_generate_entity=mocker.MagicMock(), + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + ) + + queue_manager.publish_error.assert_not_called() + + @pytest.mark.parametrize( + "error", + [ + InvokeAuthorizationError("bad"), + ValidationError.from_exception_data("TestModel", []), + ValueError("bad"), + Exception("bad"), + ], + ) + def test_generate_worker_publishes_errors(self, generator, mocker, error): + queue_manager = mocker.MagicMock() + generator._get_conversation = mocker.MagicMock(return_value=mocker.MagicMock()) + generator._get_message = mocker.MagicMock(return_value=mocker.MagicMock()) + + runner = mocker.MagicMock() + runner.run.side_effect = error + mocker.patch("core.app.apps.agent_chat.app_generator.AgentChatAppRunner", return_value=runner) + mocker.patch("core.app.apps.agent_chat.app_generator.db.session.close") + + generator._generate_worker( + flask_app=mocker.MagicMock(), + context=mocker.MagicMock(), + application_generate_entity=mocker.MagicMock(), + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + ) + + assert queue_manager.publish_error.called + + def test_generate_worker_logs_value_error_when_debug(self, generator, mocker): + queue_manager = mocker.MagicMock() + generator._get_conversation = mocker.MagicMock(return_value=mocker.MagicMock()) + generator._get_message = mocker.MagicMock(return_value=mocker.MagicMock()) + + runner = mocker.MagicMock() + runner.run.side_effect = ValueError("bad") + mocker.patch("core.app.apps.agent_chat.app_generator.AgentChatAppRunner", return_value=runner) + mocker.patch("core.app.apps.agent_chat.app_generator.db.session.close") + + mocker.patch("core.app.apps.agent_chat.app_generator.dify_config", new=mocker.MagicMock(DEBUG=True)) + logger = mocker.patch("core.app.apps.agent_chat.app_generator.logger") + + generator._generate_worker( + flask_app=mocker.MagicMock(), + context=mocker.MagicMock(), + application_generate_entity=mocker.MagicMock(), + queue_manager=queue_manager, + conversation_id="conv", + message_id="msg", + ) + + logger.exception.assert_called_once() diff --git a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_runner.py b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_runner.py new file mode 100644 index 0000000000..5603115b30 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_app_runner.py @@ -0,0 +1,413 @@ +import pytest + +from core.agent.entities import AgentEntity +from core.app.apps.agent_chat.app_runner import AgentChatAppRunner +from core.moderation.base import ModerationError +from dify_graph.model_runtime.entities.llm_entities import LLMMode +from dify_graph.model_runtime.entities.model_entities import ModelFeature, ModelPropertyKey + + +@pytest.fixture +def runner(): + return AgentChatAppRunner() + + +class TestAgentChatAppRunnerRun: + def test_run_app_not_found(self, runner, mocker): + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", agent=mocker.MagicMock()) + generate_entity = mocker.MagicMock(app_config=app_config, inputs={}, query="q", files=[], stream=True) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=None) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + + def test_run_moderation_error_direct_output(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = mocker.MagicMock() + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock(), + conversation_id=None, + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", side_effect=ModerationError("bad")) + mocker.patch.object(runner, "direct_output") + + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + + runner.direct_output.assert_called_once() + + def test_run_annotation_reply_short_circuits(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = mocker.MagicMock() + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock(), + conversation_id=None, + user_id="user", + invoke_from=mocker.MagicMock(), + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + annotation = mocker.MagicMock(id="anno", content="answer") + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=annotation) + mocker.patch.object(runner, "direct_output") + + queue_manager = mocker.MagicMock() + runner.run(generate_entity, queue_manager, mocker.MagicMock(), mocker.MagicMock()) + + queue_manager.publish.assert_called_once() + runner.direct_output.assert_called_once() + + def test_run_hosting_moderation_short_circuits(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = mocker.MagicMock() + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock(), + conversation_id=None, + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=True) + + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + + def test_run_model_schema_missing(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.CHAIN_OF_THOUGHT) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + llm_instance = mocker.MagicMock() + llm_instance.model_type_instance.get_model_schema.return_value = None + mocker.patch("core.app.apps.agent_chat.app_runner.ModelInstance", return_value=llm_instance) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(), mocker.MagicMock()) + + @pytest.mark.parametrize( + ("mode", "expected_runner"), + [ + (LLMMode.CHAT, "CotChatAgentRunner"), + (LLMMode.COMPLETION, "CotCompletionAgentRunner"), + ], + ) + def test_run_chain_of_thought_modes(self, runner, mocker, mode, expected_runner): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.CHAIN_OF_THOUGHT) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + model_schema = mocker.MagicMock() + model_schema.features = [] + model_schema.model_properties = {ModelPropertyKey.MODE: mode} + + llm_instance = mocker.MagicMock() + llm_instance.model_type_instance.get_model_schema.return_value = model_schema + mocker.patch("core.app.apps.agent_chat.app_runner.ModelInstance", return_value=llm_instance) + + conversation = mocker.MagicMock(id="conv") + message = mocker.MagicMock(id="msg") + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, conversation, message], + ) + + runner_cls = mocker.MagicMock() + mocker.patch(f"core.app.apps.agent_chat.app_runner.{expected_runner}", runner_cls) + + runner_instance = mocker.MagicMock() + runner_cls.return_value = runner_instance + runner_instance.run.return_value = [] + mocker.patch.object(runner, "_handle_invoke_result") + + runner.run(generate_entity, mocker.MagicMock(), conversation, message) + + runner_instance.run.assert_called_once() + runner._handle_invoke_result.assert_called_once() + + def test_run_invalid_llm_mode_raises(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.CHAIN_OF_THOUGHT) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + model_schema = mocker.MagicMock() + model_schema.features = [] + model_schema.model_properties = {ModelPropertyKey.MODE: "invalid"} + + llm_instance = mocker.MagicMock() + llm_instance.model_type_instance.get_model_schema.return_value = model_schema + mocker.patch("core.app.apps.agent_chat.app_runner.ModelInstance", return_value=llm_instance) + + conversation = mocker.MagicMock(id="conv") + message = mocker.MagicMock(id="msg") + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, conversation, message], + ) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), conversation, message) + + def test_run_function_calling_strategy_selected_by_features(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.CHAIN_OF_THOUGHT) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + model_schema = mocker.MagicMock() + model_schema.features = [ModelFeature.TOOL_CALL] + model_schema.model_properties = {ModelPropertyKey.MODE: LLMMode.CHAT} + + llm_instance = mocker.MagicMock() + llm_instance.model_type_instance.get_model_schema.return_value = model_schema + mocker.patch("core.app.apps.agent_chat.app_runner.ModelInstance", return_value=llm_instance) + + conversation = mocker.MagicMock(id="conv") + message = mocker.MagicMock(id="msg") + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, conversation, message], + ) + + runner_cls = mocker.MagicMock() + mocker.patch("core.app.apps.agent_chat.app_runner.FunctionCallAgentRunner", runner_cls) + + runner_instance = mocker.MagicMock() + runner_cls.return_value = runner_instance + runner_instance.run.return_value = [] + mocker.patch.object(runner, "_handle_invoke_result") + + runner.run(generate_entity, mocker.MagicMock(), conversation, message) + + assert app_config.agent.strategy == AgentEntity.Strategy.FUNCTION_CALLING + runner_instance.run.assert_called_once() + + def test_run_conversation_not_found(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.FUNCTION_CALLING) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, None], + ) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(id="conv"), mocker.MagicMock(id="msg")) + + def test_run_message_not_found(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = AgentEntity(provider="p", model="m", strategy=AgentEntity.Strategy.FUNCTION_CALLING) + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, mocker.MagicMock(id="conv"), None], + ) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), mocker.MagicMock(id="conv"), mocker.MagicMock(id="msg")) + + def test_run_invalid_agent_strategy_raises(self, runner, mocker): + app_record = mocker.MagicMock(id="app1", tenant_id="tenant") + app_config = mocker.MagicMock(app_id="app1", tenant_id="tenant", prompt_template=mocker.MagicMock()) + app_config.agent = mocker.MagicMock(strategy="invalid", provider="p", model="m") + + generate_entity = mocker.MagicMock( + app_config=app_config, + inputs={}, + query="q", + files=[], + stream=True, + model_conf=mocker.MagicMock( + provider_model_bundle=mocker.MagicMock(), + model="m", + provider="p", + credentials={"k": "v"}, + ), + conversation_id="conv", + invoke_from=mocker.MagicMock(), + user_id="user", + ) + + mocker.patch("core.app.apps.agent_chat.app_runner.db.session.scalar", return_value=app_record) + mocker.patch.object(runner, "organize_prompt_messages", return_value=([], None)) + mocker.patch.object(runner, "moderation_for_inputs", return_value=(None, {}, "q")) + mocker.patch.object(runner, "query_app_annotations_to_reply", return_value=None) + mocker.patch.object(runner, "check_hosting_moderation", return_value=False) + + model_schema = mocker.MagicMock() + model_schema.features = [] + model_schema.model_properties = {ModelPropertyKey.MODE: LLMMode.CHAT} + + llm_instance = mocker.MagicMock() + llm_instance.model_type_instance.get_model_schema.return_value = model_schema + mocker.patch("core.app.apps.agent_chat.app_runner.ModelInstance", return_value=llm_instance) + + conversation = mocker.MagicMock(id="conv") + message = mocker.MagicMock(id="msg") + mocker.patch( + "core.app.apps.agent_chat.app_runner.db.session.scalar", + side_effect=[app_record, conversation, message], + ) + + with pytest.raises(ValueError): + runner.run(generate_entity, mocker.MagicMock(), conversation, message) diff --git a/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py new file mode 100644 index 0000000000..02a1e04c98 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/agent_chat/test_agent_chat_generate_response_converter.py @@ -0,0 +1,162 @@ +from collections.abc import Generator + +from core.app.apps.agent_chat.generate_response_converter import AgentChatAppGenerateResponseConverter +from core.app.entities.task_entities import ( + ChatbotAppBlockingResponse, + ChatbotAppStreamResponse, + ErrorStreamResponse, + MessageEndStreamResponse, + MessageStreamResponse, + PingStreamResponse, +) + + +class TestAgentChatAppGenerateResponseConverterBlocking: + def test_convert_blocking_full_response(self): + blocking = ChatbotAppBlockingResponse( + task_id="task", + data=ChatbotAppBlockingResponse.Data( + id="id", + mode="agent-chat", + conversation_id="conv", + message_id="msg", + answer="answer", + metadata={"a": 1}, + created_at=123, + ), + ) + + result = AgentChatAppGenerateResponseConverter.convert_blocking_full_response(blocking) + + assert result["event"] == "message" + assert result["answer"] == "answer" + assert result["metadata"] == {"a": 1} + + def test_convert_blocking_simple_response_with_dict_metadata(self): + blocking = ChatbotAppBlockingResponse( + task_id="task", + data=ChatbotAppBlockingResponse.Data( + id="id", + mode="agent-chat", + conversation_id="conv", + message_id="msg", + answer="answer", + metadata={ + "retriever_resources": [ + { + "segment_id": "s1", + "position": 1, + "document_name": "doc", + "score": 0.9, + "content": "content", + } + ], + "annotation_reply": {"id": "a"}, + "usage": {"prompt_tokens": 1}, + }, + created_at=123, + ), + ) + + result = AgentChatAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert "annotation_reply" not in result["metadata"] + assert "usage" not in result["metadata"] + + def test_convert_blocking_simple_response_with_non_dict_metadata(self): + blocking = ChatbotAppBlockingResponse.model_construct( + task_id="task", + data=ChatbotAppBlockingResponse.Data.model_construct( + id="id", + mode="agent-chat", + conversation_id="conv", + message_id="msg", + answer="answer", + metadata="bad", + created_at=123, + ), + ) + + result = AgentChatAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert result["metadata"] == {} + + +class TestAgentChatAppGenerateResponseConverterStream: + def build_stream(self) -> Generator[ChatbotAppStreamResponse, None, None]: + def _gen(): + yield ChatbotAppStreamResponse( + conversation_id="conv", + message_id="msg", + created_at=1, + stream_response=PingStreamResponse(task_id="t"), + ) + yield ChatbotAppStreamResponse( + conversation_id="conv", + message_id="msg", + created_at=2, + stream_response=MessageStreamResponse(task_id="t", id="m1", answer="hi"), + ) + yield ChatbotAppStreamResponse( + conversation_id="conv", + message_id="msg", + created_at=3, + stream_response=MessageEndStreamResponse( + task_id="t", + id="m1", + metadata={ + "retriever_resources": [ + { + "segment_id": "s1", + "position": 1, + "document_name": "doc", + "score": 0.9, + "content": "content", + "summary": "summary", + "extra": "ignored", + } + ], + "annotation_reply": {"id": "a"}, + "usage": {"prompt_tokens": 1}, + }, + ), + ) + yield ChatbotAppStreamResponse( + conversation_id="conv", + message_id="msg", + created_at=4, + stream_response=ErrorStreamResponse(task_id="t", err=RuntimeError("bad")), + ) + + return _gen() + + def test_convert_stream_full_response(self): + items = list(AgentChatAppGenerateResponseConverter.convert_stream_full_response(self.build_stream())) + assert items[0] == "ping" + assert items[1]["event"] == "message" + assert "answer" in items[1] + assert items[2]["event"] == "message_end" + assert items[3]["event"] == "error" + + def test_convert_stream_simple_response(self): + items = list(AgentChatAppGenerateResponseConverter.convert_stream_simple_response(self.build_stream())) + assert items[0] == "ping" + # Assert the message event structure and content at items[1] + assert items[1]["event"] == "message" + assert items[1]["answer"] == "hi" or "hi" in items[1]["answer"] + assert items[2]["event"] == "message_end" + assert "metadata" in items[2] + metadata = items[2]["metadata"] + assert "annotation_reply" not in metadata + assert "usage" not in metadata + assert metadata["retriever_resources"] == [ + { + "segment_id": "s1", + "position": 1, + "document_name": "doc", + "score": 0.9, + "content": "content", + "summary": "summary", + } + ] + assert items[3]["event"] == "error" diff --git a/api/tests/unit_tests/core/app/apps/chat/__init__.py b/api/tests/unit_tests/core/app/apps/chat/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/app/apps/chat/test_app_config_manager.py b/api/tests/unit_tests/core/app/apps/chat/test_app_config_manager.py new file mode 100644 index 0000000000..271d007be6 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/chat/test_app_config_manager.py @@ -0,0 +1,113 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from core.app.app_config.entities import EasyUIBasedAppModelConfigFrom, ModelConfigEntity, PromptTemplateEntity +from core.app.apps.chat.app_config_manager import ChatAppConfigManager +from models.model import AppMode + + +class TestChatAppConfigManager: + def test_get_app_config_uses_override_dict(self): + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", mode=AppMode.CHAT.value) + app_model_config = SimpleNamespace(id="config-1", to_dict=lambda: {"model": "m"}) + override = {"model": "override"} + + model_entity = ModelConfigEntity(provider="p", model="m") + prompt_entity = PromptTemplateEntity( + prompt_type=PromptTemplateEntity.PromptType.SIMPLE, + simple_prompt_template="hi", + ) + + with ( + patch("core.app.apps.chat.app_config_manager.ModelConfigManager.convert", return_value=model_entity), + patch( + "core.app.apps.chat.app_config_manager.PromptTemplateConfigManager.convert", return_value=prompt_entity + ), + patch( + "core.app.apps.chat.app_config_manager.SensitiveWordAvoidanceConfigManager.convert", + return_value=None, + ), + patch("core.app.apps.chat.app_config_manager.DatasetConfigManager.convert", return_value=None), + patch("core.app.apps.chat.app_config_manager.BasicVariablesConfigManager.convert", return_value=([], [])), + ): + app_config = ChatAppConfigManager.get_app_config( + app_model=app_model, + app_model_config=app_model_config, + conversation=None, + override_config_dict=override, + ) + + assert app_config.app_model_config_from == EasyUIBasedAppModelConfigFrom.ARGS + assert app_config.app_model_config_dict == override + assert app_config.app_mode == AppMode.CHAT + + def test_config_validate_filters_related_keys(self): + config = {"extra": 1} + + def _add_key(key, value): + def _inner(*args, **kwargs): + config = args[-1] + config = {**config, key: value} + return config, [key] + + return _inner + + with ( + patch( + "core.app.apps.chat.app_config_manager.ModelConfigManager.validate_and_set_defaults", + side_effect=_add_key("model", 1), + ), + patch( + "core.app.apps.chat.app_config_manager.BasicVariablesConfigManager.validate_and_set_defaults", + side_effect=_add_key("inputs", 2), + ), + patch( + "core.app.apps.chat.app_config_manager.FileUploadConfigManager.validate_and_set_defaults", + side_effect=_add_key("file_upload", 3), + ), + patch( + "core.app.apps.chat.app_config_manager.PromptTemplateConfigManager.validate_and_set_defaults", + side_effect=_add_key("prompt", 4), + ), + patch( + "core.app.apps.chat.app_config_manager.DatasetConfigManager.validate_and_set_defaults", + side_effect=_add_key("dataset", 5), + ), + patch( + "core.app.apps.chat.app_config_manager.OpeningStatementConfigManager.validate_and_set_defaults", + side_effect=_add_key("opening_statement", 6), + ), + patch( + "core.app.apps.chat.app_config_manager.SuggestedQuestionsAfterAnswerConfigManager.validate_and_set_defaults", + side_effect=_add_key("suggested_questions_after_answer", 7), + ), + patch( + "core.app.apps.chat.app_config_manager.SpeechToTextConfigManager.validate_and_set_defaults", + side_effect=_add_key("speech_to_text", 8), + ), + patch( + "core.app.apps.chat.app_config_manager.TextToSpeechConfigManager.validate_and_set_defaults", + side_effect=_add_key("text_to_speech", 9), + ), + patch( + "core.app.apps.chat.app_config_manager.RetrievalResourceConfigManager.validate_and_set_defaults", + side_effect=_add_key("retriever_resource", 10), + ), + patch( + "core.app.apps.chat.app_config_manager.SensitiveWordAvoidanceConfigManager.validate_and_set_defaults", + side_effect=_add_key("sensitive_word_avoidance", 11), + ), + ): + filtered = ChatAppConfigManager.config_validate(tenant_id="t1", config=config) + + assert filtered["model"] == 1 + assert filtered["inputs"] == 2 + assert filtered["file_upload"] == 3 + assert filtered["prompt"] == 4 + assert filtered["dataset"] == 5 + assert filtered["opening_statement"] == 6 + assert filtered["suggested_questions_after_answer"] == 7 + assert filtered["speech_to_text"] == 8 + assert filtered["text_to_speech"] == 9 + assert filtered["retriever_resource"] == 10 + assert filtered["sensitive_word_avoidance"] == 11 diff --git a/api/tests/unit_tests/core/app/apps/chat/test_app_generator_and_runner.py b/api/tests/unit_tests/core/app/apps/chat/test_app_generator_and_runner.py new file mode 100644 index 0000000000..3cdffbb4cd --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/chat/test_app_generator_and_runner.py @@ -0,0 +1,280 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.app.apps.chat.app_generator import ChatAppGenerator +from core.app.apps.chat.app_runner import ChatAppRunner +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import QueueAnnotationReplyEvent +from core.moderation.base import ModerationError +from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError +from models.model import AppMode + + +class DummyGenerateEntity: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + +class DummyQueueManager: + def __init__(self, *args, **kwargs): + self.published = [] + + def publish_error(self, error, pub_from): + self.published.append((error, pub_from)) + + def publish(self, event, pub_from): + self.published.append((event, pub_from)) + + +class TestChatAppGenerator: + def test_generate_requires_query(self): + generator = ChatAppGenerator() + with pytest.raises(ValueError): + generator.generate( + app_model=SimpleNamespace(), + user=SimpleNamespace(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + + def test_generate_rejects_non_string_query(self): + generator = ChatAppGenerator() + with pytest.raises(ValueError): + generator.generate( + app_model=SimpleNamespace(), + user=SimpleNamespace(), + args={"query": 1, "inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + + def test_generate_debugger_overrides_model_config(self): + generator = ChatAppGenerator() + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1") + user = SimpleNamespace(id="user-1", session_id="session-1") + args = {"query": "hi", "inputs": {}, "model_config": {"foo": "bar"}} + + with ( + patch("core.app.apps.chat.app_generator.ConversationService.get_conversation", return_value=None), + patch("core.app.apps.chat.app_generator.ChatAppConfigManager.config_validate", return_value={"x": 1}), + patch( + "core.app.apps.chat.app_generator.ChatAppConfigManager.get_app_config", + return_value=SimpleNamespace( + variables=[], external_data_variables=[], app_model_config_dict={}, app_mode=AppMode.CHAT + ), + ), + patch("core.app.apps.chat.app_generator.ModelConfigConverter.convert", return_value=SimpleNamespace()), + patch("core.app.apps.chat.app_generator.FileUploadConfigManager.convert", return_value=None), + patch("core.app.apps.chat.app_generator.file_factory.build_from_mappings", return_value=[]), + patch("core.app.apps.chat.app_generator.ChatAppGenerateEntity", DummyGenerateEntity), + patch("core.app.apps.chat.app_generator.TraceQueueManager", return_value=SimpleNamespace()), + patch("core.app.apps.chat.app_generator.MessageBasedAppQueueManager", DummyQueueManager), + patch( + "core.app.apps.chat.app_generator.ChatAppGenerateResponseConverter.convert", return_value={"ok": True} + ), + patch.object(ChatAppGenerator, "_get_app_model_config", return_value=SimpleNamespace(to_dict=lambda: {})), + patch.object(ChatAppGenerator, "_prepare_user_inputs", return_value={}), + patch.object( + ChatAppGenerator, + "_init_generate_records", + return_value=(SimpleNamespace(id="c1", mode="chat"), SimpleNamespace(id="m1")), + ), + patch.object(ChatAppGenerator, "_handle_response", return_value={"response": True}), + patch("core.app.apps.chat.app_generator.copy_current_request_context", side_effect=lambda f: f), + patch("core.app.apps.chat.app_generator.threading.Thread") as mock_thread, + ): + mock_thread.return_value.start.return_value = None + result = generator.generate(app_model, user, args, InvokeFrom.DEBUGGER, streaming=False) + + assert result == {"ok": True} + + def test_generate_rejects_model_config_override_for_non_debugger(self): + generator = ChatAppGenerator() + with pytest.raises(ValueError): + with ( + patch.object( + ChatAppGenerator, "_get_app_model_config", return_value=SimpleNamespace(to_dict=lambda: {}) + ), + ): + generator.generate( + app_model=SimpleNamespace(tenant_id="t1", id="a1", mode=AppMode.CHAT.value), + user=SimpleNamespace(id="u1", session_id="s1"), + args={"query": "hi", "inputs": {}, "model_config": {"foo": "bar"}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + + def test_generate_worker_handles_exceptions(self): + generator = ChatAppGenerator() + queue_manager = DummyQueueManager() + entity = DummyGenerateEntity(task_id="t1", user_id="u1") + + with ( + patch.object(ChatAppGenerator, "_get_conversation", return_value=SimpleNamespace()), + patch.object(ChatAppGenerator, "_get_message", return_value=SimpleNamespace()), + patch("core.app.apps.chat.app_generator.ChatAppRunner.run", side_effect=InvokeAuthorizationError()), + patch("core.app.apps.chat.app_generator.db.session.close"), + ): + generator._generate_worker( + flask_app=Mock(app_context=Mock(return_value=Mock(__enter__=Mock(), __exit__=Mock()))), + application_generate_entity=entity, + queue_manager=queue_manager, + conversation_id="c1", + message_id="m1", + ) + + assert queue_manager.published + + with ( + patch.object(ChatAppGenerator, "_get_conversation", return_value=SimpleNamespace()), + patch.object(ChatAppGenerator, "_get_message", return_value=SimpleNamespace()), + patch("core.app.apps.chat.app_generator.ChatAppRunner.run", side_effect=GenerateTaskStoppedError()), + patch("core.app.apps.chat.app_generator.db.session.close"), + ): + generator._generate_worker( + flask_app=Mock(app_context=Mock(return_value=Mock(__enter__=Mock(), __exit__=Mock()))), + application_generate_entity=entity, + queue_manager=queue_manager, + conversation_id="c1", + message_id="m1", + ) + + +class TestChatAppRunner: + def test_run_raises_when_app_missing(self): + runner = ChatAppRunner() + app_config = SimpleNamespace( + app_id="app-1", tenant_id="tenant-1", prompt_template=None, external_data_variables=[] + ) + app_generate_entity = DummyGenerateEntity( + app_config=app_config, + model_conf=SimpleNamespace(provider_model_bundle=None, model=None, parameters={}, app_model_config_dict={}), + inputs={}, + query="hi", + files=[], + file_upload_config=None, + conversation_id=None, + stream=False, + user_id="user-1", + invoke_from=InvokeFrom.SERVICE_API, + ) + + with patch("core.app.apps.chat.app_runner.db.session.scalar", return_value=None): + with pytest.raises(ValueError): + runner.run(app_generate_entity, DummyQueueManager(), SimpleNamespace(), SimpleNamespace(id="m1")) + + def test_run_moderation_error_direct_output(self): + runner = ChatAppRunner() + app_config = SimpleNamespace( + app_id="app-1", + tenant_id="tenant-1", + prompt_template=None, + external_data_variables=[], + dataset=None, + additional_features=None, + ) + app_generate_entity = DummyGenerateEntity( + app_config=app_config, + model_conf=SimpleNamespace(provider_model_bundle=None, model=None, parameters={}, app_model_config_dict={}), + inputs={}, + query="hi", + files=[], + file_upload_config=None, + conversation_id=None, + stream=False, + user_id="user-1", + invoke_from=InvokeFrom.SERVICE_API, + ) + + with ( + patch( + "core.app.apps.chat.app_runner.db.session.scalar", + return_value=SimpleNamespace(id="app-1", tenant_id="tenant-1"), + ), + patch.object(ChatAppRunner, "organize_prompt_messages", return_value=([], [])), + patch.object(ChatAppRunner, "moderation_for_inputs", side_effect=ModerationError("blocked")), + patch.object(ChatAppRunner, "direct_output") as mock_direct, + ): + runner.run(app_generate_entity, DummyQueueManager(), SimpleNamespace(), SimpleNamespace(id="m1")) + + mock_direct.assert_called_once() + + def test_run_annotation_reply_short_circuits(self): + runner = ChatAppRunner() + app_config = SimpleNamespace( + app_id="app-1", + tenant_id="tenant-1", + prompt_template=None, + external_data_variables=[], + dataset=None, + additional_features=None, + ) + app_generate_entity = DummyGenerateEntity( + app_config=app_config, + model_conf=SimpleNamespace(provider_model_bundle=None, model=None, parameters={}, app_model_config_dict={}), + inputs={}, + query="hi", + files=[], + file_upload_config=None, + conversation_id=None, + stream=False, + user_id="user-1", + invoke_from=InvokeFrom.SERVICE_API, + ) + + annotation = SimpleNamespace(id="ann-1", content="answer") + + with ( + patch( + "core.app.apps.chat.app_runner.db.session.scalar", + return_value=SimpleNamespace(id="app-1", tenant_id="tenant-1"), + ), + patch.object(ChatAppRunner, "organize_prompt_messages", return_value=([], [])), + patch.object(ChatAppRunner, "moderation_for_inputs", return_value=(None, {}, "hi")), + patch.object(ChatAppRunner, "query_app_annotations_to_reply", return_value=annotation), + patch.object(ChatAppRunner, "direct_output") as mock_direct, + ): + queue_manager = DummyQueueManager() + runner.run(app_generate_entity, queue_manager, SimpleNamespace(), SimpleNamespace(id="m1")) + + assert any(isinstance(item[0], QueueAnnotationReplyEvent) for item in queue_manager.published) + mock_direct.assert_called_once() + + def test_run_returns_when_hosting_moderation_blocks(self): + runner = ChatAppRunner() + app_config = SimpleNamespace( + app_id="app-1", + tenant_id="tenant-1", + prompt_template=None, + external_data_variables=[], + dataset=None, + additional_features=None, + ) + app_generate_entity = DummyGenerateEntity( + app_config=app_config, + model_conf=SimpleNamespace(provider_model_bundle=None, model=None, parameters={}, app_model_config_dict={}), + inputs={}, + query="hi", + files=[], + file_upload_config=None, + conversation_id=None, + stream=False, + user_id="user-1", + invoke_from=InvokeFrom.SERVICE_API, + ) + + with ( + patch( + "core.app.apps.chat.app_runner.db.session.scalar", + return_value=SimpleNamespace(id="app-1", tenant_id="tenant-1"), + ), + patch.object(ChatAppRunner, "organize_prompt_messages", return_value=([], [])), + patch.object(ChatAppRunner, "moderation_for_inputs", return_value=(None, {}, "hi")), + patch.object(ChatAppRunner, "query_app_annotations_to_reply", return_value=None), + patch.object(ChatAppRunner, "check_hosting_moderation", return_value=True), + ): + runner.run(app_generate_entity, DummyQueueManager(), SimpleNamespace(), SimpleNamespace(id="m1")) diff --git a/api/tests/unit_tests/core/app/apps/chat/test_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/chat/test_generate_response_converter.py new file mode 100644 index 0000000000..01272ba052 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/chat/test_generate_response_converter.py @@ -0,0 +1,65 @@ +from collections.abc import Generator + +from core.app.apps.chat.generate_response_converter import ChatAppGenerateResponseConverter +from core.app.entities.task_entities import ( + ChatbotAppBlockingResponse, + ChatbotAppStreamResponse, + ErrorStreamResponse, + MessageEndStreamResponse, + MessageStreamResponse, + PingStreamResponse, +) + + +class TestChatAppGenerateResponseConverter: + def test_convert_blocking_simple_response_metadata(self): + data = ChatbotAppBlockingResponse.Data( + id="msg-1", + mode="chat", + conversation_id="c1", + message_id="m1", + answer="hi", + metadata={"usage": {"total_tokens": 1}}, + created_at=1, + ) + blocking = ChatbotAppBlockingResponse(task_id="t1", data=data) + + response = ChatAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert "usage" not in response["metadata"] + + def test_convert_stream_responses(self): + def stream() -> Generator[ChatbotAppStreamResponse, None, None]: + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=PingStreamResponse(task_id="t1"), + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=MessageStreamResponse(task_id="t1", id="m1", answer="hi"), + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=ErrorStreamResponse(task_id="t1", err=ValueError("boom")), + ) + yield ChatbotAppStreamResponse( + conversation_id="c1", + message_id="m1", + created_at=1, + stream_response=MessageEndStreamResponse(task_id="t1", id="m1"), + ) + + full = list(ChatAppGenerateResponseConverter.convert_stream_full_response(stream())) + assert full[0] == "ping" + assert full[1]["event"] == "message" + assert full[2]["event"] == "error" + + simple = list(ChatAppGenerateResponseConverter.convert_stream_simple_response(stream())) + assert simple[0] == "ping" + assert simple[-1]["event"] == "message_end" diff --git a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py index 69d476bd13..aba7dfff8c 100644 --- a/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py +++ b/api/tests/unit_tests/core/app/apps/common/test_workflow_response_converter_truncation.py @@ -24,7 +24,7 @@ from core.app.entities.queue_entities import ( QueueNodeSucceededEvent, ) from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now from models import Account @@ -66,7 +66,7 @@ class TestWorkflowResponseConverter: node_execution_id=node_execution_id or str(uuid.uuid4()), node_id="test-node-id", node_title="Test Node", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, start_at=naive_utc_now(), in_iteration_id=None, in_loop_id=None, @@ -83,7 +83,7 @@ class TestWorkflowResponseConverter: """Create a QueueNodeSucceededEvent for testing.""" return QueueNodeSucceededEvent( node_id="test-node-id", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_execution_id=node_execution_id, start_at=naive_utc_now(), in_iteration_id=None, @@ -108,7 +108,7 @@ class TestWorkflowResponseConverter: error="oops", retry_index=1, node_id="test-node-id", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="test code", provider_type="built-in", provider_id="code", @@ -319,7 +319,7 @@ class TestWorkflowResponseConverter: iteration_event = QueueNodeSucceededEvent( node_id="iteration-node", - node_type=NodeType.ITERATION, + node_type=BuiltinNodeTypes.ITERATION, node_execution_id=str(uuid.uuid4()), start_at=naive_utc_now(), in_iteration_id=None, @@ -336,7 +336,7 @@ class TestWorkflowResponseConverter: ) assert response is None - loop_event = iteration_event.model_copy(update={"node_type": NodeType.LOOP}) + loop_event = iteration_event.model_copy(update={"node_type": BuiltinNodeTypes.LOOP}) response = converter.workflow_node_finish_to_stream_response( event=loop_event, task_id="test-task-id", @@ -478,7 +478,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -523,7 +523,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -562,7 +562,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_value, process_data=large_value, @@ -600,7 +600,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -614,7 +614,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeFailedEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -628,7 +628,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: return QueueNodeExceptionEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=inputs, process_data=process_data, @@ -690,7 +690,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueNodeStartedEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Test Node", node_run_index=1, start_at=naive_utc_now(), @@ -706,7 +706,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeRetryEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_title="Test Node", node_run_index=1, start_at=naive_utc_now(), @@ -748,7 +748,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueIterationStartEvent( node_execution_id="test_iter_exec_id", node_id="test_iteration", - node_type=NodeType.ITERATION, + node_type=BuiltinNodeTypes.ITERATION, node_title="Test Iteration", node_run_index=0, start_at=naive_utc_now(), @@ -776,7 +776,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: start_event = QueueLoopStartEvent( node_execution_id="test_loop_exec_id", node_id="test_loop", - node_type=NodeType.LOOP, + node_type=BuiltinNodeTypes.LOOP, node_title="Test Loop", start_at=naive_utc_now(), inputs=large_inputs, @@ -806,7 +806,7 @@ class TestWorkflowResponseConverterServiceApiTruncation: event = QueueNodeSucceededEvent( node_execution_id="test_node_exec_id", node_id="test_node", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=naive_utc_now(), inputs=large_inputs, process_data=large_process_data, diff --git a/api/tests/unit_tests/core/app/apps/completion/test_app_runner.py b/api/tests/unit_tests/core/app/apps/completion/test_app_runner.py new file mode 100644 index 0000000000..51f33bac35 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/completion/test_app_runner.py @@ -0,0 +1,162 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +import core.app.apps.completion.app_runner as module +from core.app.apps.completion.app_runner import CompletionAppRunner +from core.moderation.base import ModerationError +from dify_graph.model_runtime.entities.message_entities import ImagePromptMessageContent + + +@pytest.fixture +def runner(): + return CompletionAppRunner() + + +def _build_app_config(dataset=None, external_tools=None, additional_features=None): + app_config = MagicMock() + app_config.app_id = "app1" + app_config.tenant_id = "tenant" + app_config.prompt_template = MagicMock() + app_config.dataset = dataset + app_config.external_data_variables = external_tools or [] + app_config.additional_features = additional_features + app_config.app_model_config_dict = {"file_upload": {"enabled": True}} + return app_config + + +def _build_generate_entity(app_config, file_upload_config=None): + model_conf = MagicMock( + provider_model_bundle="bundle", + model="model", + parameters={"max_tokens": 10}, + stop=["stop"], + ) + return SimpleNamespace( + app_config=app_config, + model_conf=model_conf, + inputs={"qvar": "query_from_input"}, + query="original_query", + files=[], + file_upload_config=file_upload_config, + stream=True, + user_id="user", + invoke_from=MagicMock(), + ) + + +class TestCompletionAppRunner: + def test_run_app_not_found(self, runner, mocker): + session = mocker.MagicMock() + session.scalar.return_value = None + mocker.patch.object(module.db, "session", session) + + app_config = _build_app_config() + app_generate_entity = _build_generate_entity(app_config) + + with pytest.raises(ValueError): + runner.run(app_generate_entity, MagicMock(), MagicMock()) + + def test_run_moderation_error_outputs_direct(self, runner, mocker): + app_record = MagicMock(id="app1", tenant_id="tenant") + + session = mocker.MagicMock() + session.scalar.return_value = app_record + mocker.patch.object(module.db, "session", session) + + app_config = _build_app_config() + app_generate_entity = _build_generate_entity(app_config) + + runner.organize_prompt_messages = MagicMock(return_value=([], None)) + runner.moderation_for_inputs = MagicMock(side_effect=ModerationError("blocked")) + runner.direct_output = MagicMock() + runner._handle_invoke_result = MagicMock() + + runner.run(app_generate_entity, MagicMock(), MagicMock(id="msg")) + + runner.direct_output.assert_called_once() + runner._handle_invoke_result.assert_not_called() + + def test_run_hosting_moderation_stops(self, runner, mocker): + app_record = MagicMock(id="app1", tenant_id="tenant") + + session = mocker.MagicMock() + session.scalar.return_value = app_record + mocker.patch.object(module.db, "session", session) + + app_config = _build_app_config() + app_generate_entity = _build_generate_entity(app_config) + + runner.organize_prompt_messages = MagicMock(return_value=([], None)) + runner.moderation_for_inputs = MagicMock(return_value=(None, app_generate_entity.inputs, "query")) + runner.check_hosting_moderation = MagicMock(return_value=True) + runner._handle_invoke_result = MagicMock() + + runner.run(app_generate_entity, MagicMock(), MagicMock(id="msg")) + + runner._handle_invoke_result.assert_not_called() + + def test_run_dataset_and_external_tools_flow(self, runner, mocker): + app_record = MagicMock(id="app1", tenant_id="tenant") + + session = mocker.MagicMock() + session.scalar.return_value = app_record + session.close = MagicMock() + mocker.patch.object(module.db, "session", session) + + retrieve_config = MagicMock(query_variable="qvar") + dataset_config = MagicMock(dataset_ids=["ds"], retrieve_config=retrieve_config) + additional_features = MagicMock(show_retrieve_source=True) + app_config = _build_app_config( + dataset=dataset_config, + external_tools=["tool"], + additional_features=additional_features, + ) + + file_upload_config = MagicMock() + file_upload_config.image_config.detail = ImagePromptMessageContent.DETAIL.HIGH + + app_generate_entity = _build_generate_entity(app_config, file_upload_config=file_upload_config) + + runner.organize_prompt_messages = MagicMock(side_effect=[(["pm1"], ["stop"]), (["pm2"], ["stop"])]) + runner.moderation_for_inputs = MagicMock(return_value=(None, app_generate_entity.inputs, "query")) + runner.fill_in_inputs_from_external_data_tools = MagicMock(return_value=app_generate_entity.inputs) + runner.check_hosting_moderation = MagicMock(return_value=False) + runner.recalc_llm_max_tokens = MagicMock() + runner._handle_invoke_result = MagicMock() + + dataset_retrieval = MagicMock() + dataset_retrieval.retrieve.return_value = ("ctx", ["file1"]) + mocker.patch.object(module, "DatasetRetrieval", return_value=dataset_retrieval) + + model_instance = MagicMock() + model_instance.invoke_llm.return_value = "invoke_result" + mocker.patch.object(module, "ModelInstance", return_value=model_instance) + + runner.run(app_generate_entity, MagicMock(), MagicMock(id="msg", tenant_id="tenant")) + + dataset_retrieval.retrieve.assert_called_once() + assert dataset_retrieval.retrieve.call_args.kwargs["query"] == "query_from_input" + runner._handle_invoke_result.assert_called_once() + + def test_run_uses_low_image_detail_default(self, runner, mocker): + app_record = MagicMock(id="app1", tenant_id="tenant") + + session = mocker.MagicMock() + session.scalar.return_value = app_record + mocker.patch.object(module.db, "session", session) + + app_config = _build_app_config() + app_generate_entity = _build_generate_entity(app_config, file_upload_config=None) + + runner.organize_prompt_messages = MagicMock(return_value=([], None)) + runner.moderation_for_inputs = MagicMock(return_value=(None, app_generate_entity.inputs, "query")) + runner.check_hosting_moderation = MagicMock(return_value=True) + + runner.run(app_generate_entity, MagicMock(), MagicMock(id="msg")) + + assert ( + runner.organize_prompt_messages.call_args.kwargs["image_detail_config"] + == ImagePromptMessageContent.DETAIL.LOW + ) diff --git a/api/tests/unit_tests/core/app/apps/completion/test_completion_app_config_manager.py b/api/tests/unit_tests/core/app/apps/completion/test_completion_app_config_manager.py new file mode 100644 index 0000000000..024bd8f302 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/completion/test_completion_app_config_manager.py @@ -0,0 +1,122 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + +import core.app.apps.completion.app_config_manager as module +from core.app.app_config.entities import EasyUIBasedAppModelConfigFrom +from core.app.apps.completion.app_config_manager import CompletionAppConfigManager +from models.model import AppMode + + +class TestCompletionAppConfigManager: + def test_get_app_config_with_override(self, mocker): + app_model = MagicMock(tenant_id="tenant", id="app1", mode=AppMode.COMPLETION.value) + app_model_config = MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "x"}} + + override_config = {"model": {"provider": "override"}} + + mocker.patch.object(module.ModelConfigManager, "convert", return_value="model") + mocker.patch.object(module.PromptTemplateConfigManager, "convert", return_value="prompt") + mocker.patch.object(module.SensitiveWordAvoidanceConfigManager, "convert", return_value="moderation") + mocker.patch.object(module.DatasetConfigManager, "convert", return_value="dataset") + mocker.patch.object(CompletionAppConfigManager, "convert_features", return_value="features") + mocker.patch.object(module.BasicVariablesConfigManager, "convert", return_value=(["v1"], ["ext1"])) + mocker.patch.object(module, "CompletionAppConfig", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + + result = CompletionAppConfigManager.get_app_config( + app_model=app_model, + app_model_config=app_model_config, + override_config_dict=override_config, + ) + + assert result.app_model_config_from == EasyUIBasedAppModelConfigFrom.ARGS + assert result.app_model_config_dict == override_config + assert result.variables == ["v1"] + assert result.external_data_variables == ["ext1"] + assert result.app_mode == AppMode.COMPLETION + + def test_get_app_config_without_override_uses_model_config(self, mocker): + app_model = MagicMock(tenant_id="tenant", id="app1", mode=AppMode.COMPLETION.value) + app_model_config = MagicMock(id="cfg1") + app_model_config.to_dict.return_value = {"model": {"provider": "x"}} + + mocker.patch.object(module.ModelConfigManager, "convert", return_value="model") + mocker.patch.object(module.PromptTemplateConfigManager, "convert", return_value="prompt") + mocker.patch.object(module.SensitiveWordAvoidanceConfigManager, "convert", return_value="moderation") + mocker.patch.object(module.DatasetConfigManager, "convert", return_value="dataset") + mocker.patch.object(CompletionAppConfigManager, "convert_features", return_value="features") + mocker.patch.object(module.BasicVariablesConfigManager, "convert", return_value=([], [])) + mocker.patch.object(module, "CompletionAppConfig", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + + result = CompletionAppConfigManager.get_app_config(app_model=app_model, app_model_config=app_model_config) + + assert result.app_model_config_from == EasyUIBasedAppModelConfigFrom.APP_LATEST_CONFIG + assert result.app_model_config_dict == {"model": {"provider": "x"}} + + def test_config_validate_filters_related_keys(self, mocker): + config = { + "model": {"provider": "x"}, + "variables": ["v"], + "file_upload": {"enabled": True}, + "prompt": {"template": "t"}, + "dataset": {"enabled": True}, + "tts": {"enabled": True}, + "more_like_this": {"enabled": True}, + "moderation": {"enabled": True}, + "extra": "drop", + } + + mocker.patch.object( + module.ModelConfigManager, + "validate_and_set_defaults", + return_value=(config, ["model"]), + ) + mocker.patch.object( + module.BasicVariablesConfigManager, + "validate_and_set_defaults", + return_value=(config, ["variables"]), + ) + mocker.patch.object( + module.FileUploadConfigManager, + "validate_and_set_defaults", + return_value=(config, ["file_upload"]), + ) + mocker.patch.object( + module.PromptTemplateConfigManager, + "validate_and_set_defaults", + return_value=(config, ["prompt"]), + ) + mocker.patch.object( + module.DatasetConfigManager, + "validate_and_set_defaults", + return_value=(config, ["dataset"]), + ) + mocker.patch.object( + module.TextToSpeechConfigManager, + "validate_and_set_defaults", + return_value=(config, ["tts"]), + ) + mocker.patch.object( + module.MoreLikeThisConfigManager, + "validate_and_set_defaults", + return_value=(config, ["more_like_this"]), + ) + mocker.patch.object( + module.SensitiveWordAvoidanceConfigManager, + "validate_and_set_defaults", + return_value=(config, ["moderation"]), + ) + + filtered = CompletionAppConfigManager.config_validate("tenant", config) + + assert "extra" not in filtered + assert set(filtered.keys()) == { + "model", + "variables", + "file_upload", + "prompt", + "dataset", + "tts", + "more_like_this", + "moderation", + } diff --git a/api/tests/unit_tests/core/app/apps/completion/test_completion_completion_app_generator.py b/api/tests/unit_tests/core/app/apps/completion/test_completion_completion_app_generator.py new file mode 100644 index 0000000000..2714757353 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/completion/test_completion_completion_app_generator.py @@ -0,0 +1,321 @@ +import contextlib +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from pydantic import ValidationError + +import core.app.apps.completion.app_generator as module +from core.app.apps.completion.app_generator import CompletionAppGenerator +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import InvokeFrom +from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError +from services.errors.app import MoreLikeThisDisabledError +from services.errors.message import MessageNotExistsError + + +@pytest.fixture +def generator(mocker): + gen = CompletionAppGenerator() + + mocker.patch.object(module, "copy_current_request_context", side_effect=lambda fn: fn) + + flask_app = MagicMock() + flask_app.app_context.return_value = contextlib.nullcontext() + mocker.patch.object(module, "current_app", MagicMock(_get_current_object=MagicMock(return_value=flask_app))) + + thread = MagicMock() + mocker.patch.object(module.threading, "Thread", return_value=thread) + + mocker.patch.object(module, "MessageBasedAppQueueManager", return_value=MagicMock()) + mocker.patch.object(module, "TraceQueueManager", return_value=MagicMock()) + mocker.patch.object(module, "CompletionAppGenerateEntity", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + + return gen + + +def _build_app_model(): + return MagicMock(tenant_id="tenant", id="app1", mode="completion") + + +def _build_user(): + return MagicMock(id="user", session_id="session") + + +def _build_app_model_config(): + config = MagicMock(id="cfg") + config.to_dict.return_value = {"model": {"provider": "x"}} + return config + + +class TestCompletionAppGenerator: + def test_generate_invalid_query_type(self, generator): + with pytest.raises(ValueError): + generator.generate( + app_model=_build_app_model(), + user=_build_user(), + args={"query": 123, "inputs": {}, "files": []}, + invoke_from=InvokeFrom.WEB_APP, + streaming=True, + ) + + def test_generate_override_not_debugger(self, generator): + with pytest.raises(ValueError): + generator.generate( + app_model=_build_app_model(), + user=_build_user(), + args={"query": "q", "inputs": {}, "files": [], "model_config": {}}, + invoke_from=InvokeFrom.WEB_APP, + streaming=False, + ) + + def test_generate_success_no_file_config(self, generator, mocker): + app_model_config = _build_app_model_config() + mocker.patch.object(generator, "_get_app_model_config", return_value=app_model_config) + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=None) + mocker.patch.object(module.file_factory, "build_from_mappings") + + app_config = MagicMock(variables=["v"], to_dict=MagicMock(return_value={})) + mocker.patch.object(module.CompletionAppConfigManager, "get_app_config", return_value=app_config) + mocker.patch.object(module.ModelConfigConverter, "convert", return_value=MagicMock()) + + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + + conversation = MagicMock(id="conv", mode="completion") + message = MagicMock(id="msg") + mocker.patch.object(generator, "_init_generate_records", return_value=(conversation, message)) + + mocker.patch.object(generator, "_handle_response", return_value="response") + mocker.patch.object(module.CompletionAppGenerateResponseConverter, "convert", return_value="converted") + + result = generator.generate( + app_model=_build_app_model(), + user=_build_user(), + args={"query": "q", "inputs": {"a": 1}, "files": []}, + invoke_from=InvokeFrom.WEB_APP, + streaming=True, + ) + + assert result == "converted" + module.file_factory.build_from_mappings.assert_not_called() + + def test_generate_success_with_files(self, generator, mocker): + app_model_config = _build_app_model_config() + mocker.patch.object(generator, "_get_app_model_config", return_value=app_model_config) + + file_extra_config = MagicMock() + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=file_extra_config) + mocker.patch.object(module.file_factory, "build_from_mappings", return_value=["file1"]) + + app_config = MagicMock(variables=["v"], to_dict=MagicMock(return_value={})) + mocker.patch.object(module.CompletionAppConfigManager, "get_app_config", return_value=app_config) + mocker.patch.object(module.ModelConfigConverter, "convert", return_value=MagicMock()) + + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + + conversation = MagicMock(id="conv", mode="completion") + message = MagicMock(id="msg") + mocker.patch.object(generator, "_init_generate_records", return_value=(conversation, message)) + + mocker.patch.object(generator, "_handle_response", return_value="response") + mocker.patch.object(module.CompletionAppGenerateResponseConverter, "convert", return_value="converted") + + result = generator.generate( + app_model=_build_app_model(), + user=_build_user(), + args={"query": "q", "inputs": {"a": 1}, "files": [{"id": "f"}]}, + invoke_from=InvokeFrom.WEB_APP, + streaming=False, + ) + + assert result == "converted" + module.file_factory.build_from_mappings.assert_called_once() + + def test_generate_override_model_config_debugger(self, generator, mocker): + app_model_config = _build_app_model_config() + mocker.patch.object(generator, "_get_app_model_config", return_value=app_model_config) + + override_config = {"model": {"provider": "override"}} + mocker.patch.object(module.CompletionAppConfigManager, "config_validate", return_value=override_config) + + app_config = MagicMock(variables=["v"], to_dict=MagicMock(return_value={})) + get_app_config = mocker.patch.object( + module.CompletionAppConfigManager, + "get_app_config", + return_value=app_config, + ) + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=None) + mocker.patch.object(module.ModelConfigConverter, "convert", return_value=MagicMock()) + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + mocker.patch.object( + generator, + "_init_generate_records", + return_value=(MagicMock(id="conv", mode="completion"), MagicMock(id="msg")), + ) + mocker.patch.object(generator, "_handle_response", return_value="response") + mocker.patch.object(module.CompletionAppGenerateResponseConverter, "convert", return_value="converted") + + generator.generate( + app_model=_build_app_model(), + user=_build_user(), + args={"query": "q", "inputs": {}, "files": [], "model_config": override_config}, + invoke_from=InvokeFrom.DEBUGGER, + streaming=True, + ) + + assert get_app_config.call_args.kwargs["override_config_dict"] == override_config + + def test_generate_more_like_this_message_not_found(self, generator, mocker): + session = mocker.MagicMock() + session.scalar.return_value = None + mocker.patch.object(module.db, "session", session) + + with pytest.raises(MessageNotExistsError): + generator.generate_more_like_this( + app_model=_build_app_model(), + message_id="msg", + user=_build_user(), + invoke_from=InvokeFrom.WEB_APP, + ) + + def test_generate_more_like_this_disabled(self, generator, mocker): + app_model = _build_app_model() + app_model.app_model_config = MagicMock(more_like_this=False, more_like_this_dict={"enabled": False}) + + message = MagicMock() + session = mocker.MagicMock() + session.scalar.return_value = message + mocker.patch.object(module.db, "session", session) + + with pytest.raises(MoreLikeThisDisabledError): + generator.generate_more_like_this( + app_model=app_model, + message_id="msg", + user=_build_user(), + invoke_from=InvokeFrom.WEB_APP, + ) + + def test_generate_more_like_this_app_model_config_missing(self, generator, mocker): + app_model = _build_app_model() + app_model.app_model_config = None + + message = MagicMock() + session = mocker.MagicMock() + session.scalar.return_value = message + mocker.patch.object(module.db, "session", session) + + with pytest.raises(MoreLikeThisDisabledError): + generator.generate_more_like_this( + app_model=app_model, + message_id="msg", + user=_build_user(), + invoke_from=InvokeFrom.WEB_APP, + ) + + def test_generate_more_like_this_message_config_none(self, generator, mocker): + app_model = _build_app_model() + app_model.app_model_config = MagicMock(more_like_this=True, more_like_this_dict={"enabled": True}) + + message = MagicMock(app_model_config=None) + session = mocker.MagicMock() + session.scalar.return_value = message + mocker.patch.object(module.db, "session", session) + + with pytest.raises(ValueError): + generator.generate_more_like_this( + app_model=app_model, + message_id="msg", + user=_build_user(), + invoke_from=InvokeFrom.WEB_APP, + ) + + def test_generate_more_like_this_success(self, generator, mocker): + app_model = _build_app_model() + app_model.app_model_config = MagicMock(more_like_this=True, more_like_this_dict={"enabled": True}) + + message = MagicMock() + message.message_files = [{"id": "f"}] + message.inputs = {"a": 1} + message.query = "q" + + app_model_config = MagicMock() + app_model_config.to_dict.return_value = { + "model": {"completion_params": {"temperature": 0.1}}, + "file_upload": {"enabled": True}, + } + message.app_model_config = app_model_config + + session = mocker.MagicMock() + session.scalar.return_value = message + mocker.patch.object(module.db, "session", session) + + file_extra_config = MagicMock() + mocker.patch.object(module.FileUploadConfigManager, "convert", return_value=file_extra_config) + mocker.patch.object(module.file_factory, "build_from_mappings", return_value=["file1"]) + + app_config = MagicMock(variables=["v"], to_dict=MagicMock(return_value={})) + get_app_config = mocker.patch.object( + module.CompletionAppConfigManager, + "get_app_config", + return_value=app_config, + ) + mocker.patch.object(module.ModelConfigConverter, "convert", return_value=MagicMock()) + + mocker.patch.object( + generator, + "_init_generate_records", + return_value=(MagicMock(id="conv", mode="completion"), MagicMock(id="msg")), + ) + mocker.patch.object(generator, "_handle_response", return_value="response") + mocker.patch.object(module.CompletionAppGenerateResponseConverter, "convert", return_value="converted") + + result = generator.generate_more_like_this( + app_model=app_model, + message_id="msg", + user=_build_user(), + invoke_from=InvokeFrom.WEB_APP, + stream=True, + ) + + assert result == "converted" + override_dict = get_app_config.call_args.kwargs["override_config_dict"] + assert override_dict["model"]["completion_params"]["temperature"] == 0.9 + + @pytest.mark.parametrize( + ("error", "should_publish"), + [ + (GenerateTaskStoppedError(), False), + (InvokeAuthorizationError("bad"), True), + ( + ValidationError.from_exception_data( + "Model", + [{"type": "missing", "loc": ("x",), "msg": "Field required", "input": {}}], + ), + True, + ), + (ValueError("bad"), True), + (RuntimeError("boom"), True), + ], + ) + def test_generate_worker_error_handling(self, generator, mocker, error, should_publish): + flask_app = MagicMock() + flask_app.app_context.return_value = contextlib.nullcontext() + + session = mocker.MagicMock() + mocker.patch.object(module.db, "session", session) + + mocker.patch.object(generator, "_get_message", return_value=MagicMock()) + + runner_instance = MagicMock() + runner_instance.run.side_effect = error + mocker.patch.object(module, "CompletionAppRunner", return_value=runner_instance) + + queue_manager = MagicMock() + generator._generate_worker( + flask_app=flask_app, + application_generate_entity=MagicMock(), + queue_manager=queue_manager, + message_id="msg", + ) + + assert queue_manager.publish_error.called is should_publish diff --git a/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py new file mode 100644 index 0000000000..cf473dfbeb --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/completion/test_completion_generate_response_converter.py @@ -0,0 +1,153 @@ +from collections.abc import Generator + +from core.app.apps.completion.generate_response_converter import CompletionAppGenerateResponseConverter +from core.app.entities.task_entities import ( + AppStreamResponse, + CompletionAppBlockingResponse, + CompletionAppStreamResponse, + ErrorStreamResponse, + MessageEndStreamResponse, + MessageStreamResponse, + PingStreamResponse, +) + + +class TestCompletionAppGenerateResponseConverter: + def test_convert_blocking_full_response(self): + blocking = CompletionAppBlockingResponse( + task_id="task", + data=CompletionAppBlockingResponse.Data( + id="id", + mode="completion", + message_id="msg", + answer="answer", + metadata={"k": "v"}, + created_at=123, + ), + ) + + result = CompletionAppGenerateResponseConverter.convert_blocking_full_response(blocking) + + assert result["event"] == "message" + assert result["task_id"] == "task" + assert result["message_id"] == "msg" + assert result["answer"] == "answer" + assert result["metadata"] == {"k": "v"} + + def test_convert_blocking_simple_response_metadata_simplified(self): + metadata = { + "retriever_resources": [ + { + "segment_id": "s", + "position": 1, + "document_name": "doc", + "score": 0.9, + "content": "c", + "summary": "sum", + "extra": "x", + } + ], + "annotation_reply": {"a": 1}, + "usage": {"t": 2}, + } + blocking = CompletionAppBlockingResponse( + task_id="task", + data=CompletionAppBlockingResponse.Data( + id="id", + mode="completion", + message_id="msg", + answer="answer", + metadata=metadata, + created_at=123, + ), + ) + + result = CompletionAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert "annotation_reply" not in result["metadata"] + assert "usage" not in result["metadata"] + assert result["metadata"]["retriever_resources"][0]["segment_id"] == "s" + assert "extra" not in result["metadata"]["retriever_resources"][0] + + def test_convert_blocking_simple_response_metadata_not_dict(self): + data = CompletionAppBlockingResponse.Data.model_construct( + id="id", + mode="completion", + message_id="msg", + answer="answer", + metadata="bad", + created_at=123, + ) + blocking = CompletionAppBlockingResponse.model_construct(task_id="task", data=data) + + result = CompletionAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert result["metadata"] == {} + + def test_convert_stream_full_response(self): + def stream() -> Generator[AppStreamResponse, None, None]: + yield CompletionAppStreamResponse( + stream_response=PingStreamResponse(task_id="t"), + message_id="m", + created_at=1, + ) + yield CompletionAppStreamResponse( + stream_response=ErrorStreamResponse(task_id="t", err=ValueError("bad")), + message_id="m", + created_at=2, + ) + yield CompletionAppStreamResponse( + stream_response=MessageStreamResponse(task_id="t", id="1", answer="ok"), + message_id="m", + created_at=3, + ) + + result = list(CompletionAppGenerateResponseConverter.convert_stream_full_response(stream())) + + assert result[0] == "ping" + assert result[1]["event"] == "error" + assert result[1]["code"] == "invalid_param" + assert result[2]["event"] == "message" + + def test_convert_stream_simple_response(self): + def stream() -> Generator[AppStreamResponse, None, None]: + yield CompletionAppStreamResponse( + stream_response=PingStreamResponse(task_id="t"), + message_id="m", + created_at=1, + ) + yield CompletionAppStreamResponse( + stream_response=MessageEndStreamResponse( + task_id="t", + id="end", + metadata={ + "retriever_resources": [ + { + "segment_id": "s", + "position": 1, + "document_name": "doc", + "score": 0.9, + "content": "c", + "summary": "sum", + } + ], + "annotation_reply": {"a": 1}, + "usage": {"t": 2}, + }, + ), + message_id="m", + created_at=2, + ) + yield CompletionAppStreamResponse( + stream_response=ErrorStreamResponse(task_id="t", err=ValueError("bad")), + message_id="m", + created_at=3, + ) + + result = list(CompletionAppGenerateResponseConverter.convert_stream_simple_response(stream())) + + assert result[0] == "ping" + assert result[1]["event"] == "message_end" + assert "annotation_reply" not in result[1]["metadata"] + assert "usage" not in result[1]["metadata"] + assert result[2]["event"] == "error" diff --git a/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_config_manager.py b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_config_manager.py new file mode 100644 index 0000000000..5d4c9bcde0 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_config_manager.py @@ -0,0 +1,55 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + +import core.app.apps.pipeline.pipeline_config_manager as module +from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager +from models.model import AppMode + + +def test_get_pipeline_config(mocker): + pipeline = MagicMock(tenant_id="tenant", id="pipe1") + workflow = MagicMock(id="wf1") + + mocker.patch.object( + module.WorkflowVariablesConfigManager, + "convert_rag_pipeline_variable", + return_value=["var1"], + ) + mocker.patch.object(module, "PipelineConfig", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + + result = PipelineConfigManager.get_pipeline_config(pipeline=pipeline, workflow=workflow, start_node_id="start") + + assert result.tenant_id == "tenant" + assert result.app_id == "pipe1" + assert result.workflow_id == "wf1" + assert result.app_mode == AppMode.RAG_PIPELINE + assert result.rag_pipeline_variables == ["var1"] + + +def test_config_validate_filters_related_keys(mocker): + config = { + "file_upload": {"enabled": True}, + "tts": {"enabled": True}, + "moderation": {"enabled": True}, + "extra": "drop", + } + + mocker.patch.object( + module.FileUploadConfigManager, + "validate_and_set_defaults", + return_value=(config, ["file_upload"]), + ) + mocker.patch.object( + module.TextToSpeechConfigManager, + "validate_and_set_defaults", + return_value=(config, ["tts"]), + ) + mocker.patch.object( + module.SensitiveWordAvoidanceConfigManager, + "validate_and_set_defaults", + return_value=(config, ["moderation"]), + ) + + filtered = PipelineConfigManager.config_validate("tenant", config) + + assert set(filtered.keys()) == {"file_upload", "tts", "moderation"} diff --git a/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generate_response_converter.py new file mode 100644 index 0000000000..94ed8166b9 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generate_response_converter.py @@ -0,0 +1,111 @@ +from collections.abc import Generator + +from core.app.apps.pipeline.generate_response_converter import WorkflowAppGenerateResponseConverter +from core.app.entities.task_entities import ( + AppStreamResponse, + ErrorStreamResponse, + NodeFinishStreamResponse, + NodeStartStreamResponse, + PingStreamResponse, + WorkflowAppBlockingResponse, + WorkflowAppStreamResponse, +) +from dify_graph.enums import WorkflowExecutionStatus, WorkflowNodeExecutionStatus + + +def test_convert_blocking_full_and_simple_response(): + blocking = WorkflowAppBlockingResponse( + task_id="task", + workflow_run_id="run", + data=WorkflowAppBlockingResponse.Data( + id="id", + workflow_id="wf", + status=WorkflowExecutionStatus.SUCCEEDED, + outputs={"k": "v"}, + error=None, + elapsed_time=0.1, + total_tokens=10, + total_steps=1, + created_at=1, + finished_at=2, + ), + ) + + full = WorkflowAppGenerateResponseConverter.convert_blocking_full_response(blocking) + simple = WorkflowAppGenerateResponseConverter.convert_blocking_simple_response(blocking) + + assert full == simple + assert full["workflow_run_id"] == "run" + assert full["data"]["status"] == WorkflowExecutionStatus.SUCCEEDED + + +def test_convert_stream_full_response(): + def stream() -> Generator[AppStreamResponse, None, None]: + yield WorkflowAppStreamResponse( + stream_response=PingStreamResponse(task_id="t"), + workflow_run_id="run", + ) + yield WorkflowAppStreamResponse( + stream_response=ErrorStreamResponse(task_id="t", err=ValueError("bad")), + workflow_run_id="run", + ) + + result = list(WorkflowAppGenerateResponseConverter.convert_stream_full_response(stream())) + + assert result[0] == "ping" + assert result[1]["event"] == "error" + assert result[1]["code"] == "invalid_param" + + +def test_convert_stream_simple_response_node_ignore_details(): + node_start = NodeStartStreamResponse( + task_id="t", + workflow_run_id="run", + data=NodeStartStreamResponse.Data( + id="nid", + node_id="node", + node_type="type", + title="Title", + index=1, + predecessor_node_id=None, + inputs={"a": 1}, + inputs_truncated=False, + created_at=1, + ), + ) + node_finish = NodeFinishStreamResponse( + task_id="t", + workflow_run_id="run", + data=NodeFinishStreamResponse.Data( + id="nid", + node_id="node", + node_type="type", + title="Title", + index=1, + predecessor_node_id=None, + inputs={"a": 1}, + inputs_truncated=False, + process_data=None, + process_data_truncated=False, + outputs={"b": 2}, + outputs_truncated=False, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + error=None, + elapsed_time=0.1, + execution_metadata=None, + created_at=1, + finished_at=2, + files=[], + ), + ) + + def stream() -> Generator[AppStreamResponse, None, None]: + yield WorkflowAppStreamResponse(stream_response=node_start, workflow_run_id="run") + yield WorkflowAppStreamResponse(stream_response=node_finish, workflow_run_id="run") + + result = list(WorkflowAppGenerateResponseConverter.convert_stream_simple_response(stream())) + + assert result[0]["event"] == "node_started" + assert result[0]["data"]["inputs"] is None + assert result[1]["event"] == "node_finished" + assert result[1]["data"]["inputs"] is None diff --git a/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generator.py b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generator.py new file mode 100644 index 0000000000..06face41fe --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_generator.py @@ -0,0 +1,699 @@ +import contextlib +from types import SimpleNamespace +from unittest.mock import MagicMock, PropertyMock + +import pytest + +import core.app.apps.pipeline.pipeline_generator as module +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import InvokeFrom +from core.datasource.entities.datasource_entities import DatasourceProviderType + + +class FakeRagPipelineGenerateEntity(SimpleNamespace): + class SingleIterationRunEntity(SimpleNamespace): + pass + + class SingleLoopRunEntity(SimpleNamespace): + pass + + def model_dump(self): + return dict(self.__dict__) + + +@pytest.fixture +def generator(mocker): + gen = module.PipelineGenerator() + + mocker.patch.object(module, "RagPipelineGenerateEntity", FakeRagPipelineGenerateEntity) + mocker.patch.object(module, "RagPipelineInvokeEntity", side_effect=lambda **kwargs: kwargs) + mocker.patch.object(module.contexts, "plugin_tool_providers", SimpleNamespace(set=MagicMock())) + mocker.patch.object(module.contexts, "plugin_tool_providers_lock", SimpleNamespace(set=MagicMock())) + + return gen + + +def _build_pipeline_dataset(): + return SimpleNamespace( + id="ds", + name="dataset", + description="desc", + chunk_structure="chunk", + built_in_field_enabled=True, + tenant_id="tenant", + ) + + +def _build_pipeline(): + pipeline = MagicMock(tenant_id="tenant", id="pipe") + pipeline.retrieve_dataset.return_value = _build_pipeline_dataset() + return pipeline + + +def _build_workflow(): + return MagicMock(id="wf", graph_dict={"nodes": [], "edges": []}, tenant_id="tenant") + + +def _build_user(): + return MagicMock(id="user", name="User", session_id="session") + + +def _build_args(): + return { + "inputs": {"k": "v"}, + "start_node_id": "start", + "datasource_type": DatasourceProviderType.LOCAL_FILE.value, + "datasource_info_list": [{"name": "file"}], + } + + +def _patch_session(mocker, session): + mocker.patch.object(module, "Session", return_value=session) + mocker.patch.object(type(module.db), "engine", new_callable=PropertyMock, return_value=MagicMock()) + + +def _dummy_preserve(*args, **kwargs): + return contextlib.nullcontext() + + +class DummySession: + def __init__(self): + self.scalar = MagicMock() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + +def test_generate_dataset_missing(generator, mocker): + pipeline = _build_pipeline() + pipeline.retrieve_dataset.return_value = None + + session = DummySession() + _patch_session(mocker, session) + + with pytest.raises(ValueError): + generator.generate( + pipeline=pipeline, + workflow=_build_workflow(), + user=_build_user(), + args=_build_args(), + invoke_from=InvokeFrom.WEB_APP, + streaming=False, + ) + + +def test_generate_debugger_calls_generate(generator, mocker): + pipeline = _build_pipeline() + workflow = _build_workflow() + + session = DummySession() + _patch_session(mocker, session) + + mocker.patch.object( + generator, + "_format_datasource_info_list", + return_value=[{"name": "file"}], + ) + mocker.patch.object( + module.PipelineConfigManager, + "get_pipeline_config", + return_value=SimpleNamespace(app_id="pipe", rag_pipeline_variables=[]), + ) + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + return_value=MagicMock(), + ) + + mocker.patch.object(generator, "_generate", return_value={"result": "ok"}) + + result = generator.generate( + pipeline=pipeline, + workflow=workflow, + user=_build_user(), + args=_build_args(), + invoke_from=InvokeFrom.DEBUGGER, + streaming=True, + ) + + assert result == {"result": "ok"} + + +def test_generate_published_pipeline_creates_documents_and_delay(generator, mocker): + pipeline = _build_pipeline() + workflow = _build_workflow() + + session = DummySession() + _patch_session(mocker, session) + + datasource_info_list = [{"name": "file1"}, {"name": "file2"}] + + mocker.patch.object( + generator, + "_format_datasource_info_list", + return_value=datasource_info_list, + ) + mocker.patch.object( + module.PipelineConfigManager, + "get_pipeline_config", + return_value=SimpleNamespace(app_id="pipe", rag_pipeline_variables=[]), + ) + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + + mocker.patch("services.dataset_service.DocumentService.get_documents_position", return_value=1) + + document1 = SimpleNamespace( + id="doc1", + position=1, + data_source_type=DatasourceProviderType.LOCAL_FILE, + data_source_info="{}", + name="file1", + indexing_status="", + error=None, + enabled=True, + ) + document2 = SimpleNamespace( + id="doc2", + position=2, + data_source_type=DatasourceProviderType.LOCAL_FILE, + data_source_info="{}", + name="file2", + indexing_status="", + error=None, + enabled=True, + ) + mocker.patch.object(generator, "_build_document", side_effect=[document1, document2]) + + mocker.patch.object(module, "DocumentPipelineExecutionLog", return_value=MagicMock()) + + db_session = MagicMock() + mocker.patch.object(module.db, "session", db_session) + + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + return_value=MagicMock(), + ) + + task_proxy = MagicMock() + mocker.patch.object(module, "RagPipelineTaskProxy", return_value=task_proxy) + + result = generator.generate( + pipeline=pipeline, + workflow=workflow, + user=_build_user(), + args=_build_args(), + invoke_from=InvokeFrom.PUBLISHED_PIPELINE, + streaming=False, + ) + + assert result["batch"] + assert len(result["documents"]) == 2 + task_proxy.delay.assert_called_once() + + +def test_generate_is_retry_calls_generate(generator, mocker): + pipeline = _build_pipeline() + workflow = _build_workflow() + + session = DummySession() + _patch_session(mocker, session) + + mocker.patch.object( + generator, + "_format_datasource_info_list", + return_value=[{"name": "file"}], + ) + mocker.patch.object( + module.PipelineConfigManager, + "get_pipeline_config", + return_value=SimpleNamespace(app_id="pipe", rag_pipeline_variables=[]), + ) + mocker.patch.object(generator, "_prepare_user_inputs", return_value={"k": "v"}) + + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + return_value=MagicMock(), + ) + + mocker.patch.object(generator, "_generate", return_value={"result": "ok"}) + + result = generator.generate( + pipeline=pipeline, + workflow=workflow, + user=_build_user(), + args=_build_args(), + invoke_from=InvokeFrom.PUBLISHED_PIPELINE, + streaming=True, + is_retry=True, + ) + + assert result == {"result": "ok"} + + +def test_generate_worker_handles_errors(generator, mocker): + flask_app = MagicMock() + flask_app.app_context.return_value = contextlib.nullcontext() + mocker.patch.object(module, "preserve_flask_contexts", _dummy_preserve) + mocker.patch.object(module.db, "session", MagicMock(close=MagicMock())) + mocker.patch.object(type(module.db), "engine", new_callable=PropertyMock, return_value=MagicMock()) + + application_generate_entity = FakeRagPipelineGenerateEntity( + app_config=SimpleNamespace(tenant_id="tenant", app_id="pipe", workflow_id="wf"), + invoke_from=InvokeFrom.WEB_APP, + user_id="user", + ) + + session = DummySession() + session.scalar.side_effect = [MagicMock(), MagicMock(session_id="session")] + _patch_session(mocker, session) + + runner_instance = MagicMock() + runner_instance.run.side_effect = ValueError("bad") + mocker.patch.object(module, "PipelineRunner", return_value=runner_instance) + + queue_manager = MagicMock() + generator._generate_worker( + flask_app=flask_app, + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + context=contextlib.nullcontext(), + variable_loader=MagicMock(), + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + queue_manager.publish_error.assert_called_once() + + +def test_generate_worker_sets_system_user_id_for_external_call(generator, mocker): + flask_app = MagicMock() + flask_app.app_context.return_value = contextlib.nullcontext() + mocker.patch.object(module, "preserve_flask_contexts", _dummy_preserve) + mocker.patch.object(module.db, "session", MagicMock(close=MagicMock())) + mocker.patch.object(type(module.db), "engine", new_callable=PropertyMock, return_value=MagicMock()) + + application_generate_entity = FakeRagPipelineGenerateEntity( + app_config=SimpleNamespace(tenant_id="tenant", app_id="pipe", workflow_id="wf"), + invoke_from=InvokeFrom.WEB_APP, + user_id="user", + ) + + session = DummySession() + session.scalar.side_effect = [MagicMock(), MagicMock(session_id="session")] + _patch_session(mocker, session) + + runner_instance = MagicMock() + mocker.patch.object(module, "PipelineRunner", return_value=runner_instance) + + generator._generate_worker( + flask_app=flask_app, + application_generate_entity=application_generate_entity, + queue_manager=MagicMock(), + context=contextlib.nullcontext(), + variable_loader=MagicMock(), + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + assert module.PipelineRunner.call_args.kwargs["system_user_id"] == "session" + + +def test_generate_raises_when_workflow_not_found(generator, mocker): + flask_app = MagicMock() + mocker.patch.object(module, "preserve_flask_contexts", _dummy_preserve) + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = None + mocker.patch.object(module.db, "session", session) + + with pytest.raises(ValueError): + generator._generate( + flask_app=flask_app, + context=contextlib.nullcontext(), + pipeline=_build_pipeline(), + workflow_id="wf", + user=_build_user(), + application_generate_entity=FakeRagPipelineGenerateEntity( + task_id="t", + app_config=SimpleNamespace(app_id="pipe"), + user_id="user", + invoke_from=InvokeFrom.DEBUGGER, + ), + invoke_from=InvokeFrom.DEBUGGER, + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + streaming=True, + ) + + +def test_generate_success_returns_converted(generator, mocker): + flask_app = MagicMock() + mocker.patch.object(module, "preserve_flask_contexts", _dummy_preserve) + + workflow = MagicMock(id="wf", tenant_id="tenant", app_id="pipe", graph_dict={}) + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = workflow + mocker.patch.object(module.db, "session", session) + + queue_manager = MagicMock() + mocker.patch.object(module, "PipelineQueueManager", return_value=queue_manager) + + worker_thread = MagicMock() + mocker.patch.object(module.threading, "Thread", return_value=worker_thread) + + mocker.patch.object(generator, "_get_draft_var_saver_factory", return_value=MagicMock()) + mocker.patch.object(generator, "_handle_response", return_value="response") + mocker.patch.object(module.WorkflowAppGenerateResponseConverter, "convert", return_value="converted") + + result = generator._generate( + flask_app=flask_app, + context=contextlib.nullcontext(), + pipeline=_build_pipeline(), + workflow_id="wf", + user=_build_user(), + application_generate_entity=FakeRagPipelineGenerateEntity( + task_id="t", + app_config=SimpleNamespace(app_id="pipe"), + user_id="user", + invoke_from=InvokeFrom.DEBUGGER, + ), + invoke_from=InvokeFrom.DEBUGGER, + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + streaming=True, + ) + + assert result == "converted" + + +def test_single_iteration_generate_validates_inputs(generator, mocker): + with pytest.raises(ValueError): + generator.single_iteration_generate(_build_pipeline(), _build_workflow(), "", _build_user(), {}) + + with pytest.raises(ValueError): + generator.single_iteration_generate( + _build_pipeline(), _build_workflow(), "node", _build_user(), {"inputs": None} + ) + + +def test_single_iteration_generate_dataset_required(generator, mocker): + pipeline = _build_pipeline() + pipeline.retrieve_dataset.return_value = None + + session = DummySession() + _patch_session(mocker, session) + + with pytest.raises(ValueError): + generator.single_iteration_generate( + pipeline, + _build_workflow(), + "node", + _build_user(), + {"inputs": {"a": 1}}, + ) + + +def test_single_iteration_generate_success(generator, mocker): + pipeline = _build_pipeline() + + session = DummySession() + _patch_session(mocker, session) + + mocker.patch.object( + module.PipelineConfigManager, + "get_pipeline_config", + return_value=SimpleNamespace(app_id="pipe", tenant_id="tenant"), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object(module.db, "session", MagicMock(return_value=MagicMock())) + + mocker.patch.object(module, "WorkflowDraftVariableService", return_value=MagicMock()) + mocker.patch.object(module, "DraftVarLoader", return_value=MagicMock()) + + mocker.patch.object(generator, "_generate", return_value={"ok": True}) + + result = generator.single_iteration_generate( + pipeline, + _build_workflow(), + "node", + _build_user(), + {"inputs": {"a": 1}}, + streaming=False, + ) + + assert result == {"ok": True} + + +def test_single_loop_generate_success(generator, mocker): + pipeline = _build_pipeline() + + session = DummySession() + _patch_session(mocker, session) + + mocker.patch.object( + module.PipelineConfigManager, + "get_pipeline_config", + return_value=SimpleNamespace(app_id="pipe", tenant_id="tenant"), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object( + module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + return_value=MagicMock(), + ) + mocker.patch.object(module.db, "session", MagicMock(return_value=MagicMock())) + + mocker.patch.object(module, "WorkflowDraftVariableService", return_value=MagicMock()) + mocker.patch.object(module, "DraftVarLoader", return_value=MagicMock()) + + mocker.patch.object(generator, "_generate", return_value={"ok": True}) + + result = generator.single_loop_generate( + pipeline, + _build_workflow(), + "node", + _build_user(), + {"inputs": {"a": 1}}, + streaming=False, + ) + + assert result == {"ok": True} + + +def test_handle_response_value_error_triggers_generate_task_stopped(generator, mocker): + pipeline = _build_pipeline() + workflow = _build_workflow() + app_entity = FakeRagPipelineGenerateEntity(task_id="t") + + task_pipeline = MagicMock() + task_pipeline.process.side_effect = ValueError("I/O operation on closed file.") + mocker.patch.object(module, "WorkflowAppGenerateTaskPipeline", return_value=task_pipeline) + + with pytest.raises(GenerateTaskStoppedError): + generator._handle_response( + application_generate_entity=app_entity, + workflow=workflow, + queue_manager=MagicMock(), + user=_build_user(), + draft_var_saver_factory=MagicMock(), + stream=False, + ) + + +def test_build_document_sets_metadata_for_builtin_fields(generator, mocker): + class DummyDocument(SimpleNamespace): + pass + + mocker.patch.object(module, "Document", side_effect=lambda **kwargs: DummyDocument(**kwargs)) + + document = generator._build_document( + tenant_id="tenant", + dataset_id="ds", + built_in_field_enabled=True, + datasource_type=DatasourceProviderType.LOCAL_FILE, + datasource_info={"name": "file"}, + created_from="rag-pipeline", + position=1, + account=_build_user(), + batch="batch", + document_form="text", + ) + + assert document.name == "file" + assert document.doc_metadata + + +def test_build_document_invalid_datasource_type(generator): + with pytest.raises(ValueError): + generator._build_document( + tenant_id="tenant", + dataset_id="ds", + built_in_field_enabled=False, + datasource_type="invalid", + datasource_info={}, + created_from="rag-pipeline", + position=1, + account=_build_user(), + batch="batch", + document_form="text", + ) + + +def test_format_datasource_info_list_non_online_drive(generator): + result = generator._format_datasource_info_list( + DatasourceProviderType.LOCAL_FILE, + [{"name": "file"}], + _build_pipeline(), + _build_workflow(), + "start", + _build_user(), + ) + + assert result == [{"name": "file"}] + + +def test_format_datasource_info_list_missing_node_data(generator): + workflow = MagicMock(graph_dict={"nodes": []}) + + with pytest.raises(ValueError): + generator._format_datasource_info_list( + DatasourceProviderType.ONLINE_DRIVE, + [], + _build_pipeline(), + workflow, + "start", + _build_user(), + ) + + +def test_format_datasource_info_list_online_drive_folder(generator, mocker): + workflow = MagicMock( + graph_dict={ + "nodes": [ + { + "id": "start", + "data": { + "plugin_id": "p", + "provider_name": "provider", + "datasource_name": "drive", + "credential_id": "cred", + }, + } + ] + } + ) + + runtime = MagicMock() + runtime.runtime = SimpleNamespace(credentials=None) + runtime.datasource_provider_type.return_value = DatasourceProviderType.ONLINE_DRIVE + + mocker.patch( + "core.datasource.datasource_manager.DatasourceManager.get_datasource_runtime", + return_value=runtime, + ) + mocker.patch.object(module.DatasourceProviderService, "get_datasource_credentials", return_value={"k": "v"}) + + mocker.patch.object( + generator, + "_get_files_in_folder", + side_effect=lambda *args, **kwargs: args[4].append({"id": "f"}), + ) + + result = generator._format_datasource_info_list( + DatasourceProviderType.ONLINE_DRIVE, + [{"id": "folder", "type": "folder", "name": "Folder", "bucket": "b"}], + _build_pipeline(), + workflow, + "start", + _build_user(), + ) + + assert result == [{"id": "f"}] + + +def test_get_files_in_folder_recurses_and_collects(generator): + class File: + def __init__(self, id, name, type): + self.id = id + self.name = name + self.type = type + + class FilesPage: + def __init__(self, files, is_truncated=False, next_page_parameters=None): + self.files = files + self.is_truncated = is_truncated + self.next_page_parameters = next_page_parameters + + class Result: + def __init__(self, result): + self.result = result + + class Runtime: + def __init__(self): + self.calls = [] + + def datasource_provider_type(self): + return DatasourceProviderType.ONLINE_DRIVE + + def online_drive_browse_files(self, user_id, request, provider_type): + self.calls.append(request.next_page_parameters) + if request.prefix == "fd": + return iter([Result([FilesPage([File("f2", "file2", "file")], False, None)])]) + if request.next_page_parameters is None: + return iter( + [ + Result( + [FilesPage([File("f1", "file", "file"), File("fd", "folder", "folder")], True, {"page": 2})] + ) + ] + ) + return iter([Result([FilesPage([File("f2", "file2", "file")], False, None)])]) + + runtime = Runtime() + all_files = [] + + generator._get_files_in_folder( + datasource_runtime=runtime, + prefix="root", + bucket="b", + user_id="user", + all_files=all_files, + datasource_info={}, + ) + + assert {f["id"] for f in all_files} == {"f1", "f2"} diff --git a/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_queue_manager.py b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_queue_manager.py new file mode 100644 index 0000000000..72f7552bd1 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_queue_manager.py @@ -0,0 +1,57 @@ +import pytest + +import core.app.apps.pipeline.pipeline_queue_manager as module +from core.app.apps.base_app_queue_manager import PublishFrom +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.apps.pipeline.pipeline_queue_manager import PipelineQueueManager +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import ( + QueueErrorEvent, + QueueMessageEndEvent, + QueueStopEvent, + QueueWorkflowFailedEvent, + QueueWorkflowPartialSuccessEvent, + QueueWorkflowSucceededEvent, +) +from dify_graph.model_runtime.entities.llm_entities import LLMResult + + +def test_publish_sets_stop_listen_and_raises_on_stopped(mocker): + manager = PipelineQueueManager(task_id="t", user_id="u", invoke_from=InvokeFrom.WEB_APP, app_mode="rag") + manager._q = mocker.MagicMock() + manager.stop_listen = mocker.MagicMock() + manager._is_stopped = mocker.MagicMock(return_value=True) + + with pytest.raises(GenerateTaskStoppedError): + manager._publish(QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL), PublishFrom.APPLICATION_MANAGER) + + manager.stop_listen.assert_called_once() + + +def test_publish_stop_events_trigger_stop_listen(mocker): + manager = PipelineQueueManager(task_id="t", user_id="u", invoke_from=InvokeFrom.WEB_APP, app_mode="rag") + manager._q = mocker.MagicMock() + manager.stop_listen = mocker.MagicMock() + manager._is_stopped = mocker.MagicMock(return_value=False) + + for event in [ + QueueErrorEvent(error=ValueError("bad")), + QueueMessageEndEvent(llm_result=LLMResult.model_construct()), + QueueWorkflowSucceededEvent(), + QueueWorkflowFailedEvent(error="failed", exceptions_count=1), + QueueWorkflowPartialSuccessEvent(exceptions_count=1), + ]: + manager.stop_listen.reset_mock() + manager._publish(event, PublishFrom.TASK_PIPELINE) + manager.stop_listen.assert_called_once() + + +def test_publish_non_stop_event_no_stop_listen(mocker): + manager = PipelineQueueManager(task_id="t", user_id="u", invoke_from=InvokeFrom.WEB_APP, app_mode="rag") + manager._q = mocker.MagicMock() + manager.stop_listen = mocker.MagicMock() + manager._is_stopped = mocker.MagicMock(return_value=False) + + non_stop_event = mocker.MagicMock(spec=module.AppQueueEvent) + manager._publish(non_stop_event, PublishFrom.TASK_PIPELINE) + manager.stop_listen.assert_not_called() diff --git a/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_runner.py b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_runner.py new file mode 100644 index 0000000000..eec95b7f39 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/pipeline/test_pipeline_runner.py @@ -0,0 +1,297 @@ +""" +Unit tests for PipelineRunner behavior. +Asserts correct event handling, error propagation, and user invocation logic. +Primary collaborators: PipelineRunner, InvokeFrom, GraphRunFailedEvent, UserFrom, and mocked dependencies. +Cross-references: core.app.apps.pipeline.pipeline_runner, core.app.entities.app_invoke_entities. +""" + +"""Unit tests for PipelineRunner behavior. + +This module validates core control-flow outcomes for +``core.app.apps.pipeline.pipeline_runner``: app/workflow lookup, graph +initialization guards, invoke-source to user-source resolution, and failed-run +event handling. Invariants asserted here include strict graph-config +validation, correct ``InvokeFrom`` to ``UserFrom`` mapping, and publishing +error paths driven by ``GraphRunFailedEvent`` through mocked collaborators. +Primary collaborators include ``PipelineRunner``, +``core.app.entities.app_invoke_entities.InvokeFrom``, ``GraphRunFailedEvent``, +``UserFrom``, and patched DB/runtime dependencies used by the runner. +""" + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +import core.app.apps.pipeline.pipeline_runner as module +from core.app.apps.pipeline.pipeline_runner import PipelineRunner +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from dify_graph.graph_events import GraphRunFailedEvent + + +def _build_app_generate_entity() -> SimpleNamespace: + app_config = SimpleNamespace(app_id="pipe", workflow_id="wf", tenant_id="tenant") + return SimpleNamespace( + app_config=app_config, + invoke_from=InvokeFrom.WEB_APP, + user_id="user", + trace_manager=MagicMock(), + inputs={"input1": "v1"}, + files=[], + workflow_execution_id="run", + document_id="doc", + original_document_id=None, + batch="batch", + dataset_id="ds", + datasource_type="local_file", + datasource_info={"name": "file"}, + start_node_id="start", + call_depth=0, + single_iteration_run=None, + single_loop_run=None, + ) + + +@pytest.fixture +def runner(): + app_generate_entity = _build_app_generate_entity() + queue_manager = MagicMock() + variable_loader = MagicMock() + workflow = MagicMock() + workflow_execution_repository = MagicMock() + workflow_node_execution_repository = MagicMock() + + return PipelineRunner( + application_generate_entity=app_generate_entity, + queue_manager=queue_manager, + variable_loader=variable_loader, + workflow=workflow, + system_user_id="sys", + workflow_execution_repository=workflow_execution_repository, + workflow_node_execution_repository=workflow_node_execution_repository, + ) + + +def test_get_app_id(runner): + assert runner._get_app_id() == "pipe" + + +def test_get_workflow_returns_workflow(mocker, runner): + pipeline = MagicMock(tenant_id="tenant", id="pipe") + workflow = MagicMock(id="wf") + + query = MagicMock() + query.where.return_value.first.return_value = workflow + mocker.patch.object(module.db, "session", MagicMock(query=MagicMock(return_value=query))) + + result = runner.get_workflow(pipeline=pipeline, workflow_id="wf") + + assert result == workflow + + +def test_init_rag_pipeline_graph_invalid_config(mocker, runner): + workflow = MagicMock(id="wf", tenant_id="tenant", graph_dict={}) + + with pytest.raises(ValueError): + runner._init_rag_pipeline_graph(workflow=workflow, graph_runtime_state=MagicMock()) + + workflow.graph_dict = {"nodes": "bad", "edges": []} + with pytest.raises(ValueError): + runner._init_rag_pipeline_graph(workflow=workflow, graph_runtime_state=MagicMock()) + + workflow.graph_dict = {"nodes": [], "edges": "bad"} + with pytest.raises(ValueError): + runner._init_rag_pipeline_graph(workflow=workflow, graph_runtime_state=MagicMock()) + + +def test_init_rag_pipeline_graph_not_found(mocker, runner): + workflow = MagicMock(id="wf", tenant_id="tenant", graph_dict={"nodes": [], "edges": []}) + mocker.patch.object(module.Graph, "init", return_value=None) + + with pytest.raises(ValueError): + runner._init_rag_pipeline_graph(workflow=workflow, graph_runtime_state=MagicMock()) + + +def test_update_document_status_on_failure(mocker, runner): + document = MagicMock() + + query = MagicMock() + query.where.return_value.first.return_value = document + + session = MagicMock() + session.query.return_value = query + mocker.patch.object(module.db, "session", session) + + event = GraphRunFailedEvent(error="boom") + + runner._update_document_status(event, document_id="doc", dataset_id="ds") + + assert document.indexing_status == "error" + assert document.error == "boom" + session.commit.assert_called_once() + + +def test_run_pipeline_not_found(mocker): + app_generate_entity = _build_app_generate_entity() + app_generate_entity.invoke_from = InvokeFrom.WEB_APP + app_generate_entity.single_iteration_run = None + app_generate_entity.single_loop_run = None + + query = MagicMock() + query.where.return_value.first.return_value = None + + session = MagicMock() + session.query.return_value = query + mocker.patch.object(module.db, "session", session) + + runner = PipelineRunner( + application_generate_entity=app_generate_entity, + queue_manager=MagicMock(), + variable_loader=MagicMock(), + workflow=MagicMock(), + system_user_id="sys", + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + with pytest.raises(ValueError): + runner.run() + + +def test_run_workflow_not_initialized(mocker): + app_generate_entity = _build_app_generate_entity() + + pipeline = MagicMock(id="pipe") + query_pipeline = MagicMock() + query_pipeline.where.return_value.first.return_value = pipeline + + session = MagicMock() + session.query.return_value = query_pipeline + mocker.patch.object(module.db, "session", session) + + runner = PipelineRunner( + application_generate_entity=app_generate_entity, + queue_manager=MagicMock(), + variable_loader=MagicMock(), + workflow=MagicMock(), + system_user_id="sys", + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + runner.get_workflow = MagicMock(return_value=None) + + with pytest.raises(ValueError): + runner.run() + + +def test_run_single_iteration_path(mocker): + app_generate_entity = _build_app_generate_entity() + app_generate_entity.single_iteration_run = MagicMock() + + pipeline = MagicMock(id="pipe") + query_pipeline = MagicMock() + query_pipeline.where.return_value.first.return_value = pipeline + + query_end_user = MagicMock() + query_end_user.where.return_value.first.return_value = MagicMock(session_id="sess") + + session = MagicMock() + session.query.side_effect = [query_end_user, query_pipeline] + mocker.patch.object(module.db, "session", session) + + runner = PipelineRunner( + application_generate_entity=app_generate_entity, + queue_manager=MagicMock(), + variable_loader=MagicMock(), + workflow=MagicMock(), + system_user_id="sys", + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + runner._resolve_user_from = MagicMock(return_value=UserFrom.ACCOUNT) + runner.get_workflow = MagicMock( + return_value=MagicMock( + id="wf", + tenant_id="tenant", + app_id="pipe", + graph_dict={}, + type="rag-pipeline", + version="v1", + ) + ) + runner._prepare_single_node_execution = MagicMock(return_value=("graph", "pool", "state")) + runner._update_document_status = MagicMock() + runner._handle_event = MagicMock() + + workflow_entry = MagicMock() + workflow_entry.graph_engine = MagicMock() + workflow_entry.run.return_value = [MagicMock()] + mocker.patch.object(module, "WorkflowEntry", return_value=workflow_entry) + + mocker.patch.object(module, "WorkflowPersistenceLayer", return_value=MagicMock()) + + runner.run() + + runner._prepare_single_node_execution.assert_called_once() + runner._handle_event.assert_called() + + +def test_run_normal_path_builds_graph(mocker): + app_generate_entity = _build_app_generate_entity() + + pipeline = MagicMock(id="pipe") + query_pipeline = MagicMock() + query_pipeline.where.return_value.first.return_value = pipeline + + query_end_user = MagicMock() + query_end_user.where.return_value.first.return_value = MagicMock(session_id="sess") + + session = MagicMock() + session.query.side_effect = [query_end_user, query_pipeline] + mocker.patch.object(module.db, "session", session) + + workflow = MagicMock( + id="wf", + tenant_id="tenant", + app_id="pipe", + graph_dict={"nodes": [], "edges": []}, + environment_variables=[], + rag_pipeline_variables=[{"variable": "input1", "belong_to_node_id": "start"}], + type="rag-pipeline", + version="v1", + ) + + runner = PipelineRunner( + application_generate_entity=app_generate_entity, + queue_manager=MagicMock(), + variable_loader=MagicMock(), + workflow=workflow, + system_user_id="sys", + workflow_execution_repository=MagicMock(), + workflow_node_execution_repository=MagicMock(), + ) + + runner._resolve_user_from = MagicMock(return_value=UserFrom.ACCOUNT) + runner.get_workflow = MagicMock(return_value=workflow) + runner._init_rag_pipeline_graph = MagicMock(return_value="graph") + runner._update_document_status = MagicMock() + runner._handle_event = MagicMock() + + mocker.patch.object( + module.RAGPipelineVariable, + "model_validate", + return_value=SimpleNamespace(belong_to_node_id="start", variable="input1"), + ) + mocker.patch.object(module, "RAGPipelineVariableInput", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + mocker.patch.object(module, "VariablePool", side_effect=lambda **kwargs: SimpleNamespace(**kwargs)) + + workflow_entry = MagicMock() + workflow_entry.graph_engine = MagicMock() + workflow_entry.run.return_value = [] + mocker.patch.object(module, "WorkflowEntry", return_value=workflow_entry) + mocker.patch.object(module, "WorkflowPersistenceLayer", return_value=MagicMock()) + + runner.run() + + runner._init_rag_pipeline_graph.assert_called_once() diff --git a/api/tests/unit_tests/core/app/apps/test_advanced_chat_app_generator.py b/api/tests/unit_tests/core/app/apps/test_advanced_chat_app_generator.py index f0d9afc0db..a25e3ec3f5 100644 --- a/api/tests/unit_tests/core/app/apps/test_advanced_chat_app_generator.py +++ b/api/tests/unit_tests/core/app/apps/test_advanced_chat_app_generator.py @@ -124,12 +124,12 @@ def test_message_cycle_manager_uses_new_conversation_flag(monkeypatch): def start(self): self.started = True - def fake_thread(**kwargs): + def fake_thread(*args, **kwargs): thread = DummyThread(**kwargs) captured["thread"] = thread return thread - monkeypatch.setattr(message_cycle_manager, "Thread", fake_thread) + monkeypatch.setattr(message_cycle_manager, "Timer", fake_thread) manager = MessageCycleManager(application_generate_entity=entity, task_state=MagicMock()) thread = manager.generate_conversation_name(conversation_id="existing-conversation-id", query="hello") diff --git a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py index 43a97ae098..a3ced02394 100644 --- a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py +++ b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py @@ -1,3 +1,5 @@ +from unittest.mock import MagicMock + import pytest from core.app.apps.base_app_generator import BaseAppGenerator @@ -366,3 +368,132 @@ def test_validate_inputs_optional_file_with_empty_string_ignores_default(): ) assert result is None + + +class TestBaseAppGeneratorExtras: + def test_prepare_user_inputs_converts_files_and_lists(self, monkeypatch): + base_app_generator = BaseAppGenerator() + + variables = [ + VariableEntity( + variable="file", + label="file", + type=VariableEntityType.FILE, + required=False, + allowed_file_types=[], + allowed_file_extensions=[], + allowed_file_upload_methods=[], + ), + VariableEntity( + variable="file_list", + label="file_list", + type=VariableEntityType.FILE_LIST, + required=False, + allowed_file_types=[], + allowed_file_extensions=[], + allowed_file_upload_methods=[], + ), + VariableEntity( + variable="json", + label="json", + type=VariableEntityType.JSON_OBJECT, + required=False, + ), + ] + + monkeypatch.setattr( + "core.app.apps.base_app_generator.file_factory.build_from_mapping", + lambda mapping, tenant_id, config, strict_type_validation=False: "file-object", + ) + monkeypatch.setattr( + "core.app.apps.base_app_generator.file_factory.build_from_mappings", + lambda mappings, tenant_id, config: ["file-1", "file-2"], + ) + + user_inputs = { + "file": {"id": "file-id"}, + "file_list": [{"id": "file-1"}, {"id": "file-2"}], + "json": {"key": "value"}, + } + + prepared = base_app_generator._prepare_user_inputs( + user_inputs=user_inputs, + variables=variables, + tenant_id="tenant-id", + ) + + assert prepared["file"] == "file-object" + assert prepared["file_list"] == ["file-1", "file-2"] + assert prepared["json"] == {"key": "value"} + + def test_prepare_user_inputs_rejects_invalid_dict_inputs(self): + base_app_generator = BaseAppGenerator() + variables = [ + VariableEntity( + variable="text", + label="text", + type=VariableEntityType.TEXT_INPUT, + required=False, + ) + ] + + with pytest.raises(ValueError, match="must be a string"): + base_app_generator._prepare_user_inputs( + user_inputs={"text": {"unexpected": "dict"}}, + variables=variables, + tenant_id="tenant-id", + ) + + def test_prepare_user_inputs_rejects_invalid_list_inputs(self): + base_app_generator = BaseAppGenerator() + variables = [ + VariableEntity( + variable="text", + label="text", + type=VariableEntityType.TEXT_INPUT, + required=False, + ) + ] + + with pytest.raises(ValueError, match="must be a string"): + base_app_generator._prepare_user_inputs( + user_inputs={"text": [{"unexpected": "dict"}]}, + variables=variables, + tenant_id="tenant-id", + ) + + def test_convert_to_event_stream(self): + base_app_generator = BaseAppGenerator() + + assert base_app_generator.convert_to_event_stream({"ok": True}) == {"ok": True} + + def _gen(): + yield {"delta": "hi"} + yield "ping" + + converted = list(base_app_generator.convert_to_event_stream(_gen())) + + assert converted[0].startswith("data: ") + assert "\n\n" in converted[0] + assert converted[1] == "event: ping\n\n" + + def test_get_draft_var_saver_factory_debugger(self): + from core.app.entities.app_invoke_entities import InvokeFrom + from dify_graph.enums import BuiltinNodeTypes + from models import Account + + base_app_generator = BaseAppGenerator() + account = Account(name="Tester", email="tester@example.com") + account.id = "account-id" + account.tenant_id = "tenant-id" + + factory = base_app_generator._get_draft_var_saver_factory(InvokeFrom.DEBUGGER, account) + saver = factory( + session=MagicMock(), + app_id="app-id", + node_id="node-id", + node_type=BuiltinNodeTypes.START, + node_execution_id="node-exec-id", + ) + + assert saver is not None diff --git a/api/tests/unit_tests/core/app/apps/test_base_app_queue_manager.py b/api/tests/unit_tests/core/app/apps/test_base_app_queue_manager.py new file mode 100644 index 0000000000..c6dc20ffc6 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_base_app_queue_manager.py @@ -0,0 +1,61 @@ +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import QueueErrorEvent + + +class DummyQueueManager(AppQueueManager): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.published = [] + + def _publish(self, event, pub_from): + self.published.append((event, pub_from)) + + +class TestBaseAppQueueManager: + def test_init_requires_user_id(self): + with pytest.raises(ValueError): + DummyQueueManager(task_id="t1", user_id="", invoke_from=InvokeFrom.SERVICE_API) + + def test_publish_error_records_event(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + manager = DummyQueueManager(task_id="t1", user_id="u1", invoke_from=InvokeFrom.SERVICE_API) + manager.publish_error(ValueError("boom"), PublishFrom.TASK_PIPELINE) + + assert isinstance(manager.published[0][0], QueueErrorEvent) + + def test_set_stop_flag_checks_user(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.get.return_value = b"end-user-u1" + AppQueueManager.set_stop_flag(task_id="t1", invoke_from=InvokeFrom.SERVICE_API, user_id="u1") + + mock_redis.setex.assert_called_once() + + def test_set_stop_flag_no_user_check(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + AppQueueManager.set_stop_flag_no_user_check(task_id="t1") + + mock_redis.setex.assert_called_once() + + def test_is_stopped_reads_cache(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + mock_redis.get.return_value = b"1" + manager = DummyQueueManager(task_id="t1", user_id="u1", invoke_from=InvokeFrom.SERVICE_API) + + assert manager._is_stopped() is True + + def test_check_for_sqlalchemy_models_raises(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + manager = DummyQueueManager(task_id="t1", user_id="u1", invoke_from=InvokeFrom.SERVICE_API) + + bad = SimpleNamespace(_sa_instance_state=True) + with pytest.raises(TypeError): + manager._check_for_sqlalchemy_models(bad) diff --git a/api/tests/unit_tests/core/app/apps/test_base_app_runner.py b/api/tests/unit_tests/core/app/apps/test_base_app_runner.py new file mode 100644 index 0000000000..aabeb54553 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_base_app_runner.py @@ -0,0 +1,442 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from core.app.app_config.entities import ( + AdvancedChatMessageEntity, + AdvancedChatPromptTemplateEntity, + AdvancedCompletionPromptTemplateEntity, + PromptTemplateEntity, +) +from core.app.apps.base_app_runner import AppRunner +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueMessageEndEvent +from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessageRole, + TextPromptMessageContent, +) +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey +from dify_graph.model_runtime.errors.invoke import InvokeBadRequestError +from models.model import AppMode + + +class _DummyParameterRule: + def __init__(self, name: str, use_template: str | None = None) -> None: + self.name = name + self.use_template = use_template + + +class _QueueRecorder: + def __init__(self) -> None: + self.events: list[object] = [] + + def publish(self, event, pub_from): + _ = pub_from + self.events.append(event) + + +class TestAppRunner: + def test_recalc_llm_max_tokens_updates_parameters(self, monkeypatch): + runner = AppRunner() + + model_schema = SimpleNamespace( + model_properties={ModelPropertyKey.CONTEXT_SIZE: 100}, + parameter_rules=[_DummyParameterRule("max_tokens")], + ) + model_config = SimpleNamespace( + provider_model_bundle=object(), + model="mock", + model_schema=model_schema, + parameters={"max_tokens": 30}, + ) + + monkeypatch.setattr( + "core.app.apps.base_app_runner.ModelInstance", + lambda provider_model_bundle, model: SimpleNamespace(get_llm_num_tokens=lambda messages: 80), + ) + + runner.recalc_llm_max_tokens(model_config, prompt_messages=[AssistantPromptMessage(content="hi")]) + + assert model_config.parameters["max_tokens"] == 20 + + def test_recalc_llm_max_tokens_returns_minus_one_when_no_context(self, monkeypatch): + runner = AppRunner() + + model_schema = SimpleNamespace( + model_properties={}, + parameter_rules=[_DummyParameterRule("max_tokens")], + ) + model_config = SimpleNamespace( + provider_model_bundle=object(), + model="mock", + model_schema=model_schema, + parameters={"max_tokens": 30}, + ) + + monkeypatch.setattr( + "core.app.apps.base_app_runner.ModelInstance", + lambda provider_model_bundle, model: SimpleNamespace(get_llm_num_tokens=lambda messages: 10), + ) + + assert runner.recalc_llm_max_tokens(model_config, prompt_messages=[]) == -1 + + def test_direct_output_streaming_publishes_chunks_and_end(self, monkeypatch): + runner = AppRunner() + queue = _QueueRecorder() + app_generate_entity = SimpleNamespace(model_conf=SimpleNamespace(model="mock"), stream=True) + + monkeypatch.setattr("core.app.apps.base_app_runner.time.sleep", lambda _: None) + + runner.direct_output( + queue_manager=queue, + app_generate_entity=app_generate_entity, + prompt_messages=[], + text="hi", + stream=True, + ) + + assert any(isinstance(event, QueueLLMChunkEvent) for event in queue.events) + assert isinstance(queue.events[-1], QueueMessageEndEvent) + + def test_handle_invoke_result_direct_publishes_end_event(self): + runner = AppRunner() + queue = _QueueRecorder() + llm_result = LLMResult( + model="mock", + prompt_messages=[], + message=AssistantPromptMessage(content="done"), + usage=LLMUsage.empty_usage(), + ) + + runner._handle_invoke_result( + invoke_result=llm_result, + queue_manager=queue, + stream=False, + ) + + assert isinstance(queue.events[-1], QueueMessageEndEvent) + + def test_handle_invoke_result_invalid_type_raises(self): + runner = AppRunner() + queue = _QueueRecorder() + + with pytest.raises(NotImplementedError): + runner._handle_invoke_result( + invoke_result=["unexpected"], + queue_manager=queue, + stream=True, + ) + + def test_organize_prompt_messages_simple_template(self, monkeypatch): + runner = AppRunner() + model_config = SimpleNamespace(mode="chat", stop=["STOP"]) + prompt_template_entity = PromptTemplateEntity( + prompt_type=PromptTemplateEntity.PromptType.SIMPLE, + simple_prompt_template="hello", + ) + + monkeypatch.setattr( + "core.app.apps.base_app_runner.SimplePromptTransform.get_prompt", + lambda self, **kwargs: (["simple-message"], ["simple-stop"]), + ) + + prompt_messages, stop = runner.organize_prompt_messages( + app_record=SimpleNamespace(mode=AppMode.CHAT.value), + model_config=model_config, + prompt_template_entity=prompt_template_entity, + inputs={}, + files=[], + query="q", + ) + + assert prompt_messages == ["simple-message"] + assert stop == ["simple-stop"] + + def test_organize_prompt_messages_advanced_completion_template(self, monkeypatch): + runner = AppRunner() + model_config = SimpleNamespace(mode="completion", stop=[""]) + captured: dict[str, object] = {} + prompt_template_entity = PromptTemplateEntity( + prompt_type=PromptTemplateEntity.PromptType.ADVANCED, + advanced_completion_prompt_template=AdvancedCompletionPromptTemplateEntity( + prompt="answer", + role_prefix=AdvancedCompletionPromptTemplateEntity.RolePrefixEntity(user="U", assistant="A"), + ), + ) + + def _fake_advanced_prompt(self, **kwargs): + captured.update(kwargs) + return ["advanced-completion-message"] + + monkeypatch.setattr("core.app.apps.base_app_runner.AdvancedPromptTransform.get_prompt", _fake_advanced_prompt) + + prompt_messages, stop = runner.organize_prompt_messages( + app_record=SimpleNamespace(mode=AppMode.CHAT.value), + model_config=model_config, + prompt_template_entity=prompt_template_entity, + inputs={}, + files=[], + query="q", + ) + + assert prompt_messages == ["advanced-completion-message"] + assert stop == [""] + memory_config = captured["memory_config"] + assert memory_config.role_prefix.user == "U" + assert memory_config.role_prefix.assistant == "A" + + def test_organize_prompt_messages_advanced_chat_template(self, monkeypatch): + runner = AppRunner() + model_config = SimpleNamespace(mode="chat", stop=[""]) + captured: dict[str, object] = {} + prompt_template_entity = PromptTemplateEntity( + prompt_type=PromptTemplateEntity.PromptType.ADVANCED, + advanced_chat_prompt_template=AdvancedChatPromptTemplateEntity( + messages=[ + AdvancedChatMessageEntity(text="hello", role=PromptMessageRole.USER), + AdvancedChatMessageEntity(text="world", role=PromptMessageRole.ASSISTANT), + ] + ), + ) + + def _fake_advanced_prompt(self, **kwargs): + captured.update(kwargs) + return ["advanced-chat-message"] + + monkeypatch.setattr("core.app.apps.base_app_runner.AdvancedPromptTransform.get_prompt", _fake_advanced_prompt) + + prompt_messages, stop = runner.organize_prompt_messages( + app_record=SimpleNamespace(mode=AppMode.CHAT.value), + model_config=model_config, + prompt_template_entity=prompt_template_entity, + inputs={}, + files=[], + query="q", + ) + + assert prompt_messages == ["advanced-chat-message"] + assert stop == [""] + assert len(captured["prompt_template"]) == 2 + + def test_organize_prompt_messages_advanced_missing_templates_raise(self): + runner = AppRunner() + + with pytest.raises(InvokeBadRequestError, match="Advanced completion prompt template is required"): + runner.organize_prompt_messages( + app_record=SimpleNamespace(mode=AppMode.CHAT.value), + model_config=SimpleNamespace(mode="completion", stop=[]), + prompt_template_entity=PromptTemplateEntity(prompt_type=PromptTemplateEntity.PromptType.ADVANCED), + inputs={}, + files=[], + ) + + with pytest.raises(InvokeBadRequestError, match="Advanced chat prompt template is required"): + runner.organize_prompt_messages( + app_record=SimpleNamespace(mode=AppMode.CHAT.value), + model_config=SimpleNamespace(mode="chat", stop=[]), + prompt_template_entity=PromptTemplateEntity(prompt_type=PromptTemplateEntity.PromptType.ADVANCED), + inputs={}, + files=[], + ) + + def test_handle_invoke_result_stream_routes_chunks_and_builds_message(self, monkeypatch): + runner = AppRunner() + queue = _QueueRecorder() + warning_logger = MagicMock() + monkeypatch.setattr("core.app.apps.base_app_runner._logger.warning", warning_logger) + + image_content = ImagePromptMessageContent( + url="https://example.com/image.png", format="png", mime_type="image/png" + ) + + def _stream(): + yield LLMResultChunk( + model="stream-model", + prompt_messages=[AssistantPromptMessage(content="prompt")], + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage.model_construct( + content=[ + "a", + TextPromptMessageContent(data="b"), + SimpleNamespace(data="c"), + image_content, + ] + ), + ), + ) + + runner._handle_invoke_result( + invoke_result=_stream(), + queue_manager=queue, + stream=True, + agent=False, + ) + + assert isinstance(queue.events[0], QueueLLMChunkEvent) + assert isinstance(queue.events[-1], QueueMessageEndEvent) + assert queue.events[-1].llm_result.message.content == "abc" + warning_logger.assert_called_once() + + def test_handle_invoke_result_stream_agent_mode_handles_multimodal_errors(self, monkeypatch): + runner = AppRunner() + queue = _QueueRecorder() + exception_logger = MagicMock() + monkeypatch.setattr("core.app.apps.base_app_runner._logger.exception", exception_logger) + + monkeypatch.setattr( + runner, + "_handle_multimodal_image_content", + MagicMock(side_effect=RuntimeError("failed to save image")), + ) + usage = LLMUsage.empty_usage() + + def _stream(): + yield LLMResultChunk( + model="agent-model", + prompt_messages=[AssistantPromptMessage(content="prompt")], + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage( + content=[ + ImagePromptMessageContent( + url="https://example.com/image.png", + format="png", + mime_type="image/png", + ), + TextPromptMessageContent(data="done"), + ] + ), + usage=usage, + ), + ) + + runner._handle_invoke_result_stream( + invoke_result=_stream(), + queue_manager=queue, + agent=True, + message_id="message-id", + user_id="user-id", + tenant_id="tenant-id", + ) + + assert isinstance(queue.events[0], QueueAgentMessageEvent) + assert isinstance(queue.events[-1], QueueMessageEndEvent) + assert queue.events[-1].llm_result.usage == usage + exception_logger.assert_called_once() + + def test_handle_multimodal_image_content_fallback_return_branch(self, monkeypatch): + runner = AppRunner() + + class _ToggleBool: + def __init__(self, values: list[bool]): + self._values = values + self._index = 0 + + def __bool__(self): + value = self._values[min(self._index, len(self._values) - 1)] + self._index += 1 + return value + + content = SimpleNamespace( + url=_ToggleBool([False, False]), + base64_data=_ToggleBool([True, False]), + mime_type="image/png", + ) + + db_session = SimpleNamespace(add=MagicMock(), commit=MagicMock(), refresh=MagicMock()) + monkeypatch.setattr("core.app.apps.base_app_runner.ToolFileManager", lambda: MagicMock()) + monkeypatch.setattr("core.app.apps.base_app_runner.db", SimpleNamespace(session=db_session)) + + queue_manager = SimpleNamespace(invoke_from=InvokeFrom.SERVICE_API, publish=MagicMock()) + + runner._handle_multimodal_image_content( + content=content, + message_id="message-id", + user_id="user-id", + tenant_id="tenant-id", + queue_manager=queue_manager, + ) + + db_session.add.assert_not_called() + queue_manager.publish.assert_not_called() + + def test_check_hosting_moderation_direct_output_called(self, monkeypatch): + runner = AppRunner() + queue = _QueueRecorder() + app_generate_entity = SimpleNamespace(stream=False) + + monkeypatch.setattr( + "core.app.apps.base_app_runner.HostingModerationFeature.check", + lambda self, application_generate_entity, prompt_messages: True, + ) + direct_output = MagicMock() + monkeypatch.setattr(runner, "direct_output", direct_output) + + result = runner.check_hosting_moderation( + application_generate_entity=app_generate_entity, + queue_manager=queue, + prompt_messages=[], + ) + + assert result is True + assert direct_output.called + + def test_fill_in_inputs_from_external_data_tools(self, monkeypatch): + runner = AppRunner() + monkeypatch.setattr( + "core.app.apps.base_app_runner.ExternalDataFetch.fetch", + lambda self, tenant_id, app_id, external_data_tools, inputs, query: {"foo": "bar"}, + ) + + result = runner.fill_in_inputs_from_external_data_tools( + tenant_id="tenant", + app_id="app", + external_data_tools=[], + inputs={}, + query="q", + ) + + assert result == {"foo": "bar"} + + def test_moderation_for_inputs_returns_result(self, monkeypatch): + runner = AppRunner() + monkeypatch.setattr( + "core.app.apps.base_app_runner.InputModeration.check", + lambda self, app_id, tenant_id, app_config, inputs, query, message_id, trace_manager: (True, {}, ""), + ) + app_generate_entity = SimpleNamespace(app_config=SimpleNamespace(), trace_manager=None) + + result = runner.moderation_for_inputs( + app_id="app", + tenant_id="tenant", + app_generate_entity=app_generate_entity, + inputs={}, + query="q", + message_id="msg", + ) + + assert result == (True, {}, "") + + def test_query_app_annotations_to_reply(self, monkeypatch): + runner = AppRunner() + monkeypatch.setattr( + "core.app.apps.base_app_runner.AnnotationReplyFeature.query", + lambda self, app_record, message, query, user_id, invoke_from: "reply", + ) + + response = runner.query_app_annotations_to_reply( + app_record=SimpleNamespace(), + message=SimpleNamespace(), + query="hello", + user_id="user", + invoke_from=InvokeFrom.WEB_APP, + ) + + assert response == "reply" diff --git a/api/tests/unit_tests/core/app/apps/test_exc.py b/api/tests/unit_tests/core/app/apps/test_exc.py new file mode 100644 index 0000000000..e41c78e89e --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_exc.py @@ -0,0 +1,7 @@ +from core.app.apps.exc import GenerateTaskStoppedError + + +class TestAppsExceptions: + def test_generate_task_stopped_error(self): + err = GenerateTaskStoppedError("stopped") + assert str(err) == "stopped" diff --git a/api/tests/unit_tests/core/app/apps/test_message_based_app_generator.py b/api/tests/unit_tests/core/app/apps/test_message_based_app_generator.py index 87b8dc51e7..1250ac5ecf 100644 --- a/api/tests/unit_tests/core/app/apps/test_message_based_app_generator.py +++ b/api/tests/unit_tests/core/app/apps/test_message_based_app_generator.py @@ -13,9 +13,11 @@ from core.app.app_config.entities import ( PromptTemplateEntity, ) from core.app.apps import message_based_app_generator +from core.app.apps.exc import GenerateTaskStoppedError from core.app.apps.message_based_app_generator import MessageBasedAppGenerator from core.app.entities.app_invoke_entities import ChatAppGenerateEntity, InvokeFrom from models.model import AppMode, Conversation, Message +from services.errors.app_model_config import AppModelConfigBrokenError class DummyModelConf: @@ -125,3 +127,55 @@ def test_init_generate_records_sets_conversation_fields_for_chat_entity(): assert entity.conversation_id == "generated-conversation-id" assert entity.is_new_conversation is True assert conversation.id == "generated-conversation-id" + + +class TestMessageBasedAppGeneratorExtras: + def test_handle_response_closed_file_raises_stopped(self, monkeypatch): + generator = MessageBasedAppGenerator() + + class _Pipeline: + def __init__(self, **kwargs) -> None: + _ = kwargs + + def process(self): + raise ValueError("I/O operation on closed file.") + + monkeypatch.setattr( + "core.app.apps.message_based_app_generator.EasyUIBasedGenerateTaskPipeline", + _Pipeline, + ) + + with pytest.raises(GenerateTaskStoppedError): + generator._handle_response( + application_generate_entity=_make_chat_generate_entity(_make_app_config(AppMode.CHAT)), + queue_manager=SimpleNamespace(), + conversation=SimpleNamespace(id="conv"), + message=SimpleNamespace(id="msg"), + user=SimpleNamespace(), + stream=False, + ) + + def test_get_app_model_config_requires_valid_config(self, monkeypatch): + generator = MessageBasedAppGenerator() + app_model = SimpleNamespace(id="app", app_model_config_id=None, app_model_config=None) + + with pytest.raises(AppModelConfigBrokenError): + generator._get_app_model_config(app_model, conversation=None) + + conversation = SimpleNamespace(app_model_config_id="missing-id") + monkeypatch.setattr( + message_based_app_generator, "db", SimpleNamespace(session=SimpleNamespace(scalar=lambda _: None)) + ) + + with pytest.raises(AppModelConfigBrokenError): + generator._get_app_model_config(app_model=SimpleNamespace(id="app"), conversation=conversation) + + def test_get_conversation_introduction_handles_missing_inputs(self): + app_config = _make_app_config(AppMode.CHAT) + app_config.additional_features.opening_statement = "Hello {{name}}" + entity = _make_chat_generate_entity(app_config) + entity.inputs = {} + + generator = MessageBasedAppGenerator() + + assert generator._get_conversation_introduction(entity) == "Hello {name}" diff --git a/api/tests/unit_tests/core/app/apps/test_message_based_app_queue_manager.py b/api/tests/unit_tests/core/app/apps/test_message_based_app_queue_manager.py new file mode 100644 index 0000000000..847ad0ce9b --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_message_based_app_queue_manager.py @@ -0,0 +1,65 @@ +from unittest.mock import Mock, patch + +import pytest + +from core.app.apps.base_app_queue_manager import PublishFrom +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.apps.message_based_app_queue_manager import MessageBasedAppQueueManager +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import QueueErrorEvent, QueueMessageEndEvent, QueueStopEvent + + +class TestMessageBasedAppQueueManager: + def test_publish_stops_on_terminal_events(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + manager = MessageBasedAppQueueManager( + task_id="t1", + user_id="u1", + invoke_from=InvokeFrom.SERVICE_API, + conversation_id="c1", + app_mode="chat", + message_id="m1", + ) + + manager.stop_listen = Mock() + manager._is_stopped = Mock(return_value=False) + + manager._publish(QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL), Mock()) + manager.stop_listen.assert_called_once() + + def test_publish_raises_when_stopped(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + manager = MessageBasedAppQueueManager( + task_id="t1", + user_id="u1", + invoke_from=InvokeFrom.SERVICE_API, + conversation_id="c1", + app_mode="chat", + message_id="m1", + ) + + manager._is_stopped = Mock(return_value=True) + + with pytest.raises(GenerateTaskStoppedError): + manager._publish(QueueErrorEvent(error=ValueError("boom")), PublishFrom.APPLICATION_MANAGER) + + def test_publish_enqueues_message_end(self): + with patch("core.app.apps.base_app_queue_manager.redis_client") as mock_redis: + mock_redis.setex.return_value = True + manager = MessageBasedAppQueueManager( + task_id="t1", + user_id="u1", + invoke_from=InvokeFrom.SERVICE_API, + conversation_id="c1", + app_mode="chat", + message_id="m1", + ) + + manager._is_stopped = Mock(return_value=False) + manager.stop_listen = Mock() + + manager._publish(QueueMessageEndEvent(), PublishFrom.TASK_PIPELINE) + + assert manager._q.qsize() == 1 diff --git a/api/tests/unit_tests/core/app/apps/test_message_generator.py b/api/tests/unit_tests/core/app/apps/test_message_generator.py new file mode 100644 index 0000000000..25377e633e --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_message_generator.py @@ -0,0 +1,29 @@ +from unittest.mock import Mock, patch + +from core.app.apps.message_generator import MessageGenerator +from models.model import AppMode + + +class TestMessageGenerator: + def test_get_response_topic(self): + channel = Mock() + channel.topic.return_value = "topic" + + with patch("core.app.apps.message_generator.get_pubsub_broadcast_channel", return_value=channel): + topic = MessageGenerator.get_response_topic(AppMode.WORKFLOW, "run-1") + + assert topic == "topic" + expected_key = MessageGenerator._make_channel_key(AppMode.WORKFLOW, "run-1") + channel.topic.assert_called_once_with(expected_key) + + def test_retrieve_events_passes_arguments(self): + with ( + patch("core.app.apps.message_generator.MessageGenerator.get_response_topic", return_value="topic"), + patch( + "core.app.apps.message_generator.stream_topic_events", return_value=iter([{"event": "ping"}]) + ) as mock_stream, + ): + events = list(MessageGenerator.retrieve_events(AppMode.WORKFLOW, "run-1", idle_timeout=1, ping_interval=2)) + + assert events == [{"event": "ping"}] + mock_stream.assert_called_once() diff --git a/api/tests/unit_tests/core/app/apps/test_pause_resume.py b/api/tests/unit_tests/core/app/apps/test_pause_resume.py index 44af89601c..2f73a8cda8 100644 --- a/api/tests/unit_tests/core/app/apps/test_pause_resume.py +++ b/api/tests/unit_tests/core/app/apps/test_pause_resume.py @@ -1,21 +1,18 @@ import sys import time -from pathlib import Path from types import ModuleType, SimpleNamespace from typing import Any -API_DIR = str(Path(__file__).resolve().parents[5]) -if API_DIR not in sys.path: - sys.path.insert(0, API_DIR) - import dify_graph.nodes.human_input.entities # noqa: F401 from core.app.apps.advanced_chat import app_generator as adv_app_gen_module from core.app.apps.workflow import app_generator as wf_app_gen_module from core.app.entities.app_invoke_entities import InvokeFrom from core.workflow.node_factory import DifyNodeFactory +from dify_graph.entities.base_node_data import BaseNodeData, RetryConfig +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter from dify_graph.entities.pause_reason import SchedulingPause from dify_graph.entities.workflow_start_reason import WorkflowStartReason -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel @@ -27,7 +24,7 @@ from dify_graph.graph_events import ( NodeRunSucceededEvent, ) from dify_graph.node_events import NodeRunResult, PauseRequestedEvent -from dify_graph.nodes.base.entities import BaseNodeData, OutputVariableEntity, RetryConfig +from dify_graph.nodes.base.entities import OutputVariableEntity from dify_graph.nodes.base.node import Node from dify_graph.nodes.end.entities import EndNodeData from dify_graph.nodes.start.entities import StartNodeData @@ -47,11 +44,12 @@ if "core.ops.ops_trace_manager" not in sys.modules: class _StubToolNodeData(BaseNodeData): + type: NodeType = BuiltinNodeTypes.TOOL pause_on: bool = False class _StubToolNode(Node[_StubToolNodeData]): - node_type = NodeType.TOOL + node_type = BuiltinNodeTypes.TOOL @classmethod def version(cls) -> str: @@ -93,23 +91,24 @@ class _StubToolNode(Node[_StubToolNodeData]): def _patch_tool_node(mocker): original_create_node = DifyNodeFactory.create_node - def _patched_create_node(self, node_config: dict[str, object]) -> Node: - node_data = node_config.get("data", {}) - if isinstance(node_data, dict) and node_data.get("type") == NodeType.TOOL.value: + def _patched_create_node(self, node_config: dict[str, object] | NodeConfigDict) -> Node: + typed_node_config = NodeConfigDictAdapter.validate_python(node_config) + node_data = typed_node_config["data"] + if node_data.type == BuiltinNodeTypes.TOOL: return _StubToolNode( - id=str(node_config["id"]), - config=node_config, + id=str(typed_node_config["id"]), + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, ) - return original_create_node(self, node_config) + return original_create_node(self, typed_node_config) mocker.patch.object(DifyNodeFactory, "create_node", _patched_create_node) def _node_data(node_type: NodeType, data: BaseNodeData) -> dict[str, object]: node_data = data.model_dump() - node_data["type"] = node_type.value + node_data["type"] = str(node_type) return node_data @@ -125,11 +124,11 @@ def _build_graph_config(*, pause_on: str | None) -> dict[str, object]: ) nodes = [ - {"id": "start", "data": _node_data(NodeType.START, start_data)}, - {"id": "tool_a", "data": _node_data(NodeType.TOOL, tool_data_a)}, - {"id": "tool_b", "data": _node_data(NodeType.TOOL, tool_data_b)}, - {"id": "tool_c", "data": _node_data(NodeType.TOOL, tool_data_c)}, - {"id": "end", "data": _node_data(NodeType.END, end_data)}, + {"id": "start", "data": _node_data(BuiltinNodeTypes.START, start_data)}, + {"id": "tool_a", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_a)}, + {"id": "tool_b", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_b)}, + {"id": "tool_c", "data": _node_data(BuiltinNodeTypes.TOOL, tool_data_c)}, + {"id": "end", "data": _node_data(BuiltinNodeTypes.END, end_data)}, ] edges = [ {"source": "start", "target": "tool_a"}, @@ -158,7 +157,7 @@ def _build_graph(runtime_state: GraphRuntimeState, *, pause_on: str | None) -> G graph_runtime_state=runtime_state, ) - return Graph.init(graph_config=graph_config, node_factory=node_factory) + return Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") def _build_runtime_state(run_id: str) -> GraphRuntimeState: diff --git a/api/tests/unit_tests/core/app/apps/test_streaming_utils.py b/api/tests/unit_tests/core/app/apps/test_streaming_utils.py index 7b5447c01e..a7714c56ce 100644 --- a/api/tests/unit_tests/core/app/apps/test_streaming_utils.py +++ b/api/tests/unit_tests/core/app/apps/test_streaming_utils.py @@ -6,6 +6,7 @@ import queue import pytest from core.app.apps.message_based_app_generator import MessageBasedAppGenerator +from core.app.apps.streaming_utils import _normalize_terminal_events, stream_topic_events from core.app.entities.task_entities import StreamEvent from models.model import AppMode @@ -78,3 +79,30 @@ def test_retrieve_events_calls_on_subscribe_after_subscription(monkeypatch): assert event["event"] == StreamEvent.WORKFLOW_FINISHED.value with pytest.raises(StopIteration): next(generator) + + +def test_normalize_terminal_events_defaults(): + assert _normalize_terminal_events(None) == { + StreamEvent.WORKFLOW_FINISHED.value, + StreamEvent.WORKFLOW_PAUSED.value, + } + + +def test_stream_topic_events_emits_ping_and_idle_timeout(monkeypatch): + topic = FakeTopic() + times = [1000.0, 1000.0, 1001.0, 1001.0, 1002.0] + + def fake_time(): + return times.pop(0) + + monkeypatch.setattr("core.app.apps.streaming_utils.time.time", fake_time) + + generator = stream_topic_events( + topic=topic, + idle_timeout=10.0, + ping_interval=1.0, + ) + + assert next(generator) == StreamEvent.PING.value + # next receive yields None -> ping interval triggers + assert next(generator) == StreamEvent.PING.value diff --git a/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py new file mode 100644 index 0000000000..3f1dd14569 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_core.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +from datetime import datetime +from types import SimpleNamespace + +import pytest + +from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from core.app.entities.queue_entities import ( + QueueAgentLogEvent, + QueueIterationCompletedEvent, + QueueLoopCompletedEvent, + QueueTextChunkEvent, + QueueWorkflowPausedEvent, + QueueWorkflowStartedEvent, + QueueWorkflowSucceededEvent, +) +from dify_graph.entities.pause_reason import HumanInputRequired +from dify_graph.enums import BuiltinNodeTypes +from dify_graph.graph_events import ( + GraphRunPausedEvent, + GraphRunStartedEvent, + GraphRunSucceededEvent, + NodeRunAgentLogEvent, + NodeRunIterationSucceededEvent, + NodeRunLoopFailedEvent, + NodeRunStartedEvent, + NodeRunStreamChunkEvent, +) +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable + + +class TestWorkflowBasedAppRunner: + def test_resolve_user_from(self): + runner = WorkflowBasedAppRunner(queue_manager=SimpleNamespace(), app_id="app") + + assert runner._resolve_user_from(InvokeFrom.EXPLORE) == UserFrom.ACCOUNT + assert runner._resolve_user_from(InvokeFrom.DEBUGGER) == UserFrom.ACCOUNT + assert runner._resolve_user_from(InvokeFrom.WEB_APP) == UserFrom.END_USER + + def test_init_graph_validates_graph_structure(self): + runner = WorkflowBasedAppRunner(queue_manager=SimpleNamespace(), app_id="app") + + runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable.default()), + start_at=0.0, + ) + + with pytest.raises(ValueError, match="nodes or edges not found"): + runner._init_graph( + graph_config={}, + graph_runtime_state=runtime_state, + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + ) + + with pytest.raises(ValueError, match="nodes in workflow graph must be a list"): + runner._init_graph( + graph_config={"nodes": {}, "edges": []}, + graph_runtime_state=runtime_state, + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + ) + + with pytest.raises(ValueError, match="edges in workflow graph must be a list"): + runner._init_graph( + graph_config={"nodes": [], "edges": {}}, + graph_runtime_state=runtime_state, + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + ) + + def test_prepare_single_node_execution_requires_run(self): + runner = WorkflowBasedAppRunner(queue_manager=SimpleNamespace(), app_id="app") + + workflow = SimpleNamespace(environment_variables=[], graph_dict={}) + + with pytest.raises(ValueError, match="Neither single_iteration_run nor single_loop_run"): + runner._prepare_single_node_execution(workflow, None, None) + + def test_get_graph_and_variable_pool_for_single_node_run(self, monkeypatch): + runner = WorkflowBasedAppRunner(queue_manager=SimpleNamespace(), app_id="app") + graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable.default()), + start_at=0.0, + ) + + graph_config = { + "nodes": [{"id": "node-1", "data": {"type": "start", "version": "1"}}], + "edges": [], + } + workflow = SimpleNamespace(tenant_id="tenant", id="workflow", graph_dict=graph_config) + + monkeypatch.setattr( + "core.app.apps.workflow_app_runner.Graph.init", + lambda **kwargs: SimpleNamespace(), + ) + + class _NodeCls: + @staticmethod + def extract_variable_selector_to_variable_mapping(graph_config, config): + return {} + + from core.app.apps import workflow_app_runner + + monkeypatch.setattr( + workflow_app_runner, + "resolve_workflow_node_class", + lambda **_kwargs: _NodeCls, + ) + monkeypatch.setattr( + "core.app.apps.workflow_app_runner.load_into_variable_pool", + lambda **kwargs: None, + ) + monkeypatch.setattr( + "core.app.apps.workflow_app_runner.WorkflowEntry.mapping_user_inputs_to_variable_pool", + lambda **kwargs: None, + ) + + graph, variable_pool = runner._get_graph_and_variable_pool_for_single_node_run( + workflow=workflow, + node_id="node-1", + user_inputs={}, + graph_runtime_state=graph_runtime_state, + node_type_filter_key="iteration_id", + node_type_label="iteration", + ) + + assert graph is not None + assert variable_pool is graph_runtime_state.variable_pool + + def test_handle_graph_run_events_and_pause_notifications(self, monkeypatch): + published: list[object] = [] + + class _QueueManager: + def publish(self, event, publish_from): + published.append((event, publish_from)) + + runner = WorkflowBasedAppRunner(queue_manager=_QueueManager(), app_id="app") + graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable.default()), + start_at=0.0, + ) + graph_runtime_state.register_paused_node("node-1") + workflow_entry = SimpleNamespace(graph_engine=SimpleNamespace(graph_runtime_state=graph_runtime_state)) + + emails: list[dict] = [] + + class _Dispatch: + def apply_async(self, *, kwargs, queue): + emails.append({"kwargs": kwargs, "queue": queue}) + + monkeypatch.setattr( + "core.app.apps.workflow_app_runner.dispatch_human_input_email_task", + _Dispatch(), + ) + + reason = HumanInputRequired( + form_id="form", + form_content="content", + node_id="node-1", + node_title="Node", + ) + + runner._handle_event(workflow_entry, GraphRunStartedEvent()) + runner._handle_event(workflow_entry, GraphRunSucceededEvent(outputs={"ok": True})) + runner._handle_event(workflow_entry, GraphRunPausedEvent(reasons=[reason], outputs={})) + + assert any(isinstance(event, QueueWorkflowStartedEvent) for event, _ in published) + assert any(isinstance(event, QueueWorkflowSucceededEvent) for event, _ in published) + paused_event = next(event for event, _ in published if isinstance(event, QueueWorkflowPausedEvent)) + assert paused_event.paused_nodes == ["node-1"] + assert emails + + def test_handle_node_events_publishes_queue_events(self): + published: list[object] = [] + + class _QueueManager: + def publish(self, event, publish_from): + published.append(event) + + runner = WorkflowBasedAppRunner(queue_manager=_QueueManager(), app_id="app") + graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable.default()), + start_at=0.0, + ) + workflow_entry = SimpleNamespace(graph_engine=SimpleNamespace(graph_runtime_state=graph_runtime_state)) + + runner._handle_event( + workflow_entry, + NodeRunStartedEvent( + id="exec", + node_id="node", + node_type=BuiltinNodeTypes.START, + node_title="Start", + start_at=datetime.utcnow(), + ), + ) + runner._handle_event( + workflow_entry, + NodeRunStreamChunkEvent( + id="exec", + node_id="node", + node_type=BuiltinNodeTypes.START, + selector=["node", "text"], + chunk="hi", + is_final=False, + ), + ) + runner._handle_event( + workflow_entry, + NodeRunAgentLogEvent( + id="exec", + node_id="node", + node_type=BuiltinNodeTypes.START, + message_id="msg", + label="label", + node_execution_id="exec", + parent_id=None, + error=None, + status="done", + data={}, + metadata={}, + ), + ) + runner._handle_event( + workflow_entry, + NodeRunIterationSucceededEvent( + id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="Iter", + start_at=datetime.utcnow(), + inputs={}, + outputs={"ok": True}, + metadata={}, + steps=1, + ), + ) + runner._handle_event( + workflow_entry, + NodeRunLoopFailedEvent( + id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="Loop", + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + metadata={}, + steps=1, + error="boom", + ), + ) + + assert any(isinstance(event, QueueTextChunkEvent) for event in published) + assert any(isinstance(event, QueueAgentLogEvent) for event in published) + assert any(isinstance(event, QueueIterationCompletedEvent) for event in published) + assert any(isinstance(event, QueueLoopCompletedEvent) for event in published) diff --git a/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_single_node.py b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_single_node.py index 2e0715e974..178e26118e 100644 --- a/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_single_node.py +++ b/api/tests/unit_tests/core/app/apps/test_workflow_app_runner_single_node.py @@ -7,7 +7,9 @@ import pytest from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.apps.workflow.app_runner import WorkflowAppRunner +from core.app.apps.workflow_app_runner import WorkflowBasedAppRunner from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from models.workflow import Workflow @@ -105,3 +107,57 @@ def test_run_uses_single_node_execution_branch( assert entry_kwargs["invoke_from"] == InvokeFrom.DEBUGGER assert entry_kwargs["variable_pool"] is variable_pool assert entry_kwargs["graph_runtime_state"] is graph_runtime_state + + +def test_single_node_run_validates_target_node_config(monkeypatch) -> None: + runner = WorkflowBasedAppRunner( + queue_manager=MagicMock(spec=AppQueueManager), + variable_loader=MagicMock(), + app_id="app", + ) + + workflow = MagicMock(spec=Workflow) + workflow.id = "workflow" + workflow.tenant_id = "tenant" + workflow.graph_dict = { + "nodes": [ + { + "id": "loop-node", + "data": { + "type": "loop", + "title": "Loop", + "loop_count": 1, + "break_conditions": [], + "logical_operator": "and", + }, + } + ], + "edges": [], + } + + _, _, graph_runtime_state = _make_graph_state() + seen_configs: list[object] = [] + original_validate_python = NodeConfigDictAdapter.validate_python + + def record_validate_python(value: object): + seen_configs.append(value) + return original_validate_python(value) + + monkeypatch.setattr(NodeConfigDictAdapter, "validate_python", record_validate_python) + + with ( + patch("core.app.apps.workflow_app_runner.DifyNodeFactory"), + patch("core.app.apps.workflow_app_runner.Graph.init", return_value=MagicMock()), + patch("core.app.apps.workflow_app_runner.load_into_variable_pool"), + patch("core.app.apps.workflow_app_runner.WorkflowEntry.mapping_user_inputs_to_variable_pool"), + ): + runner._get_graph_and_variable_pool_for_single_node_run( + workflow=workflow, + node_id="loop-node", + user_inputs={}, + graph_runtime_state=graph_runtime_state, + node_type_filter_key="loop_id", + node_type_label="loop", + ) + + assert seen_configs == [workflow.graph_dict["nodes"][0]] diff --git a/api/tests/unit_tests/core/app/apps/workflow/__init__.py b/api/tests/unit_tests/core/app/apps/workflow/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_app_config_manager.py b/api/tests/unit_tests/core/app/apps/workflow/test_app_config_manager.py new file mode 100644 index 0000000000..f8dd6bf609 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_app_config_manager.py @@ -0,0 +1,61 @@ +from types import SimpleNamespace +from unittest.mock import patch + +from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager +from models.model import AppMode + + +class TestWorkflowAppConfigManager: + def test_get_app_config(self): + app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1", mode=AppMode.WORKFLOW.value) + workflow = SimpleNamespace(id="wf-1", features_dict={}) + + with ( + patch( + "core.app.apps.workflow.app_config_manager.SensitiveWordAvoidanceConfigManager.convert", + return_value=None, + ), + patch( + "core.app.apps.workflow.app_config_manager.WorkflowVariablesConfigManager.convert", + return_value=[], + ), + ): + app_config = WorkflowAppConfigManager.get_app_config(app_model, workflow) + + assert app_config.workflow_id == "wf-1" + assert app_config.app_mode == AppMode.WORKFLOW + + def test_config_validate_filters_keys(self): + def _add_key(key, value): + def _inner(*args, **kwargs): + # Support both positional and keyword arguments for config + if "config" in kwargs: + config = kwargs["config"] + elif len(args) > 0: + config = args[0] + else: + config = {} + config[key] = value + return config, [key] + + return _inner + + with ( + patch( + "core.app.apps.workflow.app_config_manager.FileUploadConfigManager.validate_and_set_defaults", + side_effect=_add_key("file_upload", 1), + ), + patch( + "core.app.apps.workflow.app_config_manager.TextToSpeechConfigManager.validate_and_set_defaults", + side_effect=_add_key("text_to_speech", 2), + ), + patch( + "core.app.apps.workflow.app_config_manager.SensitiveWordAvoidanceConfigManager.validate_and_set_defaults", + side_effect=_add_key("sensitive_word_avoidance", 3), + ), + ): + filtered = WorkflowAppConfigManager.config_validate(tenant_id="t1", config={}) + + assert filtered["file_upload"] == 1 + assert filtered["text_to_speech"] == 2 + assert filtered["sensitive_word_avoidance"] == 3 diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_app_generator_extra.py b/api/tests/unit_tests/core/app/apps/workflow/test_app_generator_extra.py new file mode 100644 index 0000000000..09ad078a70 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_app_generator_extra.py @@ -0,0 +1,188 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import pytest + +from core.app.app_config.entities import AppAdditionalFeatures, WorkflowUIBasedAppConfig +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY, WorkflowAppGenerator +from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity +from core.ops.ops_trace_manager import TraceQueueManager +from models.model import AppMode + + +class TestWorkflowAppGeneratorValidation: + def test_should_prepare_user_inputs(self): + generator = WorkflowAppGenerator() + + assert generator._should_prepare_user_inputs({}) is True + assert generator._should_prepare_user_inputs({SKIP_PREPARE_USER_INPUTS_KEY: True}) is False + + def test_single_iteration_generate_validates_args(self): + generator = WorkflowAppGenerator() + + with pytest.raises(ValueError, match="node_id is required"): + generator.single_iteration_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="", + user=SimpleNamespace(), + args={"inputs": {}}, + streaming=False, + ) + + with pytest.raises(ValueError, match="inputs is required"): + generator.single_iteration_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="node", + user=SimpleNamespace(), + args={}, + streaming=False, + ) + + def test_single_loop_generate_validates_args(self): + generator = WorkflowAppGenerator() + + with pytest.raises(ValueError, match="node_id is required"): + generator.single_loop_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="", + user=SimpleNamespace(), + args=SimpleNamespace(inputs={}), + streaming=False, + ) + + with pytest.raises(ValueError, match="inputs is required"): + generator.single_loop_generate( + app_model=SimpleNamespace(), + workflow=SimpleNamespace(), + node_id="node", + user=SimpleNamespace(), + args=SimpleNamespace(inputs=None), + streaming=False, + ) + + +class TestWorkflowAppGeneratorHandleResponse: + def test_handle_response_closed_file_raises_stopped(self, monkeypatch): + generator = WorkflowAppGenerator() + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.WORKFLOW, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = WorkflowAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + extras={}, + trace_manager=None, + workflow_execution_id="run-id", + call_depth=0, + ) + + class _Pipeline: + def __init__(self, **kwargs) -> None: + _ = kwargs + + def process(self): + raise ValueError("I/O operation on closed file.") + + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.WorkflowAppGenerateTaskPipeline", + _Pipeline, + ) + + with pytest.raises(GenerateTaskStoppedError): + generator._handle_response( + application_generate_entity=application_generate_entity, + workflow=SimpleNamespace(), + queue_manager=SimpleNamespace(), + user=SimpleNamespace(), + draft_var_saver_factory=lambda **kwargs: None, + stream=False, + ) + + +class TestWorkflowAppGeneratorGenerate: + def test_generate_skips_prepare_inputs_when_flag_set(self, monkeypatch): + generator = WorkflowAppGenerator() + + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.WORKFLOW, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.WorkflowAppConfigManager.get_app_config", + lambda app_model, workflow: app_config, + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.FileUploadConfigManager.convert", + lambda features_dict, is_vision=False: None, + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.file_factory.build_from_mappings", + lambda **kwargs: [], + ) + DummyTraceQueueManager = type( + "_DummyTraceQueueManager", + (TraceQueueManager,), + { + "__init__": lambda self, app_id=None, user_id=None: ( + setattr(self, "app_id", app_id) or setattr(self, "user_id", user_id) + ) + }, + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.TraceQueueManager", + DummyTraceQueueManager, + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.DifyCoreRepositoryFactory.create_workflow_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + lambda **kwargs: SimpleNamespace(), + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.db", + SimpleNamespace(engine=object(), session=SimpleNamespace(close=lambda: None)), + ) + monkeypatch.setattr( + "core.app.apps.workflow.app_generator.sessionmaker", + lambda **kwargs: SimpleNamespace(), + ) + + prepare_inputs = pytest.fail + monkeypatch.setattr(generator, "_prepare_user_inputs", lambda **kwargs: prepare_inputs()) + + monkeypatch.setattr(generator, "_generate", lambda **kwargs: {"ok": True}) + + result = generator.generate( + app_model=SimpleNamespace(id="app", tenant_id="tenant"), + workflow=SimpleNamespace(features_dict={}), + user=SimpleNamespace(id="user", session_id="session"), + args={"inputs": {}, SKIP_PREPARE_USER_INPUTS_KEY: True}, + invoke_from=InvokeFrom.WEB_APP, + streaming=False, + call_depth=0, + ) + + assert result == {"ok": True} diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_app_queue_manager.py b/api/tests/unit_tests/core/app/apps/workflow/test_app_queue_manager.py new file mode 100644 index 0000000000..6133be9867 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_app_queue_manager.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import pytest + +from core.app.apps.base_app_queue_manager import PublishFrom +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.apps.workflow.app_queue_manager import WorkflowAppQueueManager +from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.queue_entities import QueueMessageEndEvent, QueuePingEvent + + +class TestWorkflowAppQueueManager: + def test_publish_stop_events_trigger_stop(self): + manager = WorkflowAppQueueManager( + task_id="task", + user_id="user", + invoke_from=InvokeFrom.DEBUGGER, + app_mode="workflow", + ) + manager._is_stopped = lambda: True + + with pytest.raises(GenerateTaskStoppedError): + manager._publish(QueueMessageEndEvent(llm_result=None), PublishFrom.APPLICATION_MANAGER) + + def test_publish_non_stop_event_does_not_raise(self): + manager = WorkflowAppQueueManager( + task_id="task", + user_id="user", + invoke_from=InvokeFrom.DEBUGGER, + app_mode="workflow", + ) + + manager._publish(QueuePingEvent(), PublishFrom.TASK_PIPELINE) diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_errors.py b/api/tests/unit_tests/core/app/apps/workflow/test_errors.py new file mode 100644 index 0000000000..7461e06833 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_errors.py @@ -0,0 +1,9 @@ +from core.app.apps.workflow.errors import WorkflowPausedInBlockingModeError + + +class TestWorkflowErrors: + def test_workflow_paused_in_blocking_mode_error_attributes(self): + err = WorkflowPausedInBlockingModeError() + assert err.error_code == "workflow_paused_in_blocking_mode" + assert err.code == 400 + assert "blocking response mode" in err.description diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_generate_response_converter.py b/api/tests/unit_tests/core/app/apps/workflow/test_generate_response_converter.py new file mode 100644 index 0000000000..62e94a7580 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_generate_response_converter.py @@ -0,0 +1,133 @@ +from collections.abc import Generator + +from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter +from core.app.entities.task_entities import ( + ErrorStreamResponse, + NodeFinishStreamResponse, + NodeStartStreamResponse, + PingStreamResponse, + WorkflowAppBlockingResponse, + WorkflowAppStreamResponse, +) +from dify_graph.enums import WorkflowExecutionStatus, WorkflowNodeExecutionStatus + + +class TestWorkflowGenerateResponseConverter: + def test_blocking_full_response(self): + blocking = WorkflowAppBlockingResponse( + task_id="t1", + workflow_run_id="r1", + data=WorkflowAppBlockingResponse.Data( + id="exec-1", + workflow_id="wf-1", + status=WorkflowExecutionStatus.SUCCEEDED, + outputs={"ok": True}, + error=None, + elapsed_time=1.2, + total_tokens=10, + total_steps=2, + created_at=1, + finished_at=2, + ), + ) + response = WorkflowAppGenerateResponseConverter.convert_blocking_full_response(blocking) + assert response["workflow_run_id"] == "r1" + + def test_stream_simple_response_node_events(self): + node_start = NodeStartStreamResponse( + task_id="t1", + workflow_run_id="r1", + data=NodeStartStreamResponse.Data( + id="e1", + node_id="n1", + node_type="answer", + title="Answer", + index=1, + created_at=1, + ), + ) + node_finish = NodeFinishStreamResponse( + task_id="t1", + workflow_run_id="r1", + data=NodeFinishStreamResponse.Data( + id="e1", + node_id="n1", + node_type="answer", + title="Answer", + index=1, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + elapsed_time=0.1, + created_at=1, + finished_at=2, + ), + ) + + def stream() -> Generator[WorkflowAppStreamResponse, None, None]: + yield WorkflowAppStreamResponse(workflow_run_id="r1", stream_response=PingStreamResponse(task_id="t1")) + yield WorkflowAppStreamResponse(workflow_run_id="r1", stream_response=node_start) + yield WorkflowAppStreamResponse(workflow_run_id="r1", stream_response=node_finish) + yield WorkflowAppStreamResponse( + workflow_run_id="r1", stream_response=ErrorStreamResponse(task_id="t1", err=ValueError("boom")) + ) + + converted = list(WorkflowAppGenerateResponseConverter.convert_stream_simple_response(stream())) + assert converted[0] == "ping" + assert converted[1]["event"] == "node_started" + assert converted[2]["event"] == "node_finished" + assert converted[3]["event"] == "error" + + def test_convert_stream_simple_response_handles_ping_and_nodes(self): + def _gen(): + yield WorkflowAppStreamResponse(stream_response=PingStreamResponse(task_id="task")) + yield WorkflowAppStreamResponse( + workflow_run_id="run", + stream_response=NodeStartStreamResponse( + task_id="task", + workflow_run_id="run", + data=NodeStartStreamResponse.Data( + id="node-exec", + node_id="node", + node_type="start", + title="Start", + index=1, + created_at=1, + ), + ), + ) + yield WorkflowAppStreamResponse( + workflow_run_id="run", + stream_response=NodeFinishStreamResponse( + task_id="task", + workflow_run_id="run", + data=NodeFinishStreamResponse.Data( + id="node-exec", + node_id="node", + node_type="start", + title="Start", + index=1, + status=WorkflowNodeExecutionStatus.SUCCEEDED, + outputs={}, + created_at=1, + finished_at=2, + elapsed_time=1.0, + error=None, + ), + ), + ) + + chunks = list(WorkflowAppGenerateResponseConverter.convert_stream_simple_response(_gen())) + + assert chunks[0] == "ping" + assert chunks[1]["event"] == "node_started" + assert chunks[2]["event"] == "node_finished" + + def test_convert_stream_full_response_handles_error(self): + def _gen(): + yield WorkflowAppStreamResponse( + workflow_run_id="run", + stream_response=ErrorStreamResponse(task_id="task", err=ValueError("boom")), + ) + + chunks = list(WorkflowAppGenerateResponseConverter.convert_stream_full_response(_gen())) + + assert chunks[0]["event"] == "error" diff --git a/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py b/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py new file mode 100644 index 0000000000..f35710d207 --- /dev/null +++ b/api/tests/unit_tests/core/app/apps/workflow/test_generate_task_pipeline_core.py @@ -0,0 +1,868 @@ +from __future__ import annotations + +from contextlib import contextmanager +from datetime import datetime +from types import SimpleNamespace + +import pytest + +from core.app.app_config.entities import AppAdditionalFeatures, WorkflowUIBasedAppConfig +from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline +from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity +from core.app.entities.queue_entities import ( + QueueAgentLogEvent, + QueueErrorEvent, + QueueHumanInputFormFilledEvent, + QueueHumanInputFormTimeoutEvent, + QueueIterationCompletedEvent, + QueueIterationNextEvent, + QueueIterationStartEvent, + QueueLoopCompletedEvent, + QueueLoopNextEvent, + QueueLoopStartEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, + QueuePingEvent, + QueueStopEvent, + QueueTextChunkEvent, + QueueWorkflowFailedEvent, + QueueWorkflowPartialSuccessEvent, + QueueWorkflowPausedEvent, + QueueWorkflowStartedEvent, + QueueWorkflowSucceededEvent, +) +from core.app.entities.task_entities import ( + ErrorStreamResponse, + MessageAudioEndStreamResponse, + MessageAudioStreamResponse, + PingStreamResponse, + WorkflowFinishStreamResponse, + WorkflowPauseStreamResponse, + WorkflowStartStreamResponse, +) +from core.base.tts.app_generator_tts_publisher import AudioTrunk +from dify_graph.enums import BuiltinNodeTypes, WorkflowExecutionStatus +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from models.enums import CreatorUserRole +from models.model import AppMode, EndUser + + +def _make_pipeline(): + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.WORKFLOW, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = WorkflowAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + files=[], + user_id="user", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + trace_manager=None, + workflow_execution_id="run-id", + extras={}, + call_depth=0, + ) + workflow = SimpleNamespace(id="workflow-id", tenant_id="tenant", features_dict={}) + user = SimpleNamespace(id="user", session_id="session") + + pipeline = WorkflowAppGenerateTaskPipeline( + application_generate_entity=application_generate_entity, + workflow=workflow, + queue_manager=SimpleNamespace(invoke_from=InvokeFrom.WEB_APP, graph_runtime_state=None), + user=user, + stream=False, + draft_var_saver_factory=lambda **kwargs: None, + ) + + return pipeline + + +class TestWorkflowGenerateTaskPipeline: + def test_to_blocking_response_handles_pause(self): + pipeline = _make_pipeline() + + def _gen(): + yield WorkflowPauseStreamResponse( + task_id="task", + workflow_run_id="run", + data=WorkflowPauseStreamResponse.Data( + workflow_run_id="run", + status=WorkflowExecutionStatus.PAUSED, + outputs={}, + created_at=1, + elapsed_time=0.1, + total_tokens=0, + total_steps=0, + ), + ) + + response = pipeline._to_blocking_response(_gen()) + + assert response.data.status == WorkflowExecutionStatus.PAUSED + + def test_to_blocking_response_handles_finish(self): + pipeline = _make_pipeline() + + def _gen(): + yield WorkflowFinishStreamResponse( + task_id="task", + workflow_run_id="run", + data=WorkflowFinishStreamResponse.Data( + id="run", + workflow_id="workflow-id", + status=WorkflowExecutionStatus.SUCCEEDED, + outputs={"ok": True}, + error=None, + elapsed_time=1.0, + total_tokens=5, + total_steps=2, + created_at=1, + finished_at=2, + ), + ) + + response = pipeline._to_blocking_response(_gen()) + + assert response.data.outputs == {"ok": True} + + def test_listen_audio_msg_returns_audio_stream(self): + pipeline = _make_pipeline() + publisher = SimpleNamespace(check_and_get_audio=lambda: AudioTrunk(status="stream", audio="data")) + + response = pipeline._listen_audio_msg(publisher=publisher, task_id="task") + + assert isinstance(response, MessageAudioStreamResponse) + + def test_handle_ping_event(self): + pipeline = _make_pipeline() + pipeline._base_task_pipeline.ping_stream_response = lambda: PingStreamResponse(task_id="task") + + responses = list(pipeline._handle_ping_event(QueuePingEvent())) + + assert isinstance(responses[0], PingStreamResponse) + + def test_handle_error_event(self): + pipeline = _make_pipeline() + pipeline._base_task_pipeline.handle_error = lambda **kwargs: ValueError("boom") + pipeline._base_task_pipeline.error_to_stream_response = lambda err: err + + responses = list(pipeline._handle_error_event(QueueErrorEvent(error=ValueError("boom")))) + + assert isinstance(responses[0], ValueError) + + def test_handle_workflow_started_event_sets_run_id(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._workflow_response_converter.workflow_start_to_stream_response = lambda **kwargs: "started" + + @contextmanager + def _fake_session(): + yield SimpleNamespace() + + monkeypatch.setattr(pipeline, "_database_session", _fake_session) + monkeypatch.setattr(pipeline, "_save_workflow_app_log", lambda **kwargs: None) + + responses = list(pipeline._handle_workflow_started_event(QueueWorkflowStartedEvent())) + + assert pipeline._workflow_execution_id == "run-id" + assert responses == ["started"] + + def test_handle_node_succeeded_event_saves_output(self): + pipeline = _make_pipeline() + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "done" + pipeline._save_output_for_event = lambda event, node_execution_id: None + pipeline._workflow_execution_id = "run-id" + + event = QueueNodeSucceededEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.START, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + ) + + responses = list(pipeline._handle_node_succeeded_event(event)) + + assert responses == ["done"] + + def test_handle_workflow_failed_event_yields_error(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._workflow_response_converter.workflow_finish_to_stream_response = lambda **kwargs: "finish" + pipeline._base_task_pipeline.handle_error = lambda **kwargs: ValueError("boom") + pipeline._base_task_pipeline.error_to_stream_response = lambda err: err + + responses = list( + pipeline._handle_workflow_failed_and_stop_events(QueueWorkflowFailedEvent(error="fail", exceptions_count=1)) + ) + + assert responses[0] == "finish" + + def test_handle_text_chunk_event_publishes_tts(self): + pipeline = _make_pipeline() + published: list[object] = [] + + class _Publisher: + def publish(self, message): + published.append(message) + + event = QueueTextChunkEvent(text="hi", from_variable_selector=["x"]) + queue_message = SimpleNamespace(event=event) + + responses = list( + pipeline._handle_text_chunk_event(event, tts_publisher=_Publisher(), queue_message=queue_message) + ) + + assert responses[0].data.text == "hi" + assert published == [queue_message] + + def test_dispatch_event_handles_node_failed(self): + pipeline = _make_pipeline() + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "done" + + event = QueueNodeFailedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.START, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + error="err", + ) + + assert list(pipeline._dispatch_event(event)) == ["done"] + + def test_handle_stop_event_yields_finish(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._workflow_response_converter.workflow_finish_to_stream_response = lambda **kwargs: "finish" + + responses = list( + pipeline._handle_workflow_failed_and_stop_events( + QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL) + ) + ) + + assert responses == ["finish"] + + def test_save_workflow_app_log_created_from(self): + pipeline = _make_pipeline() + pipeline._application_generate_entity.invoke_from = InvokeFrom.SERVICE_API + pipeline._user_id = "user" + added: list[object] = [] + + class _Session: + def add(self, item): + added.append(item) + + pipeline._save_workflow_app_log(session=_Session(), workflow_run_id="run-id") + + assert added + + def test_iteration_loop_and_human_input_handlers(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + pipeline._workflow_response_converter.workflow_iteration_start_to_stream_response = lambda **kwargs: "iter" + pipeline._workflow_response_converter.workflow_iteration_next_to_stream_response = lambda **kwargs: "next" + pipeline._workflow_response_converter.workflow_iteration_completed_to_stream_response = lambda **kwargs: "done" + pipeline._workflow_response_converter.workflow_loop_start_to_stream_response = lambda **kwargs: "loop" + pipeline._workflow_response_converter.workflow_loop_next_to_stream_response = lambda **kwargs: "loop_next" + pipeline._workflow_response_converter.workflow_loop_completed_to_stream_response = lambda **kwargs: "loop_done" + pipeline._workflow_response_converter.human_input_form_filled_to_stream_response = lambda **kwargs: "filled" + pipeline._workflow_response_converter.human_input_form_timeout_to_stream_response = lambda **kwargs: "timeout" + pipeline._workflow_response_converter.handle_agent_log = lambda **kwargs: "log" + + iter_start = QueueIterationStartEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + iter_next = QueueIterationNextEvent( + index=1, + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + node_run_index=1, + ) + iter_done = QueueIterationCompletedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + loop_start = QueueLoopStartEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + loop_next = QueueLoopNextEvent( + index=1, + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + node_run_index=1, + ) + loop_done = QueueLoopCompletedEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=datetime.utcnow(), + node_run_index=1, + ) + filled_event = QueueHumanInputFormFilledEvent( + node_execution_id="exec", + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="title", + rendered_content="content", + action_id="action", + action_text="action", + ) + timeout_event = QueueHumanInputFormTimeoutEvent( + node_id="node", + node_type=BuiltinNodeTypes.LLM, + node_title="title", + expiration_time=datetime.utcnow(), + ) + agent_event = QueueAgentLogEvent( + id="log", + label="label", + node_execution_id="exec", + parent_id=None, + error=None, + status="done", + data={}, + metadata={}, + node_id="node", + ) + + assert list(pipeline._handle_iteration_start_event(iter_start)) == ["iter"] + assert list(pipeline._handle_iteration_next_event(iter_next)) == ["next"] + assert list(pipeline._handle_iteration_completed_event(iter_done)) == ["done"] + assert list(pipeline._handle_loop_start_event(loop_start)) == ["loop"] + assert list(pipeline._handle_loop_next_event(loop_next)) == ["loop_next"] + assert list(pipeline._handle_loop_completed_event(loop_done)) == ["loop_done"] + assert list(pipeline._handle_human_input_form_filled_event(filled_event)) == ["filled"] + assert list(pipeline._handle_human_input_form_timeout_event(timeout_event)) == ["timeout"] + assert list(pipeline._handle_agent_log_event(agent_event)) == ["log"] + + def test_wrapper_process_stream_response_emits_audio_end(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._workflow_features_dict = { + "text_to_speech": {"enabled": True, "autoPlay": "enabled", "voice": "v", "language": "en"} + } + pipeline._process_stream_response = lambda **kwargs: iter([PingStreamResponse(task_id="task")]) + + class _Publisher: + def __init__(self, *args, **kwargs): + self.calls = 0 + + def check_and_get_audio(self): + self.calls += 1 + if self.calls == 1: + return AudioTrunk(status="stream", audio="data") + if self.calls == 2: + return None + return AudioTrunk(status="finish", audio="") + + def publish(self, message): + return None + + monkeypatch.setattr( + "core.app.apps.workflow.generate_task_pipeline.AppGeneratorTTSPublisher", + _Publisher, + ) + + responses = list(pipeline._wrapper_process_stream_response()) + + assert any(isinstance(item, MessageAudioStreamResponse) for item in responses) + assert any(isinstance(item, MessageAudioEndStreamResponse) for item in responses) + + def test_init_with_end_user_sets_role_and_system_user(self): + app_config = WorkflowUIBasedAppConfig( + tenant_id="tenant", + app_id="app", + app_mode=AppMode.WORKFLOW, + additional_features=AppAdditionalFeatures(), + variables=[], + workflow_id="workflow-id", + ) + application_generate_entity = WorkflowAppGenerateEntity.model_construct( + task_id="task", + app_config=app_config, + inputs={}, + files=[], + user_id="end-user-id", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + trace_manager=None, + workflow_execution_id="run-id", + extras={}, + call_depth=0, + ) + workflow = SimpleNamespace(id="workflow-id", tenant_id="tenant", features_dict={}) + queue_manager = SimpleNamespace(invoke_from=InvokeFrom.WEB_APP, graph_runtime_state=None) + end_user = EndUser(tenant_id="tenant", type="session", name="user", session_id="session-id") + end_user.id = "end-user-id" + + pipeline = WorkflowAppGenerateTaskPipeline( + application_generate_entity=application_generate_entity, + workflow=workflow, + queue_manager=queue_manager, + user=end_user, + stream=False, + draft_var_saver_factory=lambda **kwargs: None, + ) + + assert pipeline._created_by_role == CreatorUserRole.END_USER + assert pipeline._workflow_system_variables.user_id == "session-id" + + def test_process_returns_stream_and_blocking_variants(self): + pipeline = _make_pipeline() + pipeline._base_task_pipeline.stream = True + pipeline._wrapper_process_stream_response = lambda **kwargs: iter([PingStreamResponse(task_id="task")]) + + stream_response = list(pipeline.process()) + assert len(stream_response) == 1 + assert stream_response[0].workflow_run_id is None + + pipeline._base_task_pipeline.stream = False + pipeline._wrapper_process_stream_response = lambda **kwargs: iter( + [ + WorkflowFinishStreamResponse( + task_id="task", + workflow_run_id="run-id", + data=WorkflowFinishStreamResponse.Data( + id="run-id", + workflow_id="workflow-id", + status=WorkflowExecutionStatus.SUCCEEDED, + outputs={}, + error=None, + elapsed_time=0.1, + total_tokens=0, + total_steps=0, + created_at=1, + finished_at=2, + ), + ) + ] + ) + + blocking_response = pipeline.process() + assert blocking_response.workflow_run_id == "run-id" + + def test_to_blocking_response_handles_error_and_unexpected_end(self): + pipeline = _make_pipeline() + + def _error_gen(): + yield ErrorStreamResponse(task_id="task", err=ValueError("boom")) + + with pytest.raises(ValueError, match="boom"): + pipeline._to_blocking_response(_error_gen()) + + def _unexpected_gen(): + yield PingStreamResponse(task_id="task") + + with pytest.raises(ValueError, match="queue listening stopped unexpectedly"): + pipeline._to_blocking_response(_unexpected_gen()) + + def test_to_stream_response_tracks_workflow_run_id(self): + pipeline = _make_pipeline() + + def _gen(): + yield WorkflowStartStreamResponse( + task_id="task", + workflow_run_id="run-id", + data=WorkflowStartStreamResponse.Data( + id="run-id", + workflow_id="workflow-id", + inputs={}, + created_at=1, + ), + ) + yield PingStreamResponse(task_id="task") + + stream_responses = list(pipeline._to_stream_response(_gen())) + assert stream_responses[0].workflow_run_id == "run-id" + assert stream_responses[1].workflow_run_id == "run-id" + + def test_listen_audio_msg_returns_none_without_publisher(self): + pipeline = _make_pipeline() + assert pipeline._listen_audio_msg(publisher=None, task_id="task") is None + + def test_wrapper_process_stream_response_without_tts(self): + pipeline = _make_pipeline() + pipeline._workflow_features_dict = {} + pipeline._process_stream_response = lambda **kwargs: iter([PingStreamResponse(task_id="task")]) + + responses = list(pipeline._wrapper_process_stream_response()) + assert responses == [PingStreamResponse(task_id="task")] + + def test_wrapper_process_stream_response_final_audio_none_then_finish(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._workflow_features_dict = { + "text_to_speech": {"enabled": True, "autoPlay": "enabled", "voice": "v", "language": "en"} + } + pipeline._process_stream_response = lambda **kwargs: iter([]) + + sleep_spy = [] + + class _Publisher: + def __init__(self, *args, **kwargs): + self.calls = 0 + + def check_and_get_audio(self): + self.calls += 1 + if self.calls == 1: + return None + return AudioTrunk(status="finish", audio="") + + def publish(self, message): + _ = message + + time_values = iter([0.0, 0.0, 0.2]) + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.time.time", lambda: next(time_values)) + monkeypatch.setattr( + "core.app.apps.workflow.generate_task_pipeline.time.sleep", lambda _: sleep_spy.append(True) + ) + monkeypatch.setattr( + "core.app.apps.workflow.generate_task_pipeline.AppGeneratorTTSPublisher", + _Publisher, + ) + + responses = list(pipeline._wrapper_process_stream_response()) + + assert sleep_spy + assert any(isinstance(item, MessageAudioEndStreamResponse) for item in responses) + + def test_wrapper_process_stream_response_handles_audio_exception(self, monkeypatch): + pipeline = _make_pipeline() + pipeline._workflow_features_dict = { + "text_to_speech": {"enabled": True, "autoPlay": "enabled", "voice": "v", "language": "en"} + } + pipeline._process_stream_response = lambda **kwargs: iter([]) + + class _Publisher: + def __init__(self, *args, **kwargs): + self.called = False + + def check_and_get_audio(self): + if not self.called: + self.called = True + raise RuntimeError("tts failure") + return AudioTrunk(status="finish", audio="") + + def publish(self, message): + _ = message + + logger_exception = [] + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.time.time", lambda: 0.0) + monkeypatch.setattr( + "core.app.apps.workflow.generate_task_pipeline.logger.exception", + lambda *args, **kwargs: logger_exception.append((args, kwargs)), + ) + monkeypatch.setattr( + "core.app.apps.workflow.generate_task_pipeline.AppGeneratorTTSPublisher", + _Publisher, + ) + + responses = list(pipeline._wrapper_process_stream_response()) + + assert logger_exception + assert any(isinstance(item, MessageAudioEndStreamResponse) for item in responses) + + def test_database_session_rolls_back_on_error(self, monkeypatch): + pipeline = _make_pipeline() + calls = {"commit": 0, "rollback": 0} + + class _Session: + def __init__(self, *args, **kwargs): + _ = args, kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def commit(self): + calls["commit"] += 1 + + def rollback(self): + calls["rollback"] += 1 + + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.Session", _Session) + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.db", SimpleNamespace(engine=object())) + + with pytest.raises(RuntimeError, match="db error"): + with pipeline._database_session(): + raise RuntimeError("db error") + + assert calls["commit"] == 0 + assert calls["rollback"] == 1 + + def test_node_retry_and_started_handlers_cover_none_and_value(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + + retry_event = QueueNodeRetryEvent( + node_execution_id="exec", + node_id="node", + node_title="title", + node_type=BuiltinNodeTypes.LLM, + node_run_index=1, + start_at=datetime.utcnow(), + provider_type="provider", + provider_id="provider-id", + error="error", + retry_index=1, + ) + started_event = QueueNodeStartedEvent( + node_execution_id="exec", + node_id="node", + node_title="title", + node_type=BuiltinNodeTypes.LLM, + node_run_index=1, + start_at=datetime.utcnow(), + provider_type="provider", + provider_id="provider-id", + ) + + pipeline._workflow_response_converter.workflow_node_retry_to_stream_response = lambda **kwargs: None + assert list(pipeline._handle_node_retry_event(retry_event)) == [] + pipeline._workflow_response_converter.workflow_node_retry_to_stream_response = lambda **kwargs: "retry" + assert list(pipeline._handle_node_retry_event(retry_event)) == ["retry"] + + pipeline._workflow_response_converter.workflow_node_start_to_stream_response = lambda **kwargs: None + assert list(pipeline._handle_node_started_event(started_event)) == [] + pipeline._workflow_response_converter.workflow_node_start_to_stream_response = lambda **kwargs: "started" + assert list(pipeline._handle_node_started_event(started_event)) == ["started"] + + def test_handle_node_exception_event_saves_output(self): + pipeline = _make_pipeline() + saved_ids: list[str] = [] + pipeline._workflow_response_converter.workflow_node_finish_to_stream_response = lambda **kwargs: "failed" + pipeline._save_output_for_event = lambda event, node_execution_id: saved_ids.append(node_execution_id) + + event = QueueNodeExceptionEvent( + node_execution_id="exec-id", + node_id="node", + node_type=BuiltinNodeTypes.START, + start_at=datetime.utcnow(), + inputs={}, + outputs={}, + process_data={}, + error="boom", + ) + + responses = list(pipeline._handle_node_failed_events(event)) + assert responses == ["failed"] + assert saved_ids == ["exec-id"] + + def test_success_partial_and_pause_handlers(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + + pipeline._workflow_response_converter.workflow_finish_to_stream_response = lambda **kwargs: "finish" + assert list(pipeline._handle_workflow_succeeded_event(QueueWorkflowSucceededEvent(outputs={}))) == ["finish"] + assert list( + pipeline._handle_workflow_partial_success_event( + QueueWorkflowPartialSuccessEvent(exceptions_count=2, outputs={}) + ) + ) == ["finish"] + + pipeline._workflow_response_converter.workflow_pause_to_stream_response = lambda **kwargs: [ + "pause-a", + "pause-b", + ] + pause_event = QueueWorkflowPausedEvent(reasons=[], outputs={}, paused_nodes=["node"]) + assert list(pipeline._handle_workflow_paused_event(pause_event)) == ["pause-a", "pause-b"] + + def test_text_chunk_handler_returns_empty_when_text_missing(self): + pipeline = _make_pipeline() + event = QueueTextChunkEvent.model_construct(text=None, from_variable_selector=None) + assert list(pipeline._handle_text_chunk_event(event)) == [] + + def test_dispatch_event_direct_failed_and_unhandled_paths(self): + pipeline = _make_pipeline() + pipeline._workflow_execution_id = "run-id" + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._handle_ping_event = lambda event, **kwargs: iter(["ping"]) + assert list(pipeline._dispatch_event(QueuePingEvent())) == ["ping"] + + pipeline._handle_workflow_failed_and_stop_events = lambda event, **kwargs: iter(["workflow-failed"]) + assert list(pipeline._dispatch_event(QueueWorkflowFailedEvent(error="failed", exceptions_count=1))) == [ + "workflow-failed" + ] + + assert list(pipeline._dispatch_event(SimpleNamespace())) == [] + + def test_process_stream_response_main_match_paths_and_cleanup(self): + pipeline = _make_pipeline() + pipeline._graph_runtime_state = GraphRuntimeState( + variable_pool=VariablePool(system_variables=SystemVariable(workflow_execution_id="run-id")), + start_at=0.0, + ) + pipeline._base_task_pipeline.queue_manager.listen = lambda: iter( + [ + SimpleNamespace(event=QueueWorkflowStartedEvent()), + SimpleNamespace(event=QueueTextChunkEvent(text="hello")), + SimpleNamespace(event=QueuePingEvent()), + SimpleNamespace(event=QueueErrorEvent(error="e")), + ] + ) + pipeline._handle_workflow_started_event = lambda event, **kwargs: iter(["started"]) + pipeline._handle_text_chunk_event = lambda event, **kwargs: iter(["text"]) + pipeline._dispatch_event = lambda event, **kwargs: iter(["dispatched"]) + pipeline._handle_error_event = lambda event, **kwargs: iter(["error"]) + publisher_calls: list[object] = [] + + class _Publisher: + def publish(self, message): + publisher_calls.append(message) + + responses = list(pipeline._process_stream_response(tts_publisher=_Publisher())) + assert responses == ["started", "text", "dispatched", "error"] + assert publisher_calls == [None] + + def test_process_stream_response_break_paths(self): + pipeline = _make_pipeline() + + pipeline._base_task_pipeline.queue_manager.listen = lambda: iter( + [SimpleNamespace(event=QueueWorkflowFailedEvent(error="fail", exceptions_count=1))] + ) + pipeline._handle_workflow_failed_and_stop_events = lambda event, **kwargs: iter(["failed"]) + assert list(pipeline._process_stream_response()) == ["failed"] + + pipeline._base_task_pipeline.queue_manager.listen = lambda: iter( + [SimpleNamespace(event=QueueWorkflowPausedEvent(reasons=[], outputs={}, paused_nodes=[]))] + ) + pipeline._handle_workflow_paused_event = lambda event, **kwargs: iter(["paused"]) + assert list(pipeline._process_stream_response()) == ["paused"] + + pipeline._base_task_pipeline.queue_manager.listen = lambda: iter( + [SimpleNamespace(event=QueueStopEvent(stopped_by=QueueStopEvent.StopBy.USER_MANUAL))] + ) + pipeline._handle_workflow_failed_and_stop_events = lambda event, **kwargs: iter(["stopped"]) + assert list(pipeline._process_stream_response()) == ["stopped"] + + def test_save_workflow_app_log_covers_invoke_from_variants(self): + pipeline = _make_pipeline() + pipeline._user_id = "user-id" + added: list[object] = [] + + class _Session: + def add(self, item): + added.append(item) + + pipeline._application_generate_entity.invoke_from = InvokeFrom.EXPLORE + pipeline._save_workflow_app_log(session=_Session(), workflow_run_id="run-id") + assert added[-1].created_from == "installed-app" + + pipeline._application_generate_entity.invoke_from = InvokeFrom.WEB_APP + pipeline._save_workflow_app_log(session=_Session(), workflow_run_id="run-id") + assert added[-1].created_from == "web-app" + + count_before = len(added) + pipeline._application_generate_entity.invoke_from = InvokeFrom.DEBUGGER + pipeline._save_workflow_app_log(session=_Session(), workflow_run_id="run-id") + assert len(added) == count_before + + pipeline._application_generate_entity.invoke_from = InvokeFrom.WEB_APP + pipeline._save_workflow_app_log(session=_Session(), workflow_run_id=None) + assert len(added) == count_before + + def test_save_output_for_event_writes_draft_variables(self, monkeypatch): + pipeline = _make_pipeline() + saver_calls: list[tuple[object, object]] = [] + captured_factory_args: dict[str, object] = {} + + class _Saver: + def save(self, process_data, outputs): + saver_calls.append((process_data, outputs)) + + def _factory(**kwargs): + captured_factory_args.update(kwargs) + return _Saver() + + class _Begin: + def __enter__(self): + return None + + def __exit__(self, exc_type, exc, tb): + return False + + class _Session: + def __init__(self, *args, **kwargs): + _ = args, kwargs + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def begin(self): + return _Begin() + + pipeline._draft_var_saver_factory = _factory + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.Session", _Session) + monkeypatch.setattr("core.app.apps.workflow.generate_task_pipeline.db", SimpleNamespace(engine=object())) + + event = QueueNodeSucceededEvent( + node_execution_id="exec-id", + node_id="node-id", + node_type=BuiltinNodeTypes.START, + in_loop_id="loop-id", + start_at=datetime.utcnow(), + process_data={"k": "v"}, + outputs={"out": 1}, + ) + pipeline._save_output_for_event(event=event, node_execution_id="exec-id") + + assert captured_factory_args["node_execution_id"] == "exec-id" + assert captured_factory_args["enclosing_node_id"] == "loop-id" + assert saver_calls == [({"k": "v"}, {"out": 1})] diff --git a/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py b/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py index 7d0e1d25f6..bdc889d941 100644 --- a/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py +++ b/api/tests/unit_tests/core/app/layers/test_conversation_variable_persist_layer.py @@ -4,7 +4,7 @@ from unittest.mock import Mock from core.app.layers.conversation_variable_persist_layer import ConversationVariablePersistenceLayer from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, NodeType, WorkflowNodeExecutionStatus from dify_graph.graph_engine.protocols.command_channel import CommandChannel from dify_graph.graph_events.node import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult @@ -78,7 +78,7 @@ def test_persists_conversation_variables_from_assigner_output(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER, process_data=process_data) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER, process_data=process_data) layer.on_event(event) updater.update.assert_called_once_with(conversation_id=conversation_id, variable=variable) @@ -100,7 +100,7 @@ def test_skips_when_outputs_missing(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER) layer.on_event(event) updater.update.assert_not_called() @@ -112,7 +112,7 @@ def test_skips_non_assigner_nodes(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(MockReadOnlyVariablePool()), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.LLM) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.LLM) layer.on_event(event) updater.update.assert_not_called() @@ -137,7 +137,7 @@ def test_skips_non_conversation_variables(): layer = ConversationVariablePersistenceLayer(updater) layer.initialize(_build_graph_runtime_state(variable_pool, conversation_id), Mock(spec=CommandChannel)) - event = _build_node_run_succeeded_event(node_type=NodeType.VARIABLE_ASSIGNER, process_data=process_data) + event = _build_node_run_succeeded_event(node_type=BuiltinNodeTypes.VARIABLE_ASSIGNER, process_data=process_data) layer.on_event(event) updater.update.assert_not_called() diff --git a/api/tests/unit_tests/core/app/task_pipeline/test_easy_ui_message_end_files.py b/api/tests/unit_tests/core/app/task_pipeline/test_easy_ui_message_end_files.py new file mode 100644 index 0000000000..582990c88a --- /dev/null +++ b/api/tests/unit_tests/core/app/task_pipeline/test_easy_ui_message_end_files.py @@ -0,0 +1,425 @@ +""" +Unit tests for EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response method. + +This test suite ensures that the files array is correctly populated in the message_end +SSE event, which is critical for vision/image chat responses to render correctly. + +Test Coverage: +- Files array populated when MessageFile records exist +- Files array is None when no MessageFile records exist +- Correct signed URL generation for LOCAL_FILE transfer method +- Correct URL handling for REMOTE_URL transfer method +- Correct URL handling for TOOL_FILE transfer method +- Proper file metadata formatting (filename, mime_type, size, extension) +""" + +import uuid +from unittest.mock import MagicMock, Mock, patch + +import pytest +from sqlalchemy.orm import Session + +from core.app.entities.task_entities import MessageEndStreamResponse +from core.app.task_pipeline.easy_ui_based_generate_task_pipeline import EasyUIBasedGenerateTaskPipeline +from dify_graph.file.enums import FileTransferMethod +from models.model import MessageFile, UploadFile + + +class TestMessageEndStreamResponseFiles: + """Test suite for files array population in message_end SSE event.""" + + @pytest.fixture + def mock_pipeline(self): + """Create a mock EasyUIBasedGenerateTaskPipeline instance.""" + pipeline = Mock(spec=EasyUIBasedGenerateTaskPipeline) + pipeline._message_id = str(uuid.uuid4()) + pipeline._task_state = Mock() + pipeline._task_state.metadata = Mock() + pipeline._task_state.metadata.model_dump = Mock(return_value={"test": "metadata"}) + pipeline._task_state.llm_result = Mock() + pipeline._task_state.llm_result.usage = Mock() + pipeline._application_generate_entity = Mock() + pipeline._application_generate_entity.task_id = str(uuid.uuid4()) + return pipeline + + @pytest.fixture + def mock_message_file_local(self): + """Create a mock MessageFile with LOCAL_FILE transfer method.""" + message_file = Mock(spec=MessageFile) + message_file.id = str(uuid.uuid4()) + message_file.message_id = str(uuid.uuid4()) + message_file.transfer_method = FileTransferMethod.LOCAL_FILE + message_file.upload_file_id = str(uuid.uuid4()) + message_file.url = None + message_file.type = "image" + return message_file + + @pytest.fixture + def mock_message_file_remote(self): + """Create a mock MessageFile with REMOTE_URL transfer method.""" + message_file = Mock(spec=MessageFile) + message_file.id = str(uuid.uuid4()) + message_file.message_id = str(uuid.uuid4()) + message_file.transfer_method = FileTransferMethod.REMOTE_URL + message_file.upload_file_id = None + message_file.url = "https://example.com/image.jpg" + message_file.type = "image" + return message_file + + @pytest.fixture + def mock_message_file_tool(self): + """Create a mock MessageFile with TOOL_FILE transfer method.""" + message_file = Mock(spec=MessageFile) + message_file.id = str(uuid.uuid4()) + message_file.message_id = str(uuid.uuid4()) + message_file.transfer_method = FileTransferMethod.TOOL_FILE + message_file.upload_file_id = None + message_file.url = "tool_file_123.png" + message_file.type = "image" + return message_file + + @pytest.fixture + def mock_upload_file(self, mock_message_file_local): + """Create a mock UploadFile.""" + upload_file = Mock(spec=UploadFile) + upload_file.id = mock_message_file_local.upload_file_id + upload_file.name = "test_image.png" + upload_file.mime_type = "image/png" + upload_file.size = 1024 + upload_file.extension = "png" + return upload_file + + def test_message_end_with_no_files(self, mock_pipeline): + """Test that files array is None when no MessageFile records exist.""" + # Arrange + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + mock_session.scalars.return_value.all.return_value = [] + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is None + assert result.id == mock_pipeline._message_id + assert result.metadata == {"test": "metadata"} + + def test_message_end_with_local_file(self, mock_pipeline, mock_message_file_local, mock_upload_file): + """Test that files array is populated correctly for LOCAL_FILE transfer method.""" + # Arrange + mock_message_file_local.message_id = mock_pipeline._message_id + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + patch("core.app.task_pipeline.message_file_utils.file_helpers.get_signed_file_url") as mock_get_url, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + # First query: MessageFile + mock_message_files_result = Mock() + mock_message_files_result.all.return_value = [mock_message_file_local] + + # Second query: UploadFile (batch query to avoid N+1) + mock_upload_files_result = Mock() + mock_upload_files_result.all.return_value = [mock_upload_file] + + # Setup scalars to return different results for different queries + call_count = [0] # Use list to allow modification in nested function + + def scalars_side_effect(query): + call_count[0] += 1 + # First call is for MessageFile, second call is for UploadFile + if call_count[0] == 1: + return mock_message_files_result + else: + return mock_upload_files_result + + mock_session.scalars.side_effect = scalars_side_effect + mock_get_url.return_value = "https://example.com/signed-url?signature=abc123" + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 1 + + file_dict = result.files[0] + assert file_dict["related_id"] == mock_message_file_local.id + assert file_dict["filename"] == "test_image.png" + assert file_dict["mime_type"] == "image/png" + assert file_dict["size"] == 1024 + assert file_dict["extension"] == ".png" + assert file_dict["type"] == "image" + assert file_dict["transfer_method"] == FileTransferMethod.LOCAL_FILE.value + assert "https://example.com/signed-url" in file_dict["url"] + assert file_dict["upload_file_id"] == mock_message_file_local.upload_file_id + assert file_dict["remote_url"] == "" + + # Verify database queries + # Should be called twice: once for MessageFile, once for UploadFile + assert mock_session.scalars.call_count == 2 + mock_get_url.assert_called_once_with(upload_file_id=str(mock_upload_file.id)) + + def test_message_end_with_remote_url(self, mock_pipeline, mock_message_file_remote): + """Test that files array is populated correctly for REMOTE_URL transfer method.""" + # Arrange + mock_message_file_remote.message_id = mock_pipeline._message_id + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + mock_scalars_result = Mock() + mock_scalars_result.all.return_value = [mock_message_file_remote] + mock_session.scalars.return_value = mock_scalars_result + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 1 + + file_dict = result.files[0] + assert file_dict["related_id"] == mock_message_file_remote.id + assert file_dict["filename"] == "image.jpg" + assert file_dict["url"] == "https://example.com/image.jpg" + assert file_dict["extension"] == ".jpg" + assert file_dict["type"] == "image" + assert file_dict["transfer_method"] == FileTransferMethod.REMOTE_URL.value + assert file_dict["remote_url"] == "https://example.com/image.jpg" + assert file_dict["upload_file_id"] == mock_message_file_remote.id + + # Verify only one query for message_files is made + mock_session.scalars.assert_called_once() + + def test_message_end_with_tool_file_http(self, mock_pipeline, mock_message_file_tool): + """Test that files array is populated correctly for TOOL_FILE with HTTP URL.""" + # Arrange + mock_message_file_tool.message_id = mock_pipeline._message_id + mock_message_file_tool.url = "https://example.com/tool_file.png" + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + mock_scalars_result = Mock() + mock_scalars_result.all.return_value = [mock_message_file_tool] + mock_session.scalars.return_value = mock_scalars_result + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 1 + + file_dict = result.files[0] + assert file_dict["url"] == "https://example.com/tool_file.png" + assert file_dict["filename"] == "tool_file.png" + assert file_dict["extension"] == ".png" + assert file_dict["transfer_method"] == FileTransferMethod.TOOL_FILE.value + + def test_message_end_with_tool_file_local(self, mock_pipeline, mock_message_file_tool): + """Test that files array is populated correctly for TOOL_FILE with local path.""" + # Arrange + mock_message_file_tool.message_id = mock_pipeline._message_id + mock_message_file_tool.url = "tool_file_123.png" + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + patch("core.app.task_pipeline.message_file_utils.sign_tool_file") as mock_sign_tool, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + mock_scalars_result = Mock() + mock_scalars_result.all.return_value = [mock_message_file_tool] + mock_session.scalars.return_value = mock_scalars_result + + mock_sign_tool.return_value = "https://example.com/signed-tool-file.png?signature=xyz" + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 1 + + file_dict = result.files[0] + assert "https://example.com/signed-tool-file.png" in file_dict["url"] + assert file_dict["filename"] == "tool_file_123.png" + assert file_dict["extension"] == ".png" + assert file_dict["transfer_method"] == FileTransferMethod.TOOL_FILE.value + + # Verify tool file signing was called + mock_sign_tool.assert_called_once_with(tool_file_id="tool_file_123", extension=".png") + + def test_message_end_with_tool_file_long_extension(self, mock_pipeline, mock_message_file_tool): + """Test that TOOL_FILE extensions longer than MAX_TOOL_FILE_EXTENSION_LENGTH fall back to .bin.""" + mock_message_file_tool.message_id = mock_pipeline._message_id + mock_message_file_tool.url = "tool_file_abc.verylongextension" + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + patch("core.app.task_pipeline.message_file_utils.sign_tool_file") as mock_sign_tool, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + mock_scalars_result = Mock() + mock_scalars_result.all.return_value = [mock_message_file_tool] + mock_session.scalars.return_value = mock_scalars_result + mock_sign_tool.return_value = "https://example.com/signed.bin" + + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + assert result.files is not None + file_dict = result.files[0] + assert file_dict["extension"] == ".bin" + mock_sign_tool.assert_called_once_with(tool_file_id="tool_file_abc", extension=".bin") + + def test_message_end_with_multiple_files( + self, mock_pipeline, mock_message_file_local, mock_message_file_remote, mock_upload_file + ): + """Test that files array contains all MessageFile records when multiple exist.""" + # Arrange + mock_message_file_local.message_id = mock_pipeline._message_id + mock_message_file_remote.message_id = mock_pipeline._message_id + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + patch("core.app.task_pipeline.message_file_utils.file_helpers.get_signed_file_url") as mock_get_url, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + # First query: MessageFile + mock_message_files_result = Mock() + mock_message_files_result.all.return_value = [mock_message_file_local, mock_message_file_remote] + + # Second query: UploadFile (batch query to avoid N+1) + mock_upload_files_result = Mock() + mock_upload_files_result.all.return_value = [mock_upload_file] + + # Setup scalars to return different results for different queries + call_count = [0] # Use list to allow modification in nested function + + def scalars_side_effect(query): + call_count[0] += 1 + # First call is for MessageFile, second call is for UploadFile + if call_count[0] == 1: + return mock_message_files_result + else: + return mock_upload_files_result + + mock_session.scalars.side_effect = scalars_side_effect + mock_get_url.return_value = "https://example.com/signed-url?signature=abc123" + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 2 + + # Verify both files are present + file_ids = [f["related_id"] for f in result.files] + assert mock_message_file_local.id in file_ids + assert mock_message_file_remote.id in file_ids + + def test_message_end_with_local_file_no_upload_file(self, mock_pipeline, mock_message_file_local): + """Test fallback when UploadFile is not found for LOCAL_FILE.""" + # Arrange + mock_message_file_local.message_id = mock_pipeline._message_id + + with ( + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.db") as mock_db, + patch("core.app.task_pipeline.easy_ui_based_generate_task_pipeline.Session") as mock_session_class, + patch("core.app.task_pipeline.message_file_utils.file_helpers.get_signed_file_url") as mock_get_url, + ): + mock_engine = MagicMock() + mock_db.engine = mock_engine + + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + # Mock database queries + # First query: MessageFile + mock_message_files_result = Mock() + mock_message_files_result.all.return_value = [mock_message_file_local] + + # Second query: UploadFile (batch query) - returns empty list (not found) + mock_upload_files_result = Mock() + mock_upload_files_result.all.return_value = [] # UploadFile not found + + # Setup scalars to return different results for different queries + call_count = [0] # Use list to allow modification in nested function + + def scalars_side_effect(query): + call_count[0] += 1 + # First call is for MessageFile, second call is for UploadFile + if call_count[0] == 1: + return mock_message_files_result + else: + return mock_upload_files_result + + mock_session.scalars.side_effect = scalars_side_effect + mock_get_url.return_value = "https://example.com/fallback-url?signature=def456" + + # Act + result = EasyUIBasedGenerateTaskPipeline._message_end_to_stream_response(mock_pipeline) + + # Assert + assert isinstance(result, MessageEndStreamResponse) + assert result.files is not None + assert len(result.files) == 1 + + file_dict = result.files[0] + assert "https://example.com/fallback-url" in file_dict["url"] + # Verify fallback URL was generated using upload_file_id from message_file + mock_get_url.assert_called_with(upload_file_id=str(mock_message_file_local.upload_file_id)) diff --git a/api/tests/unit_tests/core/base/test_app_generator_tts_publisher.py b/api/tests/unit_tests/core/base/test_app_generator_tts_publisher.py new file mode 100644 index 0000000000..3759b6aa37 --- /dev/null +++ b/api/tests/unit_tests/core/base/test_app_generator_tts_publisher.py @@ -0,0 +1,390 @@ +import base64 +import queue +from unittest.mock import MagicMock + +import pytest + +from core.base.tts.app_generator_tts_publisher import ( + AppGeneratorTTSPublisher, + AudioTrunk, + _invoice_tts, + _process_future, +) + +# ========================= +# Fixtures +# ========================= + + +@pytest.fixture +def mock_model_instance(mocker): + model = mocker.MagicMock() + model.invoke_tts.return_value = [b"audio1", b"audio2"] + model.get_tts_voices.return_value = [{"value": "voice1"}, {"value": "voice2"}] + return model + + +@pytest.fixture +def mock_model_manager(mocker, mock_model_instance): + manager = mocker.MagicMock() + manager.get_default_model_instance.return_value = mock_model_instance + mocker.patch( + "core.base.tts.app_generator_tts_publisher.ModelManager", + return_value=manager, + ) + return manager + + +@pytest.fixture(autouse=True) +def patch_threads(mocker): + """Prevent real threads from starting during tests""" + mocker.patch("threading.Thread.start", return_value=None) + + +# ========================= +# AudioTrunk Tests +# ========================= + + +class TestAudioTrunk: + def test_audio_trunk_initialization(self): + trunk = AudioTrunk("responding", b"data") + assert trunk.status == "responding" + assert trunk.audio == b"data" + + +# ========================= +# _invoice_tts Tests +# ========================= + + +class TestInvoiceTTS: + @pytest.mark.parametrize( + "text", + [None, "", " "], + ) + def test_invoice_tts_empty_or_none_returns_none(self, text, mock_model_instance): + result = _invoice_tts(text, mock_model_instance, "tenant", "voice1") + assert result is None + mock_model_instance.invoke_tts.assert_not_called() + + def test_invoice_tts_valid_text(self, mock_model_instance): + result = _invoice_tts(" hello ", mock_model_instance, "tenant", "voice1") + mock_model_instance.invoke_tts.assert_called_once_with( + content_text="hello", + user="responding_tts", + tenant_id="tenant", + voice="voice1", + ) + assert result == [b"audio1", b"audio2"] + + +# ========================= +# _process_future Tests +# ========================= + + +class TestProcessFuture: + def test_process_future_normal_flow(self): + future_queue = queue.Queue() + audio_queue = queue.Queue() + + future = MagicMock() + future.result.return_value = [b"abc"] + + future_queue.put(future) + future_queue.put(None) + + _process_future(future_queue, audio_queue) + + first = audio_queue.get() + assert first.status == "responding" + assert first.audio == base64.b64encode(b"abc") + + finish = audio_queue.get() + assert finish.status == "finish" + + def test_process_future_empty_result(self): + future_queue = queue.Queue() + audio_queue = queue.Queue() + + future = MagicMock() + future.result.return_value = None + + future_queue.put(future) + future_queue.put(None) + + _process_future(future_queue, audio_queue) + + finish = audio_queue.get() + assert finish.status == "finish" + + def test_process_future_exception(self, mocker): + future_queue = queue.Queue() + audio_queue = queue.Queue() + + future = MagicMock() + future.result.side_effect = Exception("error") + + future_queue.put(future) + + _process_future(future_queue, audio_queue) + + finish = audio_queue.get() + assert finish.status == "finish" + + +# ========================= +# AppGeneratorTTSPublisher Tests +# ========================= + + +class TestAppGeneratorTTSPublisher: + def test_initialization_valid_voice(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + assert publisher.voice == "voice1" + assert publisher.max_sentence == 2 + assert publisher.msg_text == "" + + def test_initialization_invalid_voice_fallback(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "invalid_voice") + assert publisher.voice == "voice1" + + def test_publish_puts_message_in_queue(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + message = MagicMock() + publisher.publish(message) + assert publisher._msg_queue.get() == message + + def test_check_and_get_audio_no_audio(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + result = publisher.check_and_get_audio() + assert result is None + + def test_check_and_get_audio_non_finish_event(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + trunk = AudioTrunk("responding", b"abc") + publisher._audio_queue.put(trunk) + + result = publisher.check_and_get_audio() + + assert result.status == "responding" + assert publisher._last_audio_event == trunk + + def test_check_and_get_audio_finish_event(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + finish_trunk = AudioTrunk("finish", b"") + publisher._audio_queue.put(finish_trunk) + + result = publisher.check_and_get_audio() + + assert result.status == "finish" + publisher.executor.shutdown.assert_called_once() + + def test_check_and_get_audio_cached_finish(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + publisher._last_audio_event = AudioTrunk("finish", b"") + + result = publisher.check_and_get_audio() + + assert result.status == "finish" + publisher.executor.shutdown.assert_called_once() + + @pytest.mark.parametrize( + ("text", "expected_sentences", "expected_remaining"), + [ + ("Hello world.", ["Hello world."], ""), + ("Hello world! How are you?", ["Hello world!", " How are you?"], ""), + ("No punctuation", [], "No punctuation"), + ("", [], ""), + ], + ) + def test_extract_sentence(self, mock_model_manager, text, expected_sentences, expected_remaining): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + sentences, remaining = publisher._extract_sentence(text) + assert sentences == expected_sentences + assert remaining == expected_remaining + + def test_runtime_handles_none_message_with_buffer(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + publisher.msg_text = "Hello." + + publisher._msg_queue.put(None) + publisher._runtime() + + publisher.executor.submit.assert_called_once() + + def test_runtime_handles_none_message_without_buffer(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + publisher.msg_text = " " + + publisher._msg_queue.put(None) + publisher._runtime() + + publisher.executor.submit.assert_not_called() + + def test_runtime_sentence_threshold_triggers_submit(self, mock_model_manager, mocker): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + # Force sentence extraction to hit threshold condition + mocker.patch.object( + publisher, + "_extract_sentence", + return_value=(["Hello world.", " Second sentence."], ""), + ) + + from core.app.entities.queue_entities import QueueTextChunkEvent + + event = MagicMock() + event.event = MagicMock(spec=QueueTextChunkEvent) + event.event.text = "Hello world. Second sentence." + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.executor.submit.called + + def test_runtime_handles_text_chunk_event(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueTextChunkEvent + + event = MagicMock() + event.event = MagicMock(spec=QueueTextChunkEvent) + event.event.text = "Hello world." + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.executor.submit.called + + def test_runtime_handles_node_succeeded_event_with_output(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueNodeSucceededEvent + + event = MagicMock() + event.event = MagicMock(spec=QueueNodeSucceededEvent) + event.event.outputs = {"output": "Hello world."} + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.executor.submit.called + + def test_runtime_handles_node_succeeded_event_without_output(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueNodeSucceededEvent + + event = MagicMock() + event.event = MagicMock(spec=QueueNodeSucceededEvent) + event.event.outputs = None + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + publisher.executor.submit.assert_not_called() + + def test_runtime_handles_agent_message_event_list_content(self, mock_model_manager, mocker): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueAgentMessageEvent + from dify_graph.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta + from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + TextPromptMessageContent, + ) + + chunk = LLMResultChunk( + model="model", + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage( + content=[ + TextPromptMessageContent(data="Hello "), + ImagePromptMessageContent(format="png", mime_type="image/png", base64_data="a"), + ] + ), + ), + ) + event = MagicMock(event=QueueAgentMessageEvent(chunk=chunk)) + + mocker.patch.object(publisher, "_extract_sentence", return_value=([], "")) + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.msg_text == "Hello " + + def test_runtime_handles_agent_message_event_empty_content(self, mock_model_manager, mocker): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueAgentMessageEvent + from dify_graph.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta + from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage + + chunk = LLMResultChunk( + model="model", + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage(content=""), + ), + ) + event = MagicMock(event=QueueAgentMessageEvent(chunk=chunk)) + + mocker.patch.object(publisher, "_extract_sentence", return_value=([], "")) + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.msg_text == "" + + def test_runtime_resets_msg_text_when_text_tmp_not_str(self, mock_model_manager, mocker): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher.executor = MagicMock() + + from core.app.entities.queue_entities import QueueTextChunkEvent + + event = MagicMock() + event.event = MagicMock(spec=QueueTextChunkEvent) + event.event.text = "Hello world. Another sentence." + + mocker.patch.object(publisher, "_extract_sentence", return_value=(["A.", "B."], None)) + + publisher._msg_queue.put(event) + publisher._msg_queue.put(None) + + publisher._runtime() + + assert publisher.msg_text == "" + + def test_runtime_exception_path(self, mock_model_manager): + publisher = AppGeneratorTTSPublisher("tenant", "voice1") + publisher._msg_queue = MagicMock() + publisher._msg_queue.get.side_effect = Exception("error") + + publisher._runtime() diff --git a/api/tests/unit_tests/core/callback_handler/test_agent_tool_callback_handler.py b/api/tests/unit_tests/core/callback_handler/test_agent_tool_callback_handler.py new file mode 100644 index 0000000000..4c1aa33540 --- /dev/null +++ b/api/tests/unit_tests/core/callback_handler/test_agent_tool_callback_handler.py @@ -0,0 +1,197 @@ +from unittest.mock import MagicMock + +import pytest + +import core.callback_handler.agent_tool_callback_handler as module + +# ----------------------------- +# Fixtures +# ----------------------------- + + +@pytest.fixture +def enable_debug(mocker): + mocker.patch.object(module.dify_config, "DEBUG", True) + + +@pytest.fixture +def disable_debug(mocker): + mocker.patch.object(module.dify_config, "DEBUG", False) + + +@pytest.fixture +def mock_print(mocker): + return mocker.patch("builtins.print") + + +@pytest.fixture +def handler(): + return module.DifyAgentCallbackHandler(color="blue") + + +# ----------------------------- +# get_colored_text Tests +# ----------------------------- + + +class TestGetColoredText: + @pytest.mark.parametrize( + ("color", "expected_code"), + [ + ("blue", "36;1"), + ("yellow", "33;1"), + ("pink", "38;5;200"), + ("green", "32;1"), + ("red", "31;1"), + ], + ) + def test_get_colored_text_valid_colors(self, color, expected_code): + text = "hello" + result = module.get_colored_text(text, color) + assert expected_code in result + assert text in result + assert result.endswith("\u001b[0m") + + def test_get_colored_text_invalid_color_raises(self): + with pytest.raises(KeyError): + module.get_colored_text("hello", "invalid") + + def test_get_colored_text_empty_string(self): + result = module.get_colored_text("", "green") + assert "\u001b[" in result + + +# ----------------------------- +# print_text Tests +# ----------------------------- + + +class TestPrintText: + def test_print_text_without_color(self, mock_print): + module.print_text("hello") + mock_print.assert_called_once_with("hello", end="", file=None) + + def test_print_text_with_color(self, mocker, mock_print): + mock_get_color = mocker.patch( + "core.callback_handler.agent_tool_callback_handler.get_colored_text", + return_value="colored_text", + ) + + module.print_text("hello", color="green") + + mock_get_color.assert_called_once_with("hello", "green") + mock_print.assert_called_once_with("colored_text", end="", file=None) + + def test_print_text_with_file_flush(self, mocker): + mock_file = MagicMock() + mock_print = mocker.patch("builtins.print") + + module.print_text("hello", file=mock_file) + + mock_print.assert_called_once_with("hello", end="", file=mock_file) + mock_file.flush.assert_called_once() + + def test_print_text_with_end_parameter(self, mock_print): + module.print_text("hello", end="\n") + mock_print.assert_called_once_with("hello", end="\n", file=None) + + +# ----------------------------- +# DifyAgentCallbackHandler Tests +# ----------------------------- + + +class TestDifyAgentCallbackHandler: + def test_init_default_color(self): + handler = module.DifyAgentCallbackHandler() + assert handler.color == "green" + assert handler.current_loop == 1 + + def test_on_tool_start_debug_enabled(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_tool_start("tool1", {"a": 1}) + + mock_print_text.assert_called() + + def test_on_tool_start_debug_disabled(self, handler, disable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_tool_start("tool1", {"a": 1}) + + mock_print_text.assert_not_called() + + def test_on_tool_end_debug_enabled_and_trace(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + mock_trace_manager = MagicMock() + + handler.on_tool_end( + tool_name="tool1", + tool_inputs={"a": 1}, + tool_outputs="output", + message_id="msg1", + timer=123, + trace_manager=mock_trace_manager, + ) + + assert mock_print_text.call_count >= 1 + mock_trace_manager.add_trace_task.assert_called_once() + + def test_on_tool_end_without_trace_manager(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_tool_end( + tool_name="tool1", + tool_inputs={}, + tool_outputs="output", + ) + + assert mock_print_text.call_count >= 1 + + def test_on_tool_error_debug_enabled(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_tool_error(Exception("error")) + + mock_print_text.assert_called_once() + + def test_on_tool_error_debug_disabled(self, handler, disable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_tool_error(Exception("error")) + + mock_print_text.assert_not_called() + + @pytest.mark.parametrize("thought", ["thinking", ""]) + def test_on_agent_start(self, handler, enable_debug, mocker, thought): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_agent_start(thought) + + mock_print_text.assert_called() + + def test_on_agent_finish_increments_loop(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + current_loop = handler.current_loop + handler.on_agent_finish() + + assert handler.current_loop == current_loop + 1 + mock_print_text.assert_called() + + def test_on_datasource_start_debug_enabled(self, handler, enable_debug, mocker): + mock_print_text = mocker.patch("core.callback_handler.agent_tool_callback_handler.print_text") + + handler.on_datasource_start("ds1", {"x": 1}) + + mock_print_text.assert_called_once() + + def test_ignore_agent_property(self, disable_debug, handler): + assert handler.ignore_agent is True + + def test_ignore_chat_model_property(self, disable_debug, handler): + assert handler.ignore_chat_model is True + + def test_ignore_properties_when_debug_enabled(self, enable_debug, handler): + assert handler.ignore_agent is False + assert handler.ignore_chat_model is False diff --git a/api/tests/unit_tests/core/callback_handler/test_index_tool_callback_handler.py b/api/tests/unit_tests/core/callback_handler/test_index_tool_callback_handler.py new file mode 100644 index 0000000000..b37c4c57a1 --- /dev/null +++ b/api/tests/unit_tests/core/callback_handler/test_index_tool_callback_handler.py @@ -0,0 +1,162 @@ +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.callback_handler.index_tool_callback_handler import ( + DatasetIndexToolCallbackHandler, +) + + +@pytest.fixture +def mock_queue_manager(mocker): + return mocker.Mock() + + +@pytest.fixture +def handler(mock_queue_manager, mocker): + mocker.patch( + "core.callback_handler.index_tool_callback_handler.db", + ) + return DatasetIndexToolCallbackHandler( + queue_manager=mock_queue_manager, + app_id="app-1", + message_id="msg-1", + user_id="user-1", + invoke_from=mocker.Mock(), + ) + + +class TestOnQuery: + @pytest.mark.parametrize( + ("invoke_from", "expected_role"), + [ + (InvokeFrom.EXPLORE, "account"), + (InvokeFrom.DEBUGGER, "account"), + (InvokeFrom.WEB_APP, "end_user"), + ], + ) + def test_on_query_success_roles(self, mocker, mock_queue_manager, invoke_from, expected_role): + # Arrange + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + + handler = DatasetIndexToolCallbackHandler( + queue_manager=mock_queue_manager, + app_id="app-1", + message_id="msg-1", + user_id="user-1", + invoke_from=mocker.Mock(), + ) + + handler._invoke_from = invoke_from + + # Act + handler.on_query("test query", "dataset-1") + + # Assert + mock_db.session.add.assert_called_once() + dataset_query = mock_db.session.add.call_args.args[0] + assert dataset_query.created_by_role == expected_role + mock_db.session.commit.assert_called_once() + + def test_on_query_none_values(self, mocker, mock_queue_manager): + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + + handler = DatasetIndexToolCallbackHandler( + queue_manager=mock_queue_manager, + app_id=None, + message_id=None, + user_id=None, + invoke_from=None, + ) + + handler.on_query(None, None) + + mock_db.session.add.assert_called_once() + mock_db.session.commit.assert_called_once() + + +class TestOnToolEnd: + def test_on_tool_end_no_metadata(self, handler, mocker): + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + + document = mocker.Mock() + document.metadata = None + + handler.on_tool_end([document]) + + mock_db.session.commit.assert_not_called() + + def test_on_tool_end_dataset_document_not_found(self, handler, mocker): + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + mock_db.session.scalar.return_value = None + + document = mocker.Mock() + document.metadata = {"document_id": "doc-1", "doc_id": "node-1"} + + handler.on_tool_end([document]) + + mock_db.session.scalar.assert_called_once() + + def test_on_tool_end_parent_child_index_with_child(self, handler, mocker): + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + + mock_dataset_doc = mocker.Mock() + from core.callback_handler.index_tool_callback_handler import IndexStructureType + + mock_dataset_doc.doc_form = IndexStructureType.PARENT_CHILD_INDEX + mock_dataset_doc.dataset_id = "dataset-1" + mock_dataset_doc.id = "doc-1" + + mock_child_chunk = mocker.Mock() + mock_child_chunk.segment_id = "segment-1" + + mock_db.session.scalar.side_effect = [mock_dataset_doc, mock_child_chunk] + + document = mocker.Mock() + document.metadata = {"document_id": "doc-1", "doc_id": "node-1"} + + mock_query = mocker.Mock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + + handler.on_tool_end([document]) + + mock_query.update.assert_called_once() + mock_db.session.commit.assert_called_once() + + def test_on_tool_end_non_parent_child_index(self, handler, mocker): + mock_db = mocker.patch("core.callback_handler.index_tool_callback_handler.db") + + mock_dataset_doc = mocker.Mock() + mock_dataset_doc.doc_form = "OTHER" + + mock_db.session.scalar.return_value = mock_dataset_doc + + document = mocker.Mock() + document.metadata = { + "document_id": "doc-1", + "doc_id": "node-1", + "dataset_id": "dataset-1", + } + + mock_query = mocker.Mock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + + handler.on_tool_end([document]) + + mock_query.update.assert_called_once() + mock_db.session.commit.assert_called_once() + + def test_on_tool_end_empty_documents(self, handler): + handler.on_tool_end([]) + + +class TestReturnRetrieverResourceInfo: + def test_publish_called(self, handler, mock_queue_manager, mocker): + mock_event = mocker.patch("core.callback_handler.index_tool_callback_handler.QueueRetrieverResourcesEvent") + + resources = [mocker.Mock()] + + handler.return_retriever_resource_info(resources) + + mock_queue_manager.publish.assert_called_once() diff --git a/api/tests/unit_tests/core/callback_handler/test_workflow_tool_callback_handler.py b/api/tests/unit_tests/core/callback_handler/test_workflow_tool_callback_handler.py new file mode 100644 index 0000000000..131fb006ed --- /dev/null +++ b/api/tests/unit_tests/core/callback_handler/test_workflow_tool_callback_handler.py @@ -0,0 +1,184 @@ +from unittest.mock import MagicMock, call + +import pytest + +from core.callback_handler.workflow_tool_callback_handler import ( + DifyWorkflowCallbackHandler, +) + + +class DummyToolInvokeMessage: + """Lightweight dummy to simulate ToolInvokeMessage behavior.""" + + def __init__(self, json_value: str): + self._json_value = json_value + + def model_dump_json(self): + return self._json_value + + +@pytest.fixture +def handler(): + """Fixture to create handler instance with deterministic color.""" + instance = DifyWorkflowCallbackHandler() + instance.color = "blue" + return instance + + +@pytest.fixture +def mock_print_text(mocker): + """Mock print_text to avoid real stdout printing.""" + return mocker.patch("core.callback_handler.workflow_tool_callback_handler.print_text") + + +class TestDifyWorkflowCallbackHandler: + def test_on_tool_execution_single_output_success(self, handler, mock_print_text): + # Arrange + tool_name = "test_tool" + tool_inputs = {"a": 1} + message = DummyToolInvokeMessage('{"key": "value"}') + + # Act + results = list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs=tool_inputs, + tool_outputs=[message], + ) + ) + + # Assert + assert results == [message] + assert mock_print_text.call_count == 4 + mock_print_text.assert_has_calls( + [ + call("\n[on_tool_execution]\n", color="blue"), + call("Tool: test_tool\n", color="blue"), + call( + "Outputs: " + message.model_dump_json()[:1000] + "\n", + color="blue", + ), + call("\n"), + ] + ) + + def test_on_tool_execution_multiple_outputs(self, handler, mock_print_text): + # Arrange + tool_name = "multi_tool" + outputs = [ + DummyToolInvokeMessage('{"id": 1}'), + DummyToolInvokeMessage('{"id": 2}'), + ] + + # Act + results = list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=outputs, + ) + ) + + # Assert + assert results == outputs + assert mock_print_text.call_count == 4 * len(outputs) + + def test_on_tool_execution_empty_iterable(self, handler, mock_print_text): + # Arrange + tool_name = "empty_tool" + + # Act + results = list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=[], + ) + ) + + # Assert + assert results == [] + mock_print_text.assert_not_called() + + @pytest.mark.parametrize( + ("invalid_outputs", "expected_exception"), + [ + (None, TypeError), + (123, TypeError), + ("not_iterable", AttributeError), + ], + ) + def test_on_tool_execution_invalid_outputs_type(self, handler, invalid_outputs, expected_exception): + # Arrange + tool_name = "invalid_tool" + + # Act & Assert + with pytest.raises(expected_exception): + list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=invalid_outputs, + ) + ) + + def test_on_tool_execution_long_json_truncation(self, handler, mock_print_text): + # Arrange + tool_name = "long_json_tool" + long_json = "x" * 1500 + message = DummyToolInvokeMessage(long_json) + + # Act + list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=[message], + ) + ) + + # Assert + expected_truncated = long_json[:1000] + mock_print_text.assert_any_call( + "Outputs: " + expected_truncated + "\n", + color="blue", + ) + + def test_on_tool_execution_model_dump_json_exception(self, handler, mock_print_text): + # Arrange + tool_name = "exception_tool" + bad_message = MagicMock() + bad_message.model_dump_json.side_effect = ValueError("JSON error") + + # Act & Assert + with pytest.raises(ValueError): + list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=[bad_message], + ) + ) + + # Ensure first two prints happened before failure + assert mock_print_text.call_count >= 2 + + def test_on_tool_execution_none_message_id_and_trace_manager(self, handler, mock_print_text): + # Arrange + tool_name = "optional_params_tool" + message = DummyToolInvokeMessage('{"data": "ok"}') + + # Act + results = list( + handler.on_tool_execution( + tool_name=tool_name, + tool_inputs={}, + tool_outputs=[message], + message_id=None, + timer=None, + trace_manager=None, + ) + ) + + assert results == [message] + assert mock_print_text.call_count == 4 diff --git a/api/tests/unit_tests/core/datasource/__base/test_datasource_plugin.py b/api/tests/unit_tests/core/datasource/__base/test_datasource_plugin.py new file mode 100644 index 0000000000..5482b4db52 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/__base/test_datasource_plugin.py @@ -0,0 +1,90 @@ +from unittest.mock import MagicMock, patch + +from configs import dify_config +from core.datasource.__base.datasource_plugin import DatasourcePlugin +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import DatasourceEntity, DatasourceProviderType + + +class ConcreteDatasourcePlugin(DatasourcePlugin): + """ + Concrete implementation of DatasourcePlugin for testing purposes. + Since DatasourcePlugin is an ABC, we need a concrete class to instantiate it. + """ + + def datasource_provider_type(self) -> str: + return DatasourceProviderType.LOCAL_FILE + + +class TestDatasourcePlugin: + def test_init(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + icon = "test-icon.png" + + # Act + plugin = ConcreteDatasourcePlugin(entity=entity, runtime=runtime, icon=icon) + + # Assert + assert plugin.entity == entity + assert plugin.runtime == runtime + assert plugin.icon == icon + + def test_datasource_provider_type(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + icon = "test-icon.png" + plugin = ConcreteDatasourcePlugin(entity=entity, runtime=runtime, icon=icon) + + # Act + provider_type = plugin.datasource_provider_type() + # Call the base class method to ensure it's covered + base_provider_type = DatasourcePlugin.datasource_provider_type(plugin) + + # Assert + assert provider_type == DatasourceProviderType.LOCAL_FILE + assert base_provider_type == DatasourceProviderType.LOCAL_FILE + + def test_fork_datasource_runtime(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceEntity) + mock_entity_copy = MagicMock(spec=DatasourceEntity) + mock_entity.model_copy.return_value = mock_entity_copy + + runtime = MagicMock(spec=DatasourceRuntime) + new_runtime = MagicMock(spec=DatasourceRuntime) + icon = "test-icon.png" + + plugin = ConcreteDatasourcePlugin(entity=mock_entity, runtime=runtime, icon=icon) + + # Act + new_plugin = plugin.fork_datasource_runtime(new_runtime) + + # Assert + assert isinstance(new_plugin, ConcreteDatasourcePlugin) + assert new_plugin.entity == mock_entity_copy + assert new_plugin.runtime == new_runtime + assert new_plugin.icon == icon + mock_entity.model_copy.assert_called_once() + + def test_get_icon_url(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + icon = "test-icon.png" + tenant_id = "test-tenant-id" + + plugin = ConcreteDatasourcePlugin(entity=entity, runtime=runtime, icon=icon) + + # Mocking dify_config.CONSOLE_API_URL + with patch.object(dify_config, "CONSOLE_API_URL", "https://api.dify.ai"): + # Act + icon_url = plugin.get_icon_url(tenant_id) + + # Assert + expected_url = ( + f"https://api.dify.ai/console/api/workspaces/current/plugin/icon?tenant_id={tenant_id}&filename={icon}" + ) + assert icon_url == expected_url diff --git a/api/tests/unit_tests/core/datasource/__base/test_datasource_provider.py b/api/tests/unit_tests/core/datasource/__base/test_datasource_provider.py new file mode 100644 index 0000000000..6a3d21a33d --- /dev/null +++ b/api/tests/unit_tests/core/datasource/__base/test_datasource_provider.py @@ -0,0 +1,265 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.datasource.__base.datasource_plugin import DatasourcePlugin +from core.datasource.__base.datasource_provider import DatasourcePluginProviderController +from core.datasource.entities.datasource_entities import ( + DatasourceProviderEntityWithPlugin, + DatasourceProviderType, +) +from core.entities.provider_entities import ProviderConfig +from core.tools.errors import ToolProviderCredentialValidationError + + +class ConcreteDatasourcePluginProviderController(DatasourcePluginProviderController): + """ + Concrete implementation of DatasourcePluginProviderController for testing purposes. + """ + + def get_datasource(self, datasource_name: str) -> DatasourcePlugin: + return MagicMock(spec=DatasourcePlugin) + + +class TestDatasourcePluginProviderController: + def test_init(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + tenant_id = "test-tenant-id" + + # Act + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id=tenant_id) + + # Assert + assert controller.entity == mock_entity + assert controller.tenant_id == tenant_id + + def test_need_credentials(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + tenant_id = "test-tenant-id" + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id=tenant_id) + + # Case 1: credentials_schema is None + mock_entity.credentials_schema = None + assert controller.need_credentials is False + + # Case 2: credentials_schema is empty + mock_entity.credentials_schema = [] + assert controller.need_credentials is False + + # Case 3: credentials_schema has items + mock_entity.credentials_schema = [MagicMock()] + assert controller.need_credentials is True + + @patch("core.datasource.__base.datasource_provider.PluginToolManager") + def test_validate_credentials(self, mock_manager_class): + # Arrange + mock_manager = mock_manager_class.return_value + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.identity = MagicMock() + mock_entity.identity.name = "test-provider" + tenant_id = "test-tenant-id" + user_id = "test-user-id" + credentials = {"api_key": "secret"} + + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id=tenant_id) + + # Act: Successful validation + mock_manager.validate_datasource_credentials.return_value = True + controller._validate_credentials(user_id, credentials) + + mock_manager.validate_datasource_credentials.assert_called_once_with( + tenant_id=tenant_id, + user_id=user_id, + provider="test-provider", + credentials=credentials, + ) + + # Act: Failed validation + mock_manager.validate_datasource_credentials.return_value = False + with pytest.raises(ToolProviderCredentialValidationError, match="Invalid credentials"): + controller._validate_credentials(user_id, credentials) + + def test_provider_type(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act & Assert + assert controller.provider_type == DatasourceProviderType.LOCAL_FILE + + def test_validate_credentials_format_empty_schema(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + credentials = {} + + # Act & Assert (Should not raise anything) + controller.validate_credentials_format(credentials) + + def test_validate_credentials_format_unknown_credential(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.identity = MagicMock() + mock_entity.identity.name = "test-provider" + mock_entity.credentials_schema = [] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + credentials = {"unknown": "value"} + + # Act & Assert + with pytest.raises( + ToolProviderCredentialValidationError, match="credential unknown not found in provider test-provider" + ): + controller.validate_credentials_format(credentials) + + def test_validate_credentials_format_required_missing(self): + # Arrange + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "api_key" + mock_config.required = True + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act & Assert + with pytest.raises(ToolProviderCredentialValidationError, match="credential api_key is required"): + controller.validate_credentials_format({}) + + def test_validate_credentials_format_not_required_null(self): + # Arrange + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "optional" + mock_config.required = False + mock_config.default = None + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act & Assert + credentials = {"optional": None} + controller.validate_credentials_format(credentials) + assert credentials["optional"] is None + + def test_validate_credentials_format_type_mismatch_text(self): + # Arrange + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "text_field" + mock_config.required = True + mock_config.type = ProviderConfig.Type.TEXT_INPUT + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act & Assert + with pytest.raises(ToolProviderCredentialValidationError, match="credential text_field should be string"): + controller.validate_credentials_format({"text_field": 123}) + + def test_validate_credentials_format_select_validation(self): + # Arrange + mock_option = MagicMock() + mock_option.value = "opt1" + + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "select_field" + mock_config.required = True + mock_config.type = ProviderConfig.Type.SELECT + mock_config.options = [mock_option] + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Case 1: Value not string + with pytest.raises(ToolProviderCredentialValidationError, match="credential select_field should be string"): + controller.validate_credentials_format({"select_field": 123}) + + # Case 2: Options not list + mock_config.options = "invalid" + with pytest.raises( + ToolProviderCredentialValidationError, match="credential select_field options should be list" + ): + controller.validate_credentials_format({"select_field": "opt1"}) + + # Case 3: Value not in options + mock_config.options = [mock_option] + with pytest.raises(ToolProviderCredentialValidationError, match="credential select_field should be one of"): + controller.validate_credentials_format({"select_field": "invalid_opt"}) + + def test_get_datasource_base(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act + result = DatasourcePluginProviderController.get_datasource(controller, "test") + + # Assert + assert result is None + + def test_validate_credentials_format_hits_pop(self): + # Arrange + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "valid_field" + mock_config.required = True + mock_config.type = ProviderConfig.Type.TEXT_INPUT + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act + credentials = {"valid_field": "valid_value"} + controller.validate_credentials_format(credentials) + + # Assert + assert "valid_field" in credentials + assert credentials["valid_field"] == "valid_value" + + def test_validate_credentials_format_hits_continue(self): + # Arrange + mock_config = MagicMock(spec=ProviderConfig) + mock_config.name = "optional_field" + mock_config.required = False + mock_config.default = None + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act + credentials = {"optional_field": None} + controller.validate_credentials_format(credentials) + + # Assert + assert credentials["optional_field"] is None + + def test_validate_credentials_format_default_values(self): + # Arrange + mock_config_text = MagicMock(spec=ProviderConfig) + mock_config_text.name = "text_def" + mock_config_text.required = False + mock_config_text.type = ProviderConfig.Type.TEXT_INPUT + mock_config_text.default = 123 # Int default, should be converted to str + + mock_config_other = MagicMock(spec=ProviderConfig) + mock_config_other.name = "other_def" + mock_config_other.required = False + mock_config_other.type = "OTHER" + mock_config_other.default = "fallback" + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.credentials_schema = [mock_config_text, mock_config_other] + controller = ConcreteDatasourcePluginProviderController(entity=mock_entity, tenant_id="test") + + # Act + credentials = {} + controller.validate_credentials_format(credentials) + + # Assert + assert credentials["text_def"] == "123" + assert credentials["other_def"] == "fallback" diff --git a/api/tests/unit_tests/core/datasource/__base/test_datasource_runtime.py b/api/tests/unit_tests/core/datasource/__base/test_datasource_runtime.py new file mode 100644 index 0000000000..2bca9155e9 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/__base/test_datasource_runtime.py @@ -0,0 +1,26 @@ +from core.app.entities.app_invoke_entities import InvokeFrom +from core.datasource.__base.datasource_runtime import DatasourceRuntime, FakeDatasourceRuntime +from core.datasource.entities.datasource_entities import DatasourceInvokeFrom + + +class TestDatasourceRuntime: + def test_init(self): + runtime = DatasourceRuntime( + tenant_id="test-tenant", + datasource_id="test-ds", + invoke_from=InvokeFrom.DEBUGGER, + datasource_invoke_from=DatasourceInvokeFrom.RAG_PIPELINE, + credentials={"key": "val"}, + runtime_parameters={"p": "v"}, + ) + assert runtime.tenant_id == "test-tenant" + assert runtime.datasource_id == "test-ds" + assert runtime.credentials["key"] == "val" + + def test_fake_datasource_runtime(self): + # This covers the FakeDatasourceRuntime class and its __init__ + runtime = FakeDatasourceRuntime() + assert runtime.tenant_id == "fake_tenant_id" + assert runtime.datasource_id == "fake_datasource_id" + assert runtime.invoke_from == InvokeFrom.DEBUGGER + assert runtime.datasource_invoke_from == DatasourceInvokeFrom.RAG_PIPELINE diff --git a/api/tests/unit_tests/core/datasource/entities/test_api_entities.py b/api/tests/unit_tests/core/datasource/entities/test_api_entities.py new file mode 100644 index 0000000000..9855b4040a --- /dev/null +++ b/api/tests/unit_tests/core/datasource/entities/test_api_entities.py @@ -0,0 +1,150 @@ +from core.datasource.entities.api_entities import DatasourceApiEntity, DatasourceProviderApiEntity +from core.datasource.entities.datasource_entities import DatasourceParameter +from core.tools.entities.common_entities import I18nObject + + +def test_datasource_api_entity(): + label = I18nObject(en_US="label", zh_Hans="标签") + description = I18nObject(en_US="desc", zh_Hans="描述") + + entity = DatasourceApiEntity( + author="author", name="name", label=label, description=description, labels=["l1", "l2"] + ) + + assert entity.author == "author" + assert entity.name == "name" + assert entity.label == label + assert entity.description == description + assert entity.labels == ["l1", "l2"] + assert entity.parameters is None + assert entity.output_schema is None + + +def test_datasource_provider_api_entity_defaults(): + description = I18nObject(en_US="desc", zh_Hans="描述") + label = I18nObject(en_US="label", zh_Hans="标签") + + entity = DatasourceProviderApiEntity( + id="id", author="author", name="name", description=description, icon="icon", label=label, type="type" + ) + + assert entity.id == "id" + assert entity.datasources == [] + assert entity.is_team_authorization is False + assert entity.allow_delete is True + assert entity.plugin_id == "" + assert entity.plugin_unique_identifier == "" + assert entity.labels == [] + + +def test_datasource_provider_api_entity_convert_none_to_empty_list(): + description = I18nObject(en_US="desc", zh_Hans="描述") + label = I18nObject(en_US="label", zh_Hans="标签") + + # Implicitly testing the field_validator "convert_none_to_empty_list" + entity = DatasourceProviderApiEntity( + id="id", + author="author", + name="name", + description=description, + icon="icon", + label=label, + type="type", + datasources=None, # type: ignore + ) + + assert entity.datasources == [] + + +def test_datasource_provider_api_entity_to_dict(): + description = I18nObject(en_US="desc", zh_Hans="描述") + label = I18nObject(en_US="label", zh_Hans="标签") + + # Create a parameter that should be converted + param = DatasourceParameter.get_simple_instance( + name="test_param", typ=DatasourceParameter.DatasourceParameterType.SYSTEM_FILES, required=True + ) + + ds_entity = DatasourceApiEntity( + author="author", name="ds_name", label=label, description=description, parameters=[param] + ) + + provider_entity = DatasourceProviderApiEntity( + id="id", + author="author", + name="name", + description=description, + icon="icon", + label=label, + type="type", + masked_credentials={"key": "masked"}, + datasources=[ds_entity], + labels=["l1"], + ) + + result = provider_entity.to_dict() + + assert result["id"] == "id" + assert result["author"] == "author" + assert result["name"] == "name" + assert result["description"] == description.to_dict() + assert result["icon"] == "icon" + assert result["label"] == label.to_dict() + assert result["type"] == "type" + assert result["team_credentials"] == {"key": "masked"} + assert result["is_team_authorization"] is False + assert result["allow_delete"] is True + assert result["labels"] == ["l1"] + + # Check if parameter type was converted from SYSTEM_FILES to files + assert result["datasources"][0]["parameters"][0]["type"] == "files" + + +def test_datasource_provider_api_entity_to_dict_no_params(): + description = I18nObject(en_US="desc", zh_Hans="描述") + label = I18nObject(en_US="label", zh_Hans="标签") + + ds_entity = DatasourceApiEntity( + author="author", name="ds_name", label=label, description=description, parameters=None + ) + + provider_entity = DatasourceProviderApiEntity( + id="id", + author="author", + name="name", + description=description, + icon="icon", + label=label, + type="type", + datasources=[ds_entity], + ) + + result = provider_entity.to_dict() + assert result["datasources"][0]["parameters"] is None + + +def test_datasource_provider_api_entity_to_dict_other_param_type(): + description = I18nObject(en_US="desc", zh_Hans="描述") + label = I18nObject(en_US="label", zh_Hans="标签") + + param = DatasourceParameter.get_simple_instance( + name="test_param", typ=DatasourceParameter.DatasourceParameterType.STRING, required=True + ) + + ds_entity = DatasourceApiEntity( + author="author", name="ds_name", label=label, description=description, parameters=[param] + ) + + provider_entity = DatasourceProviderApiEntity( + id="id", + author="author", + name="name", + description=description, + icon="icon", + label=label, + type="type", + datasources=[ds_entity], + ) + + result = provider_entity.to_dict() + assert result["datasources"][0]["parameters"][0]["type"] == "string" diff --git a/api/tests/unit_tests/core/datasource/entities/test_common_entities.py b/api/tests/unit_tests/core/datasource/entities/test_common_entities.py new file mode 100644 index 0000000000..0ee4928105 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/entities/test_common_entities.py @@ -0,0 +1,31 @@ +from core.datasource.entities.common_entities import I18nObject + + +def test_i18n_object_fallback(): + # Only en_US provided + obj = I18nObject(en_US="Hello") + assert obj.en_US == "Hello" + assert obj.zh_Hans == "Hello" + assert obj.pt_BR == "Hello" + assert obj.ja_JP == "Hello" + + # Some fields provided + obj = I18nObject(en_US="Hello", zh_Hans="你好") + assert obj.en_US == "Hello" + assert obj.zh_Hans == "你好" + assert obj.pt_BR == "Hello" + assert obj.ja_JP == "Hello" + + +def test_i18n_object_all_fields(): + obj = I18nObject(en_US="Hello", zh_Hans="你好", pt_BR="Olá", ja_JP="こんにちは") + assert obj.en_US == "Hello" + assert obj.zh_Hans == "你好" + assert obj.pt_BR == "Olá" + assert obj.ja_JP == "こんにちは" + + +def test_i18n_object_to_dict(): + obj = I18nObject(en_US="Hello", zh_Hans="你好", pt_BR="Olá", ja_JP="こんにちは") + expected_dict = {"en_US": "Hello", "zh_Hans": "你好", "pt_BR": "Olá", "ja_JP": "こんにちは"} + assert obj.to_dict() == expected_dict diff --git a/api/tests/unit_tests/core/datasource/entities/test_datasource_entities.py b/api/tests/unit_tests/core/datasource/entities/test_datasource_entities.py new file mode 100644 index 0000000000..a8c8d31537 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/entities/test_datasource_entities.py @@ -0,0 +1,275 @@ +from unittest.mock import patch + +import pytest + +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceIdentity, + DatasourceInvokeMeta, + DatasourceLabel, + DatasourceMessage, + DatasourceParameter, + DatasourceProviderEntity, + DatasourceProviderEntityWithPlugin, + DatasourceProviderIdentity, + DatasourceProviderType, + GetOnlineDocumentPageContentRequest, + GetOnlineDocumentPageContentResponse, + GetWebsiteCrawlRequest, + OnlineDocumentInfo, + OnlineDocumentPage, + OnlineDocumentPageContent, + OnlineDocumentPagesMessage, + OnlineDriveBrowseFilesRequest, + OnlineDriveBrowseFilesResponse, + OnlineDriveDownloadFileRequest, + OnlineDriveFile, + OnlineDriveFileBucket, + WebsiteCrawlMessage, + WebSiteInfo, + WebSiteInfoDetail, +) +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolLabelEnum + + +def test_datasource_provider_type(): + assert DatasourceProviderType.value_of("online_document") == DatasourceProviderType.ONLINE_DOCUMENT + assert DatasourceProviderType.value_of("local_file") == DatasourceProviderType.LOCAL_FILE + + with pytest.raises(ValueError, match="invalid mode value invalid"): + DatasourceProviderType.value_of("invalid") + + +def test_datasource_parameter_type(): + param_type = DatasourceParameter.DatasourceParameterType.STRING + assert param_type.as_normal_type() == "string" + assert param_type.cast_value("test") == "test" + + param_type = DatasourceParameter.DatasourceParameterType.NUMBER + assert param_type.cast_value("123") == 123 + + +def test_datasource_parameter(): + param = DatasourceParameter.get_simple_instance( + name="test_param", + typ=DatasourceParameter.DatasourceParameterType.STRING, + required=True, + options=["opt1", "opt2"], + ) + assert param.name == "test_param" + assert param.type == DatasourceParameter.DatasourceParameterType.STRING + assert param.required is True + assert len(param.options) == 2 + assert param.options[0].value == "opt1" + + param_no_options = DatasourceParameter.get_simple_instance( + name="test_param_2", typ=DatasourceParameter.DatasourceParameterType.NUMBER, required=False + ) + assert param_no_options.options == [] + + # Test init_frontend_parameter + # For STRING, it should just return the value as is (or cast to str) + frontend_param = param.init_frontend_parameter("val") + assert frontend_param == "val" + + # Test parameter type methods + assert DatasourceParameter.DatasourceParameterType.STRING.as_normal_type() == "string" + assert DatasourceParameter.DatasourceParameterType.NUMBER.as_normal_type() == "number" + assert DatasourceParameter.DatasourceParameterType.SECRET_INPUT.as_normal_type() == "string" + + assert DatasourceParameter.DatasourceParameterType.NUMBER.cast_value("10.5") == 10.5 + assert DatasourceParameter.DatasourceParameterType.BOOLEAN.cast_value("true") is True + assert DatasourceParameter.DatasourceParameterType.FILES.cast_value(["f1", "f2"]) == ["f1", "f2"] + + +def test_datasource_identity(): + label = I18nObject(en_US="label", zh_Hans="标签") + identity = DatasourceIdentity(author="author", name="name", label=label, provider="provider", icon="icon") + assert identity.author == "author" + assert identity.name == "name" + assert identity.label == label + assert identity.provider == "provider" + assert identity.icon == "icon" + + +def test_datasource_entity(): + label = I18nObject(en_US="label", zh_Hans="标签") + identity = DatasourceIdentity(author="author", name="name", label=label, provider="provider") + description = I18nObject(en_US="desc", zh_Hans="描述") + + entity = DatasourceEntity( + identity=identity, + description=description, + parameters=None, # Should be handled by validator + ) + assert entity.parameters == [] + + param = DatasourceParameter.get_simple_instance("p1", DatasourceParameter.DatasourceParameterType.STRING, True) + entity_with_params = DatasourceEntity(identity=identity, description=description, parameters=[param]) + assert entity_with_params.parameters == [param] + + +def test_datasource_provider_identity(): + label = I18nObject(en_US="label", zh_Hans="标签") + description = I18nObject(en_US="desc", zh_Hans="描述") + identity = DatasourceProviderIdentity( + author="author", name="name", description=description, icon="icon.png", label=label, tags=[ToolLabelEnum.SEARCH] + ) + + assert identity.author == "author" + assert identity.name == "name" + assert identity.description == description + assert identity.icon == "icon.png" + assert identity.label == label + assert identity.tags == [ToolLabelEnum.SEARCH] + + # Test generate_datasource_icon_url + with patch("core.datasource.entities.datasource_entities.dify_config") as mock_config: + mock_config.CONSOLE_API_URL = "http://api.example.com" + url = identity.generate_datasource_icon_url("tenant123") + assert "http://api.example.com/console/api/workspaces/current/plugin/icon" in url + assert "tenant_id=tenant123" in url + assert "filename=icon.png" in url + + # Test hardcoded icon + identity.icon = "https://assets.dify.ai/images/File%20Upload.svg" + assert identity.generate_datasource_icon_url("tenant123") == identity.icon + + # Test with empty CONSOLE_API_URL + identity.icon = "test.png" + with patch("core.datasource.entities.datasource_entities.dify_config") as mock_config: + mock_config.CONSOLE_API_URL = None + url = identity.generate_datasource_icon_url("tenant123") + assert url.startswith("/console/api/workspaces/current/plugin/icon") + + +def test_datasource_provider_entity(): + label = I18nObject(en_US="label", zh_Hans="标签") + description = I18nObject(en_US="desc", zh_Hans="描述") + identity = DatasourceProviderIdentity( + author="author", name="name", description=description, icon="icon", label=label + ) + + entity = DatasourceProviderEntity( + identity=identity, + provider_type=DatasourceProviderType.ONLINE_DOCUMENT, + credentials_schema=[], + oauth_schema=None, + ) + assert entity.identity == identity + assert entity.provider_type == DatasourceProviderType.ONLINE_DOCUMENT + assert entity.credentials_schema == [] + + +def test_datasource_provider_entity_with_plugin(): + label = I18nObject(en_US="label", zh_Hans="标签") + description = I18nObject(en_US="desc", zh_Hans="描述") + identity = DatasourceProviderIdentity( + author="author", name="name", description=description, icon="icon", label=label + ) + + entity = DatasourceProviderEntityWithPlugin( + identity=identity, provider_type=DatasourceProviderType.ONLINE_DOCUMENT, datasources=[] + ) + assert entity.datasources == [] + + +def test_datasource_invoke_meta(): + meta = DatasourceInvokeMeta(time_cost=1.5, error="some error", tool_config={"k": "v"}) + assert meta.time_cost == 1.5 + assert meta.error == "some error" + assert meta.tool_config == {"k": "v"} + + d = meta.to_dict() + assert d == {"time_cost": 1.5, "error": "some error", "tool_config": {"k": "v"}} + + empty_meta = DatasourceInvokeMeta.empty() + assert empty_meta.time_cost == 0.0 + assert empty_meta.error is None + assert empty_meta.tool_config == {} + + error_meta = DatasourceInvokeMeta.error_instance("fatal error") + assert error_meta.time_cost == 0.0 + assert error_meta.error == "fatal error" + assert error_meta.tool_config == {} + + +def test_datasource_label(): + label_obj = I18nObject(en_US="label", zh_Hans="标签") + ds_label = DatasourceLabel(name="name", label=label_obj, icon="icon") + assert ds_label.name == "name" + assert ds_label.label == label_obj + assert ds_label.icon == "icon" + + +def test_online_document_models(): + page = OnlineDocumentPage( + page_id="p1", + page_name="name", + page_icon={"type": "emoji"}, + type="page", + last_edited_time="2023-01-01", + parent_id=None, + ) + assert page.page_id == "p1" + + info = OnlineDocumentInfo(workspace_id="w1", workspace_name="name", workspace_icon="icon", total=1, pages=[page]) + assert info.total == 1 + + msg = OnlineDocumentPagesMessage(result=[info]) + assert msg.result == [info] + + req = GetOnlineDocumentPageContentRequest(workspace_id="w1", page_id="p1", type="page") + assert req.workspace_id == "w1" + + content = OnlineDocumentPageContent(workspace_id="w1", page_id="p1", content="hello") + assert content.content == "hello" + + resp = GetOnlineDocumentPageContentResponse(result=content) + assert resp.result == content + + +def test_website_crawl_models(): + req = GetWebsiteCrawlRequest(crawl_parameters={"url": "http://test.com"}) + assert req.crawl_parameters == {"url": "http://test.com"} + + detail = WebSiteInfoDetail(source_url="http://test.com", content="content", title="title", description="desc") + assert detail.title == "title" + + info = WebSiteInfo(status="completed", web_info_list=[detail], total=1, completed=1) + assert info.status == "completed" + + msg = WebsiteCrawlMessage(result=info) + assert msg.result == info + + # Test default values + msg_default = WebsiteCrawlMessage() + assert msg_default.result.status == "" + assert msg_default.result.web_info_list == [] + + +def test_online_drive_models(): + file = OnlineDriveFile(id="f1", name="file.txt", size=100, type="file") + assert file.name == "file.txt" + + bucket = OnlineDriveFileBucket(bucket="b1", files=[file], is_truncated=False, next_page_parameters=None) + assert bucket.bucket == "b1" + + req = OnlineDriveBrowseFilesRequest(bucket="b1", prefix="folder1", max_keys=10, next_page_parameters=None) + assert req.prefix == "folder1" + + resp = OnlineDriveBrowseFilesResponse(result=[bucket]) + assert resp.result == [bucket] + + dl_req = OnlineDriveDownloadFileRequest(id="f1", bucket="b1") + assert dl_req.id == "f1" + + +def test_datasource_message(): + # Use proper dict for message to avoid Pydantic Union validation ambiguity/crashes + msg = DatasourceMessage(type="text", message={"text": "hello"}) + assert msg.message.text == "hello" + + msg_json = DatasourceMessage(type="json", message={"json_object": {"k": "v"}}) + assert msg_json.message.json_object == {"k": "v"} diff --git a/api/tests/unit_tests/core/datasource/local_file/test_local_file_plugin.py b/api/tests/unit_tests/core/datasource/local_file/test_local_file_plugin.py new file mode 100644 index 0000000000..5bf7362a8a --- /dev/null +++ b/api/tests/unit_tests/core/datasource/local_file/test_local_file_plugin.py @@ -0,0 +1,57 @@ +from unittest.mock import MagicMock + +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceProviderType, +) +from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin + + +class TestLocalFileDatasourcePlugin: + def test_init(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceEntity) + mock_runtime = MagicMock(spec=DatasourceRuntime) + tenant_id = "test-tenant-id" + icon = "test-icon" + plugin_unique_identifier = "test-plugin-id" + + # Act + plugin = LocalFileDatasourcePlugin( + entity=mock_entity, + runtime=mock_runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + # Assert + assert plugin.tenant_id == tenant_id + assert plugin.plugin_unique_identifier == plugin_unique_identifier + assert plugin.entity == mock_entity + assert plugin.runtime == mock_runtime + assert plugin.icon == icon + + def test_datasource_provider_type(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceEntity) + mock_runtime = MagicMock(spec=DatasourceRuntime) + plugin = LocalFileDatasourcePlugin( + entity=mock_entity, runtime=mock_runtime, tenant_id="test", icon="test", plugin_unique_identifier="test" + ) + + # Act & Assert + assert plugin.datasource_provider_type() == DatasourceProviderType.LOCAL_FILE + + def test_get_icon_url(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceEntity) + mock_runtime = MagicMock(spec=DatasourceRuntime) + icon = "test-icon" + plugin = LocalFileDatasourcePlugin( + entity=mock_entity, runtime=mock_runtime, tenant_id="test", icon=icon, plugin_unique_identifier="test" + ) + + # Act & Assert + assert plugin.get_icon_url("any-tenant-id") == icon diff --git a/api/tests/unit_tests/core/datasource/local_file/test_local_file_provider.py b/api/tests/unit_tests/core/datasource/local_file/test_local_file_provider.py new file mode 100644 index 0000000000..af2369ac4e --- /dev/null +++ b/api/tests/unit_tests/core/datasource/local_file/test_local_file_provider.py @@ -0,0 +1,96 @@ +from unittest.mock import MagicMock + +import pytest + +from core.datasource.entities.datasource_entities import ( + DatasourceProviderEntityWithPlugin, + DatasourceProviderType, +) +from core.datasource.local_file.local_file_plugin import LocalFileDatasourcePlugin +from core.datasource.local_file.local_file_provider import LocalFileDatasourcePluginProviderController + + +class TestLocalFileDatasourcePluginProviderController: + def test_init(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + plugin_id = "test_plugin_id" + plugin_unique_identifier = "test_plugin_unique_identifier" + tenant_id = "test_tenant_id" + + # Act + controller = LocalFileDatasourcePluginProviderController( + entity=mock_entity, + plugin_id=plugin_id, + plugin_unique_identifier=plugin_unique_identifier, + tenant_id=tenant_id, + ) + + # Assert + assert controller.entity == mock_entity + assert controller.plugin_id == plugin_id + assert controller.plugin_unique_identifier == plugin_unique_identifier + assert controller.tenant_id == tenant_id + + def test_provider_type(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = LocalFileDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier="unique_id", tenant_id="tenant" + ) + + # Act & Assert + assert controller.provider_type == DatasourceProviderType.LOCAL_FILE + + def test_validate_credentials(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = LocalFileDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier="unique_id", tenant_id="tenant" + ) + + # Act & Assert + # Should not raise any exception + controller._validate_credentials("user_id", {"key": "value"}) + + def test_get_datasource_success(self): + # Arrange + mock_datasource_entity = MagicMock() + mock_datasource_entity.identity.name = "test_datasource" + + mock_entity = MagicMock() + mock_entity.datasources = [mock_datasource_entity] + mock_entity.identity.icon = "test_icon" + + plugin_unique_identifier = "test_plugin_unique_identifier" + tenant_id = "test_tenant_id" + + controller = LocalFileDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier=plugin_unique_identifier, tenant_id=tenant_id + ) + + # Act + datasource = controller.get_datasource("test_datasource") + + # Assert + assert isinstance(datasource, LocalFileDatasourcePlugin) + assert datasource.entity == mock_datasource_entity + assert datasource.tenant_id == tenant_id + assert datasource.icon == "test_icon" + assert datasource.plugin_unique_identifier == plugin_unique_identifier + + def test_get_datasource_not_found(self): + # Arrange + mock_datasource_entity = MagicMock() + mock_datasource_entity.identity.name = "other_datasource" + + mock_entity = MagicMock() + mock_entity.datasources = [mock_datasource_entity] + + controller = LocalFileDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier="unique_id", tenant_id="tenant" + ) + + # Act & Assert + with pytest.raises(ValueError, match="Datasource with name test_datasource not found"): + controller.get_datasource("test_datasource") diff --git a/api/tests/unit_tests/core/datasource/online_document/test_online_document_plugin.py b/api/tests/unit_tests/core/datasource/online_document/test_online_document_plugin.py new file mode 100644 index 0000000000..e3a217725a --- /dev/null +++ b/api/tests/unit_tests/core/datasource/online_document/test_online_document_plugin.py @@ -0,0 +1,151 @@ +from unittest.mock import MagicMock, patch + +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceIdentity, + DatasourceProviderType, + GetOnlineDocumentPageContentRequest, +) +from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin + + +class TestOnlineDocumentDatasourcePlugin: + def test_init(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + # Act + plugin = OnlineDocumentDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + # Assert + assert plugin.entity == entity + assert plugin.runtime == runtime + assert plugin.tenant_id == tenant_id + assert plugin.icon == icon + assert plugin.plugin_unique_identifier == plugin_unique_identifier + + def test_get_online_document_pages(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + identity = MagicMock(spec=DatasourceIdentity) + entity.identity = identity + identity.provider = "test_provider" + identity.name = "test_name" + + runtime = MagicMock(spec=DatasourceRuntime) + runtime.credentials = {"api_key": "test_key"} + + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + plugin = OnlineDocumentDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + user_id = "test_user" + datasource_parameters = {"param": "value"} + provider_type = "test_type" + + mock_generator = MagicMock() + + # Patch PluginDatasourceManager to isolate plugin behavior from external dependencies + with patch("core.datasource.online_document.online_document_plugin.PluginDatasourceManager") as MockManager: + mock_manager_instance = MockManager.return_value + mock_manager_instance.get_online_document_pages.return_value = mock_generator + + # Act + result = plugin.get_online_document_pages( + user_id=user_id, datasource_parameters=datasource_parameters, provider_type=provider_type + ) + + # Assert + assert result == mock_generator + mock_manager_instance.get_online_document_pages.assert_called_once_with( + tenant_id=tenant_id, + user_id=user_id, + datasource_provider="test_provider", + datasource_name="test_name", + credentials=runtime.credentials, + datasource_parameters=datasource_parameters, + provider_type=provider_type, + ) + + def test_get_online_document_page_content(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + identity = MagicMock(spec=DatasourceIdentity) + entity.identity = identity + identity.provider = "test_provider" + identity.name = "test_name" + + runtime = MagicMock(spec=DatasourceRuntime) + runtime.credentials = {"api_key": "test_key"} + + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + plugin = OnlineDocumentDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + user_id = "test_user" + datasource_parameters = MagicMock(spec=GetOnlineDocumentPageContentRequest) + provider_type = "test_type" + + mock_generator = MagicMock() + + with patch("core.datasource.online_document.online_document_plugin.PluginDatasourceManager") as MockManager: + mock_manager_instance = MockManager.return_value + mock_manager_instance.get_online_document_page_content.return_value = mock_generator + + # Act + result = plugin.get_online_document_page_content( + user_id=user_id, datasource_parameters=datasource_parameters, provider_type=provider_type + ) + + # Assert + assert result == mock_generator + mock_manager_instance.get_online_document_page_content.assert_called_once_with( + tenant_id=tenant_id, + user_id=user_id, + datasource_provider="test_provider", + datasource_name="test_name", + credentials=runtime.credentials, + datasource_parameters=datasource_parameters, + provider_type=provider_type, + ) + + def test_datasource_provider_type(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + plugin = OnlineDocumentDatasourcePlugin( + entity=entity, runtime=runtime, tenant_id="test", icon="test", plugin_unique_identifier="test" + ) + + # Act + result = plugin.datasource_provider_type() + + # Assert + assert result == DatasourceProviderType.ONLINE_DOCUMENT diff --git a/api/tests/unit_tests/core/datasource/online_document/test_online_document_provider.py b/api/tests/unit_tests/core/datasource/online_document/test_online_document_provider.py new file mode 100644 index 0000000000..cfdd05e0b2 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/online_document/test_online_document_provider.py @@ -0,0 +1,100 @@ +from unittest.mock import MagicMock + +import pytest + +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceProviderEntityWithPlugin, + DatasourceProviderType, +) +from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin +from core.datasource.online_document.online_document_provider import OnlineDocumentDatasourcePluginProviderController + + +class TestOnlineDocumentDatasourcePluginProviderController: + def test_init(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + plugin_id = "test_plugin_id" + plugin_unique_identifier = "test_plugin_uid" + tenant_id = "test_tenant_id" + + # Act + controller = OnlineDocumentDatasourcePluginProviderController( + entity=mock_entity, + plugin_id=plugin_id, + plugin_unique_identifier=plugin_unique_identifier, + tenant_id=tenant_id, + ) + + # Assert + assert controller.entity == mock_entity + assert controller.plugin_id == plugin_id + assert controller.plugin_unique_identifier == plugin_unique_identifier + assert controller.tenant_id == tenant_id + + def test_provider_type(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = OnlineDocumentDatasourcePluginProviderController( + entity=mock_entity, plugin_id="test", plugin_unique_identifier="test", tenant_id="test" + ) + + # Assert + assert controller.provider_type == DatasourceProviderType.ONLINE_DOCUMENT + + def test_get_datasource_success(self): + # Arrange + from core.datasource.entities.datasource_entities import DatasourceIdentity + + mock_datasource_entity = MagicMock(spec=DatasourceEntity) + mock_datasource_entity.identity = MagicMock(spec=DatasourceIdentity) + mock_datasource_entity.identity.name = "target_datasource" + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.datasources = [mock_datasource_entity] + mock_entity.identity = MagicMock() + mock_entity.identity.icon = "test_icon" + + plugin_unique_identifier = "test_plugin_uid" + tenant_id = "test_tenant_id" + + controller = OnlineDocumentDatasourcePluginProviderController( + entity=mock_entity, + plugin_id="test_plugin_id", + plugin_unique_identifier=plugin_unique_identifier, + tenant_id=tenant_id, + ) + + # Act + result = controller.get_datasource("target_datasource") + + # Assert + assert isinstance(result, OnlineDocumentDatasourcePlugin) + assert result.entity == mock_datasource_entity + assert result.tenant_id == tenant_id + assert result.icon == "test_icon" + assert result.plugin_unique_identifier == plugin_unique_identifier + assert result.runtime.tenant_id == tenant_id + + def test_get_datasource_not_found(self): + # Arrange + from core.datasource.entities.datasource_entities import DatasourceIdentity + + mock_datasource_entity = MagicMock(spec=DatasourceEntity) + mock_datasource_entity.identity = MagicMock(spec=DatasourceIdentity) + mock_datasource_entity.identity.name = "other_datasource" + + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + mock_entity.datasources = [mock_datasource_entity] + + controller = OnlineDocumentDatasourcePluginProviderController( + entity=mock_entity, + plugin_id="test_plugin_id", + plugin_unique_identifier="test_plugin_uid", + tenant_id="test_tenant_id", + ) + + # Act & Assert + with pytest.raises(ValueError, match="Datasource with name missing_datasource not found"): + controller.get_datasource("missing_datasource") diff --git a/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_plugin.py b/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_plugin.py new file mode 100644 index 0000000000..6c8b644871 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_plugin.py @@ -0,0 +1,147 @@ +from unittest.mock import MagicMock, patch + +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceIdentity, + DatasourceProviderType, + OnlineDriveBrowseFilesRequest, + OnlineDriveDownloadFileRequest, +) +from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin + + +class TestOnlineDriveDatasourcePlugin: + def test_init(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + # Act + plugin = OnlineDriveDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + # Assert + assert plugin.entity == entity + assert plugin.runtime == runtime + assert plugin.tenant_id == tenant_id + assert plugin.icon == icon + assert plugin.plugin_unique_identifier == plugin_unique_identifier + + def test_online_drive_browse_files(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + identity = MagicMock(spec=DatasourceIdentity) + entity.identity = identity + identity.provider = "test_provider" + identity.name = "test_name" + + runtime = MagicMock(spec=DatasourceRuntime) + runtime.credentials = {"token": "test_token"} + + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + plugin = OnlineDriveDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + user_id = "test_user" + request = MagicMock(spec=OnlineDriveBrowseFilesRequest) + provider_type = "test_type" + + mock_generator = MagicMock() + + with patch("core.datasource.online_drive.online_drive_plugin.PluginDatasourceManager") as MockManager: + mock_manager_instance = MockManager.return_value + mock_manager_instance.online_drive_browse_files.return_value = mock_generator + + # Act + result = plugin.online_drive_browse_files(user_id=user_id, request=request, provider_type=provider_type) + + # Assert + assert result == mock_generator + mock_manager_instance.online_drive_browse_files.assert_called_once_with( + tenant_id=tenant_id, + user_id=user_id, + datasource_provider="test_provider", + datasource_name="test_name", + credentials=runtime.credentials, + request=request, + provider_type=provider_type, + ) + + def test_online_drive_download_file(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + identity = MagicMock(spec=DatasourceIdentity) + entity.identity = identity + identity.provider = "test_provider" + identity.name = "test_name" + + runtime = MagicMock(spec=DatasourceRuntime) + runtime.credentials = {"token": "test_token"} + + tenant_id = "test_tenant" + icon = "test_icon" + plugin_unique_identifier = "test_plugin_id" + + plugin = OnlineDriveDatasourcePlugin( + entity=entity, + runtime=runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + user_id = "test_user" + request = MagicMock(spec=OnlineDriveDownloadFileRequest) + provider_type = "test_type" + + mock_generator = MagicMock() + + with patch("core.datasource.online_drive.online_drive_plugin.PluginDatasourceManager") as MockManager: + mock_manager_instance = MockManager.return_value + mock_manager_instance.online_drive_download_file.return_value = mock_generator + + # Act + result = plugin.online_drive_download_file(user_id=user_id, request=request, provider_type=provider_type) + + # Assert + assert result == mock_generator + mock_manager_instance.online_drive_download_file.assert_called_once_with( + tenant_id=tenant_id, + user_id=user_id, + datasource_provider="test_provider", + datasource_name="test_name", + credentials=runtime.credentials, + request=request, + provider_type=provider_type, + ) + + def test_datasource_provider_type(self): + # Arrange + entity = MagicMock(spec=DatasourceEntity) + runtime = MagicMock(spec=DatasourceRuntime) + plugin = OnlineDriveDatasourcePlugin( + entity=entity, runtime=runtime, tenant_id="test", icon="test", plugin_unique_identifier="test" + ) + + # Act + result = plugin.datasource_provider_type() + + # Assert + assert result == DatasourceProviderType.ONLINE_DRIVE diff --git a/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_provider.py b/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_provider.py new file mode 100644 index 0000000000..2824ddd8ed --- /dev/null +++ b/api/tests/unit_tests/core/datasource/online_drive/test_online_drive_provider.py @@ -0,0 +1,83 @@ +from unittest.mock import MagicMock + +import pytest + +from core.datasource.entities.datasource_entities import DatasourceProviderEntityWithPlugin, DatasourceProviderType +from core.datasource.online_drive.online_drive_plugin import OnlineDriveDatasourcePlugin +from core.datasource.online_drive.online_drive_provider import OnlineDriveDatasourcePluginProviderController + + +class TestOnlineDriveDatasourcePluginProviderController: + def test_init(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + plugin_id = "test_plugin_id" + plugin_unique_identifier = "test_plugin_unique_identifier" + tenant_id = "test_tenant_id" + + # Act + controller = OnlineDriveDatasourcePluginProviderController( + entity=mock_entity, + plugin_id=plugin_id, + plugin_unique_identifier=plugin_unique_identifier, + tenant_id=tenant_id, + ) + + # Assert + assert controller.entity == mock_entity + assert controller.plugin_id == plugin_id + assert controller.plugin_unique_identifier == plugin_unique_identifier + assert controller.tenant_id == tenant_id + + def test_provider_type(self): + # Arrange + mock_entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + controller = OnlineDriveDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier="unique_id", tenant_id="tenant" + ) + + # Act & Assert + assert controller.provider_type == DatasourceProviderType.ONLINE_DRIVE + + def test_get_datasource_success(self): + # Arrange + mock_datasource_entity = MagicMock() + mock_datasource_entity.identity.name = "test_datasource" + + mock_entity = MagicMock() + mock_entity.datasources = [mock_datasource_entity] + mock_entity.identity.icon = "test_icon" + + plugin_unique_identifier = "test_plugin_unique_identifier" + tenant_id = "test_tenant_id" + + controller = OnlineDriveDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier=plugin_unique_identifier, tenant_id=tenant_id + ) + + # Act + datasource = controller.get_datasource("test_datasource") + + # Assert + assert isinstance(datasource, OnlineDriveDatasourcePlugin) + assert datasource.entity == mock_datasource_entity + assert datasource.tenant_id == tenant_id + assert datasource.icon == "test_icon" + assert datasource.plugin_unique_identifier == plugin_unique_identifier + assert datasource.runtime.tenant_id == tenant_id + + def test_get_datasource_not_found(self): + # Arrange + mock_datasource_entity = MagicMock() + mock_datasource_entity.identity.name = "other_datasource" + + mock_entity = MagicMock() + mock_entity.datasources = [mock_datasource_entity] + + controller = OnlineDriveDatasourcePluginProviderController( + entity=mock_entity, plugin_id="id", plugin_unique_identifier="unique_id", tenant_id="tenant" + ) + + # Act & Assert + with pytest.raises(ValueError, match="Datasource with name test_datasource not found"): + controller.get_datasource("test_datasource") diff --git a/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py new file mode 100644 index 0000000000..a7c93242cd --- /dev/null +++ b/api/tests/unit_tests/core/datasource/test_datasource_file_manager.py @@ -0,0 +1,409 @@ +import base64 +import hashlib +import hmac +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from core.datasource.datasource_file_manager import DatasourceFileManager +from models.model import MessageFile, UploadFile +from models.tools import ToolFile + + +class TestDatasourceFileManager: + @patch("core.datasource.datasource_file_manager.time.time") + @patch("core.datasource.datasource_file_manager.os.urandom") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_sign_file(self, mock_config, mock_urandom, mock_time): + # Setup + mock_config.FILES_URL = "http://localhost:5001" + mock_config.SECRET_KEY = "test_secret" + mock_time.return_value = 1700000000 + mock_urandom.return_value = b"1234567890abcdef" # 16 bytes + + datasource_file_id = "file_id_123" + extension = ".png" + + # Execute + signed_url = DatasourceFileManager.sign_file(datasource_file_id, extension) + + # Verify + assert signed_url.startswith("http://localhost:5001/files/datasources/file_id_123.png?") + assert "timestamp=1700000000" in signed_url + assert f"nonce={mock_urandom.return_value.hex()}" in signed_url + assert "sign=" in signed_url + + @patch("core.datasource.datasource_file_manager.time.time") + @patch("core.datasource.datasource_file_manager.os.urandom") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_sign_file_empty_secret(self, mock_config, mock_urandom, mock_time): + # Setup + mock_config.FILES_URL = "http://localhost:5001" + mock_config.SECRET_KEY = None # Empty secret + mock_time.return_value = 1700000000 + mock_urandom.return_value = b"1234567890abcdef" + + # Execute + signed_url = DatasourceFileManager.sign_file("file_id", ".png") + assert "sign=" in signed_url + + @patch("core.datasource.datasource_file_manager.time.time") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_verify_file(self, mock_config, mock_time): + # Setup + mock_config.SECRET_KEY = "test_secret" + mock_config.FILES_ACCESS_TIMEOUT = 300 + mock_time.return_value = 1700000000 + + datasource_file_id = "file_id_123" + timestamp = "1699999800" # 200 seconds ago + nonce = "some_nonce" + + # Manually calculate sign + data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}" + secret_key = b"test_secret" + sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + + # Execute & Verify Success + assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is True + + # Verify Failure - Wrong Sign + assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, "wrong_sign") is False + + # Verify Failure - Timeout + mock_time.return_value = 1700000500 # 700 seconds after timestamp (300 is timeout) + assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is False + + @patch("core.datasource.datasource_file_manager.time.time") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_verify_file_empty_secret(self, mock_config, mock_time): + # Setup + mock_config.SECRET_KEY = "" # Empty string secret + mock_config.FILES_ACCESS_TIMEOUT = 300 + mock_time.return_value = 1700000000 + + datasource_file_id = "file_id_123" + timestamp = "1699999800" + nonce = "some_nonce" + + # Calculate with empty secret + data_to_sign = f"file-preview|{datasource_file_id}|{timestamp}|{nonce}" + sign = hmac.new(b"", data_to_sign.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + + assert DatasourceFileManager.verify_file(datasource_file_id, timestamp, nonce, encoded_sign) is True + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_create_file_by_raw(self, mock_config, mock_uuid, mock_storage, mock_db): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_config.STORAGE_TYPE = "local" + + user_id = "user_123" + tenant_id = "tenant_456" + file_binary = b"fake binary data" + mimetype = "image/png" + + # Execute + upload_file = DatasourceFileManager.create_file_by_raw( + user_id=user_id, + tenant_id=tenant_id, + conversation_id=None, + file_binary=file_binary, + mimetype=mimetype, + filename="test.png", + ) + + # Verify + assert upload_file.tenant_id == tenant_id + assert upload_file.name == "test.png" + assert upload_file.size == len(file_binary) + assert upload_file.mime_type == mimetype + assert upload_file.key == f"datasources/{tenant_id}/unique_hex.png" + + mock_storage.save.assert_called_once_with(upload_file.key, file_binary) + mock_db.session.add.assert_called_once() + mock_db.session.commit.assert_called_once() + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_create_file_by_raw_filename_no_extension(self, mock_config, mock_uuid, mock_storage, mock_db): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_config.STORAGE_TYPE = "local" + + user_id = "user_123" + tenant_id = "tenant_456" + file_binary = b"fake binary data" + mimetype = "image/png" + + # Execute + upload_file = DatasourceFileManager.create_file_by_raw( + user_id=user_id, + tenant_id=tenant_id, + conversation_id=None, + file_binary=file_binary, + mimetype=mimetype, + filename="test", # No extension + ) + + # Verify + assert upload_file.name == "test.png" # Should append extension + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + @patch("core.datasource.datasource_file_manager.dify_config") + @patch("core.datasource.datasource_file_manager.guess_extension") + def test_create_file_by_raw_unknown_extension(self, mock_guess_ext, mock_config, mock_uuid, mock_storage, mock_db): + # Setup + mock_guess_ext.return_value = None # Cannot guess + mock_uuid.return_value = MagicMock(hex="unique_hex") + + # Execute + upload_file = DatasourceFileManager.create_file_by_raw( + user_id="user", + tenant_id="tenant", + conversation_id=None, + file_binary=b"data", + mimetype="application/x-unknown", + ) + + # Verify + assert upload_file.extension == ".bin" + assert upload_file.name == "unique_hex.bin" + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + @patch("core.datasource.datasource_file_manager.dify_config") + def test_create_file_by_raw_no_filename(self, mock_config, mock_uuid, mock_storage, mock_db): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_config.STORAGE_TYPE = "local" + + # Execute + upload_file = DatasourceFileManager.create_file_by_raw( + user_id="user_123", + tenant_id="tenant_456", + conversation_id=None, + file_binary=b"data", + mimetype="application/pdf", + ) + + # Verify + assert upload_file.name == "unique_hex.pdf" + assert upload_file.extension == ".pdf" + + @patch("core.datasource.datasource_file_manager.ssrf_proxy") + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + def test_create_file_by_url_mimetype_from_guess(self, mock_uuid, mock_storage, mock_db, mock_ssrf): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_response = MagicMock() + mock_response.content = b"bits" + mock_response.headers = {} # No content-type in headers + mock_ssrf.get.return_value = mock_response + + # Execute + tool_file = DatasourceFileManager.create_file_by_url( + user_id="user_123", tenant_id="tenant_456", file_url="https://example.com/photo.png" + ) + + # Verify + assert tool_file.mimetype == "image/png" # Guessed from .png in URL + + @patch("core.datasource.datasource_file_manager.ssrf_proxy") + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + def test_create_file_by_url_mimetype_default(self, mock_uuid, mock_storage, mock_db, mock_ssrf): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_response = MagicMock() + mock_response.content = b"bits" + mock_response.headers = {} + mock_ssrf.get.return_value = mock_response + + # Execute + tool_file = DatasourceFileManager.create_file_by_url( + user_id="user_123", + tenant_id="tenant_456", + file_url="https://example.com/unknown", # No extension, no headers + ) + + # Verify + assert tool_file.mimetype == "application/octet-stream" + + @patch("core.datasource.datasource_file_manager.ssrf_proxy") + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + @patch("core.datasource.datasource_file_manager.uuid4") + def test_create_file_by_url_success(self, mock_uuid, mock_storage, mock_db, mock_ssrf): + # Setup + mock_uuid.return_value = MagicMock(hex="unique_hex") + mock_response = MagicMock() + mock_response.content = b"downloaded bits" + mock_response.headers = {"Content-Type": "image/jpeg"} + mock_ssrf.get.return_value = mock_response + + # Execute + tool_file = DatasourceFileManager.create_file_by_url( + user_id="user_123", tenant_id="tenant_456", file_url="https://example.com/photo.jpg" + ) + + # Verify + assert tool_file.mimetype == "image/jpeg" + assert tool_file.size == len(b"downloaded bits") + assert tool_file.file_key == "tools/tenant_456/unique_hex.jpg" + mock_storage.save.assert_called_once() + + @patch("core.datasource.datasource_file_manager.ssrf_proxy") + def test_create_file_by_url_timeout(self, mock_ssrf): + # Setup + mock_ssrf.get.side_effect = httpx.TimeoutException("Timeout") + + # Execute & Verify + with pytest.raises(ValueError, match="timeout when downloading file"): + DatasourceFileManager.create_file_by_url( + user_id="user_123", tenant_id="tenant_456", file_url="https://example.com/large.file" + ) + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + def test_get_file_binary(self, mock_storage, mock_db): + # Setup + mock_upload_file = MagicMock(spec=UploadFile) + mock_upload_file.key = "some_key" + mock_upload_file.mime_type = "image/png" + + mock_query = mock_db.session.query.return_value + mock_where = mock_query.where.return_value + mock_where.first.return_value = mock_upload_file + + mock_storage.load_once.return_value = b"file content" + + # Execute + result = DatasourceFileManager.get_file_binary("file_id") + + # Verify + assert result == (b"file content", "image/png") + + # Case: Not found + mock_where.first.return_value = None + assert DatasourceFileManager.get_file_binary("unknown") is None + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + def test_get_file_binary_by_message_file_id(self, mock_storage, mock_db): + # Setup + mock_message_file = MagicMock(spec=MessageFile) + mock_message_file.url = "http://localhost/files/tools/tool_id.png" + + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.file_key = "tool_key" + mock_tool_file.mimetype = "image/png" + + # Mock query sequence + def mock_query(model): + m = MagicMock() + if model == MessageFile: + m.where.return_value.first.return_value = mock_message_file + elif model == ToolFile: + m.where.return_value.first.return_value = mock_tool_file + return m + + mock_db.session.query.side_effect = mock_query + mock_storage.load_once.return_value = b"tool content" + + # Execute + result = DatasourceFileManager.get_file_binary_by_message_file_id("msg_file_id") + + # Verify + assert result == (b"tool content", "image/png") + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + def test_get_file_binary_by_message_file_id_with_extension(self, mock_storage, mock_db): + # Test that it correctly parses tool_id even with extension in URL + mock_message_file = MagicMock(spec=MessageFile) + mock_message_file.url = "http://localhost/files/tools/abcdef.png" + + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.id = "abcdef" + mock_tool_file.file_key = "tk" + mock_tool_file.mimetype = "image/png" + + def mock_query(model): + m = MagicMock() + if model == MessageFile: + m.where.return_value.first.return_value = mock_message_file + else: + m.where.return_value.first.return_value = mock_tool_file + return m + + mock_db.session.query.side_effect = mock_query + mock_storage.load_once.return_value = b"bits" + + result = DatasourceFileManager.get_file_binary_by_message_file_id("m") + assert result == (b"bits", "image/png") + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + def test_get_file_binary_by_message_file_id_failures(self, mock_storage, mock_db): + # Setup common mock + mock_query_obj = MagicMock() + mock_db.session.query.return_value = mock_query_obj + mock_query_obj.where.return_value.first.return_value = None + + # Case 1: Message file not found + assert DatasourceFileManager.get_file_binary_by_message_file_id("none") is None + + # Case 2: Message file found but tool file not found + mock_message_file = MagicMock(spec=MessageFile) + mock_message_file.url = None + + def mock_query_v2(model): + m = MagicMock() + if model == MessageFile: + m.where.return_value.first.return_value = mock_message_file + else: + m.where.return_value.first.return_value = None + return m + + mock_db.session.query.side_effect = mock_query_v2 + assert DatasourceFileManager.get_file_binary_by_message_file_id("msg_id") is None + + @patch("core.datasource.datasource_file_manager.db") + @patch("core.datasource.datasource_file_manager.storage") + def test_get_file_generator_by_upload_file_id(self, mock_storage, mock_db): + # Setup + mock_upload_file = MagicMock(spec=UploadFile) + mock_upload_file.key = "upload_key" + mock_upload_file.mime_type = "text/plain" + + mock_db.session.query.return_value.where.return_value.first.return_value = mock_upload_file + + mock_storage.load_stream.return_value = iter([b"chunk1", b"chunk2"]) + + # Execute + stream, mimetype = DatasourceFileManager.get_file_generator_by_upload_file_id("upload_id") + + # Verify + assert mimetype == "text/plain" + assert list(stream) == [b"chunk1", b"chunk2"] + + # Case: Not found + mock_db.session.query.return_value.where.return_value.first.return_value = None + stream, mimetype = DatasourceFileManager.get_file_generator_by_upload_file_id("none") + assert stream is None + assert mimetype is None diff --git a/api/tests/unit_tests/core/datasource/test_datasource_manager.py b/api/tests/unit_tests/core/datasource/test_datasource_manager.py index 52c91fb8c9..d5eeae912c 100644 --- a/api/tests/unit_tests/core/datasource/test_datasource_manager.py +++ b/api/tests/unit_tests/core/datasource/test_datasource_manager.py @@ -1,9 +1,15 @@ import types from collections.abc import Generator +import pytest + +from contexts.wrapper import RecyclableContextVar from core.datasource.datasource_manager import DatasourceManager -from core.datasource.entities.datasource_entities import DatasourceMessage +from core.datasource.entities.datasource_entities import DatasourceMessage, DatasourceProviderType +from core.datasource.errors import DatasourceProviderNotFoundError from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.file import File +from dify_graph.file.enums import FileTransferMethod, FileType from dify_graph.node_events import StreamChunkEvent, StreamCompletedEvent @@ -15,6 +21,22 @@ def _gen_messages_text_only(text: str) -> Generator[DatasourceMessage, None, Non ) +def _drain_generator(gen: Generator[DatasourceMessage, None, object]) -> tuple[list[DatasourceMessage], object | None]: + messages: list[DatasourceMessage] = [] + try: + while True: + messages.append(next(gen)) + except StopIteration as e: + return messages, e.value + + +def _invalidate_recyclable_contextvars() -> None: + """ + Ensure RecyclableContextVar.get() raises LookupError until reset by code under test. + """ + RecyclableContextVar.increment_thread_recycles() + + def test_get_icon_url_calls_runtime(mocker): fake_runtime = mocker.Mock() fake_runtime.get_icon_url.return_value = "https://icon" @@ -30,6 +52,119 @@ def test_get_icon_url_calls_runtime(mocker): DatasourceManager.get_datasource_runtime.assert_called_once() +def test_get_datasource_runtime_delegates_to_provider_controller(mocker): + provider_controller = mocker.Mock() + provider_controller.get_datasource.return_value = object() + mocker.patch.object(DatasourceManager, "get_datasource_plugin_provider", return_value=provider_controller) + + runtime = DatasourceManager.get_datasource_runtime( + provider_id="prov/x", + datasource_name="ds", + tenant_id="t1", + datasource_type=DatasourceProviderType.ONLINE_DOCUMENT, + ) + assert runtime is provider_controller.get_datasource.return_value + provider_controller.get_datasource.assert_called_once_with("ds") + + +@pytest.mark.parametrize( + ("datasource_type", "controller_path"), + [ + ( + DatasourceProviderType.ONLINE_DOCUMENT, + "core.datasource.datasource_manager.OnlineDocumentDatasourcePluginProviderController", + ), + ( + DatasourceProviderType.ONLINE_DRIVE, + "core.datasource.datasource_manager.OnlineDriveDatasourcePluginProviderController", + ), + ( + DatasourceProviderType.WEBSITE_CRAWL, + "core.datasource.datasource_manager.WebsiteCrawlDatasourcePluginProviderController", + ), + ( + DatasourceProviderType.LOCAL_FILE, + "core.datasource.datasource_manager.LocalFileDatasourcePluginProviderController", + ), + ], +) +def test_get_datasource_plugin_provider_creates_controller_and_caches(mocker, datasource_type, controller_path): + _invalidate_recyclable_contextvars() + + provider_entity = types.SimpleNamespace(declaration=object(), plugin_id="plugin", plugin_unique_identifier="uniq") + fetch = mocker.patch( + "core.datasource.datasource_manager.PluginDatasourceManager.fetch_datasource_provider", + return_value=provider_entity, + ) + ctrl_cls = mocker.patch(controller_path) + + first = DatasourceManager.get_datasource_plugin_provider( + provider_id=f"prov/{datasource_type.value}", + tenant_id="t1", + datasource_type=datasource_type, + ) + second = DatasourceManager.get_datasource_plugin_provider( + provider_id=f"prov/{datasource_type.value}", + tenant_id="t1", + datasource_type=datasource_type, + ) + + assert first is second + assert fetch.call_count == 1 + assert ctrl_cls.call_count == 1 + + +def test_get_datasource_plugin_provider_raises_when_provider_entity_missing(mocker): + _invalidate_recyclable_contextvars() + mocker.patch( + "core.datasource.datasource_manager.PluginDatasourceManager.fetch_datasource_provider", + return_value=None, + ) + + with pytest.raises(DatasourceProviderNotFoundError, match="plugin provider prov/notfound not found"): + DatasourceManager.get_datasource_plugin_provider( + provider_id="prov/notfound", + tenant_id="t1", + datasource_type=DatasourceProviderType.ONLINE_DOCUMENT, + ) + + +def test_get_datasource_plugin_provider_raises_for_unsupported_type(mocker): + _invalidate_recyclable_contextvars() + provider_entity = types.SimpleNamespace(declaration=object(), plugin_id="plugin", plugin_unique_identifier="uniq") + mocker.patch( + "core.datasource.datasource_manager.PluginDatasourceManager.fetch_datasource_provider", + return_value=provider_entity, + ) + + with pytest.raises(ValueError, match="Unsupported datasource type"): + DatasourceManager.get_datasource_plugin_provider( + provider_id="prov/x", + tenant_id="t1", + datasource_type=types.SimpleNamespace(), # not a DatasourceProviderType at runtime + ) + + +def test_get_datasource_plugin_provider_raises_when_controller_none(mocker): + _invalidate_recyclable_contextvars() + provider_entity = types.SimpleNamespace(declaration=object(), plugin_id="plugin", plugin_unique_identifier="uniq") + mocker.patch( + "core.datasource.datasource_manager.PluginDatasourceManager.fetch_datasource_provider", + return_value=provider_entity, + ) + mocker.patch( + "core.datasource.datasource_manager.OnlineDocumentDatasourcePluginProviderController", + return_value=None, + ) + + with pytest.raises(DatasourceProviderNotFoundError, match="Datasource provider prov/x not found"): + DatasourceManager.get_datasource_plugin_provider( + provider_id="prov/x", + tenant_id="t1", + datasource_type=DatasourceProviderType.ONLINE_DOCUMENT, + ) + + def test_stream_online_results_yields_messages_online_document(mocker): # stub runtime to yield a text message def _doc_messages(**_): @@ -60,6 +195,148 @@ def test_stream_online_results_yields_messages_online_document(mocker): assert msgs[0].message.text == "hello" +def test_stream_online_results_sets_credentials_and_returns_empty_dict_online_document(mocker): + class _Runtime: + def __init__(self) -> None: + self.runtime = types.SimpleNamespace(credentials=None) + + def get_online_document_page_content(self, **_kwargs): + yield from _gen_messages_text_only("hello") + + runtime = _Runtime() + mocker.patch.object(DatasourceManager, "get_datasource_runtime", return_value=runtime) + mocker.patch( + "core.datasource.datasource_manager.DatasourceProviderService.get_datasource_credentials", + return_value={"token": "t"}, + ) + + gen = DatasourceManager.stream_online_results( + user_id="u1", + datasource_name="ds", + datasource_type="online_document", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="cred", + datasource_param=types.SimpleNamespace(workspace_id="w", page_id="pg", type="t"), + online_drive_request=None, + ) + messages, final_value = _drain_generator(gen) + + assert runtime.runtime.credentials == {"token": "t"} + assert [m.message.text for m in messages] == ["hello"] + assert final_value == {} + + +def test_stream_online_results_raises_when_missing_params(mocker): + class _Runtime: + def __init__(self) -> None: + self.runtime = types.SimpleNamespace(credentials=None) + + def get_online_document_page_content(self, **_kwargs): + yield from _gen_messages_text_only("never") + + def online_drive_download_file(self, **_kwargs): + yield from _gen_messages_text_only("never") + + mocker.patch.object(DatasourceManager, "get_datasource_runtime", return_value=_Runtime()) + mocker.patch( + "core.datasource.datasource_manager.DatasourceProviderService.get_datasource_credentials", + return_value={}, + ) + + with pytest.raises(ValueError, match="datasource_param is required for ONLINE_DOCUMENT streaming"): + list( + DatasourceManager.stream_online_results( + user_id="u1", + datasource_name="ds", + datasource_type="online_document", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + datasource_param=None, + online_drive_request=None, + ) + ) + + with pytest.raises(ValueError, match="online_drive_request is required for ONLINE_DRIVE streaming"): + list( + DatasourceManager.stream_online_results( + user_id="u1", + datasource_name="ds", + datasource_type="online_drive", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + datasource_param=None, + online_drive_request=None, + ) + ) + + +def test_stream_online_results_yields_messages_and_returns_empty_dict_online_drive(mocker): + class _Runtime: + def __init__(self) -> None: + self.runtime = types.SimpleNamespace(credentials=None) + + def online_drive_download_file(self, **_kwargs): + yield from _gen_messages_text_only("drive") + + runtime = _Runtime() + mocker.patch.object(DatasourceManager, "get_datasource_runtime", return_value=runtime) + mocker.patch( + "core.datasource.datasource_manager.DatasourceProviderService.get_datasource_credentials", + return_value={"token": "t"}, + ) + + gen = DatasourceManager.stream_online_results( + user_id="u1", + datasource_name="ds", + datasource_type="online_drive", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="cred", + datasource_param=None, + online_drive_request=types.SimpleNamespace(id="fid", bucket="b"), + ) + messages, final_value = _drain_generator(gen) + + assert runtime.runtime.credentials == {"token": "t"} + assert [m.message.text for m in messages] == ["drive"] + assert final_value == {} + + +def test_stream_online_results_raises_for_unsupported_stream_type(mocker): + mocker.patch.object(DatasourceManager, "get_datasource_runtime", return_value=mocker.Mock()) + mocker.patch( + "core.datasource.datasource_manager.DatasourceProviderService.get_datasource_credentials", + return_value={}, + ) + + with pytest.raises(ValueError, match="Unsupported datasource type for streaming"): + list( + DatasourceManager.stream_online_results( + user_id="u1", + datasource_name="ds", + datasource_type="website_crawl", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + datasource_param=None, + online_drive_request=None, + ) + ) + + def test_stream_node_events_emits_events_online_document(mocker): # make manager's low-level stream produce TEXT only mocker.patch.object( @@ -93,6 +370,260 @@ def test_stream_node_events_emits_events_online_document(mocker): assert events[-1].node_run_result.status == WorkflowNodeExecutionStatus.SUCCEEDED +def test_stream_node_events_builds_file_and_variables_from_messages(mocker): + mocker.patch.object(DatasourceManager, "stream_online_results", return_value=_gen_messages_text_only("ignored")) + + def _transformed(**_kwargs): + yield DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE_LINK, + message=DatasourceMessage.TextMessage(text="/files/datasources/tool_file_1.png"), + meta={}, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.TEXT, + message=DatasourceMessage.TextMessage(text="hello"), + meta=None, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.LINK, + message=DatasourceMessage.TextMessage(text="http://example.com"), + meta=None, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.VARIABLE, + message=DatasourceMessage.VariableMessage(variable_name="v", variable_value="a", stream=True), + meta=None, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.VARIABLE, + message=DatasourceMessage.VariableMessage(variable_name="v", variable_value="b", stream=True), + meta=None, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.VARIABLE, + message=DatasourceMessage.VariableMessage(variable_name="x", variable_value=1, stream=False), + meta=None, + ) + yield DatasourceMessage( + type=DatasourceMessage.MessageType.JSON, + message=DatasourceMessage.JsonMessage(json_object={"k": "v"}), + meta=None, + ) + + mocker.patch( + "core.datasource.datasource_manager.DatasourceFileMessageTransformer.transform_datasource_invoke_messages", + side_effect=_transformed, + ) + + fake_tool_file = types.SimpleNamespace(mimetype="image/png") + + class _Session: + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + def scalar(self, _stmt): + return fake_tool_file + + mocker.patch("core.datasource.datasource_manager.session_factory.create_session", return_value=_Session()) + mocker.patch( + "core.datasource.datasource_manager.file_factory.get_file_type_by_mime_type", return_value=FileType.IMAGE + ) + built = File( + tenant_id="t1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.TOOL_FILE, + related_id="tool_file_1", + extension=".png", + mime_type="image/png", + storage_key="k", + ) + build_from_mapping = mocker.patch( + "core.datasource.datasource_manager.file_factory.build_from_mapping", + return_value=built, + ) + + variable_pool = mocker.Mock() + + events = list( + DatasourceManager.stream_node_events( + node_id="nodeA", + user_id="u1", + datasource_name="ds", + datasource_type="online_document", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + parameters_for_log={"k": "v"}, + datasource_info={"info": "x"}, + variable_pool=variable_pool, + datasource_param=types.SimpleNamespace(workspace_id="w", page_id="pg", type="t"), + online_drive_request=None, + ) + ) + + build_from_mapping.assert_called_once() + variable_pool.add.assert_not_called() + + assert any(isinstance(e, StreamChunkEvent) and e.chunk == "hello" for e in events) + assert any(isinstance(e, StreamChunkEvent) and e.chunk.startswith("Link: http") for e in events) + assert any(isinstance(e, StreamChunkEvent) and e.selector == ["nodeA", "v"] and e.chunk == "a" for e in events) + assert any(isinstance(e, StreamChunkEvent) and e.selector == ["nodeA", "v"] and e.chunk == "b" for e in events) + assert isinstance(events[-2], StreamChunkEvent) + assert events[-2].is_final is True + + assert isinstance(events[-1], StreamCompletedEvent) + assert events[-1].node_run_result.outputs["v"] == "ab" + assert events[-1].node_run_result.outputs["x"] == 1 + + +def test_stream_node_events_raises_when_toolfile_missing(mocker): + mocker.patch.object(DatasourceManager, "stream_online_results", return_value=_gen_messages_text_only("ignored")) + + def _transformed(**_kwargs): + yield DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE_LINK, + message=DatasourceMessage.TextMessage(text="/files/datasources/missing.png"), + meta={}, + ) + + mocker.patch( + "core.datasource.datasource_manager.DatasourceFileMessageTransformer.transform_datasource_invoke_messages", + side_effect=_transformed, + ) + + class _Session: + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + def scalar(self, _stmt): + return None + + mocker.patch("core.datasource.datasource_manager.session_factory.create_session", return_value=_Session()) + + with pytest.raises(ValueError, match="ToolFile not found for file_id=missing, tenant_id=t1"): + list( + DatasourceManager.stream_node_events( + node_id="nodeA", + user_id="u1", + datasource_name="ds", + datasource_type="online_document", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + parameters_for_log={}, + datasource_info={}, + variable_pool=mocker.Mock(), + datasource_param=types.SimpleNamespace(workspace_id="w", page_id="pg", type="t"), + online_drive_request=None, + ) + ) + + +def test_stream_node_events_online_drive_sets_variable_pool_file_and_outputs(mocker): + mocker.patch.object(DatasourceManager, "stream_online_results", return_value=_gen_messages_text_only("ignored")) + + file_in = File( + tenant_id="t1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.TOOL_FILE, + related_id="tf", + extension=".pdf", + mime_type="application/pdf", + storage_key="k", + ) + + def _transformed(**_kwargs): + yield DatasourceMessage( + type=DatasourceMessage.MessageType.FILE, + message=DatasourceMessage.FileMessage(file_marker="file_marker"), + meta={"file": file_in}, + ) + + mocker.patch( + "core.datasource.datasource_manager.DatasourceFileMessageTransformer.transform_datasource_invoke_messages", + side_effect=_transformed, + ) + + variable_pool = mocker.Mock() + events = list( + DatasourceManager.stream_node_events( + node_id="nodeA", + user_id="u1", + datasource_name="ds", + datasource_type="online_drive", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + parameters_for_log={}, + datasource_info={"k": "v"}, + variable_pool=variable_pool, + datasource_param=None, + online_drive_request=types.SimpleNamespace(id="id", bucket="b"), + ) + ) + + variable_pool.add.assert_called_once() + assert variable_pool.add.call_args[0][0] == ["nodeA", "file"] + assert variable_pool.add.call_args[0][1] == file_in + + completed = events[-1] + assert isinstance(completed, StreamCompletedEvent) + assert completed.node_run_result.outputs["file"] == file_in + assert completed.node_run_result.outputs["datasource_type"] == DatasourceProviderType.ONLINE_DRIVE + + +def test_stream_node_events_skips_file_build_for_non_online_types(mocker): + mocker.patch.object(DatasourceManager, "stream_online_results", return_value=_gen_messages_text_only("ignored")) + + def _transformed(**_kwargs): + yield DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE_LINK, + message=DatasourceMessage.TextMessage(text="/files/datasources/tool_file_1.png"), + meta={}, + ) + + mocker.patch( + "core.datasource.datasource_manager.DatasourceFileMessageTransformer.transform_datasource_invoke_messages", + side_effect=_transformed, + ) + build_from_mapping = mocker.patch("core.datasource.datasource_manager.file_factory.build_from_mapping") + + events = list( + DatasourceManager.stream_node_events( + node_id="nodeA", + user_id="u1", + datasource_name="ds", + datasource_type="website_crawl", + provider_id="p/x", + tenant_id="t1", + provider="prov", + plugin_id="plug", + credential_id="", + parameters_for_log={}, + datasource_info={}, + variable_pool=mocker.Mock(), + datasource_param=None, + online_drive_request=None, + ) + ) + + build_from_mapping.assert_not_called() + assert isinstance(events[-1], StreamCompletedEvent) + assert events[-1].node_run_result.outputs["file"] is None + + def test_get_upload_file_by_id_builds_file(mocker): # fake UploadFile row fake_row = types.SimpleNamespace( @@ -133,3 +664,27 @@ def test_get_upload_file_by_id_builds_file(mocker): f = DatasourceManager.get_upload_file_by_id(file_id="fid", tenant_id="t1") assert f.related_id == "fid" assert f.extension == ".txt" + + +def test_get_upload_file_by_id_raises_when_missing(mocker): + class _Q: + def where(self, *_args, **_kwargs): + return self + + def first(self): + return None + + class _S: + def __enter__(self): + return self + + def __exit__(self, *exc): + return False + + def query(self, *_): + return _Q() + + mocker.patch("core.datasource.datasource_manager.session_factory.create_session", return_value=_S()) + + with pytest.raises(ValueError, match="UploadFile not found for file_id=fid, tenant_id=t1"): + DatasourceManager.get_upload_file_by_id(file_id="fid", tenant_id="t1") diff --git a/api/tests/unit_tests/core/datasource/test_errors.py b/api/tests/unit_tests/core/datasource/test_errors.py new file mode 100644 index 0000000000..95986415b1 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/test_errors.py @@ -0,0 +1,64 @@ +from unittest.mock import MagicMock + +from core.datasource.entities.datasource_entities import DatasourceInvokeMeta +from core.datasource.errors import ( + DatasourceApiSchemaError, + DatasourceEngineInvokeError, + DatasourceInvokeError, + DatasourceNotFoundError, + DatasourceNotSupportedError, + DatasourceParameterValidationError, + DatasourceProviderCredentialValidationError, + DatasourceProviderNotFoundError, +) + + +class TestErrors: + def test_datasource_provider_not_found_error(self): + error = DatasourceProviderNotFoundError("Provider not found") + assert str(error) == "Provider not found" + assert isinstance(error, ValueError) + + def test_datasource_not_found_error(self): + error = DatasourceNotFoundError("Datasource not found") + assert str(error) == "Datasource not found" + assert isinstance(error, ValueError) + + def test_datasource_parameter_validation_error(self): + error = DatasourceParameterValidationError("Validation failed") + assert str(error) == "Validation failed" + assert isinstance(error, ValueError) + + def test_datasource_provider_credential_validation_error(self): + error = DatasourceProviderCredentialValidationError("Credential validation failed") + assert str(error) == "Credential validation failed" + assert isinstance(error, ValueError) + + def test_datasource_not_supported_error(self): + error = DatasourceNotSupportedError("Not supported") + assert str(error) == "Not supported" + assert isinstance(error, ValueError) + + def test_datasource_invoke_error(self): + error = DatasourceInvokeError("Invoke error") + assert str(error) == "Invoke error" + assert isinstance(error, ValueError) + + def test_datasource_api_schema_error(self): + error = DatasourceApiSchemaError("API schema error") + assert str(error) == "API schema error" + assert isinstance(error, ValueError) + + def test_datasource_engine_invoke_error(self): + mock_meta = MagicMock(spec=DatasourceInvokeMeta) + error = DatasourceEngineInvokeError(meta=mock_meta) + assert error.meta == mock_meta + assert isinstance(error, Exception) + + def test_datasource_engine_invoke_error_init(self): + # Test initialization with meta + meta = DatasourceInvokeMeta(time_cost=1.5, error="Engine failed") + error = DatasourceEngineInvokeError(meta=meta) + assert error.meta == meta + assert error.meta.time_cost == 1.5 + assert error.meta.error == "Engine failed" diff --git a/api/tests/unit_tests/core/datasource/test_file_upload.py b/api/tests/unit_tests/core/datasource/test_file_upload.py index ad86190e00..63b86e64fc 100644 --- a/api/tests/unit_tests/core/datasource/test_file_upload.py +++ b/api/tests/unit_tests/core/datasource/test_file_upload.py @@ -35,7 +35,7 @@ TEST COVERAGE OVERVIEW: - Tests hash consistency and determinism 6. Invalid Filename Handling (TestInvalidFilenameHandling) - - Validates rejection of filenames with invalid characters (/, \\, :, *, ?, ", <, >, |) + - Validates rejection of filenames with path separators (/, \\) - Tests filename length truncation (max 200 characters) - Prevents path traversal attacks - Handles edge cases like empty filenames @@ -535,30 +535,23 @@ class TestInvalidFilenameHandling: @pytest.mark.parametrize( "invalid_char", - ["/", "\\", ":", "*", "?", '"', "<", ">", "|"], + ["/", "\\"], ) def test_filename_contains_invalid_characters(self, invalid_char): """Test detection of invalid characters in filename. - Security-critical test that validates rejection of dangerous filename characters. + Security-critical test that validates rejection of path separators. These characters are blocked because they: - / and \\ : Directory separators, could enable path traversal - - : : Drive letter separator on Windows, reserved character - - * and ? : Wildcards, could cause issues in file operations - - " : Quote character, could break command-line operations - - < and > : Redirection operators, command injection risk - - | : Pipe operator, command injection risk Blocking these characters prevents: - Path traversal attacks (../../etc/passwd) - - Command injection - - File system corruption - - Cross-platform compatibility issues + - ZIP entry traversal issues + - Ambiguous path handling """ # Arrange - Create filename with invalid character filename = f"test{invalid_char}file.txt" - # Define complete list of invalid characters - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act - Check if filename contains any invalid character has_invalid_char = any(c in filename for c in invalid_chars) @@ -570,7 +563,7 @@ class TestInvalidFilenameHandling: """Test that valid filenames pass validation.""" # Arrange filename = "valid_file-name_123.txt" - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act has_invalid_char = any(c in filename for c in invalid_chars) @@ -578,6 +571,16 @@ class TestInvalidFilenameHandling: # Assert assert has_invalid_char is False + @pytest.mark.parametrize("safe_char", [":", "*", "?", '"', "<", ">", "|"]) + def test_filename_allows_safe_metadata_characters(self, safe_char): + """Test that non-separator punctuation remains allowed in filenames.""" + filename = f"candidate{safe_char}resume.txt" + invalid_chars = ["/", "\\"] + + has_invalid_char = any(c in filename for c in invalid_chars) + + assert has_invalid_char is False + def test_extremely_long_filename_truncation(self): """Test handling of extremely long filenames.""" # Arrange @@ -904,7 +907,7 @@ class TestFilenameValidation: """Test that filenames with spaces are handled correctly.""" # Arrange filename = "my document with spaces.pdf" - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act - Check for invalid characters has_invalid = any(c in filename for c in invalid_chars) @@ -921,7 +924,7 @@ class TestFilenameValidation: "مستند.txt", # Arabic "ファイル.jpg", # Japanese ] - invalid_chars = ["/", "\\", ":", "*", "?", '"', "<", ">", "|"] + invalid_chars = ["/", "\\"] # Act & Assert - Unicode should be allowed for filename in unicode_filenames: diff --git a/api/tests/unit_tests/core/datasource/utils/test_message_transformer.py b/api/tests/unit_tests/core/datasource/utils/test_message_transformer.py new file mode 100644 index 0000000000..43f582feb7 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/utils/test_message_transformer.py @@ -0,0 +1,337 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.datasource.entities.datasource_entities import DatasourceMessage +from core.datasource.utils.message_transformer import DatasourceFileMessageTransformer +from dify_graph.file import File +from dify_graph.file.enums import FileTransferMethod, FileType +from models.tools import ToolFile + + +class TestDatasourceFileMessageTransformer: + def test_transform_text_and_link_messages(self): + # Setup + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.TEXT, message=DatasourceMessage.TextMessage(text="hello") + ), + DatasourceMessage( + type=DatasourceMessage.MessageType.LINK, + message=DatasourceMessage.TextMessage(text="https://example.com"), + ), + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 2 + assert result[0].type == DatasourceMessage.MessageType.TEXT + assert result[0].message.text == "hello" + assert result[1].type == DatasourceMessage.MessageType.LINK + assert result[1].message.text == "https://example.com" + + @patch("core.datasource.utils.message_transformer.ToolFileManager") + @patch("core.datasource.utils.message_transformer.guess_extension") + def test_transform_image_message_success(self, mock_guess_ext, mock_tool_file_manager_cls): + # Setup + mock_manager = mock_tool_file_manager_cls.return_value + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.id = "file_id_123" + mock_tool_file.mimetype = "image/png" + mock_manager.create_file_by_url.return_value = mock_tool_file + mock_guess_ext.return_value = ".png" + + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE, + message=DatasourceMessage.TextMessage(text="https://example.com/image.png"), + meta={"some": "meta"}, + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1", conversation_id="conv1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.IMAGE_LINK + assert result[0].message.text == "/files/datasources/file_id_123.png" + assert result[0].meta == {"some": "meta"} + mock_manager.create_file_by_url.assert_called_once_with( + user_id="user1", tenant_id="tenant1", file_url="https://example.com/image.png", conversation_id="conv1" + ) + + @patch("core.datasource.utils.message_transformer.ToolFileManager") + def test_transform_image_message_failure(self, mock_tool_file_manager_cls): + # Setup + mock_manager = mock_tool_file_manager_cls.return_value + mock_manager.create_file_by_url.side_effect = Exception("Download failed") + + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE, + message=DatasourceMessage.TextMessage(text="https://example.com/image.png"), + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.TEXT + assert "Failed to download image" in result[0].message.text + assert "Download failed" in result[0].message.text + + @patch("core.datasource.utils.message_transformer.ToolFileManager") + @patch("core.datasource.utils.message_transformer.guess_extension") + def test_transform_blob_message_image(self, mock_guess_ext, mock_tool_file_manager_cls): + # Setup + mock_manager = mock_tool_file_manager_cls.return_value + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.id = "blob_id_456" + mock_tool_file.mimetype = "image/jpeg" + mock_manager.create_file_by_raw.return_value = mock_tool_file + mock_guess_ext.return_value = ".jpg" + + blob_data = b"fake-image-bits" + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.BLOB, + message=DatasourceMessage.BlobMessage(blob=blob_data), + meta={"mime_type": "image/jpeg", "file_name": "test.jpg"}, + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.IMAGE_LINK + assert result[0].message.text == "/files/datasources/blob_id_456.jpg" + mock_manager.create_file_by_raw.assert_called_once() + + @patch("core.datasource.utils.message_transformer.ToolFileManager") + @patch("core.datasource.utils.message_transformer.guess_extension") + @patch("core.datasource.utils.message_transformer.guess_type") + def test_transform_blob_message_binary_guess_mimetype( + self, mock_guess_type, mock_guess_ext, mock_tool_file_manager_cls + ): + # Setup + mock_manager = mock_tool_file_manager_cls.return_value + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.id = "blob_id_789" + mock_tool_file.mimetype = "application/pdf" + mock_manager.create_file_by_raw.return_value = mock_tool_file + mock_guess_type.return_value = ("application/pdf", None) + mock_guess_ext.return_value = ".pdf" + + blob_data = b"fake-pdf-bits" + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.BLOB, + message=DatasourceMessage.BlobMessage(blob=blob_data), + meta={"file_name": "test.pdf"}, + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.BINARY_LINK + assert result[0].message.text == "/files/datasources/blob_id_789.pdf" + + def test_transform_blob_message_invalid_type(self): + # Setup + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.BLOB, message=DatasourceMessage.TextMessage(text="not a blob") + ) + ] + + # Execute & Verify + with pytest.raises(ValueError, match="unexpected message type"): + list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + def test_transform_file_tool_file_image(self): + # Setup + mock_file = MagicMock(spec=File) + mock_file.transfer_method = FileTransferMethod.TOOL_FILE + mock_file.related_id = "related_123" + mock_file.extension = ".png" + mock_file.type = FileType.IMAGE + + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.FILE, + message=DatasourceMessage.TextMessage(text="ignored"), + meta={"file": mock_file}, + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.IMAGE_LINK + assert result[0].message.text == "/files/datasources/related_123.png" + + def test_transform_file_tool_file_binary(self): + # Setup + mock_file = MagicMock(spec=File) + mock_file.transfer_method = FileTransferMethod.TOOL_FILE + mock_file.related_id = "related_456" + mock_file.extension = ".txt" + mock_file.type = FileType.DOCUMENT + + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.FILE, + message=DatasourceMessage.TextMessage(text="ignored"), + meta={"file": mock_file}, + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.LINK + assert result[0].message.text == "/files/datasources/related_456.txt" + + def test_transform_file_other_transfer_method(self): + # Setup + mock_file = MagicMock(spec=File) + mock_file.transfer_method = FileTransferMethod.REMOTE_URL + + msg = DatasourceMessage( + type=DatasourceMessage.MessageType.FILE, + message=DatasourceMessage.TextMessage(text="remote image"), + meta={"file": mock_file}, + ) + messages = [msg] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0] == msg + + def test_transform_other_message_type(self): + # JSON type is yielded by the default 'else' block or the 'yield message' at the end + msg = DatasourceMessage( + type=DatasourceMessage.MessageType.JSON, message=DatasourceMessage.JsonMessage(json_object={"k": "v"}) + ) + messages = [msg] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify + assert len(result) == 1 + assert result[0] == msg + + def test_get_datasource_file_url(self): + # Test with extension + url = DatasourceFileMessageTransformer.get_datasource_file_url("file1", ".jpg") + assert url == "/files/datasources/file1.jpg" + + # Test without extension + url = DatasourceFileMessageTransformer.get_datasource_file_url("file2", None) + assert url == "/files/datasources/file2.bin" + + def test_transform_blob_message_no_meta_filename(self): + # This tests line 70 where filename might be None + with patch("core.datasource.utils.message_transformer.ToolFileManager") as mock_tool_file_manager_cls: + mock_manager = mock_tool_file_manager_cls.return_value + mock_tool_file = MagicMock(spec=ToolFile) + mock_tool_file.id = "blob_id_no_name" + mock_tool_file.mimetype = "application/octet-stream" + mock_manager.create_file_by_raw.return_value = mock_tool_file + + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.BLOB, + message=DatasourceMessage.BlobMessage(blob=b"data"), + meta={}, # No mime_type, no file_name + ) + ] + + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.BINARY_LINK + assert result[0].message.text == "/files/datasources/blob_id_no_name.bin" + + @patch("core.datasource.utils.message_transformer.ToolFileManager") + def test_transform_image_message_not_text_message(self, mock_tool_file_manager_cls): + # This tests line 24-26 where it checks if message is instance of TextMessage + messages = [ + DatasourceMessage( + type=DatasourceMessage.MessageType.IMAGE, message=DatasourceMessage.BlobMessage(blob=b"not-text") + ) + ] + + # Execute + result = list( + DatasourceFileMessageTransformer.transform_datasource_invoke_messages( + messages=iter(messages), user_id="user1", tenant_id="tenant1" + ) + ) + + # Verify - should yield unchanged if it's not a TextMessage + assert len(result) == 1 + assert result[0].type == DatasourceMessage.MessageType.IMAGE + assert isinstance(result[0].message, DatasourceMessage.BlobMessage) diff --git a/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_plugin.py b/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_plugin.py new file mode 100644 index 0000000000..2945eb5523 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_plugin.py @@ -0,0 +1,101 @@ +from collections.abc import Generator +from unittest.mock import MagicMock, patch + +import pytest + +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import ( + DatasourceEntity, + DatasourceProviderType, + WebsiteCrawlMessage, +) +from core.datasource.website_crawl.website_crawl_plugin import WebsiteCrawlDatasourcePlugin + + +class TestWebsiteCrawlDatasourcePlugin: + @pytest.fixture + def mock_entity(self): + entity = MagicMock(spec=DatasourceEntity) + entity.identity = MagicMock() + entity.identity.provider = "test-provider" + entity.identity.name = "test-name" + return entity + + @pytest.fixture + def mock_runtime(self): + runtime = MagicMock(spec=DatasourceRuntime) + runtime.credentials = {"api_key": "test-key"} + return runtime + + def test_init(self, mock_entity, mock_runtime): + # Arrange + tenant_id = "test-tenant-id" + icon = "test-icon" + plugin_unique_identifier = "test-plugin-id" + + # Act + plugin = WebsiteCrawlDatasourcePlugin( + entity=mock_entity, + runtime=mock_runtime, + tenant_id=tenant_id, + icon=icon, + plugin_unique_identifier=plugin_unique_identifier, + ) + + # Assert + assert plugin.tenant_id == tenant_id + assert plugin.plugin_unique_identifier == plugin_unique_identifier + assert plugin.entity == mock_entity + assert plugin.runtime == mock_runtime + assert plugin.icon == icon + + def test_datasource_provider_type(self, mock_entity, mock_runtime): + # Arrange + plugin = WebsiteCrawlDatasourcePlugin( + entity=mock_entity, runtime=mock_runtime, tenant_id="test", icon="test", plugin_unique_identifier="test" + ) + + # Act & Assert + assert plugin.datasource_provider_type() == DatasourceProviderType.WEBSITE_CRAWL + + def test_get_website_crawl(self, mock_entity, mock_runtime): + # Arrange + plugin = WebsiteCrawlDatasourcePlugin( + entity=mock_entity, + runtime=mock_runtime, + tenant_id="test-tenant-id", + icon="test-icon", + plugin_unique_identifier="test-plugin-id", + ) + + user_id = "test-user-id" + datasource_parameters = {"url": "https://example.com"} + provider_type = "firecrawl" + + mock_message = MagicMock(spec=WebsiteCrawlMessage) + + # Mock PluginDatasourceManager + with patch("core.datasource.website_crawl.website_crawl_plugin.PluginDatasourceManager") as mock_manager_class: + mock_manager = mock_manager_class.return_value + mock_manager.get_website_crawl.return_value = (msg for msg in [mock_message]) + + # Act + result = plugin.get_website_crawl( + user_id=user_id, datasource_parameters=datasource_parameters, provider_type=provider_type + ) + + # Assert + assert isinstance(result, Generator) + messages = list(result) + assert len(messages) == 1 + assert messages[0] == mock_message + + mock_manager.get_website_crawl.assert_called_once_with( + tenant_id="test-tenant-id", + user_id=user_id, + datasource_provider="test-provider", + datasource_name="test-name", + credentials={"api_key": "test-key"}, + datasource_parameters=datasource_parameters, + provider_type=provider_type, + ) diff --git a/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_provider.py b/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_provider.py new file mode 100644 index 0000000000..b7822ba800 --- /dev/null +++ b/api/tests/unit_tests/core/datasource/website_crawl/test_website_crawl_provider.py @@ -0,0 +1,95 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.datasource.__base.datasource_runtime import DatasourceRuntime +from core.datasource.entities.datasource_entities import ( + DatasourceProviderEntityWithPlugin, + DatasourceProviderType, +) +from core.datasource.website_crawl.website_crawl_provider import WebsiteCrawlDatasourcePluginProviderController + + +class TestWebsiteCrawlDatasourcePluginProviderController: + @pytest.fixture + def mock_entity(self): + entity = MagicMock(spec=DatasourceProviderEntityWithPlugin) + entity.datasources = [] + entity.identity = MagicMock() + entity.identity.icon = "test-icon" + return entity + + def test_init(self, mock_entity): + # Arrange + plugin_id = "test-plugin-id" + plugin_unique_identifier = "test-unique-id" + tenant_id = "test-tenant-id" + + # Act + controller = WebsiteCrawlDatasourcePluginProviderController( + entity=mock_entity, + plugin_id=plugin_id, + plugin_unique_identifier=plugin_unique_identifier, + tenant_id=tenant_id, + ) + + # Assert + assert controller.entity == mock_entity + assert controller.plugin_id == plugin_id + assert controller.plugin_unique_identifier == plugin_unique_identifier + assert controller.tenant_id == tenant_id + + def test_provider_type(self, mock_entity): + # Arrange + controller = WebsiteCrawlDatasourcePluginProviderController( + entity=mock_entity, plugin_id="test", plugin_unique_identifier="test", tenant_id="test" + ) + + # Act & Assert + assert controller.provider_type == DatasourceProviderType.WEBSITE_CRAWL + + def test_get_datasource_success(self, mock_entity): + # Arrange + datasource_name = "test-datasource" + tenant_id = "test-tenant-id" + plugin_unique_identifier = "test-unique-id" + + mock_datasource_entity = MagicMock() + mock_datasource_entity.identity = MagicMock() + mock_datasource_entity.identity.name = datasource_name + mock_entity.datasources = [mock_datasource_entity] + + controller = WebsiteCrawlDatasourcePluginProviderController( + entity=mock_entity, plugin_id="test", plugin_unique_identifier=plugin_unique_identifier, tenant_id=tenant_id + ) + + # Act + with patch( + "core.datasource.website_crawl.website_crawl_provider.WebsiteCrawlDatasourcePlugin" + ) as mock_plugin_class: + mock_plugin_instance = mock_plugin_class.return_value + result = controller.get_datasource(datasource_name) + + # Assert + assert result == mock_plugin_instance + mock_plugin_class.assert_called_once() + args, kwargs = mock_plugin_class.call_args + assert kwargs["entity"] == mock_datasource_entity + assert isinstance(kwargs["runtime"], DatasourceRuntime) + assert kwargs["runtime"].tenant_id == tenant_id + assert kwargs["tenant_id"] == tenant_id + assert kwargs["icon"] == "test-icon" + assert kwargs["plugin_unique_identifier"] == plugin_unique_identifier + + def test_get_datasource_not_found(self, mock_entity): + # Arrange + datasource_name = "non-existent" + mock_entity.datasources = [] + + controller = WebsiteCrawlDatasourcePluginProviderController( + entity=mock_entity, plugin_id="test", plugin_unique_identifier="test", tenant_id="test" + ) + + # Act & Assert + with pytest.raises(ValueError, match=f"Datasource with name {datasource_name} not found"): + controller.get_datasource(datasource_name) diff --git a/api/tests/unit_tests/core/entities/test_entities_agent_entities.py b/api/tests/unit_tests/core/entities/test_entities_agent_entities.py new file mode 100644 index 0000000000..2437602695 --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_agent_entities.py @@ -0,0 +1,9 @@ +from core.entities.agent_entities import PlanningStrategy + + +def test_planning_strategy_values_are_stable() -> None: + # Arrange / Act / Assert + assert PlanningStrategy.ROUTER.value == "router" + assert PlanningStrategy.REACT_ROUTER.value == "react_router" + assert PlanningStrategy.REACT.value == "react" + assert PlanningStrategy.FUNCTION_CALL.value == "function_call" diff --git a/api/tests/unit_tests/core/entities/test_entities_document_task.py b/api/tests/unit_tests/core/entities/test_entities_document_task.py new file mode 100644 index 0000000000..dd550930d7 --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_document_task.py @@ -0,0 +1,18 @@ +from core.entities.document_task import DocumentTask + + +def test_document_task_keeps_indexing_identifiers() -> None: + # Arrange + document_ids = ("doc-1", "doc-2") + + # Act + task = DocumentTask( + tenant_id="tenant-1", + dataset_id="dataset-1", + document_ids=document_ids, + ) + + # Assert + assert task.tenant_id == "tenant-1" + assert task.dataset_id == "dataset-1" + assert task.document_ids == document_ids diff --git a/api/tests/unit_tests/core/entities/test_entities_embedding_type.py b/api/tests/unit_tests/core/entities/test_entities_embedding_type.py new file mode 100644 index 0000000000..5a82fc4842 --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_embedding_type.py @@ -0,0 +1,7 @@ +from core.entities.embedding_type import EmbeddingInputType + + +def test_embedding_input_type_values_are_stable() -> None: + # Arrange / Act / Assert + assert EmbeddingInputType.DOCUMENT.value == "document" + assert EmbeddingInputType.QUERY.value == "query" diff --git a/api/tests/unit_tests/core/entities/test_entities_execution_extra_content.py b/api/tests/unit_tests/core/entities/test_entities_execution_extra_content.py new file mode 100644 index 0000000000..2e4f6d34fb --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_execution_extra_content.py @@ -0,0 +1,45 @@ +from core.entities.execution_extra_content import ( + ExecutionExtraContentDomainModel, + HumanInputContent, + HumanInputFormDefinition, + HumanInputFormSubmissionData, +) +from dify_graph.nodes.human_input.entities import FormInput, UserAction +from dify_graph.nodes.human_input.enums import FormInputType +from models.execution_extra_content import ExecutionContentType + + +def test_human_input_content_defaults_and_domain_alias() -> None: + # Arrange + form_definition = HumanInputFormDefinition( + form_id="form-1", + node_id="node-1", + node_title="Human Input", + form_content="Please confirm", + inputs=[FormInput(type=FormInputType.TEXT_INPUT, output_variable_name="answer")], + actions=[UserAction(id="confirm", title="Confirm")], + resolved_default_values={"answer": "yes"}, + expiration_time=1_700_000_000, + ) + submission_data = HumanInputFormSubmissionData( + node_id="node-1", + node_title="Human Input", + rendered_content="Please confirm", + action_id="confirm", + action_text="Confirm", + ) + + # Act + content = HumanInputContent( + workflow_run_id="workflow-run-1", + submitted=True, + form_definition=form_definition, + form_submission_data=submission_data, + ) + + # Assert + assert form_definition.model_config.get("frozen") is True + assert content.type == ExecutionContentType.HUMAN_INPUT + assert content.form_definition is form_definition + assert content.form_submission_data is submission_data + assert ExecutionExtraContentDomainModel is HumanInputContent diff --git a/api/tests/unit_tests/core/entities/test_entities_knowledge_entities.py b/api/tests/unit_tests/core/entities/test_entities_knowledge_entities.py new file mode 100644 index 0000000000..d25f20145f --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_knowledge_entities.py @@ -0,0 +1,45 @@ +from core.entities.knowledge_entities import ( + PipelineDataset, + PipelineDocument, + PipelineGenerateResponse, +) + + +def test_pipeline_dataset_normalizes_none_description() -> None: + # Arrange / Act + dataset = PipelineDataset( + id="dataset-1", + name="Dataset", + description=None, + chunk_structure="parent-child", + ) + + # Assert + assert dataset.description == "" + + +def test_pipeline_generate_response_builds_nested_models() -> None: + # Arrange + dataset = PipelineDataset( + id="dataset-1", + name="Dataset", + description="Knowledge base", + chunk_structure="parent-child", + ) + document = PipelineDocument( + id="doc-1", + position=1, + data_source_type="file", + data_source_info={"name": "spec.pdf"}, + name="spec.pdf", + indexing_status="completed", + enabled=True, + ) + + # Act + response = PipelineGenerateResponse(batch="batch-1", dataset=dataset, documents=[document]) + + # Assert + assert response.batch == "batch-1" + assert response.dataset.id == "dataset-1" + assert response.documents[0].id == "doc-1" diff --git a/api/tests/unit_tests/core/entities/test_entities_mcp_provider.py b/api/tests/unit_tests/core/entities/test_entities_mcp_provider.py new file mode 100644 index 0000000000..5449c63b45 --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_mcp_provider.py @@ -0,0 +1,450 @@ +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.entities import mcp_provider as mcp_provider_module +from core.entities.mcp_provider import ( + DEFAULT_EXPIRES_IN, + DEFAULT_TOKEN_TYPE, + MCPProviderEntity, +) +from core.mcp.types import OAuthTokens + + +def _build_mcp_provider_entity() -> MCPProviderEntity: + now = datetime(2025, 1, 1, tzinfo=UTC) + return MCPProviderEntity( + id="provider-1", + provider_id="server-1", + name="Example MCP", + tenant_id="tenant-1", + user_id="user-1", + server_url="encrypted-server-url", + headers={}, + timeout=30, + sse_read_timeout=300, + authed=False, + credentials={}, + tools=[], + icon={"en_US": "icon.png"}, + created_at=now, + updated_at=now, + ) + + +def test_from_db_model_maps_fields() -> None: + # Arrange + now = datetime(2025, 1, 1, tzinfo=UTC) + db_provider = SimpleNamespace( + id="provider-1", + server_identifier="server-1", + name="Example MCP", + tenant_id="tenant-1", + user_id="user-1", + server_url="encrypted-server-url", + headers={"Authorization": "enc"}, + timeout=15, + sse_read_timeout=120, + authed=True, + credentials={"access_token": "enc-token"}, + tool_dict=[{"name": "search"}], + icon=None, + created_at=now, + updated_at=now, + ) + + # Act + entity = MCPProviderEntity.from_db_model(db_provider) + + # Assert + assert entity.provider_id == "server-1" + assert entity.tools == [{"name": "search"}] + assert entity.icon == "" + + +def test_redirect_url_uses_console_api_url(monkeypatch: pytest.MonkeyPatch) -> None: + # Arrange + entity = _build_mcp_provider_entity() + monkeypatch.setattr(mcp_provider_module.dify_config, "CONSOLE_API_URL", "https://console.example.com") + + # Act + redirect_url = entity.redirect_url + + # Assert + assert redirect_url == "https://console.example.com/console/api/mcp/oauth/callback" + + +def test_client_metadata_for_authorization_code_flow() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={}): + # Act + metadata = entity.client_metadata + + # Assert + assert metadata.grant_types == ["refresh_token", "authorization_code"] + assert metadata.redirect_uris == [entity.redirect_url] + assert metadata.response_types == ["code"] + + +def test_client_metadata_for_client_credentials_flow() -> None: + # Arrange + entity = _build_mcp_provider_entity() + credentials = {"client_information": {"grant_types": ["client_credentials"]}} + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value=credentials): + # Act + metadata = entity.client_metadata + + # Assert + assert metadata.grant_types == ["refresh_token", "client_credentials"] + assert metadata.redirect_uris == [] + assert metadata.response_types == [] + + +def test_client_metadata_prefers_nested_authorization_code_grant_type() -> None: + # Arrange + entity = _build_mcp_provider_entity() + credentials = { + "grant_type": "client_credentials", + "client_information": {"grant_types": ["authorization_code"]}, + } + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value=credentials): + # Act + metadata = entity.client_metadata + + # Assert + assert metadata.grant_types == ["refresh_token", "authorization_code"] + assert metadata.redirect_uris == [entity.redirect_url] + assert metadata.response_types == ["code"] + + +def test_provider_icon_returns_icon_dict_as_is() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"icon": {"en_US": "icon.png"}}) + + # Act + icon = entity.provider_icon + + # Assert + assert icon == {"en_US": "icon.png"} + + +def test_provider_icon_uses_signed_url_for_plain_path() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"icon": "icons/mcp.png"}) + + with patch( + "core.entities.mcp_provider.file_helpers.get_signed_file_url", + return_value="https://signed.example.com/icons/mcp.png", + ) as mock_get_signed_url: + # Act + icon = entity.provider_icon + + # Assert + mock_get_signed_url.assert_called_once_with("icons/mcp.png") + assert icon == "https://signed.example.com/icons/mcp.png" + + +def test_to_api_response_without_sensitive_data_skips_auth_related_work() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"icon": {"en_US": "icon.png"}}) + + with patch.object(MCPProviderEntity, "masked_server_url", return_value="https://api.example.com/******"): + # Act + response = entity.to_api_response(include_sensitive=False) + + # Assert + assert response["author"] == "Anonymous" + assert response["masked_headers"] == {} + assert response["is_dynamic_registration"] is True + assert "authentication" not in response + + +def test_to_api_response_with_sensitive_data_includes_masked_values() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy( + update={ + "credentials": {"client_information": {"is_dynamic_registration": False}}, + "icon": {"en_US": "icon.png"}, + } + ) + + with patch.object(MCPProviderEntity, "masked_server_url", return_value="https://api.example.com/******"): + with patch.object(MCPProviderEntity, "masked_headers", return_value={"Authorization": "Be****"}): + with patch.object(MCPProviderEntity, "masked_credentials", return_value={"client_id": "cl****"}): + # Act + response = entity.to_api_response(user_name="Rajat", include_sensitive=True) + + # Assert + assert response["author"] == "Rajat" + assert response["masked_headers"] == {"Authorization": "Be****"} + assert response["authentication"] == {"client_id": "cl****"} + assert response["is_dynamic_registration"] is False + + +def test_retrieve_client_information_decrypts_nested_secret() -> None: + # Arrange + entity = _build_mcp_provider_entity() + credentials = {"client_information": {"client_id": "client-1", "encrypted_client_secret": "enc-secret"}} + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value=credentials): + with patch("core.entities.mcp_provider.encrypter.decrypt_token", return_value="plain-secret") as mock_decrypt: + # Act + client_info = entity.retrieve_client_information() + + # Assert + assert client_info is not None + assert client_info.client_id == "client-1" + assert client_info.client_secret == "plain-secret" + mock_decrypt.assert_called_once_with("tenant-1", "enc-secret") + + +def test_retrieve_client_information_returns_none_for_missing_data() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={}): + # Act + result_empty = entity.retrieve_client_information() + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={"client_information": "invalid"}): + # Act + result_invalid = entity.retrieve_client_information() + + # Assert + assert result_empty is None + assert result_invalid is None + + +def test_masked_server_url_hides_path_segments() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object( + MCPProviderEntity, + "decrypt_server_url", + return_value="https://api.example.com/v1/mcp?query=1", + ): + # Act + masked_url = entity.masked_server_url() + + # Assert + assert masked_url == "https://api.example.com/******?query=1" + + +def test_mask_value_covers_short_and_long_values() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + # Act + short_masked = entity._mask_value("short") + long_masked = entity._mask_value("abcdefghijkl") + + # Assert + assert short_masked == "*****" + assert long_masked == "ab********kl" + + +def test_masked_headers_masks_all_decrypted_header_values() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object(MCPProviderEntity, "decrypt_headers", return_value={"Authorization": "abcdefgh"}): + # Act + masked = entity.masked_headers() + + # Assert + assert masked == {"Authorization": "ab****gh"} + + +def test_masked_credentials_handles_nested_secret_fields() -> None: + # Arrange + entity = _build_mcp_provider_entity() + credentials = { + "client_information": { + "client_id": "client-id", + "encrypted_client_secret": "encrypted-value", + "client_secret": "plain-secret", + } + } + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value=credentials): + with patch("core.entities.mcp_provider.encrypter.decrypt_token", return_value="decrypted-secret"): + # Act + masked = entity.masked_credentials() + + # Assert + assert masked["client_id"] == "cl*****id" + assert masked["client_secret"] == "pl********et" + + +def test_masked_credentials_returns_empty_for_missing_client_information() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={}): + # Act + masked_empty = entity.masked_credentials() + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={"client_information": "invalid"}): + # Act + masked_invalid = entity.masked_credentials() + + # Assert + assert masked_empty == {} + assert masked_invalid == {} + + +def test_retrieve_tokens_returns_defaults_when_optional_fields_missing() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"credentials": {"token": "encrypted"}}) + + with patch.object( + MCPProviderEntity, + "decrypt_credentials", + return_value={"access_token": "token", "expires_in": "", "refresh_token": "refresh"}, + ): + # Act + tokens = entity.retrieve_tokens() + + # Assert + assert isinstance(tokens, OAuthTokens) + assert tokens.access_token == "token" + assert tokens.token_type == DEFAULT_TOKEN_TYPE + assert tokens.expires_in == DEFAULT_EXPIRES_IN + assert tokens.refresh_token == "refresh" + + +def test_retrieve_tokens_returns_none_when_access_token_missing() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"credentials": {"token": "encrypted"}}) + + with patch.object(MCPProviderEntity, "decrypt_credentials", return_value={"access_token": ""}) as mock_decrypt: + # Act + tokens = entity.retrieve_tokens() + + # Assert + mock_decrypt.assert_called_once() + assert tokens is None + + +def test_decrypt_server_url_delegates_to_encrypter() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch("core.entities.mcp_provider.encrypter.decrypt_token", return_value="https://api.example.com") as mock: + # Act + decrypted = entity.decrypt_server_url() + + # Assert + mock.assert_called_once_with("tenant-1", "encrypted-server-url") + assert decrypted == "https://api.example.com" + + +def test_decrypt_authentication_injects_authorization_for_oauth() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy(update={"authed": True, "headers": {}}) + + with patch.object(MCPProviderEntity, "decrypt_headers", return_value={}): + with patch.object( + MCPProviderEntity, + "retrieve_tokens", + return_value=OAuthTokens(access_token="abc123", token_type="bearer"), + ): + # Act + headers = entity.decrypt_authentication() + + # Assert + assert headers["Authorization"] == "Bearer abc123" + + +def test_decrypt_authentication_does_not_overwrite_existing_headers() -> None: + # Arrange + entity = _build_mcp_provider_entity().model_copy( + update={"authed": True, "headers": {"Authorization": "encrypted-header"}} + ) + + with patch.object(MCPProviderEntity, "decrypt_headers", return_value={"Authorization": "existing"}): + with patch.object( + MCPProviderEntity, + "retrieve_tokens", + return_value=OAuthTokens(access_token="abc", token_type="bearer"), + ) as mock_tokens: + # Act + headers = entity.decrypt_authentication() + + # Assert + mock_tokens.assert_not_called() + assert headers == {"Authorization": "existing"} + + +def test_decrypt_dict_returns_empty_for_empty_input() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + # Act + decrypted = entity._decrypt_dict({}) + + # Assert + assert decrypted == {} + + +def test_decrypt_dict_returns_original_data_when_no_encrypted_fields() -> None: + # Arrange + entity = _build_mcp_provider_entity() + input_data = {"nested": {"k": "v"}, "count": 2, "empty": ""} + + # Act + result = entity._decrypt_dict(input_data) + + # Assert + assert result is input_data + + +def test_decrypt_dict_only_decrypts_top_level_string_values() -> None: + # Arrange + entity = _build_mcp_provider_entity() + decryptor = Mock() + decryptor.decrypt.return_value = {"api_key": "plain-key"} + + def _fake_create_provider_encrypter(*, tenant_id: str, config: list, cache): + assert tenant_id == "tenant-1" + assert any(item.name == "api_key" for item in config) + return decryptor, None + + with patch("core.tools.utils.encryption.create_provider_encrypter", side_effect=_fake_create_provider_encrypter): + # Act + result = entity._decrypt_dict( + { + "api_key": "encrypted-key", + "nested": {"client_id": "unchanged"}, + "empty": "", + "count": 2, + } + ) + + # Assert + decryptor.decrypt.assert_called_once_with({"api_key": "encrypted-key"}) + assert result["api_key"] == "plain-key" + assert result["nested"] == {"client_id": "unchanged"} + assert result["count"] == 2 + + +def test_decrypt_headers_and_credentials_delegate_to_decrypt_dict() -> None: + # Arrange + entity = _build_mcp_provider_entity() + + with patch.object(MCPProviderEntity, "_decrypt_dict", side_effect=[{"h": "v"}, {"c": "v"}]) as mock: + # Act + headers = entity.decrypt_headers() + credentials = entity.decrypt_credentials() + + # Assert + assert mock.call_count == 2 + assert headers == {"h": "v"} + assert credentials == {"c": "v"} diff --git a/api/tests/unit_tests/core/entities/test_entities_model_entities.py b/api/tests/unit_tests/core/entities/test_entities_model_entities.py new file mode 100644 index 0000000000..7a3d5e84ed --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_model_entities.py @@ -0,0 +1,92 @@ +"""Unit tests for model entity behavior and invariants. + +Covers DefaultModelEntity, DefaultModelProviderEntity, ModelStatus, +ProviderModelWithStatusEntity, and SimpleModelProviderEntity. Assumes i18n +labels are provided via I18nObject, model metadata aligns with FetchFrom and +ModelType expectations, and ProviderEntity/ConfigurateMethod interactions +drive provider mapping behavior. +""" + +import pytest + +from core.entities.model_entities import ( + DefaultModelEntity, + DefaultModelProviderEntity, + ModelStatus, + ProviderModelWithStatusEntity, + SimpleModelProviderEntity, +) +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import FetchFrom, ModelType +from dify_graph.model_runtime.entities.provider_entities import ConfigurateMethod, ProviderEntity + + +def _build_model_with_status(status: ModelStatus) -> ProviderModelWithStatusEntity: + return ProviderModelWithStatusEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={}, + status=status, + ) + + +def test_simple_model_provider_entity_maps_from_provider_entity() -> None: + # Arrange + provider_entity = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + ) + + # Act + simple_provider = SimpleModelProviderEntity(provider_entity) + + # Assert + assert simple_provider.provider == "openai" + assert simple_provider.label.en_US == "OpenAI" + assert simple_provider.supported_model_types == [ModelType.LLM] + + +def test_provider_model_with_status_raises_for_known_error_statuses() -> None: + # Arrange + expectations = { + ModelStatus.NO_CONFIGURE: "Model is not configured", + ModelStatus.QUOTA_EXCEEDED: "Model quota has been exceeded", + ModelStatus.NO_PERMISSION: "No permission to use this model", + ModelStatus.DISABLED: "Model is disabled", + } + + for status, message in expectations.items(): + # Act / Assert + with pytest.raises(ValueError, match=message): + _build_model_with_status(status).raise_for_status() + + +def test_provider_model_with_status_allows_active_and_credential_removed() -> None: + # Arrange + active_model = _build_model_with_status(ModelStatus.ACTIVE) + removed_model = _build_model_with_status(ModelStatus.CREDENTIAL_REMOVED) + + # Act / Assert + active_model.raise_for_status() + removed_model.raise_for_status() + + +def test_default_model_entity_accepts_model_field_name() -> None: + # Arrange / Act + default_model = DefaultModelEntity( + model="gpt-4o-mini", + model_type=ModelType.LLM, + provider=DefaultModelProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + ), + ) + + # Assert + assert default_model.model == "gpt-4o-mini" + assert default_model.provider.provider == "openai" diff --git a/api/tests/unit_tests/core/entities/test_entities_parameter_entities.py b/api/tests/unit_tests/core/entities/test_entities_parameter_entities.py new file mode 100644 index 0000000000..20b7bf2a9f --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_parameter_entities.py @@ -0,0 +1,22 @@ +from core.entities.parameter_entities import ( + AppSelectorScope, + CommonParameterType, + ModelSelectorScope, + ToolSelectorScope, +) + + +def test_common_parameter_type_values_are_stable() -> None: + # Arrange / Act / Assert + assert CommonParameterType.SECRET_INPUT.value == "secret-input" + assert CommonParameterType.MODEL_SELECTOR.value == "model-selector" + assert CommonParameterType.DYNAMIC_SELECT.value == "dynamic-select" + assert CommonParameterType.ARRAY.value == "array" + assert CommonParameterType.OBJECT.value == "object" + + +def test_selector_scope_values_are_stable() -> None: + # Arrange / Act / Assert + assert AppSelectorScope.WORKFLOW.value == "workflow" + assert ModelSelectorScope.TEXT_EMBEDDING.value == "text-embedding" + assert ToolSelectorScope.BUILTIN.value == "builtin" diff --git a/api/tests/unit_tests/core/entities/test_entities_provider_configuration.py b/api/tests/unit_tests/core/entities/test_entities_provider_configuration.py new file mode 100644 index 0000000000..82f98d07a3 --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_provider_configuration.py @@ -0,0 +1,1850 @@ +from __future__ import annotations + +from contextlib import contextmanager +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from constants import HIDDEN_VALUE +from core.entities.model_entities import ModelStatus +from core.entities.provider_configuration import ProviderConfiguration, ProviderConfigurations +from core.entities.provider_entities import ( + CredentialConfiguration, + CustomConfiguration, + CustomModelConfiguration, + CustomProviderConfiguration, + ModelLoadBalancingConfiguration, + ModelSettings, + ProviderQuotaType, + QuotaConfiguration, + QuotaUnit, + RestrictModel, + SystemConfiguration, + SystemConfigurationStatus, +) +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType +from dify_graph.model_runtime.entities.provider_entities import ( + ConfigurateMethod, + CredentialFormSchema, + FieldModelSchema, + FormType, + ModelCredentialSchema, + ProviderCredentialSchema, + ProviderEntity, +) +from models.provider import ProviderType +from models.provider_ids import ModelProviderID + +_UNSET = object() + + +def _build_provider_configuration(*, provider_name: str = "openai") -> ProviderConfiguration: + provider_entity = ProviderEntity( + provider=provider_name, + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + ) + system_configuration = SystemConfiguration( + enabled=True, + credentials={"api_key": "test-key"}, + current_quota_type=ProviderQuotaType.TRIAL, + quota_configurations=[ + QuotaConfiguration( + quota_type=ProviderQuotaType.TRIAL, + quota_unit=QuotaUnit.TOKENS, + quota_limit=1_000, + quota_used=0, + is_valid=True, + restrict_models=[], + ) + ], + ) + with patch("core.entities.provider_configuration.original_provider_configurate_methods", {}): + return ProviderConfiguration( + tenant_id="tenant-1", + provider=provider_entity, + preferred_provider_type=ProviderType.SYSTEM, + using_provider_type=ProviderType.SYSTEM, + system_configuration=system_configuration, + custom_configuration=CustomConfiguration(provider=None, models=[]), + model_settings=[], + ) + + +def _build_ai_model(name: str, *, model_type: ModelType = ModelType.LLM) -> AIModelEntity: + return AIModelEntity( + model=name, + label=I18nObject(en_US=name), + model_type=model_type, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={}, + ) + + +def _exec_result( + *, + scalar_one_or_none: Any = _UNSET, + scalar: Any = _UNSET, + scalars_all: Any = _UNSET, + scalars_first: Any = _UNSET, +) -> Mock: + result = Mock() + if scalar_one_or_none is not _UNSET: + result.scalar_one_or_none.return_value = scalar_one_or_none + if scalar is not _UNSET: + result.scalar.return_value = scalar + if scalars_all is not _UNSET or scalars_first is not _UNSET: + scalars = Mock() + if scalars_all is not _UNSET: + scalars.all.return_value = scalars_all + if scalars_first is not _UNSET: + scalars.first.return_value = scalars_first + result.scalars.return_value = scalars + return result + + +@contextmanager +def _patched_session(session: Mock): + with patch("core.entities.provider_configuration.db") as mock_db: + mock_db.engine = Mock() + with patch("core.entities.provider_configuration.Session") as mock_session_cls: + mock_session_cls.return_value.__enter__.return_value = session + yield mock_session_cls + + +def _build_secret_provider_schema() -> ProviderCredentialSchema: + return ProviderCredentialSchema( + credential_form_schemas=[ + CredentialFormSchema( + variable="openai_api_key", + label=I18nObject(en_US="API Key"), + type=FormType.SECRET_INPUT, + ) + ] + ) + + +def _build_secret_model_schema() -> ModelCredentialSchema: + return ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model")), + credential_form_schemas=[ + CredentialFormSchema( + variable="openai_api_key", + label=I18nObject(en_US="API Key"), + type=FormType.SECRET_INPUT, + ) + ], + ) + + +def test_extract_secret_variables_returns_only_secret_inputs() -> None: + configuration = _build_provider_configuration() + credential_form_schemas = [ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.SECRET_INPUT, + ), + CredentialFormSchema( + variable="endpoint", + label=I18nObject(en_US="Endpoint"), + type=FormType.TEXT_INPUT, + ), + ] + + secret_variables = configuration.extract_secret_variables(credential_form_schemas) + assert secret_variables == ["api_key"] + + +def test_obfuscated_credentials_masks_only_secret_fields() -> None: + configuration = _build_provider_configuration() + credential_form_schemas = [ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.SECRET_INPUT, + ), + CredentialFormSchema( + variable="endpoint", + label=I18nObject(en_US="Endpoint"), + type=FormType.TEXT_INPUT, + ), + ] + + with patch( + "core.entities.provider_configuration.encrypter.obfuscated_token", + side_effect=lambda value: f"masked-{value[-2:]}", + ): + obfuscated = configuration.obfuscated_credentials( + credentials={"api_key": "sk-test-1234", "endpoint": "https://api.example.com"}, + credential_form_schemas=credential_form_schemas, + ) + + assert obfuscated["api_key"] == "masked-34" + assert obfuscated["endpoint"] == "https://api.example.com" + + +def test_provider_configurations_behave_like_keyed_container() -> None: + configuration = _build_provider_configuration() + provider_key = str(ModelProviderID("openai")) + configurations = ProviderConfigurations(tenant_id="tenant-1") + + configurations[provider_key] = configuration + + assert "openai" in configurations + assert configurations["openai"] is configuration + assert configurations.get("openai") is configuration + assert configurations.to_list() == [configuration] + assert list(configurations) == [(provider_key, configuration)] + + +def test_provider_configurations_get_models_forwards_filters() -> None: + configuration = _build_provider_configuration() + provider_key = str(ModelProviderID("openai")) + configurations = ProviderConfigurations(tenant_id="tenant-1") + configurations[provider_key] = configuration + expected_model = Mock() + + with patch.object(ProviderConfiguration, "get_provider_models", return_value=[expected_model]) as mock_get: + models = configurations.get_models(provider="openai", model_type=ModelType.LLM, only_active=True) + + mock_get.assert_called_once_with(ModelType.LLM, True) + assert models == [expected_model] + + +def test_provider_configurations_get_models_skips_non_matching_provider_filter() -> None: + configuration = _build_provider_configuration() + provider_key = str(ModelProviderID("openai")) + configurations = ProviderConfigurations(tenant_id="tenant-1") + configurations[provider_key] = configuration + + with patch.object(ProviderConfiguration, "get_provider_models", return_value=[Mock()]) as mock_get: + models = configurations.get_models(provider="anthropic", model_type=ModelType.LLM, only_active=True) + + assert models == [] + mock_get.assert_not_called() + + +def test_get_current_credentials_custom_provider_checks_current_credential() -> None: + configuration = _build_provider_configuration() + configuration.using_provider_type = ProviderType.CUSTOM + configuration.custom_configuration.provider = CustomProviderConfiguration( + credentials={"api_key": "provider-key"}, + current_credential_id="credential-1", + current_credential_name="Primary", + available_credentials=[], + ) + + with patch("core.helper.credential_utils.check_credential_policy_compliance") as mock_check: + credentials = configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + + assert credentials == {"api_key": "provider-key"} + assert mock_check.call_count == 1 + assert mock_check.call_args.kwargs["credential_id"] == "credential-1" + assert mock_check.call_args.kwargs["provider"] == "openai" + + +def test_get_current_credentials_custom_provider_checks_all_available_credentials() -> None: + configuration = _build_provider_configuration() + configuration.using_provider_type = ProviderType.CUSTOM + configuration.custom_configuration.provider = CustomProviderConfiguration( + credentials={"api_key": "provider-key"}, + available_credentials=[ + CredentialConfiguration(credential_id="cred-1", credential_name="First"), + CredentialConfiguration(credential_id="cred-2", credential_name="Second"), + ], + ) + + with patch("core.helper.credential_utils.check_credential_policy_compliance") as mock_check: + credentials = configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + + assert credentials == {"api_key": "provider-key"} + assert [c.kwargs["credential_id"] for c in mock_check.call_args_list] == ["cred-1", "cred-2"] + assert all(c.kwargs["provider"] == "openai" for c in mock_check.call_args_list) + + +def test_get_system_configuration_status_returns_none_when_current_quota_missing() -> None: + configuration = _build_provider_configuration() + configuration.system_configuration.current_quota_type = ProviderQuotaType.FREE + + status = configuration.get_system_configuration_status() + assert status is None + + +def test_get_provider_names_supports_legacy_and_full_plugin_id() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider = "langgenius/openai/openai" + + provider_names = configuration._get_provider_names() + assert provider_names == ["langgenius/openai/openai", "openai"] + + +def test_generate_next_api_key_name_uses_highest_numeric_suffix() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalars.return_value.all.return_value = [ + SimpleNamespace(credential_name="API KEY 9"), + SimpleNamespace(credential_name="legacy"), + SimpleNamespace(credential_name=" API KEY 2 "), + ] + + name = configuration._generate_next_api_key_name(session=session, query_factory=lambda: Mock()) + assert name == "API KEY 10" + + +def test_generate_next_api_key_name_falls_back_to_default_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + + def _raise_query_error(): + raise RuntimeError("boom") + + name = configuration._generate_next_api_key_name(session=session, query_factory=_raise_query_error) + assert name == "API KEY 1" + + +def test_generate_provider_and_custom_model_names_delegate_to_shared_generator() -> None: + configuration = _build_provider_configuration() + + with patch.object(configuration, "_generate_next_api_key_name", return_value="API KEY 7") as mock_generator: + provider_name = configuration._generate_provider_credential_name(session=Mock()) + custom_model_name = configuration._generate_custom_model_credential_name( + model="gpt-4o", + model_type=ModelType.LLM, + session=Mock(), + ) + + assert provider_name == "API KEY 7" + assert custom_model_name == "API KEY 7" + assert mock_generator.call_count == 2 + + +def test_get_provider_credential_uses_specific_lookup_when_id_provided() -> None: + configuration = _build_provider_configuration() + + with patch.object(configuration, "_get_specific_provider_credential", return_value={"api_key": "***"}) as mock_get: + credential = configuration.get_provider_credential("credential-1") + + assert credential == {"api_key": "***"} + mock_get.assert_called_once_with("credential-1") + + +def test_validate_provider_credentials_handles_hidden_secret_value() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider_credential_schema = ProviderCredentialSchema( + credential_form_schemas=[ + CredentialFormSchema( + variable="openai_api_key", + label=I18nObject(en_US="API Key"), + type=FormType.SECRET_INPUT, + ) + ] + ) + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(encrypted_config="encrypted-old-key") + mock_factory = Mock() + mock_factory.provider_credentials_validate.return_value = {"openai_api_key": "restored-key", "region": "us"} + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + with patch("core.entities.provider_configuration.encrypter.decrypt_token", return_value="restored-key"): + with patch( + "core.entities.provider_configuration.encrypter.encrypt_token", + side_effect=lambda tenant_id, value: f"enc::{value}", + ): + validated = configuration.validate_provider_credentials( + credentials={"openai_api_key": HIDDEN_VALUE, "region": "us"}, + credential_id="credential-1", + session=session, + ) + + assert validated["openai_api_key"] == "enc::restored-key" + assert validated["region"] == "us" + mock_factory.provider_credentials_validate.assert_called_once_with( + provider="openai", + credentials={"openai_api_key": "restored-key", "region": "us"}, + ) + + +def test_validate_provider_credentials_opens_session_when_not_passed() -> None: + configuration = _build_provider_configuration() + mock_session = Mock() + mock_factory = Mock() + mock_factory.provider_credentials_validate.return_value = {"region": "us"} + + with patch("core.entities.provider_configuration.Session") as mock_session_cls: + with patch("core.entities.provider_configuration.db") as mock_db: + mock_db.engine = Mock() + mock_session_cls.return_value.__enter__.return_value = mock_session + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + validated = configuration.validate_provider_credentials(credentials={"region": "us"}) + + assert validated == {"region": "us"} + mock_session_cls.assert_called_once() + + +def test_switch_preferred_provider_type_returns_early_when_no_change_or_unsupported() -> None: + configuration = _build_provider_configuration() + + with patch("core.entities.provider_configuration.Session") as mock_session_cls: + configuration.switch_preferred_provider_type(ProviderType.SYSTEM) + mock_session_cls.assert_not_called() + + configuration.preferred_provider_type = ProviderType.CUSTOM + configuration.system_configuration.enabled = False + with patch("core.entities.provider_configuration.Session") as mock_session_cls: + configuration.switch_preferred_provider_type(ProviderType.SYSTEM) + mock_session_cls.assert_not_called() + + +def test_switch_preferred_provider_type_updates_existing_record_with_session() -> None: + configuration = _build_provider_configuration() + configuration.preferred_provider_type = ProviderType.CUSTOM + session = Mock() + existing_record = SimpleNamespace(preferred_provider_type="custom") + session.execute.return_value.scalars.return_value.first.return_value = existing_record + + configuration.switch_preferred_provider_type(ProviderType.SYSTEM, session=session) + + assert existing_record.preferred_provider_type == ProviderType.SYSTEM.value + session.commit.assert_called_once() + + +def test_switch_preferred_provider_type_creates_record_when_missing() -> None: + configuration = _build_provider_configuration() + configuration.preferred_provider_type = ProviderType.SYSTEM + session = Mock() + session.execute.return_value.scalars.return_value.first.return_value = None + + configuration.switch_preferred_provider_type(ProviderType.CUSTOM, session=session) + + assert session.add.call_count == 1 + session.commit.assert_called_once() + + +def test_get_model_type_instance_and_schema_delegate_to_factory() -> None: + configuration = _build_provider_configuration() + mock_factory = Mock() + mock_model_type_instance = Mock() + mock_schema = _build_ai_model("gpt-4o") + mock_factory.get_model_type_instance.return_value = mock_model_type_instance + mock_factory.get_model_schema.return_value = mock_schema + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + model_type_instance = configuration.get_model_type_instance(ModelType.LLM) + model_schema = configuration.get_model_schema(ModelType.LLM, "gpt-4o", {"api_key": "x"}) + + assert model_type_instance is mock_model_type_instance + assert model_schema is mock_schema + mock_factory.get_model_type_instance.assert_called_once_with(provider="openai", model_type=ModelType.LLM) + mock_factory.get_model_schema.assert_called_once_with( + provider="openai", + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"api_key": "x"}, + ) + + +def test_get_provider_model_returns_none_when_model_not_found() -> None: + configuration = _build_provider_configuration() + fake_model = SimpleNamespace(model="other-model") + + with patch.object(ProviderConfiguration, "get_provider_models", return_value=[fake_model]): + selected = configuration.get_provider_model(ModelType.LLM, "gpt-4o") + + assert selected is None + + +def test_get_provider_models_system_deduplicates_sorts_and_filters_active() -> None: + configuration = _build_provider_configuration() + configuration.provider.position = {"llm": ["b-model", "a-model"]} + configuration.model_settings = [ + ModelSettings(model="a-model", model_type=ModelType.LLM, enabled=False, load_balancing_configs=[]) + ] + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=[_build_ai_model("a-model"), _build_ai_model("b-model"), _build_ai_model("a-model")], + ) + mock_factory = Mock() + mock_factory.get_provider_schema.return_value = provider_schema + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + all_models = configuration.get_provider_models(model_type=ModelType.LLM, only_active=False) + active_models = configuration.get_provider_models(model_type=ModelType.LLM, only_active=True) + + assert [model.model for model in all_models] == ["b-model", "a-model"] + assert [model.status for model in all_models] == [ModelStatus.ACTIVE, ModelStatus.DISABLED] + assert [model.model for model in active_models] == ["b-model"] + + +def test_get_custom_provider_models_sets_status_for_removed_credentials_and_invalid_lb_configs() -> None: + configuration = _build_provider_configuration() + configuration.using_provider_type = ProviderType.CUSTOM + configuration.custom_configuration.provider = CustomProviderConfiguration(credentials={"api_key": "provider-key"}) + configuration.custom_configuration.models = [ + CustomModelConfiguration( + model="custom-model", + model_type=ModelType.LLM, + credentials=None, + available_model_credentials=[CredentialConfiguration(credential_id="c-1", credential_name="first")], + ) + ] + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=[_build_ai_model("base-model")], + ) + model_setting_map = { + ModelType.LLM: { + "base-model": ModelSettings( + model="base-model", + model_type=ModelType.LLM, + enabled=True, + load_balancing_enabled=True, + load_balancing_configs=[ + ModelLoadBalancingConfiguration( + id="lb-base", + name="LB Base", + credentials={}, + credential_source_type="provider", + ) + ], + ), + "custom-model": ModelSettings( + model="custom-model", + model_type=ModelType.LLM, + enabled=True, + load_balancing_enabled=True, + load_balancing_configs=[ + ModelLoadBalancingConfiguration( + id="lb-custom", + name="LB Custom", + credentials={}, + credential_source_type="custom_model", + ) + ], + ), + } + } + + with patch.object(ProviderConfiguration, "get_model_schema", return_value=_build_ai_model("custom-model")): + models = configuration._get_custom_provider_models( + model_types=[ModelType.LLM], + provider_schema=provider_schema, + model_setting_map=model_setting_map, + ) + + status_map = {model.model: model.status for model in models} + invalid_lb_map = {model.model: model.has_invalid_load_balancing_configs for model in models} + assert status_map["base-model"] == ModelStatus.ACTIVE + assert status_map["custom-model"] == ModelStatus.CREDENTIAL_REMOVED + assert invalid_lb_map["base-model"] is True + assert invalid_lb_map["custom-model"] is True + + +def test_validator_adds_predefined_model_for_customizable_provider_with_restrictions() -> None: + provider = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.CUSTOMIZABLE_MODEL], + ) + system_configuration = SystemConfiguration( + enabled=True, + credentials={"api_key": "test-key"}, + current_quota_type=ProviderQuotaType.TRIAL, + quota_configurations=[ + QuotaConfiguration( + quota_type=ProviderQuotaType.TRIAL, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=0, + is_valid=True, + restrict_models=[ + RestrictModel(model="restricted", base_model_name="base-model", model_type=ModelType.LLM) + ], + ) + ], + ) + with patch("core.entities.provider_configuration.original_provider_configurate_methods", {}): + configuration = ProviderConfiguration( + tenant_id="tenant-1", + provider=provider, + preferred_provider_type=ProviderType.SYSTEM, + using_provider_type=ProviderType.SYSTEM, + system_configuration=system_configuration, + custom_configuration=CustomConfiguration(provider=None, models=[]), + model_settings=[], + ) + + assert ConfigurateMethod.PREDEFINED_MODEL in configuration.provider.configurate_methods + + +def test_get_current_credentials_system_handles_disable_and_restricted_base_model() -> None: + configuration = _build_provider_configuration() + configuration.model_settings = [ + ModelSettings(model="gpt-4o", model_type=ModelType.LLM, enabled=False, load_balancing_configs=[]) + ] + + with pytest.raises(ValueError, match="Model gpt-4o is disabled"): + configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + + configuration.model_settings = [] + configuration.system_configuration.quota_configurations[0].restrict_models = [ + RestrictModel(model="gpt-4o", base_model_name="base-model", model_type=ModelType.LLM) + ] + credentials = configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + assert credentials["base_model_name"] == "base-model" + + +def test_get_current_credentials_prefers_model_specific_custom_credentials() -> None: + configuration = _build_provider_configuration() + configuration.using_provider_type = ProviderType.CUSTOM + configuration.custom_configuration.models = [ + CustomModelConfiguration( + model="gpt-4o", + model_type=ModelType.LLM, + credentials={"api_key": "model-key"}, + ) + ] + configuration.custom_configuration.provider = CustomProviderConfiguration(credentials={"api_key": "provider-key"}) + + credentials = configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + assert credentials == {"api_key": "model-key"} + + +def test_get_system_configuration_status_falsey_quota_returns_unsupported() -> None: + class _FalseyQuota: + quota_type = ProviderQuotaType.TRIAL + is_valid = True + + def __bool__(self) -> bool: + return False + + configuration = _build_provider_configuration() + configuration.system_configuration.quota_configurations = [_FalseyQuota()] # type: ignore[list-item] + assert configuration.get_system_configuration_status() == SystemConfigurationStatus.UNSUPPORTED + + +def test_get_provider_credential_default_uses_custom_provider_credentials() -> None: + configuration = _build_provider_configuration() + configuration.custom_configuration.provider = CustomProviderConfiguration(credentials={"api_key": "provider-key"}) + obfuscated = configuration.get_provider_credential() + assert obfuscated == {"api_key": "provider-key"} + + +def test_custom_configuration_availability_and_provider_record_helpers() -> None: + configuration = _build_provider_configuration() + assert not configuration.is_custom_configuration_available() + + configuration.custom_configuration.provider = CustomProviderConfiguration( + credentials={"api_key": "provider-key"}, + available_credentials=[CredentialConfiguration(credential_id="cred-1", credential_name="Main")], + ) + assert configuration.is_custom_configuration_available() + + configuration.custom_configuration.provider = None + configuration.custom_configuration.models = [ + CustomModelConfiguration(model="gpt-4o", model_type=ModelType.LLM, credentials={"api_key": "model-key"}) + ] + assert configuration.is_custom_configuration_available() + + session = Mock() + provider_record = SimpleNamespace(id="provider-1") + session.execute.return_value.scalar_one_or_none.return_value = provider_record + assert configuration._get_provider_record(session) is provider_record + + session.execute.return_value.scalar_one_or_none.return_value = None + assert configuration._get_provider_record(session) is None + + +def test_check_provider_credential_name_exists_and_model_setting_lookup() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = "existing-id" + assert configuration._check_provider_credential_name_exists("Main", session) + + session.execute.return_value.scalar_one_or_none.return_value = None + assert not configuration._check_provider_credential_name_exists("Main", session, exclude_id="cred-2") + + setting = SimpleNamespace(id="setting-1") + session.execute.return_value.scalars.return_value.first.return_value = setting + assert configuration._get_provider_model_setting(ModelType.LLM, "gpt-4o", session) is setting + + +def test_validate_provider_credentials_handles_invalid_original_json() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider_credential_schema = _build_secret_provider_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(encrypted_config="{invalid-json") + mock_factory = Mock() + mock_factory.provider_credentials_validate.return_value = {"openai_api_key": "new-key"} + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + with patch("core.entities.provider_configuration.encrypter.encrypt_token", return_value="enc-key"): + validated = configuration.validate_provider_credentials( + credentials={"openai_api_key": HIDDEN_VALUE}, + credential_id="cred-1", + session=session, + ) + + assert validated == {"openai_api_key": "enc-key"} + + +def test_generate_next_api_key_name_returns_default_when_no_records() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalars.return_value.all.return_value = [] + + name = configuration._generate_next_api_key_name(session=session, query_factory=lambda: Mock()) + assert name == "API KEY 1" + + +def test_create_provider_credential_creates_provider_record_when_missing() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.flush.side_effect = lambda: None + + with _patched_session(session): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with patch.object( + ProviderConfiguration, + "_generate_provider_credential_name", + return_value="API KEY 2", + ): + with patch.object(ProviderConfiguration, "switch_preferred_provider_type") as mock_switch: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.create_provider_credential({"api_key": "raw"}, None) + + assert session.add.call_count == 2 + session.commit.assert_called_once() + mock_cache.return_value.delete.assert_called_once() + mock_switch.assert_called_once_with(provider_type=ProviderType.CUSTOM, session=session) + + +def test_create_provider_credential_marks_existing_provider_as_valid() -> None: + configuration = _build_provider_configuration() + session = Mock() + provider_record = SimpleNamespace(is_valid=False) + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_provider_credential_name_exists", return_value=False): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + configuration.create_provider_credential({"api_key": "raw"}, "Main") + + assert provider_record.is_valid is True + session.commit.assert_called_once() + + +def test_create_provider_credential_raises_when_duplicate_name_exists() -> None: + configuration = _build_provider_configuration() + session = Mock() + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_provider_credential_name_exists", return_value=True): + with pytest.raises(ValueError, match="already exists"): + configuration.create_provider_credential({"api_key": "raw"}, "Main") + + +def test_update_provider_credential_success_updates_and_invalidates_cache() -> None: + configuration = _build_provider_configuration() + session = Mock() + credential_record = SimpleNamespace(id="cred-1", encrypted_config="{}", credential_name="Old", updated_at=None) + provider_record = SimpleNamespace(id="provider-1", credential_id="cred-1") + session.execute.return_value.scalar_one_or_none.return_value = credential_record + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_provider_credential_name_exists", return_value=False): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with patch.object( + ProviderConfiguration, + "_update_load_balancing_configs_with_credential", + ) as mock_lb: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.update_provider_credential( + credentials={"api_key": "raw"}, + credential_id="cred-1", + credential_name="New Name", + ) + + assert credential_record.credential_name == "New Name" + session.commit.assert_called_once() + mock_cache.return_value.delete.assert_called_once() + mock_lb.assert_called_once() + + +def test_update_provider_credential_raises_when_record_not_found() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + + with _patched_session(session): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.update_provider_credential({"api_key": "raw"}, "cred-1", None) + + +def test_update_load_balancing_configs_updates_all_matching_configs() -> None: + configuration = _build_provider_configuration() + session = Mock() + lb_config = SimpleNamespace(id="lb-1", encrypted_config="old", name="old", updated_at=None) + session.execute.return_value.scalars.return_value.all.return_value = [lb_config] + credential_record = SimpleNamespace(encrypted_config='{"api_key":"enc"}', credential_name="API KEY 3") + + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration._update_load_balancing_configs_with_credential( + credential_id="cred-1", + credential_record=credential_record, + credential_source="provider", + session=session, + ) + + assert lb_config.encrypted_config == '{"api_key":"enc"}' + assert lb_config.name == "API KEY 3" + mock_cache.return_value.delete.assert_called_once() + session.commit.assert_called_once() + + +def test_update_load_balancing_configs_returns_when_no_matching_configs() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalars.return_value.all.return_value = [] + + configuration._update_load_balancing_configs_with_credential( + credential_id="cred-1", + credential_record=SimpleNamespace(encrypted_config="{}", credential_name="Main"), + credential_source="provider", + session=session, + ) + + session.commit.assert_not_called() + + +def test_delete_provider_credential_removes_provider_record_when_last_credential() -> None: + configuration = _build_provider_configuration() + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + provider_record = SimpleNamespace(id="provider-1", credential_id="cred-1", updated_at=None) + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=credential_record), + _exec_result(scalars_all=[]), + _exec_result(scalar=1), + ] + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with patch.object(ProviderConfiguration, "switch_preferred_provider_type") as mock_switch: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.delete_provider_credential("cred-1") + + assert any(call.args and call.args[0] is provider_record for call in session.delete.call_args_list) + mock_cache.return_value.delete.assert_called_once() + mock_switch.assert_called_once_with(provider_type=ProviderType.SYSTEM, session=session) + + +def test_delete_provider_credential_raises_when_not_found() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + + with _patched_session(session): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.delete_provider_credential("cred-1") + + +def test_delete_provider_credential_unsets_active_credential_when_more_available() -> None: + configuration = _build_provider_configuration() + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + lb_config = SimpleNamespace(id="lb-1") + provider_record = SimpleNamespace(id="provider-1", credential_id="cred-1", updated_at=None) + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=credential_record), + _exec_result(scalars_all=[lb_config]), + _exec_result(scalar=2), + ] + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with patch.object(ProviderConfiguration, "switch_preferred_provider_type") as mock_switch: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.delete_provider_credential("cred-1") + + assert provider_record.credential_id is None + assert mock_cache.return_value.delete.call_count == 2 + mock_switch.assert_called_once_with(provider_type=ProviderType.SYSTEM, session=session) + + +def test_switch_active_provider_credential_success_and_failures() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + with _patched_session(session): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.switch_active_provider_credential("cred-1") + + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(id="cred-1") + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with pytest.raises(ValueError, match="Provider record not found"): + configuration.switch_active_provider_credential("cred-1") + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + provider_record = SimpleNamespace(id="provider-1", credential_id=None, updated_at=None) + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with patch.object(ProviderConfiguration, "switch_preferred_provider_type") as mock_switch: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.switch_active_provider_credential("cred-1") + + assert provider_record.credential_id == "cred-1" + mock_cache.return_value.delete.assert_called_once() + mock_switch.assert_called_once_with(ProviderType.CUSTOM, session=session) + + +def test_get_custom_model_record_supports_plugin_id_alias() -> None: + configuration = _build_provider_configuration(provider_name="langgenius/openai/openai") + session = Mock() + custom_model_record = SimpleNamespace(id="model-1") + session.execute.return_value.scalar_one_or_none.return_value = custom_model_record + + result = configuration._get_custom_model_record(ModelType.LLM, "gpt-4o", session) + assert result is custom_model_record + + +def test_get_specific_custom_model_credential_success_and_not_found() -> None: + configuration = _build_provider_configuration() + configuration.provider.model_credential_schema = _build_secret_model_schema() + session = Mock() + record = SimpleNamespace(id="cred-1", credential_name="Main", encrypted_config='{"openai_api_key":"enc"}') + session.execute.return_value.scalar_one_or_none.return_value = record + + with _patched_session(session): + with patch("core.entities.provider_configuration.encrypter.decrypt_token", return_value="raw"): + with patch.object(ProviderConfiguration, "obfuscated_credentials", return_value={"openai_api_key": "***"}): + response = configuration._get_specific_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + assert response["current_credential_id"] == "cred-1" + assert response["credentials"] == {"openai_api_key": "***"} + + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + with _patched_session(session): + with pytest.raises(ValueError, match="Credential with id cred-1 not found"): + configuration._get_specific_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + id="cred-1", + credential_name="Main", + encrypted_config="{invalid-json", + ) + with _patched_session(session): + invalid_json = configuration._get_specific_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + assert invalid_json["credentials"] == {} + + +def test_check_custom_model_credential_name_exists_respects_exclusion() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(id="cred-1") + assert configuration._check_custom_model_credential_name_exists( + ModelType.LLM, "gpt-4o", "Main", session, exclude_id="other-id" + ) + + session.execute.return_value.scalar_one_or_none.return_value = None + assert not configuration._check_custom_model_credential_name_exists(ModelType.LLM, "gpt-4o", "Main", session) + + +def test_get_custom_model_credential_uses_specific_id_or_configuration_fallback() -> None: + configuration = _build_provider_configuration() + with patch.object( + ProviderConfiguration, + "_get_specific_custom_model_credential", + return_value={"current_credential_id": "cred-1"}, + ) as mock_specific: + result = configuration.get_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + assert result == {"current_credential_id": "cred-1"} + mock_specific.assert_called_once() + + configuration.provider.model_credential_schema = _build_secret_model_schema() + configuration.custom_configuration.models = [ + CustomModelConfiguration( + model="gpt-4o", + model_type=ModelType.LLM, + credentials={"openai_api_key": "raw"}, + current_credential_id="cred-1", + current_credential_name="Main", + ) + ] + with patch.object(ProviderConfiguration, "obfuscated_credentials", return_value={"openai_api_key": "***"}): + fallback = configuration.get_custom_model_credential(ModelType.LLM, "gpt-4o", None) + assert fallback == { + "current_credential_id": "cred-1", + "current_credential_name": "Main", + "credentials": {"openai_api_key": "***"}, + } + + configuration.custom_configuration.models = [] + assert configuration.get_custom_model_credential(ModelType.LLM, "gpt-4o", None) is None + + +def test_validate_custom_model_credentials_supports_hidden_reuse_and_sessionless_path() -> None: + configuration = _build_provider_configuration() + configuration.provider.model_credential_schema = _build_secret_model_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + encrypted_config='{"openai_api_key":"enc"}' + ) + mock_factory = Mock() + mock_factory.model_credentials_validate.return_value = {"openai_api_key": "raw"} + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + with patch("core.entities.provider_configuration.encrypter.decrypt_token", return_value="raw"): + with patch("core.entities.provider_configuration.encrypter.encrypt_token", return_value="enc-new"): + validated = configuration.validate_custom_model_credentials( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"openai_api_key": HIDDEN_VALUE}, + credential_id="cred-1", + session=session, + ) + assert validated == {"openai_api_key": "enc-new"} + + session = Mock() + mock_factory = Mock() + mock_factory.model_credentials_validate.return_value = {"region": "us"} + with _patched_session(session): + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + validated = configuration.validate_custom_model_credentials( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"region": "us"}, + ) + assert validated == {"region": "us"} + + +def test_create_update_delete_custom_model_credential_flow() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.flush.side_effect = lambda: None + provider_model_record = SimpleNamespace(id="model-1", credential_id="cred-1", updated_at=None) + credential_record = SimpleNamespace(id="cred-1", encrypted_config="{}", credential_name="Old", updated_at=None) + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_generate_custom_model_credential_name", return_value="API KEY 1"): + with patch.object( + ProviderConfiguration, + "validate_custom_model_credentials", + return_value={"openai_api_key": "enc"}, + ): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.create_custom_model_credential(ModelType.LLM, "gpt-4o", {"k": "v"}, None) + assert session.add.call_count == 2 + assert mock_cache.return_value.delete.call_count == 1 + + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_custom_model_credential_name_exists", return_value=False): + with patch.object( + ProviderConfiguration, + "validate_custom_model_credentials", + return_value={"openai_api_key": "enc2"}, + ): + with patch.object( + ProviderConfiguration, + "_get_custom_model_record", + return_value=provider_model_record, + ): + with patch.object( + ProviderConfiguration, + "_update_load_balancing_configs_with_credential", + ) as mock_lb: + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.update_custom_model_credential( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"k": "v"}, + credential_name="New Name", + credential_id="cred-1", + ) + assert credential_record.credential_name == "New Name" + assert mock_cache.return_value.delete.call_count == 1 + mock_lb.assert_called_once() + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + lb_config = SimpleNamespace(id="lb-1") + provider_model_record = SimpleNamespace(id="model-1", credential_id="cred-1", updated_at=None) + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=credential_record), + _exec_result(scalars_all=[lb_config]), + _exec_result(scalar=2), + ] + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.delete_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + assert provider_model_record.credential_id is None + assert mock_cache.return_value.delete.call_count == 2 + + +def test_add_model_credential_to_model_and_switch_custom_model_credential() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + with _patched_session(session): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.add_model_credential_to_model(ModelType.LLM, "gpt-4o", "cred-1") + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + configuration.add_model_credential_to_model(ModelType.LLM, "gpt-4o", "cred-1") + session.add.assert_called_once() + session.commit.assert_called_once() + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + provider_model_record = SimpleNamespace(id="model-1", credential_id="cred-1", updated_at=None) + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + with pytest.raises(ValueError, match="Can't add same credential"): + configuration.add_model_credential_to_model(ModelType.LLM, "gpt-4o", "cred-1") + + session = Mock() + credential_record = SimpleNamespace(id="cred-2") + provider_model_record = SimpleNamespace(id="model-1", credential_id="cred-1", updated_at=None) + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.add_model_credential_to_model(ModelType.LLM, "gpt-4o", "cred-2") + assert provider_model_record.credential_id == "cred-2" + mock_cache.return_value.delete.assert_called_once() + + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + with _patched_session(session): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.switch_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with pytest.raises(ValueError, match="custom model record not found"): + configuration.switch_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + provider_model_record = SimpleNamespace(id="model-1", credential_id=None, updated_at=None) + session.execute.return_value.scalar_one_or_none.return_value = credential_record + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.switch_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + assert provider_model_record.credential_id == "cred-1" + mock_cache.return_value.delete.assert_called_once() + + +def test_delete_custom_model_and_model_setting_methods() -> None: + configuration = _build_provider_configuration() + session = Mock() + provider_model_record = SimpleNamespace(id="model-1") + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + with patch("core.entities.provider_configuration.ProviderCredentialsCache") as mock_cache: + configuration.delete_custom_model(ModelType.LLM, "gpt-4o") + session.delete.assert_called_once_with(provider_model_record) + session.commit.assert_called_once() + mock_cache.return_value.delete.assert_called_once() + + session = Mock() + existing = SimpleNamespace(enabled=False, updated_at=None) + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=existing): + assert configuration.enable_model(ModelType.LLM, "gpt-4o") is existing + assert existing.enabled is True + + session = Mock() + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=None): + created = configuration.enable_model(ModelType.LLM, "gpt-4o") + assert created.enabled is True + + session = Mock() + existing = SimpleNamespace(enabled=True, load_balancing_enabled=True, updated_at=None) + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=existing): + assert configuration.disable_model(ModelType.LLM, "gpt-4o") is existing + assert existing.enabled is False + + session = Mock() + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=None): + created = configuration.disable_model(ModelType.LLM, "gpt-4o") + assert created.enabled is False + + session = Mock() + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=existing): + result = configuration.get_provider_model_setting(ModelType.LLM, "gpt-4o") + assert result is existing + + +def test_model_load_balancing_enable_disable_and_switch_preferred_provider_type_without_session() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar.return_value = 1 + with _patched_session(session): + with pytest.raises(ValueError, match="must be more than 1"): + configuration.enable_model_load_balancing(ModelType.LLM, "gpt-4o") + + session = Mock() + session.execute.return_value.scalar.return_value = 2 + existing = SimpleNamespace(load_balancing_enabled=False, updated_at=None) + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=existing): + result = configuration.enable_model_load_balancing(ModelType.LLM, "gpt-4o") + assert result is existing + assert existing.load_balancing_enabled is True + + session = Mock() + session.execute.return_value.scalar.return_value = 2 + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=None): + created = configuration.enable_model_load_balancing(ModelType.LLM, "gpt-4o") + assert created.load_balancing_enabled is True + + session = Mock() + existing = SimpleNamespace(load_balancing_enabled=True, updated_at=None) + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=existing): + result = configuration.disable_model_load_balancing(ModelType.LLM, "gpt-4o") + assert result is existing + assert existing.load_balancing_enabled is False + + session = Mock() + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_model_setting", return_value=None): + created = configuration.disable_model_load_balancing(ModelType.LLM, "gpt-4o") + assert created.load_balancing_enabled is False + + configuration.preferred_provider_type = ProviderType.SYSTEM + switch_session = Mock() + with _patched_session(switch_session): + switch_session.execute.return_value.scalars.return_value.first.return_value = None + configuration.switch_preferred_provider_type(ProviderType.CUSTOM) + assert any( + call.args and call.args[0].__class__.__name__ == "TenantPreferredModelProvider" + for call in switch_session.add.call_args_list + ) + switch_session.commit.assert_called() + + +def test_system_and_custom_provider_model_helpers_cover_remaining_skip_paths() -> None: + configuration = _build_provider_configuration() + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.CUSTOMIZABLE_MODEL], + models=[_build_ai_model("llm-model")], + ) + configuration.system_configuration.quota_configurations = [ + QuotaConfiguration( + quota_type=ProviderQuotaType.FREE, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=0, + is_valid=True, + restrict_models=[ + RestrictModel(model="target", base_model_name="base", model_type=ModelType.LLM), + ], + ), + QuotaConfiguration( + quota_type=ProviderQuotaType.TRIAL, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=0, + is_valid=True, + restrict_models=[ + RestrictModel(model="target", base_model_name="base", model_type=ModelType.LLM), + RestrictModel(model="error-model", base_model_name="base", model_type=ModelType.LLM), + RestrictModel(model="none-model", base_model_name="base", model_type=ModelType.LLM), + RestrictModel( + model="embed-model", + base_model_name="base", + model_type=ModelType.TEXT_EMBEDDING, + ), + ], + ), + ] + configuration.system_configuration.current_quota_type = ProviderQuotaType.TRIAL + + def _system_schema(*, model_type: ModelType, model: str, credentials: dict | None): + if model == "error-model": + raise RuntimeError("boom") + if model == "none-model": + return None + if model == "embed-model": + return _build_ai_model("embed-model", model_type=ModelType.TEXT_EMBEDDING) + return _build_ai_model("target") + + with patch( + "core.entities.provider_configuration.original_provider_configurate_methods", + {"openai": [ConfigurateMethod.CUSTOMIZABLE_MODEL]}, + ): + with patch.object(ProviderConfiguration, "get_model_schema", side_effect=_system_schema): + system_models = configuration._get_system_provider_models( + model_types=[ModelType.LLM], + provider_schema=provider_schema, + model_setting_map={ + ModelType.LLM: { + "target": ModelSettings( + model="target", + model_type=ModelType.LLM, + enabled=False, + load_balancing_configs=[], + ) + } + }, + ) + assert any(model.model == "target" and model.status == ModelStatus.DISABLED for model in system_models) + + configuration.using_provider_type = ProviderType.CUSTOM + configuration.custom_configuration.provider = CustomProviderConfiguration(credentials={"api_key": "provider-key"}) + configuration.custom_configuration.models = [ + CustomModelConfiguration( + model="skip-model-type", + model_type=ModelType.TEXT_EMBEDDING, + credentials={"k": "v"}, + ), + CustomModelConfiguration( + model="skip-unadded", + model_type=ModelType.LLM, + credentials={"k": "v"}, + unadded_to_model_list=True, + ), + CustomModelConfiguration( + model="skip-filter", + model_type=ModelType.LLM, + credentials={"k": "v"}, + ), + CustomModelConfiguration( + model="error-custom", + model_type=ModelType.LLM, + credentials={"k": "v"}, + ), + CustomModelConfiguration( + model="none-custom", + model_type=ModelType.LLM, + credentials={"k": "v"}, + ), + CustomModelConfiguration( + model="disabled-custom", + model_type=ModelType.LLM, + credentials={"k": "v"}, + ), + ] + + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=[_build_ai_model("base-disabled")], + ) + model_setting_map = { + ModelType.LLM: { + "base-disabled": ModelSettings( + model="base-disabled", + model_type=ModelType.LLM, + enabled=False, + load_balancing_enabled=True, + load_balancing_configs=[ModelLoadBalancingConfiguration(id="lb-1", name="lb", credentials={})], + ), + "disabled-custom": ModelSettings( + model="disabled-custom", + model_type=ModelType.LLM, + enabled=False, + load_balancing_enabled=False, + load_balancing_configs=[], + ), + } + } + + def _custom_schema(*, model_type: ModelType, model: str, credentials: dict | None): + if model == "error-custom": + raise RuntimeError("boom") + if model == "none-custom": + return None + return _build_ai_model(model) + + with patch.object(ProviderConfiguration, "get_model_schema", side_effect=_custom_schema): + custom_models = configuration._get_custom_provider_models( + model_types=[ModelType.LLM], + provider_schema=provider_schema, + model_setting_map=model_setting_map, + model="disabled-custom", + ) + assert any(model.model == "base-disabled" and model.status == ModelStatus.DISABLED for model in custom_models) + assert any(model.model == "disabled-custom" and model.status == ModelStatus.DISABLED for model in custom_models) + + +def test_get_current_credentials_skips_non_current_quota_restrictions() -> None: + configuration = _build_provider_configuration() + configuration.system_configuration.current_quota_type = ProviderQuotaType.TRIAL + configuration.system_configuration.quota_configurations = [ + QuotaConfiguration( + quota_type=ProviderQuotaType.FREE, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=0, + is_valid=True, + restrict_models=[ + RestrictModel(model="gpt-4o", base_model_name="free-base", model_type=ModelType.LLM), + ], + ), + QuotaConfiguration( + quota_type=ProviderQuotaType.TRIAL, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=0, + is_valid=True, + restrict_models=[ + RestrictModel(model="gpt-4o", base_model_name="trial-base", model_type=ModelType.LLM), + ], + ), + ] + + credentials = configuration.get_current_credentials(ModelType.LLM, "gpt-4o") + assert credentials["base_model_name"] == "trial-base" + + +def test_get_system_configuration_status_covers_disabled_and_quota_exceeded() -> None: + configuration = _build_provider_configuration() + configuration.system_configuration.enabled = False + assert configuration.get_system_configuration_status() == SystemConfigurationStatus.UNSUPPORTED + + configuration.system_configuration.enabled = True + configuration.system_configuration.quota_configurations = [ + QuotaConfiguration( + quota_type=ProviderQuotaType.TRIAL, + quota_unit=QuotaUnit.TOKENS, + quota_limit=100, + quota_used=100, + is_valid=False, + restrict_models=[], + ) + ] + configuration.system_configuration.current_quota_type = ProviderQuotaType.TRIAL + assert configuration.get_system_configuration_status() == SystemConfigurationStatus.QUOTA_EXCEEDED + + +def test_get_specific_provider_credential_decrypts_and_obfuscates_credentials() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider_credential_schema = _build_secret_provider_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + encrypted_config='{"openai_api_key":"enc-secret","region":"us"}' + ) + provider_record = SimpleNamespace(provider_name="aliased-openai") + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with patch("core.entities.provider_configuration.encrypter.decrypt_token", return_value="raw-secret"): + with patch.object( + ProviderConfiguration, + "obfuscated_credentials", + side_effect=lambda credentials, credential_form_schemas: credentials, + ): + credentials = configuration._get_specific_provider_credential("cred-1") + + assert credentials == {"openai_api_key": "raw-secret", "region": "us"} + + +def test_get_specific_provider_credential_logs_when_decrypt_fails() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider_credential_schema = _build_secret_provider_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + encrypted_config='{"openai_api_key":"enc-secret"}' + ) + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with patch( + "core.entities.provider_configuration.encrypter.decrypt_token", + side_effect=RuntimeError("boom"), + ): + with patch("core.entities.provider_configuration.logger.exception") as mock_logger: + with patch.object( + ProviderConfiguration, + "obfuscated_credentials", + side_effect=lambda credentials, credential_form_schemas: credentials, + ): + credentials = configuration._get_specific_provider_credential("cred-1") + + assert credentials == {"openai_api_key": "enc-secret"} + mock_logger.assert_called_once() + + +def test_validate_provider_credentials_uses_empty_original_when_record_missing() -> None: + configuration = _build_provider_configuration() + configuration.provider.provider_credential_schema = _build_secret_provider_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + mock_factory = Mock() + mock_factory.provider_credentials_validate.return_value = {"openai_api_key": "raw"} + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + with patch("core.entities.provider_configuration.encrypter.encrypt_token", return_value="enc-new"): + validated = configuration.validate_provider_credentials( + credentials={"openai_api_key": HIDDEN_VALUE}, + credential_id="cred-1", + session=session, + ) + + assert validated == {"openai_api_key": "enc-new"} + + +def test_create_provider_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.add.side_effect = RuntimeError("boom") + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_generate_provider_credential_name", return_value="API KEY 9"): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.create_provider_credential({"api_key": "raw"}, None) + + session.rollback.assert_called_once() + + +def test_update_provider_credential_raises_on_duplicate_name() -> None: + configuration = _build_provider_configuration() + session = Mock() + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_provider_credential_name_exists", return_value=True): + with pytest.raises(ValueError, match="already exists"): + configuration.update_provider_credential({"api_key": "raw"}, "cred-1", "Main") + + +def test_update_provider_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + id="cred-1", + encrypted_config="{}", + credential_name="Main", + updated_at=None, + ) + session.commit.side_effect = RuntimeError("boom") + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_provider_credential_name_exists", return_value=False): + with patch.object(ProviderConfiguration, "validate_provider_credentials", return_value={"api_key": "enc"}): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.update_provider_credential({"api_key": "raw"}, "cred-1", "Main") + + session.rollback.assert_called_once() + + +def test_delete_provider_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.delete.side_effect = RuntimeError("boom") + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=SimpleNamespace(id="cred-1")), + _exec_result(scalars_all=[]), + _exec_result(scalar=2), + ] + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.delete_provider_credential("cred-1") + + session.rollback.assert_called_once() + + +def test_switch_active_provider_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(id="cred-1") + session.commit.side_effect = RuntimeError("boom") + provider_record = SimpleNamespace(id="provider-1", credential_id=None, updated_at=None) + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_provider_record", return_value=provider_record): + with pytest.raises(RuntimeError, match="boom"): + configuration.switch_active_provider_credential("cred-1") + + session.rollback.assert_called_once() + + +def test_get_specific_custom_model_credential_logs_when_decrypt_fails() -> None: + configuration = _build_provider_configuration() + configuration.provider.model_credential_schema = _build_secret_model_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + id="cred-1", + credential_name="Main", + encrypted_config='{"openai_api_key":"enc-secret"}', + ) + + with _patched_session(session): + with patch("core.entities.provider_configuration.encrypter.decrypt_token", side_effect=RuntimeError("boom")): + with patch("core.entities.provider_configuration.logger.exception") as mock_logger: + with patch.object( + ProviderConfiguration, + "obfuscated_credentials", + side_effect=lambda credentials, credential_form_schemas: credentials, + ): + result = configuration._get_specific_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + assert result["credentials"] == {"openai_api_key": "enc-secret"} + mock_logger.assert_called_once() + + +def test_validate_custom_model_credentials_handles_invalid_original_json() -> None: + configuration = _build_provider_configuration() + configuration.provider.model_credential_schema = _build_secret_model_schema() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace(encrypted_config="{invalid-json") + mock_factory = Mock() + mock_factory.model_credentials_validate.return_value = {"openai_api_key": "raw"} + + with patch("core.entities.provider_configuration.ModelProviderFactory", return_value=mock_factory): + with patch("core.entities.provider_configuration.encrypter.encrypt_token", return_value="enc-new"): + validated = configuration.validate_custom_model_credentials( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"openai_api_key": HIDDEN_VALUE}, + credential_id="cred-1", + session=session, + ) + + assert validated == {"openai_api_key": "enc-new"} + + +def test_create_custom_model_credential_raises_on_duplicate_name() -> None: + configuration = _build_provider_configuration() + session = Mock() + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_custom_model_credential_name_exists", return_value=True): + with pytest.raises(ValueError, match="already exists"): + configuration.create_custom_model_credential(ModelType.LLM, "gpt-4o", {"k": "v"}, "Main") + + +def test_create_custom_model_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.add.side_effect = RuntimeError("boom") + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_generate_custom_model_credential_name", return_value="API KEY 4"): + with patch.object( + ProviderConfiguration, + "validate_custom_model_credentials", + return_value={"openai_api_key": "enc"}, + ): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.create_custom_model_credential(ModelType.LLM, "gpt-4o", {"k": "v"}, None) + + session.rollback.assert_called_once() + + +def test_update_custom_model_credential_raises_on_duplicate_name() -> None: + configuration = _build_provider_configuration() + session = Mock() + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_custom_model_credential_name_exists", return_value=True): + with pytest.raises(ValueError, match="already exists"): + configuration.update_custom_model_credential( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"k": "v"}, + credential_name="Main", + credential_id="cred-1", + ) + + +def test_update_custom_model_credential_raises_when_record_not_found() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_custom_model_credential_name_exists", return_value=False): + with patch.object(ProviderConfiguration, "validate_custom_model_credentials", return_value={"k": "v"}): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.update_custom_model_credential( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"k": "v"}, + credential_name="Main", + credential_id="cred-1", + ) + + +def test_update_custom_model_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = SimpleNamespace( + id="cred-1", + encrypted_config="{}", + credential_name="Main", + updated_at=None, + ) + session.commit.side_effect = RuntimeError("boom") + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_check_custom_model_credential_name_exists", return_value=False): + with patch.object(ProviderConfiguration, "validate_custom_model_credentials", return_value={"k": "v"}): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.update_custom_model_credential( + model_type=ModelType.LLM, + model="gpt-4o", + credentials={"k": "v"}, + credential_name="Main", + credential_id="cred-1", + ) + + session.rollback.assert_called_once() + + +def test_delete_custom_model_credential_raises_when_record_not_found() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.execute.return_value.scalar_one_or_none.return_value = None + + with _patched_session(session): + with pytest.raises(ValueError, match="Credential record not found"): + configuration.delete_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + +def test_delete_custom_model_credential_removes_custom_model_record_when_last_credential() -> None: + configuration = _build_provider_configuration() + session = Mock() + credential_record = SimpleNamespace(id="cred-1") + provider_model_record = SimpleNamespace(id="model-1", credential_id="cred-1", updated_at=None) + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=credential_record), + _exec_result(scalars_all=[]), + _exec_result(scalar=1), + ] + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=provider_model_record): + configuration.delete_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + assert any(call.args and call.args[0] is provider_model_record for call in session.delete.call_args_list) + + +def test_delete_custom_model_credential_rolls_back_on_error() -> None: + configuration = _build_provider_configuration() + session = Mock() + session.delete.side_effect = RuntimeError("boom") + session.execute.side_effect = [ + _exec_result(scalar_one_or_none=SimpleNamespace(id="cred-1")), + _exec_result(scalars_all=[]), + _exec_result(scalar=2), + ] + + with _patched_session(session): + with patch.object(ProviderConfiguration, "_get_custom_model_record", return_value=None): + with pytest.raises(RuntimeError, match="boom"): + configuration.delete_custom_model_credential(ModelType.LLM, "gpt-4o", "cred-1") + + session.rollback.assert_called_once() + + +def test_get_custom_provider_models_skips_schema_models_with_mismatched_type() -> None: + configuration = _build_provider_configuration() + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM, ModelType.TEXT_EMBEDDING], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=[ + _build_ai_model("llm-model", model_type=ModelType.LLM), + _build_ai_model("embed-model", model_type=ModelType.TEXT_EMBEDDING), + ], + ) + + models = configuration._get_custom_provider_models( + model_types=[ModelType.LLM], + provider_schema=provider_schema, + model_setting_map={}, + ) + + assert any(model.model == "llm-model" for model in models) + assert all(model.model != "embed-model" for model in models) + + +def test_get_custom_provider_models_skips_custom_models_on_schema_error_or_none() -> None: + configuration = _build_provider_configuration() + configuration.custom_configuration.models = [ + CustomModelConfiguration(model="error-custom", model_type=ModelType.LLM, credentials={"k": "v"}), + CustomModelConfiguration(model="none-custom", model_type=ModelType.LLM, credentials={"k": "v"}), + CustomModelConfiguration(model="ok-custom", model_type=ModelType.LLM, credentials={"k": "v"}), + ] + provider_schema = ProviderEntity( + provider="openai", + label=I18nObject(en_US="OpenAI"), + supported_model_types=[ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=[], + ) + + def _schema(*, model_type: ModelType, model: str, credentials: dict | None): + if model == "error-custom": + raise RuntimeError("boom") + if model == "none-custom": + return None + return _build_ai_model(model) + + with patch("core.entities.provider_configuration.logger.warning") as mock_warning: + with patch.object(ProviderConfiguration, "get_model_schema", side_effect=_schema): + models = configuration._get_custom_provider_models( + model_types=[ModelType.LLM], + provider_schema=provider_schema, + model_setting_map={}, + ) + + assert mock_warning.call_count == 1 + assert any(model.model == "ok-custom" for model in models) + assert all(model.model != "none-custom" for model in models) diff --git a/api/tests/unit_tests/core/entities/test_entities_provider_entities.py b/api/tests/unit_tests/core/entities/test_entities_provider_entities.py new file mode 100644 index 0000000000..c5bfd05a1e --- /dev/null +++ b/api/tests/unit_tests/core/entities/test_entities_provider_entities.py @@ -0,0 +1,72 @@ +import pytest + +from core.entities.parameter_entities import AppSelectorScope +from core.entities.provider_entities import ( + BasicProviderConfig, + ModelSettings, + ProviderConfig, + ProviderQuotaType, +) +from core.tools.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ModelType + + +def test_provider_quota_type_value_of_returns_enum_member() -> None: + # Arrange / Act + quota_type = ProviderQuotaType.value_of(ProviderQuotaType.TRIAL.value) + + # Assert + assert quota_type == ProviderQuotaType.TRIAL + + +def test_provider_quota_type_value_of_rejects_unknown_values() -> None: + # Arrange / Act / Assert + with pytest.raises(ValueError, match="No matching enum found"): + ProviderQuotaType.value_of("enterprise") + + +def test_basic_provider_config_type_value_of_handles_known_values() -> None: + # Arrange / Act + parameter_type = BasicProviderConfig.Type.value_of("text-input") + + # Assert + assert parameter_type == BasicProviderConfig.Type.TEXT_INPUT + + +def test_basic_provider_config_type_value_of_rejects_invalid_values() -> None: + # Arrange / Act / Assert + with pytest.raises(ValueError, match="invalid mode value"): + BasicProviderConfig.Type.value_of("unknown") + + +def test_provider_config_to_basic_provider_config_keeps_type_and_name() -> None: + # Arrange + provider_config = ProviderConfig( + type=BasicProviderConfig.Type.SELECT, + name="workspace", + scope=AppSelectorScope.ALL, + options=[ProviderConfig.Option(value="all", label=I18nObject(en_US="All"))], + ) + + # Act + basic_config = provider_config.to_basic_provider_config() + + # Assert + assert isinstance(basic_config, BasicProviderConfig) + assert basic_config.type == BasicProviderConfig.Type.SELECT + assert basic_config.name == "workspace" + + +def test_model_settings_accepts_model_field_name() -> None: + # Arrange / Act + settings = ModelSettings( + model="gpt-4o", + model_type=ModelType.LLM, + enabled=True, + load_balancing_enabled=False, + load_balancing_configs=[], + ) + + # Assert + assert settings.model == "gpt-4o" + assert settings.model_type == ModelType.LLM diff --git a/api/tests/unit_tests/core/extension/test_api_based_extension_requestor.py b/api/tests/unit_tests/core/extension/test_api_based_extension_requestor.py new file mode 100644 index 0000000000..399b531205 --- /dev/null +++ b/api/tests/unit_tests/core/extension/test_api_based_extension_requestor.py @@ -0,0 +1,137 @@ +import httpx +import pytest + +from core.extension.api_based_extension_requestor import APIBasedExtensionRequestor +from models.api_based_extension import APIBasedExtensionPoint + + +def test_request_success(mocker): + # Mock httpx.Client and its context manager + mock_client = mocker.MagicMock() + mock_client_instance = mock_client.__enter__.return_value + mocker.patch("httpx.Client", return_value=mock_client) + + mock_response = mocker.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "success"} + mock_client_instance.request.return_value = mock_response + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + result = requestor.request(APIBasedExtensionPoint.PING, {"foo": "bar"}) + + assert result == {"result": "success"} + mock_client_instance.request.assert_called_once_with( + method="POST", + url="http://example.com", + json={"point": APIBasedExtensionPoint.PING.value, "params": {"foo": "bar"}}, + headers={"Content-Type": "application/json", "Authorization": "Bearer test_key"}, + ) + + +def test_request_with_ssrf_proxy(mocker): + # Mock dify_config + mocker.patch("configs.dify_config.SSRF_PROXY_HTTP_URL", "http://proxy:8080") + mocker.patch("configs.dify_config.SSRF_PROXY_HTTPS_URL", "https://proxy:8081") + + # Mock httpx.Client + mock_client = mocker.MagicMock() + mock_client_class = mocker.patch("httpx.Client", return_value=mock_client) + mock_client_instance = mock_client.__enter__.return_value + + # Mock response + mock_response = mocker.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "success"} + mock_client_instance.request.return_value = mock_response + + # Mock HTTPTransport + mock_transport = mocker.patch("httpx.HTTPTransport") + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + requestor.request(APIBasedExtensionPoint.PING, {}) + + # Verify httpx.Client was called with mounts + mock_client_class.assert_called_once() + kwargs = mock_client_class.call_args.kwargs + assert "mounts" in kwargs + assert "http://" in kwargs["mounts"] + assert "https://" in kwargs["mounts"] + assert mock_transport.call_count == 2 + + +def test_request_with_only_one_proxy_config(mocker): + # Mock dify_config with only one proxy + mocker.patch("configs.dify_config.SSRF_PROXY_HTTP_URL", "http://proxy:8080") + mocker.patch("configs.dify_config.SSRF_PROXY_HTTPS_URL", None) + + # Mock httpx.Client + mock_client = mocker.MagicMock() + mock_client_class = mocker.patch("httpx.Client", return_value=mock_client) + mock_client_instance = mock_client.__enter__.return_value + + # Mock response + mock_response = mocker.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "success"} + mock_client_instance.request.return_value = mock_response + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + requestor.request(APIBasedExtensionPoint.PING, {}) + + # Verify httpx.Client was called with mounts=None (default) + mock_client_class.assert_called_once() + kwargs = mock_client_class.call_args.kwargs + assert kwargs.get("mounts") is None + + +def test_request_timeout(mocker): + mock_client = mocker.MagicMock() + mock_client_instance = mock_client.__enter__.return_value + mocker.patch("httpx.Client", return_value=mock_client) + mock_client_instance.request.side_effect = httpx.TimeoutException("timeout") + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + with pytest.raises(ValueError, match="request timeout"): + requestor.request(APIBasedExtensionPoint.PING, {}) + + +def test_request_connection_error(mocker): + mock_client = mocker.MagicMock() + mock_client_instance = mock_client.__enter__.return_value + mocker.patch("httpx.Client", return_value=mock_client) + mock_client_instance.request.side_effect = httpx.RequestError("error") + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + with pytest.raises(ValueError, match="request connection error"): + requestor.request(APIBasedExtensionPoint.PING, {}) + + +def test_request_error_status_code(mocker): + mock_client = mocker.MagicMock() + mock_client_instance = mock_client.__enter__.return_value + mocker.patch("httpx.Client", return_value=mock_client) + + mock_response = mocker.MagicMock() + mock_response.status_code = 404 + mock_response.text = "Not Found" + mock_client_instance.request.return_value = mock_response + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + with pytest.raises(ValueError, match="request error, status_code: 404, content: Not Found"): + requestor.request(APIBasedExtensionPoint.PING, {}) + + +def test_request_error_status_code_long_content(mocker): + mock_client = mocker.MagicMock() + mock_client_instance = mock_client.__enter__.return_value + mocker.patch("httpx.Client", return_value=mock_client) + + mock_response = mocker.MagicMock() + mock_response.status_code = 500 + mock_response.text = "A" * 200 # Testing truncation of content + mock_client_instance.request.return_value = mock_response + + requestor = APIBasedExtensionRequestor(api_endpoint="http://example.com", api_key="test_key") + expected_content = "A" * 100 + with pytest.raises(ValueError, match=f"request error, status_code: 500, content: {expected_content}"): + requestor.request(APIBasedExtensionPoint.PING, {}) diff --git a/api/tests/unit_tests/core/extension/test_extensible.py b/api/tests/unit_tests/core/extension/test_extensible.py new file mode 100644 index 0000000000..9bce0cd7c8 --- /dev/null +++ b/api/tests/unit_tests/core/extension/test_extensible.py @@ -0,0 +1,281 @@ +import json +import types +from unittest.mock import MagicMock, mock_open, patch + +import pytest + +from core.extension.extensible import Extensible + + +class TestExtensible: + def test_init(self): + tenant_id = "tenant_123" + config = {"key": "value"} + ext = Extensible(tenant_id, config) + assert ext.tenant_id == tenant_id + assert ext.config == config + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + @patch("core.extension.extensible.os.path.exists") + @patch("core.extension.extensible.Path.read_text") + @patch("core.extension.extensible.importlib.util.module_from_spec") + @patch("core.extension.extensible.sort_to_dict_by_position_map") + def test_scan_extensions_success( + self, + mock_sort, + mock_module_from_spec, + mock_read_text, + mock_exists, + mock_isdir, + mock_listdir, + mock_dirname, + mock_find_spec, + ): + # Setup + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + + module_spec = MagicMock() + module_spec.loader = MagicMock() + + mock_find_spec.side_effect = [package_spec, module_spec] + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [ + ["ext1"], # package_dir + ["ext1.py", "__builtin__"], # subdir_path + ] + mock_isdir.return_value = True + + mock_exists.return_value = True + mock_read_text.return_value = "10" + + # Use types.ModuleType to avoid MagicMock __dict__ issues + mock_mod = types.ModuleType("ext1") + + class MockExtension(Extensible): + pass + + mock_mod.MockExtension = MockExtension + mock_module_from_spec.return_value = mock_mod + + mock_sort.side_effect = lambda position_map, data, name_func: data + + # Execute + results = Extensible.scan_extensions() + + # Assert + assert len(results) == 1 + assert results[0].name == "ext1" + assert results[0].position == 10 + assert results[0].builtin is True + assert results[0].extension_class == MockExtension + + @patch("core.extension.extensible.importlib.util.find_spec") + def test_scan_extensions_package_not_found(self, mock_find_spec): + mock_find_spec.return_value = None + with pytest.raises(ImportError, match="Could not find package"): + Extensible.scan_extensions() + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + def test_scan_extensions_skip_subdirs(self, mock_isdir, mock_listdir, mock_dirname, mock_find_spec): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + mock_find_spec.return_value = package_spec + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [["__pycache__", "not_a_dir", "missing_py_file"], []] + + mock_isdir.side_effect = [False, True] + + with patch("core.extension.extensible.sort_to_dict_by_position_map", return_value=[]): + results = Extensible.scan_extensions() + assert len(results) == 0 + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + @patch("core.extension.extensible.os.path.exists") + @patch("core.extension.extensible.importlib.util.module_from_spec") + def test_scan_extensions_not_builtin_success( + self, mock_module_from_spec, mock_exists, mock_isdir, mock_listdir, mock_dirname, mock_find_spec + ): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + + module_spec = MagicMock() + module_spec.loader = MagicMock() + + mock_find_spec.side_effect = [package_spec, module_spec] + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [["ext1"], ["ext1.py", "schema.json"]] + mock_isdir.return_value = True + + # exists checks: only schema.json needs to exist + mock_exists.return_value = True + + mock_mod = types.ModuleType("ext1") + + class MockExtension(Extensible): + pass + + mock_mod.MockExtension = MockExtension + mock_module_from_spec.return_value = mock_mod + + schema_content = json.dumps({"label": {"en": "Test"}, "form_schema": [{"name": "field1"}]}) + + with ( + patch("builtins.open", mock_open(read_data=schema_content)), + patch( + "core.extension.extensible.sort_to_dict_by_position_map", + side_effect=lambda position_map, data, name_func: data, + ), + ): + results = Extensible.scan_extensions() + + assert len(results) == 1 + assert results[0].name == "ext1" + assert results[0].builtin is False + assert results[0].label == {"en": "Test"} + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + @patch("core.extension.extensible.os.path.exists") + @patch("core.extension.extensible.importlib.util.module_from_spec") + def test_scan_extensions_not_builtin_missing_schema( + self, mock_module_from_spec, mock_exists, mock_isdir, mock_listdir, mock_dirname, mock_find_spec + ): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + + module_spec = MagicMock() + module_spec.loader = MagicMock() + + mock_find_spec.side_effect = [package_spec, module_spec] + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [["ext1"], ["ext1.py"]] + mock_isdir.return_value = True + + # exists: only schema.json checked, and return False + mock_exists.return_value = False + + mock_mod = types.ModuleType("ext1") + + class MockExtension(Extensible): + pass + + mock_mod.MockExtension = MockExtension + mock_module_from_spec.return_value = mock_mod + + with patch("core.extension.extensible.sort_to_dict_by_position_map", return_value=[]): + results = Extensible.scan_extensions() + + assert len(results) == 0 + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + @patch("core.extension.extensible.importlib.util.module_from_spec") + @patch("core.extension.extensible.os.path.exists") + def test_scan_extensions_no_extension_class( + self, mock_exists, mock_module_from_spec, mock_isdir, mock_listdir, mock_dirname, mock_find_spec + ): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + module_spec = MagicMock() + module_spec.loader = MagicMock() + + mock_find_spec.side_effect = [package_spec, module_spec] + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [["ext1"], ["ext1.py"]] + mock_isdir.return_value = True + + # Mock not builtin + mock_exists.return_value = False + + mock_mod = types.ModuleType("ext1") + mock_mod.SomeOtherClass = type("SomeOtherClass", (), {}) + mock_module_from_spec.return_value = mock_mod + + # We need to ensure we don't crash if checking schema (but we won't reach there because class not found) + + with patch("core.extension.extensible.sort_to_dict_by_position_map", return_value=[]): + results = Extensible.scan_extensions() + + assert len(results) == 0 + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + def test_scan_extensions_module_import_error(self, mock_isdir, mock_listdir, mock_dirname, mock_find_spec): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + + mock_find_spec.side_effect = [package_spec, None] # No module spec + mock_dirname.return_value = "/path/to/pkg" + + mock_listdir.side_effect = [["ext1"], ["ext1.py"]] + mock_isdir.return_value = True + + with pytest.raises(ImportError, match="Failed to load module"): + Extensible.scan_extensions() + + @patch("core.extension.extensible.importlib.util.find_spec") + def test_scan_extensions_general_exception(self, mock_find_spec): + mock_find_spec.side_effect = Exception("Unexpected error") + with pytest.raises(Exception, match="Unexpected error"): + Extensible.scan_extensions() + + @patch("core.extension.extensible.importlib.util.find_spec") + @patch("core.extension.extensible.os.path.dirname") + @patch("core.extension.extensible.os.listdir") + @patch("core.extension.extensible.os.path.isdir") + @patch("core.extension.extensible.os.path.exists") + @patch("core.extension.extensible.Path.read_text") + @patch("core.extension.extensible.importlib.util.module_from_spec") + def test_scan_extensions_builtin_without_position_file( + self, mock_module_from_spec, mock_read_text, mock_exists, mock_isdir, mock_listdir, mock_dirname, mock_find_spec + ): + package_spec = MagicMock() + package_spec.origin = "/path/to/pkg/__init__.py" + module_spec = MagicMock() + module_spec.loader = MagicMock() + + mock_find_spec.side_effect = [package_spec, module_spec] + mock_dirname.return_value = "/path/to/pkg" + mock_listdir.side_effect = [["ext1"], ["ext1.py", "__builtin__"]] + mock_isdir.return_value = True + + # builtin exists in listdir, but os.path.exists(builtin_file_path) returns False + mock_exists.return_value = False + + mock_mod = types.ModuleType("ext1") + + class MockExtension(Extensible): + pass + + mock_mod.MockExtension = MockExtension + mock_module_from_spec.return_value = mock_mod + + with patch( + "core.extension.extensible.sort_to_dict_by_position_map", + side_effect=lambda position_map, data, name_func: data, + ): + results = Extensible.scan_extensions() + + assert len(results) == 1 + assert results[0].position == 0 diff --git a/api/tests/unit_tests/core/extension/test_extension.py b/api/tests/unit_tests/core/extension/test_extension.py new file mode 100644 index 0000000000..4ad32d3840 --- /dev/null +++ b/api/tests/unit_tests/core/extension/test_extension.py @@ -0,0 +1,90 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.extension.extensible import ExtensionModule, ModuleExtension +from core.extension.extension import Extension + + +class TestExtension: + def setup_method(self): + # Reset the private class attribute before each test + Extension._Extension__module_extensions = {} + + def test_init(self): + # Mock scan_extensions for Moderation and ExternalDataTool + mock_mod_extensions = {"mod1": ModuleExtension(name="mod1")} + mock_ext_extensions = {"ext1": ModuleExtension(name="ext1")} + + extension = Extension() + + # We need to mock scan_extensions on the classes defined in Extension.module_classes + with ( + patch("core.extension.extension.Moderation.scan_extensions", return_value=mock_mod_extensions), + patch("core.extension.extension.ExternalDataTool.scan_extensions", return_value=mock_ext_extensions), + ): + extension.init() + + # Check if internal state is updated + internal_state = Extension._Extension__module_extensions + assert internal_state[ExtensionModule.MODERATION.value] == mock_mod_extensions + assert internal_state[ExtensionModule.EXTERNAL_DATA_TOOL.value] == mock_ext_extensions + + def test_module_extensions_success(self): + # Setup data + mock_extensions = {"name1": ModuleExtension(name="name1"), "name2": ModuleExtension(name="name2")} + Extension._Extension__module_extensions = {ExtensionModule.MODERATION.value: mock_extensions} + + extension = Extension() + result = extension.module_extensions(ExtensionModule.MODERATION.value) + + assert len(result) == 2 + assert any(e.name == "name1" for e in result) + assert any(e.name == "name2" for e in result) + + def test_module_extensions_not_found(self): + extension = Extension() + with pytest.raises(ValueError, match="Extension Module unknown not found"): + extension.module_extensions("unknown") + + def test_module_extension_success(self): + mock_ext = ModuleExtension(name="test_ext") + Extension._Extension__module_extensions = {ExtensionModule.MODERATION.value: {"test_ext": mock_ext}} + + extension = Extension() + result = extension.module_extension(ExtensionModule.MODERATION, "test_ext") + assert result == mock_ext + + def test_module_extension_module_not_found(self): + extension = Extension() + # ExtensionModule.MODERATION is "moderation" + with pytest.raises(ValueError, match="Extension Module moderation not found"): + extension.module_extension(ExtensionModule.MODERATION, "any") + + def test_module_extension_extension_not_found(self): + # We need a non-empty dict because 'if not module_extensions' in extension.py + # returns True for an empty dict, which raises the module not found error instead. + Extension._Extension__module_extensions = {ExtensionModule.MODERATION.value: {"other": MagicMock()}} + + extension = Extension() + with pytest.raises(ValueError, match="Extension unknown not found"): + extension.module_extension(ExtensionModule.MODERATION, "unknown") + + def test_extension_class_success(self): + class MockClass: + pass + + mock_ext = ModuleExtension(name="test_ext", extension_class=MockClass) + Extension._Extension__module_extensions = {ExtensionModule.MODERATION.value: {"test_ext": mock_ext}} + + extension = Extension() + result = extension.extension_class(ExtensionModule.MODERATION, "test_ext") + assert result == MockClass + + def test_extension_class_none(self): + mock_ext = ModuleExtension(name="test_ext", extension_class=None) + Extension._Extension__module_extensions = {ExtensionModule.MODERATION.value: {"test_ext": mock_ext}} + + extension = Extension() + with pytest.raises(AssertionError): + extension.extension_class(ExtensionModule.MODERATION, "test_ext") diff --git a/api/tests/unit_tests/core/external_data_tool/api/test_api.py b/api/tests/unit_tests/core/external_data_tool/api/test_api.py new file mode 100644 index 0000000000..1653124bd8 --- /dev/null +++ b/api/tests/unit_tests/core/external_data_tool/api/test_api.py @@ -0,0 +1,145 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.external_data_tool.api.api import ApiExternalDataTool +from models.api_based_extension import APIBasedExtensionPoint + + +def test_api_external_data_tool_name(): + assert ApiExternalDataTool.name == "api" + + +@patch("core.external_data_tool.api.api.db") +def test_validate_config_success(mock_db): + mock_extension = MagicMock() + mock_extension.id = "ext_id" + mock_extension.tenant_id = "tenant_id" + mock_db.session.scalar.return_value = mock_extension + + # Should not raise exception + ApiExternalDataTool.validate_config("tenant_id", {"api_based_extension_id": "ext_id"}) + + +def test_validate_config_missing_id(): + with pytest.raises(ValueError, match="api_based_extension_id is required"): + ApiExternalDataTool.validate_config("tenant_id", {}) + + +@patch("core.external_data_tool.api.api.db") +def test_validate_config_invalid_id(mock_db): + mock_db.session.scalar.return_value = None + + with pytest.raises(ValueError, match="api_based_extension_id is invalid"): + ApiExternalDataTool.validate_config("tenant_id", {"api_based_extension_id": "ext_id"}) + + +@pytest.fixture +def api_tool(): + # Use standard kwargs as it inherits from ExternalDataTool which is typically a Pydantic BaseModel + return ApiExternalDataTool( + tenant_id="tenant_id", app_id="app_id", variable="var1", config={"api_based_extension_id": "ext_id"} + ) + + +@patch("core.external_data_tool.api.api.db") +@patch("core.external_data_tool.api.api.encrypter") +@patch("core.external_data_tool.api.api.APIBasedExtensionRequestor") +def test_query_success(mock_requestor_class, mock_encrypter, mock_db, api_tool): + mock_extension = MagicMock() + mock_extension.id = "ext_id" + mock_extension.tenant_id = "tenant_id" + mock_extension.api_endpoint = "http://api" + mock_extension.api_key = "encrypted_key" + mock_db.session.scalar.return_value = mock_extension + mock_encrypter.decrypt_token.return_value = "decrypted_key" + + mock_requestor = mock_requestor_class.return_value + mock_requestor.request.return_value = {"result": "success_result"} + + res = api_tool.query({"input1": "value1"}, "query_str") + + assert res == "success_result" + + mock_requestor_class.assert_called_once_with(api_endpoint="http://api", api_key="decrypted_key") + mock_requestor.request.assert_called_once_with( + point=APIBasedExtensionPoint.APP_EXTERNAL_DATA_TOOL_QUERY, + params={"app_id": "app_id", "tool_variable": "var1", "inputs": {"input1": "value1"}, "query": "query_str"}, + ) + + +def test_query_missing_config(): + api_tool = ApiExternalDataTool(tenant_id="tenant_id", app_id="app_id", variable="var1") + api_tool.config = None # Force None + with pytest.raises(ValueError, match="config is required"): + api_tool.query({}, "") + + +def test_query_missing_extension_id(): + api_tool = ApiExternalDataTool(tenant_id="tenant_id", app_id="app_id", variable="var1", config={"dummy": "value"}) + with pytest.raises(AssertionError, match="api_based_extension_id is required"): + api_tool.query({}, "") + + +@patch("core.external_data_tool.api.api.db") +def test_query_invalid_extension(mock_db, api_tool): + mock_db.session.scalar.return_value = None + + with pytest.raises(ValueError, match=".*error: api_based_extension_id is invalid"): + api_tool.query({}, "") + + +@patch("core.external_data_tool.api.api.db") +@patch("core.external_data_tool.api.api.encrypter") +@patch("core.external_data_tool.api.api.APIBasedExtensionRequestor") +def test_query_requestor_init_error(mock_requestor_class, mock_encrypter, mock_db, api_tool): + mock_extension = MagicMock() + mock_extension.id = "ext_id" + mock_extension.tenant_id = "tenant_id" + mock_extension.api_endpoint = "http://api" + mock_extension.api_key = "encrypted_key" + mock_db.session.scalar.return_value = mock_extension + mock_encrypter.decrypt_token.return_value = "decrypted_key" + + mock_requestor_class.side_effect = Exception("init error") + + with pytest.raises(ValueError, match=".*error: init error"): + api_tool.query({}, "") + + +@patch("core.external_data_tool.api.api.db") +@patch("core.external_data_tool.api.api.encrypter") +@patch("core.external_data_tool.api.api.APIBasedExtensionRequestor") +def test_query_no_result_in_response(mock_requestor_class, mock_encrypter, mock_db, api_tool): + mock_extension = MagicMock() + mock_extension.id = "ext_id" + mock_extension.tenant_id = "tenant_id" + mock_extension.api_endpoint = "http://api" + mock_extension.api_key = "encrypted_key" + mock_db.session.scalar.return_value = mock_extension + mock_encrypter.decrypt_token.return_value = "decrypted_key" + + mock_requestor = mock_requestor_class.return_value + mock_requestor.request.return_value = {"other": "value"} + + with pytest.raises(ValueError, match=".*error: result not found in response"): + api_tool.query({}, "") + + +@patch("core.external_data_tool.api.api.db") +@patch("core.external_data_tool.api.api.encrypter") +@patch("core.external_data_tool.api.api.APIBasedExtensionRequestor") +def test_query_result_not_string(mock_requestor_class, mock_encrypter, mock_db, api_tool): + mock_extension = MagicMock() + mock_extension.id = "ext_id" + mock_extension.tenant_id = "tenant_id" + mock_extension.api_endpoint = "http://api" + mock_extension.api_key = "encrypted_key" + mock_db.session.scalar.return_value = mock_extension + mock_encrypter.decrypt_token.return_value = "decrypted_key" + + mock_requestor = mock_requestor_class.return_value + mock_requestor.request.return_value = {"result": 123} # Not a string + + with pytest.raises(ValueError, match=".*error: result is not string"): + api_tool.query({}, "") diff --git a/api/tests/unit_tests/core/external_data_tool/test_base.py b/api/tests/unit_tests/core/external_data_tool/test_base.py new file mode 100644 index 0000000000..216cda83c5 --- /dev/null +++ b/api/tests/unit_tests/core/external_data_tool/test_base.py @@ -0,0 +1,66 @@ +import pytest + +from core.extension.extensible import ExtensionModule +from core.external_data_tool.base import ExternalDataTool + + +class TestExternalDataTool: + def test_module_attribute(self): + assert ExternalDataTool.module == ExtensionModule.EXTERNAL_DATA_TOOL + + def test_init(self): + # Create a concrete subclass to test init + class ConcreteTool(ExternalDataTool): + @classmethod + def validate_config(cls, tenant_id: str, config: dict): + return super().validate_config(tenant_id, config) + + def query(self, inputs: dict, query: str | None = None) -> str: + return super().query(inputs, query) + + tool = ConcreteTool(tenant_id="tenant_1", app_id="app_1", variable="var_1", config={"key": "value"}) + assert tool.tenant_id == "tenant_1" + assert tool.app_id == "app_1" + assert tool.variable == "var_1" + assert tool.config == {"key": "value"} + + def test_init_without_config(self): + # Create a concrete subclass to test init + class ConcreteTool(ExternalDataTool): + @classmethod + def validate_config(cls, tenant_id: str, config: dict): + pass + + def query(self, inputs: dict, query: str | None = None) -> str: + return "" + + tool = ConcreteTool(tenant_id="tenant_1", app_id="app_1", variable="var_1") + assert tool.tenant_id == "tenant_1" + assert tool.app_id == "app_1" + assert tool.variable == "var_1" + assert tool.config is None + + def test_validate_config_raises_not_implemented(self): + class ConcreteTool(ExternalDataTool): + @classmethod + def validate_config(cls, tenant_id: str, config: dict): + return super().validate_config(tenant_id, config) + + def query(self, inputs: dict, query: str | None = None) -> str: + return "" + + with pytest.raises(NotImplementedError): + ConcreteTool.validate_config("tenant_1", {}) + + def test_query_raises_not_implemented(self): + class ConcreteTool(ExternalDataTool): + @classmethod + def validate_config(cls, tenant_id: str, config: dict): + pass + + def query(self, inputs: dict, query: str | None = None) -> str: + return super().query(inputs, query) + + tool = ConcreteTool(tenant_id="tenant_1", app_id="app_1", variable="var_1") + with pytest.raises(NotImplementedError): + tool.query({}) diff --git a/api/tests/unit_tests/core/external_data_tool/test_external_data_fetch.py b/api/tests/unit_tests/core/external_data_tool/test_external_data_fetch.py new file mode 100644 index 0000000000..86b461cf04 --- /dev/null +++ b/api/tests/unit_tests/core/external_data_tool/test_external_data_fetch.py @@ -0,0 +1,115 @@ +from unittest.mock import patch + +import pytest +from flask import Flask + +from core.app.app_config.entities import ExternalDataVariableEntity +from core.external_data_tool.external_data_fetch import ExternalDataFetch + + +class TestExternalDataFetch: + @pytest.fixture + def app(self): + app = Flask(__name__) + return app + + def test_fetch_success(self, app): + with app.app_context(): + fetcher = ExternalDataFetch() + + # Setup mocks + tool1 = ExternalDataVariableEntity(variable="var1", type="type1", config={"c1": "v1"}) + tool2 = ExternalDataVariableEntity(variable="var2", type="type2", config={"c2": "v2"}) + + external_data_tools = [tool1, tool2] + inputs = {"input_key": "input_value"} + query = "test query" + + with patch("core.external_data_tool.external_data_fetch.ExternalDataToolFactory") as MockFactory: + # Create distinct mock instances for each tool to ensure deterministic results + # This approach is robust regardless of thread scheduling order + from unittest.mock import MagicMock + + def factory_side_effect(*args, **kwargs): + variable = kwargs.get("variable") + mock_instance = MagicMock() + if variable == "var1": + mock_instance.query.return_value = "result1" + elif variable == "var2": + mock_instance.query.return_value = "result2" + return mock_instance + + MockFactory.side_effect = factory_side_effect + + result_inputs = fetcher.fetch( + tenant_id="tenant1", + app_id="app1", + external_data_tools=external_data_tools, + inputs=inputs, + query=query, + ) + + # Each tool gets its deterministic result regardless of thread completion order + assert result_inputs["var1"] == "result1" + assert result_inputs["var2"] == "result2" + assert result_inputs["input_key"] == "input_value" + assert len(result_inputs) == 3 + + # Verify factory calls + assert MockFactory.call_count == 2 + MockFactory.assert_any_call( + name="type1", tenant_id="tenant1", app_id="app1", variable="var1", config={"c1": "v1"} + ) + MockFactory.assert_any_call( + name="type2", tenant_id="tenant1", app_id="app1", variable="var2", config={"c2": "v2"} + ) + + def test_fetch_no_tools(self): + # We don't necessarily need app_context if there are no tools, + # but fetch calls current_app._get_current_object() only inside the loop. + # Wait, let's look at the code. + # for tool in external_data_tools: + # executor.submit(..., current_app._get_current_object(), ...) + # So if external_data_tools is empty, it shouldn't access current_app. + fetcher = ExternalDataFetch() + inputs = {"input_key": "input_value"} + result_inputs = fetcher.fetch( + tenant_id="tenant1", app_id="app1", external_data_tools=[], inputs=inputs, query="test query" + ) + assert result_inputs == inputs + assert result_inputs is not inputs # Should be a copy + + def test_fetch_with_none_variable(self, app): + with app.app_context(): + fetcher = ExternalDataFetch() + tool = ExternalDataVariableEntity(variable="var1", type="type1", config={}) + + # Patch _query_external_data_tool to return None variable + with patch.object(ExternalDataFetch, "_query_external_data_tool") as mock_query: + mock_query.return_value = (None, "some_result") + + result_inputs = fetcher.fetch( + tenant_id="t1", app_id="a1", external_data_tools=[tool], inputs={"in": "val"}, query="q" + ) + + assert "var1" not in result_inputs + assert result_inputs == {"in": "val"} + + def test_query_external_data_tool(self, app): + fetcher = ExternalDataFetch() + tool = ExternalDataVariableEntity(variable="var1", type="type1", config={"k": "v"}) + + with patch("core.external_data_tool.external_data_fetch.ExternalDataToolFactory") as MockFactory: + mock_factory_instance = MockFactory.return_value + mock_factory_instance.query.return_value = "query_result" + + var, res = fetcher._query_external_data_tool( + flask_app=app, tenant_id="t1", app_id="a1", external_data_tool=tool, inputs={"i": "v"}, query="q" + ) + + assert var == "var1" + assert res == "query_result" + MockFactory.assert_called_once_with( + name="type1", tenant_id="t1", app_id="a1", variable="var1", config={"k": "v"} + ) + mock_factory_instance.query.assert_called_once_with(inputs={"i": "v"}, query="q") diff --git a/api/tests/unit_tests/core/external_data_tool/test_factory.py b/api/tests/unit_tests/core/external_data_tool/test_factory.py new file mode 100644 index 0000000000..6bb384b0ac --- /dev/null +++ b/api/tests/unit_tests/core/external_data_tool/test_factory.py @@ -0,0 +1,58 @@ +from unittest.mock import MagicMock, patch + +from core.extension.extensible import ExtensionModule +from core.external_data_tool.factory import ExternalDataToolFactory + + +def test_external_data_tool_factory_init(): + with patch("core.external_data_tool.factory.code_based_extension") as mock_code_based_extension: + mock_extension_class = MagicMock() + mock_code_based_extension.extension_class.return_value = mock_extension_class + + name = "test_tool" + tenant_id = "tenant_123" + app_id = "app_456" + variable = "var_v" + config = {"key": "value"} + + factory = ExternalDataToolFactory(name, tenant_id, app_id, variable, config) + + mock_code_based_extension.extension_class.assert_called_once_with(ExtensionModule.EXTERNAL_DATA_TOOL, name) + mock_extension_class.assert_called_once_with( + tenant_id=tenant_id, app_id=app_id, variable=variable, config=config + ) + + +def test_external_data_tool_factory_validate_config(): + with patch("core.external_data_tool.factory.code_based_extension") as mock_code_based_extension: + mock_extension_class = MagicMock() + mock_code_based_extension.extension_class.return_value = mock_extension_class + + name = "test_tool" + tenant_id = "tenant_123" + config = {"key": "value"} + + ExternalDataToolFactory.validate_config(name, tenant_id, config) + + mock_code_based_extension.extension_class.assert_called_once_with(ExtensionModule.EXTERNAL_DATA_TOOL, name) + mock_extension_class.validate_config.assert_called_once_with(tenant_id, config) + + +def test_external_data_tool_factory_query(): + with patch("core.external_data_tool.factory.code_based_extension") as mock_code_based_extension: + mock_extension_class = MagicMock() + mock_extension_instance = MagicMock() + mock_extension_class.return_value = mock_extension_instance + mock_code_based_extension.extension_class.return_value = mock_extension_class + + mock_extension_instance.query.return_value = "query_result" + + factory = ExternalDataToolFactory("name", "tenant", "app", "var", {}) + + inputs = {"input_key": "input_value"} + query = "search_query" + + result = factory.query(inputs, query) + + assert result == "query_result" + mock_extension_instance.query.assert_called_once_with(inputs, query) diff --git a/api/tests/unit_tests/core/llm_generator/output_parser/test_rule_config_generator.py b/api/tests/unit_tests/core/llm_generator/output_parser/test_rule_config_generator.py new file mode 100644 index 0000000000..b2783bdf99 --- /dev/null +++ b/api/tests/unit_tests/core/llm_generator/output_parser/test_rule_config_generator.py @@ -0,0 +1,103 @@ +import pytest + +from core.llm_generator.output_parser.errors import OutputParserError +from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser +from core.llm_generator.prompts import ( + RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE, + RULE_CONFIG_PROMPT_GENERATE_TEMPLATE, + RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE, +) + + +class TestRuleConfigGeneratorOutputParser: + def test_get_format_instructions(self): + parser = RuleConfigGeneratorOutputParser() + instructions = parser.get_format_instructions() + assert instructions == ( + RULE_CONFIG_PROMPT_GENERATE_TEMPLATE, + RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE, + RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE, + ) + + def test_parse_success(self): + parser = RuleConfigGeneratorOutputParser() + text = """ +```json +{ + "prompt": "This is a prompt", + "variables": ["var1", "var2"], + "opening_statement": "Hello!" +} +``` +""" + result = parser.parse(text) + assert result["prompt"] == "This is a prompt" + assert result["variables"] == ["var1", "var2"] + assert result["opening_statement"] == "Hello!" + + def test_parse_invalid_json(self): + parser = RuleConfigGeneratorOutputParser() + text = "invalid json" + with pytest.raises(OutputParserError) as excinfo: + parser.parse(text) + assert "Parsing text" in str(excinfo.value) + assert "could not find json block in the output" in str(excinfo.value) + + def test_parse_missing_keys(self): + parser = RuleConfigGeneratorOutputParser() + text = """ +```json +{ + "prompt": "This is a prompt", + "variables": ["var1", "var2"] +} +``` +""" + with pytest.raises(OutputParserError) as excinfo: + parser.parse(text) + assert "expected key `opening_statement` to be present" in str(excinfo.value) + + def test_parse_wrong_type_prompt(self): + parser = RuleConfigGeneratorOutputParser() + text = """ +```json +{ + "prompt": 123, + "variables": ["var1", "var2"], + "opening_statement": "Hello!" +} +``` +""" + with pytest.raises(OutputParserError) as excinfo: + parser.parse(text) + assert "Expected 'prompt' to be a string" in str(excinfo.value) + + def test_parse_wrong_type_variables(self): + parser = RuleConfigGeneratorOutputParser() + text = """ +```json +{ + "prompt": "This is a prompt", + "variables": "not a list", + "opening_statement": "Hello!" +} +``` +""" + with pytest.raises(OutputParserError) as excinfo: + parser.parse(text) + assert "Expected 'variables' to be a list" in str(excinfo.value) + + def test_parse_wrong_type_opening_statement(self): + parser = RuleConfigGeneratorOutputParser() + text = """ +```json +{ + "prompt": "This is a prompt", + "variables": ["var1", "var2"], + "opening_statement": 123 +} +``` +""" + with pytest.raises(OutputParserError) as excinfo: + parser.parse(text) + assert "Expected 'opening_statement' to be a str" in str(excinfo.value) diff --git a/api/tests/unit_tests/core/llm_generator/output_parser/test_structured_output.py b/api/tests/unit_tests/core/llm_generator/output_parser/test_structured_output.py new file mode 100644 index 0000000000..46c9dc6f9c --- /dev/null +++ b/api/tests/unit_tests/core/llm_generator/output_parser/test_structured_output.py @@ -0,0 +1,402 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest + +from core.llm_generator.output_parser.errors import OutputParserError +from core.llm_generator.output_parser.structured_output import ( + ResponseFormat, + _handle_native_json_schema, + _handle_prompt_based_schema, + _parse_structured_output, + _prepare_schema_for_model, + _set_response_format, + convert_boolean_to_string, + invoke_llm_with_structured_output, + remove_additional_properties, +) +from core.model_manager import ModelInstance +from dify_graph.model_runtime.entities.llm_entities import ( + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, + LLMResultWithStructuredOutput, + LLMUsage, +) +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + SystemPromptMessage, + TextPromptMessageContent, + UserPromptMessage, +) +from dify_graph.model_runtime.entities.model_entities import AIModelEntity, ParameterRule, ParameterType + + +class TestStructuredOutput: + def test_remove_additional_properties(self): + schema = { + "type": "object", + "properties": {"name": {"type": "string"}, "age": {"type": "integer"}}, + "additionalProperties": False, + "nested": {"type": "object", "additionalProperties": True}, + "items": [{"type": "object", "additionalProperties": False}], + } + remove_additional_properties(schema) + assert "additionalProperties" not in schema + assert "additionalProperties" not in schema["nested"] + assert "additionalProperties" not in schema["items"][0] + + # Test with non-dict input + remove_additional_properties(None) # Should not raise + remove_additional_properties([]) # Should not raise + + def test_convert_boolean_to_string(self): + schema = { + "type": "object", + "properties": { + "is_active": {"type": "boolean"}, + "tags": {"type": "array", "items": {"type": "boolean"}}, + "list_schema": [{"type": "boolean"}], + }, + } + convert_boolean_to_string(schema) + assert schema["properties"]["is_active"]["type"] == "string" + assert schema["properties"]["tags"]["items"]["type"] == "string" + assert schema["properties"]["list_schema"][0]["type"] == "string" + + # Test with non-dict input + convert_boolean_to_string(None) # Should not raise + convert_boolean_to_string([]) # Should not raise + + def test_parse_structured_output_valid(self): + text = '{"key": "value"}' + assert _parse_structured_output(text) == {"key": "value"} + + def test_parse_structured_output_non_dict_valid_json(self): + # Even if it's valid JSON, if it's not a dict, it should try repair or fail + text = '["a", "b"]' + with patch("json_repair.loads") as mock_repair: + mock_repair.return_value = {"key": "value"} + assert _parse_structured_output(text) == {"key": "value"} + + def test_parse_structured_output_not_dict_fail_via_validate(self): + # Force TypeAdapter to return a non-dict to trigger line 292 + with patch("pydantic.TypeAdapter.validate_json") as mock_validate: + mock_validate.return_value = ["a list"] + with pytest.raises(OutputParserError) as excinfo: + _parse_structured_output('["a list"]') + assert "Failed to parse structured output" in str(excinfo.value) + + def test_parse_structured_output_repair_success(self): + text = "{'key': 'value'}" # Invalid JSON (single quotes) + # json_repair should handle this + assert _parse_structured_output(text) == {"key": "value"} + + def test_parse_structured_output_repair_list(self): + # Deepseek-r1 case: result is a list containing a dict + text = '[{"key": "value"}]' + assert _parse_structured_output(text) == {"key": "value"} + + def test_parse_structured_output_repair_list_no_dict(self): + # Deepseek-r1 case: result is a list with NO dict + text = "[1, 2, 3]" + assert _parse_structured_output(text) == {} + + def test_parse_structured_output_repair_fail(self): + text = "not a json at all" + with patch("json_repair.loads") as mock_repair: + mock_repair.return_value = "still not a dict or list" + with pytest.raises(OutputParserError): + _parse_structured_output(text) + + def test_set_response_format(self): + # Test JSON + params = {} + rules = [ + ParameterRule( + name="response_format", + label={"en_US": ""}, + type=ParameterType.STRING, + help={"en_US": ""}, + options=[ResponseFormat.JSON], + ) + ] + _set_response_format(params, rules) + assert params["response_format"] == ResponseFormat.JSON + + # Test JSON_OBJECT + params = {} + rules = [ + ParameterRule( + name="response_format", + label={"en_US": ""}, + type=ParameterType.STRING, + help={"en_US": ""}, + options=[ResponseFormat.JSON_OBJECT], + ) + ] + _set_response_format(params, rules) + assert params["response_format"] == ResponseFormat.JSON_OBJECT + + def test_handle_native_json_schema(self): + provider = "openai" + model_schema = MagicMock(spec=AIModelEntity) + model_schema.model = "gpt-4" + structured_output_schema = {"type": "object"} + model_parameters = {} + rules = [ + ParameterRule( + name="response_format", + label={"en_US": ""}, + type=ParameterType.STRING, + help={"en_US": ""}, + options=[ResponseFormat.JSON_SCHEMA], + ) + ] + + updated_params = _handle_native_json_schema( + provider, model_schema, structured_output_schema, model_parameters, rules + ) + + assert "json_schema" in updated_params + assert json.loads(updated_params["json_schema"]) == {"schema": {"type": "object"}, "name": "llm_response"} + assert updated_params["response_format"] == ResponseFormat.JSON_SCHEMA + + def test_handle_native_json_schema_no_format_rule(self): + provider = "openai" + model_schema = MagicMock(spec=AIModelEntity) + model_schema.model = "gpt-4" + structured_output_schema = {"type": "object"} + model_parameters = {} + rules = [] + + updated_params = _handle_native_json_schema( + provider, model_schema, structured_output_schema, model_parameters, rules + ) + + assert "json_schema" in updated_params + assert "response_format" not in updated_params + + def test_handle_prompt_based_schema_with_system_prompt(self): + prompt_messages = [ + SystemPromptMessage(content="Existing system prompt"), + UserPromptMessage(content="User question"), + ] + schema = {"type": "object"} + + result = _handle_prompt_based_schema(prompt_messages, schema) + + assert len(result) == 2 + assert isinstance(result[0], SystemPromptMessage) + assert "Existing system prompt" in result[0].content + assert json.dumps(schema) in result[0].content + assert isinstance(result[1], UserPromptMessage) + + def test_handle_prompt_based_schema_without_system_prompt(self): + prompt_messages = [UserPromptMessage(content="User question")] + schema = {"type": "object"} + + result = _handle_prompt_based_schema(prompt_messages, schema) + + assert len(result) == 2 + assert isinstance(result[0], SystemPromptMessage) + assert json.dumps(schema) in result[0].content + assert isinstance(result[1], UserPromptMessage) + + def test_prepare_schema_for_model_gemini(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.model = "gemini-1.5-pro" + schema = {"type": "object", "additionalProperties": False} + + result = _prepare_schema_for_model("google", model_schema, schema) + assert "additionalProperties" not in result + + def test_prepare_schema_for_model_ollama(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.model = "llama3" + schema = {"type": "object"} + + result = _prepare_schema_for_model("ollama", model_schema, schema) + assert result == schema + + def test_prepare_schema_for_model_default(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.model = "gpt-4" + schema = {"type": "object"} + + result = _prepare_schema_for_model("openai", model_schema, schema) + assert result == {"schema": schema, "name": "llm_response"} + + def test_invoke_llm_with_structured_output_no_stream_native(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.support_structure_output = True + model_schema.parameter_rules = [ + ParameterRule( + name="response_format", + label={"en_US": ""}, + type=ParameterType.STRING, + help={"en_US": ""}, + options=[ResponseFormat.JSON_SCHEMA], + ) + ] + model_schema.model = "gpt-4o" + + model_instance = MagicMock(spec=ModelInstance) + mock_result = MagicMock(spec=LLMResult) + mock_result.message = AssistantPromptMessage(content='{"result": "success"}') + mock_result.model = "gpt-4o" + mock_result.usage = LLMUsage.empty_usage() + mock_result.system_fingerprint = "fp_native" + mock_result.prompt_messages = [UserPromptMessage(content="hi")] + + model_instance.invoke_llm.return_value = mock_result + + result = invoke_llm_with_structured_output( + provider="openai", + model_schema=model_schema, + model_instance=model_instance, + prompt_messages=[UserPromptMessage(content="hi")], + json_schema={"type": "object"}, + stream=False, + ) + + assert isinstance(result, LLMResultWithStructuredOutput) + assert result.structured_output == {"result": "success"} + assert result.system_fingerprint == "fp_native" + + def test_invoke_llm_with_structured_output_no_stream_prompt_based(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.support_structure_output = False + model_schema.parameter_rules = [ + ParameterRule( + name="response_format", + label={"en_US": ""}, + type=ParameterType.STRING, + help={"en_US": ""}, + options=[ResponseFormat.JSON], + ) + ] + model_schema.model = "claude-3" + + model_instance = MagicMock(spec=ModelInstance) + mock_result = MagicMock(spec=LLMResult) + mock_result.message = AssistantPromptMessage(content='{"result": "success"}') + mock_result.model = "claude-3" + mock_result.usage = LLMUsage.empty_usage() + mock_result.system_fingerprint = "fp_prompt" + mock_result.prompt_messages = [] + + model_instance.invoke_llm.return_value = mock_result + + result = invoke_llm_with_structured_output( + provider="anthropic", + model_schema=model_schema, + model_instance=model_instance, + prompt_messages=[UserPromptMessage(content="hi")], + json_schema={"type": "object"}, + stream=False, + ) + + assert isinstance(result, LLMResultWithStructuredOutput) + assert result.structured_output == {"result": "success"} + assert result.system_fingerprint == "fp_prompt" + + def test_invoke_llm_with_structured_output_no_string_error(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.support_structure_output = False + model_schema.parameter_rules = [] + + model_instance = MagicMock(spec=ModelInstance) + mock_result = MagicMock(spec=LLMResult) + mock_result.message = AssistantPromptMessage(content=[TextPromptMessageContent(data="not a string")]) + + model_instance.invoke_llm.return_value = mock_result + + with pytest.raises(OutputParserError) as excinfo: + invoke_llm_with_structured_output( + provider="anthropic", + model_schema=model_schema, + model_instance=model_instance, + prompt_messages=[], + json_schema={}, + stream=False, + ) + assert "Failed to parse structured output, LLM result is not a string" in str(excinfo.value) + + def test_invoke_llm_with_structured_output_stream(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.support_structure_output = False + model_schema.parameter_rules = [] + model_schema.model = "gpt-4" + + model_instance = MagicMock(spec=ModelInstance) + + # Mock chunks + chunk1 = MagicMock(spec=LLMResultChunk) + chunk1.delta = LLMResultChunkDelta( + index=0, message=AssistantPromptMessage(content='{"key": '), usage=LLMUsage.empty_usage() + ) + chunk1.prompt_messages = [UserPromptMessage(content="hi")] + chunk1.system_fingerprint = "fp1" + + chunk2 = MagicMock(spec=LLMResultChunk) + chunk2.delta = LLMResultChunkDelta(index=0, message=AssistantPromptMessage(content='"value"}')) + chunk2.prompt_messages = [UserPromptMessage(content="hi")] + chunk2.system_fingerprint = "fp1" + + chunk3 = MagicMock(spec=LLMResultChunk) + chunk3.delta = LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage( + content=[ + TextPromptMessageContent(data=" "), + ] + ), + ) + chunk3.prompt_messages = [UserPromptMessage(content="hi")] + chunk3.system_fingerprint = "fp1" + + event4 = MagicMock() + event4.delta = LLMResultChunkDelta(index=0, message=AssistantPromptMessage(content="")) + + model_instance.invoke_llm.return_value = [chunk1, chunk2, chunk3, event4] + + generator = invoke_llm_with_structured_output( + provider="openai", + model_schema=model_schema, + model_instance=model_instance, + prompt_messages=[UserPromptMessage(content="hi")], + json_schema={}, + stream=True, + ) + + chunks = list(generator) + assert len(chunks) == 5 + assert chunks[-1].structured_output == {"key": "value"} + assert chunks[-1].system_fingerprint == "fp1" + assert chunks[-1].prompt_messages == [UserPromptMessage(content="hi")] + + def test_invoke_llm_with_structured_output_stream_no_id_events(self): + model_schema = MagicMock(spec=AIModelEntity) + model_schema.support_structure_output = False + model_schema.parameter_rules = [] + model_schema.model = "gpt-4" + + model_instance = MagicMock(spec=ModelInstance) + model_instance.invoke_llm.return_value = [] + + generator = invoke_llm_with_structured_output( + provider="openai", + model_schema=model_schema, + model_instance=model_instance, + prompt_messages=[], + json_schema={}, + stream=True, + ) + + with pytest.raises(OutputParserError): + list(generator) + + def test_parse_structured_output_empty_string(self): + with pytest.raises(OutputParserError): + _parse_structured_output("") diff --git a/api/tests/unit_tests/core/llm_generator/test_llm_generator.py b/api/tests/unit_tests/core/llm_generator/test_llm_generator.py new file mode 100644 index 0000000000..5b7640696f --- /dev/null +++ b/api/tests/unit_tests/core/llm_generator/test_llm_generator.py @@ -0,0 +1,589 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest + +from core.app.app_config.entities import ModelConfig +from core.llm_generator.entities import RuleCodeGeneratePayload, RuleGeneratePayload, RuleStructuredOutputPayload +from core.llm_generator.llm_generator import LLMGenerator +from dify_graph.model_runtime.entities.llm_entities import LLMMode, LLMResult +from dify_graph.model_runtime.errors.invoke import InvokeAuthorizationError, InvokeError + + +class TestLLMGenerator: + @pytest.fixture + def mock_model_instance(self): + with patch("core.llm_generator.llm_generator.ModelManager") as mock_manager: + instance = MagicMock() + mock_manager.return_value.get_default_model_instance.return_value = instance + mock_manager.return_value.get_model_instance.return_value = instance + yield instance + + @pytest.fixture + def model_config_entity(self): + return ModelConfig(provider="openai", name="gpt-4", mode=LLMMode.CHAT, completion_params={"temperature": 0.7}) + + def test_generate_conversation_name_success(self, mock_model_instance): + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = json.dumps({"Your Output": "Test Conversation Name"}) + mock_model_instance.invoke_llm.return_value = mock_response + + with patch("core.llm_generator.llm_generator.TraceQueueManager") as mock_trace: + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert name == "Test Conversation Name" + mock_trace.assert_called_once() + + def test_generate_conversation_name_truncated(self, mock_model_instance): + long_query = "a" * 2100 + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = json.dumps({"Your Output": "Short Name"}) + mock_model_instance.invoke_llm.return_value = mock_response + + with patch("core.llm_generator.llm_generator.TraceQueueManager"): + name = LLMGenerator.generate_conversation_name("tenant_id", long_query) + assert name == "Short Name" + + def test_generate_conversation_name_empty_answer(self, mock_model_instance): + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "" + mock_model_instance.invoke_llm.return_value = mock_response + + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert name == "" + + def test_generate_conversation_name_json_repair(self, mock_model_instance): + mock_response = MagicMock() + # Invalid JSON that json_repair can fix + mock_response.message.get_text_content.return_value = "{'Your Output': 'Repaired Name'}" + mock_model_instance.invoke_llm.return_value = mock_response + + with patch("core.llm_generator.llm_generator.TraceQueueManager"): + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert name == "Repaired Name" + + def test_generate_conversation_name_not_dict_result(self, mock_model_instance): + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '["not a dict"]' + mock_model_instance.invoke_llm.return_value = mock_response + with patch("core.llm_generator.llm_generator.TraceQueueManager"): + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert name == "test query" + + def test_generate_conversation_name_no_output_in_dict(self, mock_model_instance): + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"something": "else"}' + mock_model_instance.invoke_llm.return_value = mock_response + with patch("core.llm_generator.llm_generator.TraceQueueManager"): + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert name == "test query" + + def test_generate_conversation_name_long_output(self, mock_model_instance): + long_output = "a" * 100 + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = json.dumps({"Your Output": long_output}) + mock_model_instance.invoke_llm.return_value = mock_response + + with patch("core.llm_generator.llm_generator.TraceQueueManager"): + name = LLMGenerator.generate_conversation_name("tenant_id", "test query") + assert len(name) == 78 # 75 + "..." + assert name.endswith("...") + + def test_generate_suggested_questions_after_answer_success(self, mock_model_instance): + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '["Question 1?", "Question 2?"]' + mock_model_instance.invoke_llm.return_value = mock_response + + questions = LLMGenerator.generate_suggested_questions_after_answer("tenant_id", "histories") + assert len(questions) == 2 + assert questions[0] == "Question 1?" + + def test_generate_suggested_questions_after_answer_auth_error(self, mock_model_instance): + with patch("core.llm_generator.llm_generator.ModelManager") as mock_manager: + mock_manager.return_value.get_default_model_instance.side_effect = InvokeAuthorizationError("Auth failed") + questions = LLMGenerator.generate_suggested_questions_after_answer("tenant_id", "histories") + assert questions == [] + + def test_generate_suggested_questions_after_answer_invoke_error(self, mock_model_instance): + mock_model_instance.invoke_llm.side_effect = InvokeError("Invoke failed") + questions = LLMGenerator.generate_suggested_questions_after_answer("tenant_id", "histories") + assert questions == [] + + def test_generate_suggested_questions_after_answer_exception(self, mock_model_instance): + mock_model_instance.invoke_llm.side_effect = Exception("Random error") + questions = LLMGenerator.generate_suggested_questions_after_answer("tenant_id", "histories") + assert questions == [] + + def test_generate_rule_config_no_variable_success(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=True + ) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "Generated Prompt" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert result["prompt"] == "Generated Prompt" + assert result["error"] == "" + + def test_generate_rule_config_no_variable_invoke_error(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=True + ) + mock_model_instance.invoke_llm.side_effect = InvokeError("Invoke failed") + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to generate rule config" in result["error"] + + def test_generate_rule_config_no_variable_exception(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=True + ) + mock_model_instance.invoke_llm.side_effect = Exception("Random error") + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to generate rule config" in result["error"] + assert "Random error" in result["error"] + + def test_generate_rule_config_with_variable_success(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=False + ) + # Mocking 3 calls for invoke_llm + mock_res1 = MagicMock() + mock_res1.message.get_text_content.return_value = "Step 1 Prompt" + + mock_res2 = MagicMock() + mock_res2.message.get_text_content.return_value = '"var1", "var2"' + + mock_res3 = MagicMock() + mock_res3.message.get_text_content.return_value = "Opening Statement" + + mock_model_instance.invoke_llm.side_effect = [mock_res1, mock_res2, mock_res3] + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert result["prompt"] == "Step 1 Prompt" + assert result["variables"] == ["var1", "var2"] + assert result["opening_statement"] == "Opening Statement" + assert result["error"] == "" + + def test_generate_rule_config_with_variable_step1_error(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=False + ) + mock_model_instance.invoke_llm.side_effect = InvokeError("Step 1 Failed") + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to generate prefix prompt" in result["error"] + + def test_generate_rule_config_with_variable_step2_error(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=False + ) + mock_res1 = MagicMock() + mock_res1.message.get_text_content.return_value = "Step 1 Prompt" + + # Step 2 fails + mock_model_instance.invoke_llm.side_effect = [mock_res1, InvokeError("Step 2 Failed"), MagicMock()] + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to generate variables" in result["error"] + + def test_generate_rule_config_with_variable_step3_error(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=False + ) + mock_res1 = MagicMock() + mock_res1.message.get_text_content.return_value = "Step 1 Prompt" + + mock_res2 = MagicMock() + mock_res2.message.get_text_content.return_value = '"var1"' + + # Step 3 fails + mock_model_instance.invoke_llm.side_effect = [mock_res1, mock_res2, InvokeError("Step 3 Failed")] + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to generate conversation opener" in result["error"] + + def test_generate_rule_config_with_variable_exception(self, mock_model_instance, model_config_entity): + payload = RuleGeneratePayload( + instruction="test instruction", model_config=model_config_entity, no_variable=False + ) + # Mock any step to throw Exception + mock_model_instance.invoke_llm.side_effect = Exception("Unexpected multi-step error") + + result = LLMGenerator.generate_rule_config("tenant_id", payload) + assert "Failed to handle unexpected exception" in result["error"] + assert "Unexpected multi-step error" in result["error"] + + def test_generate_code_python_success(self, mock_model_instance, model_config_entity): + payload = RuleCodeGeneratePayload( + instruction="print hello", code_language="python", model_config=model_config_entity + ) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "print('hello')" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_code("tenant_id", payload) + assert result["code"] == "print('hello')" + assert result["language"] == "python" + + def test_generate_code_javascript_success(self, mock_model_instance, model_config_entity): + payload = RuleCodeGeneratePayload( + instruction="console log hello", code_language="javascript", model_config=model_config_entity + ) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "console.log('hello')" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_code("tenant_id", payload) + assert result["code"] == "console.log('hello')" + assert result["language"] == "javascript" + + def test_generate_code_invoke_error(self, mock_model_instance, model_config_entity): + payload = RuleCodeGeneratePayload(instruction="error", code_language="python", model_config=model_config_entity) + mock_model_instance.invoke_llm.side_effect = InvokeError("Invoke failed") + + result = LLMGenerator.generate_code("tenant_id", payload) + assert "Failed to generate code" in result["error"] + + def test_generate_code_exception(self, mock_model_instance, model_config_entity): + payload = RuleCodeGeneratePayload(instruction="error", code_language="python", model_config=model_config_entity) + mock_model_instance.invoke_llm.side_effect = Exception("Random error") + + result = LLMGenerator.generate_code("tenant_id", payload) + assert "An unexpected error occurred" in result["error"] + + def test_generate_qa_document_success(self, mock_model_instance): + mock_response = MagicMock(spec=LLMResult) + mock_response.message = MagicMock() + mock_response.message.get_text_content.return_value = "QA Document Content" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_qa_document("tenant_id", "query", "English") + assert result == "QA Document Content" + + def test_generate_qa_document_type_error(self, mock_model_instance): + mock_model_instance.invoke_llm.return_value = "Not an LLMResult" + + with pytest.raises(TypeError, match="Expected LLMResult when stream=False"): + LLMGenerator.generate_qa_document("tenant_id", "query", "English") + + def test_generate_structured_output_success(self, mock_model_instance, model_config_entity): + payload = RuleStructuredOutputPayload(instruction="generate schema", model_config=model_config_entity) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"type": "object", "properties": {}}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_structured_output("tenant_id", payload) + parsed_output = json.loads(result["output"]) + assert parsed_output["type"] == "object" + assert result["error"] == "" + + def test_generate_structured_output_json_repair(self, mock_model_instance, model_config_entity): + payload = RuleStructuredOutputPayload(instruction="generate schema", model_config=model_config_entity) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "{'type': 'object'}" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_structured_output("tenant_id", payload) + parsed_output = json.loads(result["output"]) + assert parsed_output["type"] == "object" + + def test_generate_structured_output_not_dict_or_list(self, mock_model_instance, model_config_entity): + payload = RuleStructuredOutputPayload(instruction="generate schema", model_config=model_config_entity) + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "true" # parsed as bool + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.generate_structured_output("tenant_id", payload) + assert "An unexpected error occurred" in result["error"] + assert "Failed to parse structured output" in result["error"] + + def test_generate_structured_output_invoke_error(self, mock_model_instance, model_config_entity): + payload = RuleStructuredOutputPayload(instruction="error", model_config=model_config_entity) + mock_model_instance.invoke_llm.side_effect = InvokeError("Invoke failed") + + result = LLMGenerator.generate_structured_output("tenant_id", payload) + assert "Failed to generate JSON Schema" in result["error"] + + def test_generate_structured_output_exception(self, mock_model_instance, model_config_entity): + payload = RuleStructuredOutputPayload(instruction="error", model_config=model_config_entity) + mock_model_instance.invoke_llm.side_effect = Exception("Random error") + + result = LLMGenerator.generate_structured_output("tenant_id", payload) + assert "An unexpected error occurred" in result["error"] + + def test_instruction_modify_legacy_no_last_run(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + + # Mock __instruction_modify_common call via invoke_llm + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "prompt"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert result == {"modified": "prompt"} + + def test_instruction_modify_legacy_with_last_run(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + last_run = MagicMock() + last_run.query = "q" + last_run.answer = "a" + last_run.error = "e" + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = last_run + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "prompt"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert result == {"modified": "prompt"} + + def test_instruction_modify_workflow_app_not_found(self): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = None + with pytest.raises(ValueError, match="App not found."): + LLMGenerator.instruction_modify_workflow("t", "f", "n", "c", "i", MagicMock(), "o", MagicMock()) + + def test_instruction_modify_workflow_no_workflow(self): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = None + with pytest.raises(ValueError, match="Workflow not found for the given app model."): + LLMGenerator.instruction_modify_workflow("t", "f", "n", "c", "i", MagicMock(), "o", workflow_service) + + def test_instruction_modify_workflow_success(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow = MagicMock() + workflow.graph_dict = {"graph": {"nodes": [{"id": "node_id", "data": {"type": "llm"}}]}} + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + + last_run = MagicMock() + last_run.node_type = "llm" + last_run.status = "s" + last_run.error = "e" + # Return regular values, not Mocks + last_run.execution_metadata_dict = {"agent_log": [{"status": "s", "error": "e", "data": {}}]} + last_run.load_full_inputs.return_value = {"in": "val"} + + workflow_service.get_node_last_run.return_value = last_run + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "workflow"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_workflow( + "tenant_id", + "flow_id", + "node_id", + "current", + "instruction", + model_config_entity, + "ideal", + workflow_service, + ) + assert result == {"modified": "workflow"} + + def test_instruction_modify_workflow_no_last_run_fallback(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow = MagicMock() + workflow.graph_dict = {"graph": {"nodes": [{"id": "node_id", "data": {"type": "code"}}]}} + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + workflow_service.get_node_last_run.return_value = None + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "fallback"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_workflow( + "tenant_id", + "flow_id", + "node_id", + "current", + "instruction", + model_config_entity, + "ideal", + workflow_service, + ) + assert result == {"modified": "fallback"} + + def test_instruction_modify_workflow_node_type_fallback(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow = MagicMock() + # Cause exception in node_type logic + workflow.graph_dict = {"graph": {"nodes": []}} + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + workflow_service.get_node_last_run.return_value = None + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "fallback"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_workflow( + "tenant_id", + "flow_id", + "node_id", + "current", + "instruction", + model_config_entity, + "ideal", + workflow_service, + ) + assert result == {"modified": "fallback"} + + def test_instruction_modify_workflow_empty_agent_log(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow = MagicMock() + workflow.graph_dict = {"graph": {"nodes": [{"id": "node_id", "data": {"type": "llm"}}]}} + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + + last_run = MagicMock() + last_run.node_type = "llm" + last_run.status = "s" + last_run.error = "e" + # Return regular empty list, not a Mock + last_run.execution_metadata_dict = {"agent_log": []} + last_run.load_full_inputs.return_value = {} + + workflow_service.get_node_last_run.return_value = last_run + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"modified": "workflow"}' + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_workflow( + "tenant_id", + "flow_id", + "node_id", + "current", + "instruction", + model_config_entity, + "ideal", + workflow_service, + ) + assert result == {"modified": "workflow"} + + def test_instruction_modify_common_placeholders(self, mock_model_instance, model_config_entity): + # Testing placeholders replacement via instruction_modify_legacy for convenience + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"ok": true}' + mock_model_instance.invoke_llm.return_value = mock_response + + instruction = "Test {{#last_run#}} and {{#current#}} and {{#error_message#}}" + LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current_val", instruction, model_config_entity, "ideal" + ) + + # Verify the call to invoke_llm contains replaced instruction + args, kwargs = mock_model_instance.invoke_llm.call_args + prompt_messages = kwargs["prompt_messages"] + user_msg = prompt_messages[1].content + user_msg_dict = json.loads(user_msg) + assert "null" in user_msg_dict["instruction"] # because last_run is None and current is current_val etc. + assert "current_val" in user_msg_dict["instruction"] + + def test_instruction_modify_common_no_braces(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "No braces here" + mock_model_instance.invoke_llm.return_value = mock_response + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert "An unexpected error occurred" in result["error"] + assert "Could not find a valid JSON object" in result["error"] + + def test_instruction_modify_common_not_dict(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "[1, 2, 3]" + mock_model_instance.invoke_llm.return_value = mock_response + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + # The exception message is "Expected a JSON object, but got list" + assert "An unexpected error occurred" in result["error"] + + def test_instruction_modify_common_other_node_type(self, mock_model_instance, model_config_entity): + with patch("core.llm_generator.llm_generator.ModelManager") as mock_manager: + instance = MagicMock() + mock_manager.return_value.get_model_instance.return_value = instance + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = '{"ok": true}' + instance.invoke_llm.return_value = mock_response + + with patch("extensions.ext_database.db.session") as mock_session: + mock_session.return_value.query.return_value.where.return_value.first.return_value = MagicMock() + workflow = MagicMock() + workflow.graph_dict = {"graph": {"nodes": [{"id": "node_id", "data": {"type": "other"}}]}} + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + workflow_service.get_node_last_run.return_value = None + + LLMGenerator.instruction_modify_workflow( + "tenant_id", + "flow_id", + "node_id", + "current", + "instruction", + model_config_entity, + "ideal", + workflow_service, + ) + + def test_instruction_modify_common_invoke_error(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + mock_model_instance.invoke_llm.side_effect = InvokeError("Invoke Failed") + + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert "Failed to generate code" in result["error"] + + def test_instruction_modify_common_exception(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + mock_model_instance.invoke_llm.side_effect = Exception("Random error") + + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert "An unexpected error occurred" in result["error"] + + def test_instruction_modify_common_json_error(self, mock_model_instance, model_config_entity): + with patch("extensions.ext_database.db.session.query") as mock_query: + mock_query.return_value.where.return_value.order_by.return_value.first.return_value = None + + mock_response = MagicMock() + mock_response.message.get_text_content.return_value = "No JSON here" + mock_model_instance.invoke_llm.return_value = mock_response + + result = LLMGenerator.instruction_modify_legacy( + "tenant_id", "flow_id", "current", "instruction", model_config_entity, "ideal" + ) + assert "An unexpected error occurred" in result["error"] diff --git a/api/tests/unit_tests/core/logging/test_filters.py b/api/tests/unit_tests/core/logging/test_filters.py index 7c2767266f..a8b186ac8a 100644 --- a/api/tests/unit_tests/core/logging/test_filters.py +++ b/api/tests/unit_tests/core/logging/test_filters.py @@ -82,6 +82,68 @@ class TestTraceContextFilter: assert log_record.trace_id == "5b8aa5a2d2c872e8321cf37308d69df2" assert log_record.span_id == "051581bf3bb55c45" + def test_otel_context_invalid_trace_id(self, log_record): + from core.logging.filters import TraceContextFilter + + mock_span = mock.MagicMock() + mock_context = mock.MagicMock() + mock_context.trace_id = 0 + mock_context.is_valid = True + mock_span.get_span_context.return_value = mock_context + + # Use mocks for base context to ensure we can test the fallback + with ( + mock.patch("opentelemetry.trace.get_current_span", return_value=mock_span), + mock.patch("opentelemetry.trace.span.INVALID_TRACE_ID", 0), + mock.patch("core.logging.filters.get_trace_id", return_value=""), + ): + filter = TraceContextFilter() + filter.filter(log_record) + assert log_record.trace_id == "" + + def test_otel_context_invalid_span_id(self, log_record): + from core.logging.filters import TraceContextFilter + + mock_span = mock.MagicMock() + mock_context = mock.MagicMock() + mock_context.trace_id = 0x5B8AA5A2D2C872E8321CF37308D69DF2 + mock_context.span_id = 0 + mock_context.is_valid = True + mock_span.get_span_context.return_value = mock_context + + with ( + mock.patch("opentelemetry.trace.get_current_span", return_value=mock_span), + mock.patch("opentelemetry.trace.span.INVALID_TRACE_ID", 0), + mock.patch("opentelemetry.trace.span.INVALID_SPAN_ID", 0), + ): + filter = TraceContextFilter() + filter.filter(log_record) + assert log_record.trace_id == "5b8aa5a2d2c872e8321cf37308d69df2" + assert log_record.span_id == "" + + def test_otel_context_span_none(self, log_record): + from core.logging.filters import TraceContextFilter + + with ( + mock.patch("opentelemetry.trace.get_current_span", return_value=None), + mock.patch("core.logging.filters.get_trace_id", return_value=""), + ): + filter = TraceContextFilter() + filter.filter(log_record) + assert log_record.trace_id == "" + + def test_otel_context_exception(self, log_record): + from core.logging.filters import TraceContextFilter + + # Trigger exception in OTEL block + with ( + mock.patch("opentelemetry.trace.get_current_span", side_effect=Exception), + mock.patch("core.logging.filters.get_trace_id", return_value=""), + ): + filter = TraceContextFilter() + filter.filter(log_record) + assert log_record.trace_id == "" + class TestIdentityContextFilter: def test_sets_empty_identity_without_request_context(self, log_record): @@ -114,3 +176,119 @@ class TestIdentityContextFilter: result = filter.filter(log_record) assert result is True assert log_record.tenant_id == "" + + def test_sets_empty_identity_unauthenticated(self, log_record): + from core.logging.filters import IdentityContextFilter + + mock_user = mock.MagicMock() + mock_user.is_authenticated = False + + with ( + mock.patch("flask.has_request_context", return_value=True), + mock.patch("flask_login.current_user", mock_user), + ): + filter = IdentityContextFilter() + filter.filter(log_record) + assert log_record.user_id == "" + + def test_sets_identity_for_account(self, log_record): + from core.logging.filters import IdentityContextFilter + + class MockAccount: + pass + + mock_user = MockAccount() + mock_user.id = "account_id" + mock_user.current_tenant_id = "tenant_id" + mock_user.is_authenticated = True + + with ( + mock.patch("flask.has_request_context", return_value=True), + mock.patch("models.Account", MockAccount), + mock.patch("flask_login.current_user", mock_user), + ): + filter = IdentityContextFilter() + filter.filter(log_record) + + assert log_record.tenant_id == "tenant_id" + assert log_record.user_id == "account_id" + assert log_record.user_type == "account" + + def test_sets_identity_for_account_no_tenant(self, log_record): + from core.logging.filters import IdentityContextFilter + + class MockAccount: + pass + + mock_user = MockAccount() + mock_user.id = "account_id" + mock_user.current_tenant_id = None + mock_user.is_authenticated = True + + with ( + mock.patch("flask.has_request_context", return_value=True), + mock.patch("models.Account", MockAccount), + mock.patch("flask_login.current_user", mock_user), + ): + filter = IdentityContextFilter() + filter.filter(log_record) + + assert log_record.tenant_id == "" + assert log_record.user_id == "account_id" + assert log_record.user_type == "account" + + def test_sets_identity_for_end_user(self, log_record): + from core.logging.filters import IdentityContextFilter + + class MockEndUser: + pass + + class AnotherClass: + pass + + mock_user = MockEndUser() + mock_user.id = "end_user_id" + mock_user.tenant_id = "tenant_id" + mock_user.type = "custom_type" + mock_user.is_authenticated = True + + with ( + mock.patch("flask.has_request_context", return_value=True), + mock.patch("models.model.EndUser", MockEndUser), + mock.patch("models.Account", AnotherClass), + mock.patch("flask_login.current_user", mock_user), + ): + filter = IdentityContextFilter() + filter.filter(log_record) + + assert log_record.tenant_id == "tenant_id" + assert log_record.user_id == "end_user_id" + assert log_record.user_type == "custom_type" + + def test_sets_identity_for_end_user_default_type(self, log_record): + from core.logging.filters import IdentityContextFilter + + class MockEndUser: + pass + + class AnotherClass: + pass + + mock_user = MockEndUser() + mock_user.id = "end_user_id" + mock_user.tenant_id = "tenant_id" + mock_user.type = None + mock_user.is_authenticated = True + + with ( + mock.patch("flask.has_request_context", return_value=True), + mock.patch("models.model.EndUser", MockEndUser), + mock.patch("models.Account", AnotherClass), + mock.patch("flask_login.current_user", mock_user), + ): + filter = IdentityContextFilter() + filter.filter(log_record) + + assert log_record.tenant_id == "tenant_id" + assert log_record.user_id == "end_user_id" + assert log_record.user_type == "end_user" diff --git a/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py b/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py index 60f37b6de0..abf3c60fe0 100644 --- a/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py +++ b/api/tests/unit_tests/core/mcp/auth/test_auth_flow.py @@ -1,27 +1,39 @@ """Unit tests for MCP OAuth authentication flow.""" +import json from unittest.mock import Mock, patch +import httpx import pytest +from pydantic import ValidationError from core.entities.mcp_provider import MCPProviderEntity +from core.helper import ssrf_proxy from core.mcp.auth.auth_flow import ( OAUTH_STATE_EXPIRY_SECONDS, OAUTH_STATE_REDIS_KEY_PREFIX, OAuthCallbackState, _create_secure_redis_state, + _parse_token_response, _retrieve_redis_state, auth, + build_oauth_authorization_server_metadata_discovery_urls, + build_protected_resource_metadata_discovery_urls, check_support_resource_discovery, + client_credentials_flow, + discover_oauth_authorization_server_metadata, discover_oauth_metadata, + discover_protected_resource_metadata, exchange_authorization, generate_pkce_challenge, + get_effective_scope, handle_callback, refresh_authorization, register_client, start_authorization, ) from core.mcp.entities import AuthActionType, AuthResult +from core.mcp.error import MCPRefreshTokenError from core.mcp.types import ( LATEST_PROTOCOL_VERSION, OAuthClientInformation, @@ -764,3 +776,555 @@ class TestAuthOrchestration: auth(mock_provider, authorization_code="auth-code") assert "Existing OAuth client information is required" in str(exc_info.value) + + def test_generate_pkce_challenge(self): + verifier, challenge = generate_pkce_challenge() + assert verifier + assert challenge + assert "=" not in verifier + assert "=" not in challenge + + def test_build_protected_resource_metadata_discovery_urls(self): + # Case 1: WWW-Auth URL provided + urls = build_protected_resource_metadata_discovery_urls( + "https://auth.example.com/prm", "https://api.example.com" + ) + assert "https://auth.example.com/prm" in urls + assert "https://api.example.com/.well-known/oauth-protected-resource" in urls + + # Case 2: No WWW-Auth URL, with path + urls = build_protected_resource_metadata_discovery_urls(None, "https://api.example.com/v1") + assert "https://api.example.com/.well-known/oauth-protected-resource/v1" in urls + assert "https://api.example.com/.well-known/oauth-protected-resource" in urls + + # Case 3: No path + urls = build_protected_resource_metadata_discovery_urls(None, "https://api.example.com") + assert urls == ["https://api.example.com/.well-known/oauth-protected-resource"] + + def test_build_oauth_authorization_server_metadata_discovery_urls(self): + # Case 1: with auth_server_url + urls = build_oauth_authorization_server_metadata_discovery_urls( + "https://auth.example.com", "https://api.example.com" + ) + assert "https://auth.example.com/.well-known/oauth-authorization-server" in urls + assert "https://auth.example.com/.well-known/openid-configuration" in urls + + # Case 2: with path + urls = build_oauth_authorization_server_metadata_discovery_urls(None, "https://api.example.com/tenant") + assert "https://api.example.com/.well-known/oauth-authorization-server/tenant" in urls + assert "https://api.example.com/tenant/.well-known/openid-configuration" in urls + + @patch("core.helper.ssrf_proxy.get") + def test_discover_protected_resource_metadata(self, mock_get): + # Success + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "resource": "https://api.example.com", + "authorization_servers": ["https://auth"], + } + mock_get.return_value = mock_response + result = discover_protected_resource_metadata(None, "https://api.example.com") + assert result is not None + assert result.resource == "https://api.example.com" + + # 404 then Success + res404 = Mock() + res404.status_code = 404 + mock_get.side_effect = [res404, mock_response] + result = discover_protected_resource_metadata(None, "https://api.example.com/path") + assert result is not None + assert result.resource == "https://api.example.com" + + # Error handling + mock_get.side_effect = httpx.RequestError("Error") + result = discover_protected_resource_metadata(None, "https://api.example.com") + assert result is None + + @patch("core.helper.ssrf_proxy.get") + def test_discover_oauth_authorization_server_metadata(self, mock_get): + # Success + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "authorization_endpoint": "https://auth.example.com/auth", + "token_endpoint": "https://auth.example.com/token", + "response_types_supported": ["code"], + } + mock_get.return_value = mock_response + result = discover_oauth_authorization_server_metadata(None, "https://api.example.com") + assert result is not None + assert result.authorization_endpoint == "https://auth.example.com/auth" + + # 404 + res404 = Mock() + res404.status_code = 404 + mock_get.side_effect = [res404, mock_response] + result = discover_oauth_authorization_server_metadata(None, "https://api.example.com/tenant") + assert result is not None + assert result.authorization_endpoint == "https://auth.example.com/auth" + + # ValidationError + mock_response.json.return_value = {"invalid": "data"} + mock_get.side_effect = None + mock_get.return_value = mock_response + result = discover_oauth_authorization_server_metadata(None, "https://api.example.com") + assert result is None + + def test_get_effective_scope(self): + prm = ProtectedResourceMetadata( + resource="https://api.example.com", + authorization_servers=["https://auth"], + scopes_supported=["read", "write"], + ) + asm = OAuthMetadata( + authorization_endpoint="https://auth.example.com/auth", + token_endpoint="https://auth.example.com/token", + response_types_supported=["code"], + scopes_supported=["openid", "profile"], + ) + + # 1. WWW-Auth priority + assert get_effective_scope("scope1", prm, asm, "client") == "scope1" + # 2. PRM priority + assert get_effective_scope(None, prm, asm, "client") == "read write" + # 3. ASM priority + assert get_effective_scope(None, None, asm, "client") == "openid profile" + # 4. Client configured + assert get_effective_scope(None, None, None, "client") == "client" + + @patch("core.mcp.auth.auth_flow.redis_client") + def test_redis_state_management(self, mock_redis): + state_data = OAuthCallbackState( + provider_id="p1", + tenant_id="t1", + server_url="https://api", + metadata=None, + client_information=OAuthClientInformation(client_id="c1"), + code_verifier="cv", + redirect_uri="https://re", + ) + + # Create + state_key = _create_secure_redis_state(state_data) + assert state_key + mock_redis.setex.assert_called_once() + + # Retrieve Success + mock_redis.get.return_value = state_data.model_dump_json() + retrieved = _retrieve_redis_state(state_key) + assert retrieved.provider_id == "p1" + mock_redis.delete.assert_called_once() + + # Retrieve Failure - Not found + mock_redis.get.return_value = None + with pytest.raises(ValueError, match="expired or does not exist"): + _retrieve_redis_state("absent") + + # Retrieve Failure - Invalid JSON + mock_redis.get.return_value = "invalid" + with pytest.raises(ValueError, match="Invalid state parameter"): + _retrieve_redis_state("invalid") + + @patch("core.mcp.auth.auth_flow._retrieve_redis_state") + @patch("core.mcp.auth.auth_flow.exchange_authorization") + def test_handle_callback(self, mock_exchange, mock_retrieve): + state = Mock(spec=OAuthCallbackState) + state.server_url = "https://api" + state.metadata = None + state.client_information = Mock() + state.code_verifier = "cv" + state.redirect_uri = "https://re" + mock_retrieve.return_value = state + + tokens = Mock(spec=OAuthTokens) + mock_exchange.return_value = tokens + + s, t = handle_callback("key", "code") + assert s == state + assert t == tokens + + @patch("core.helper.ssrf_proxy.get") + def test_check_support_resource_discovery(self, mock_get): + # Case 1: authorization_servers (plural) + res = Mock() + res.status_code = 200 + res.json.return_value = {"authorization_servers": ["https://auth1"]} + mock_get.return_value = res + supported, url = check_support_resource_discovery("https://api") + assert supported is True + assert url == "https://auth1" + + # Case 2: authorization_server_url (singular alias) + res.json.return_value = {"authorization_server_url": ["https://auth2"]} + supported, url = check_support_resource_discovery("https://api") + assert supported is True + assert url == "https://auth2" + + # Case 3: Missing fields + res.json.return_value = {"nothing": []} + supported, url = check_support_resource_discovery("https://api") + assert supported is False + + # Case 4: 404 + res.status_code = 404 + supported, url = check_support_resource_discovery("https://api") + assert supported is False + + # Case 5: RequestError + mock_get.side_effect = httpx.RequestError("Error") + supported, url = check_support_resource_discovery("https://api") + assert supported is False + + def test_discover_oauth_metadata(self): + with patch("core.mcp.auth.auth_flow.discover_protected_resource_metadata") as mock_prm: + with patch("core.mcp.auth.auth_flow.discover_oauth_authorization_server_metadata") as mock_asm: + mock_prm.return_value = ProtectedResourceMetadata( + resource="https://api", authorization_servers=["https://auth"] + ) + mock_asm.return_value = Mock(spec=OAuthMetadata) + + asm, prm, hint = discover_oauth_metadata("https://api") + assert asm == mock_asm.return_value + assert prm == mock_prm.return_value + mock_asm.assert_called_with("https://auth", "https://api", None) + + def test_start_authorization(self): + metadata = OAuthMetadata( + authorization_endpoint="https://auth/authorize", + token_endpoint="https://auth/token", + response_types_supported=["code"], + ) + client_info = OAuthClientInformation(client_id="c1") + + with patch("core.mcp.auth.auth_flow._create_secure_redis_state") as mock_create: + mock_create.return_value = "state-key" + + # Success with scope + url, verifier = start_authorization("https://api", metadata, client_info, "https://re", "p1", "t1", "read") + assert "scope=read" in url + assert "state=state-key" in url + + # Success without metadata + url, verifier = start_authorization("https://api", None, client_info, "https://re", "p1", "t1") + assert "https://api/authorize" in url + + # Failure: incompatible auth server + metadata.response_types_supported = ["implicit"] + with pytest.raises(ValueError, match="Incompatible auth server"): + start_authorization("https://api", metadata, client_info, "https://re", "p1", "t1") + + def test_parse_token_response(self): + # Case 1: JSON + res = Mock() + res.headers = {"content-type": "application/json"} + res.json.return_value = {"access_token": "at", "token_type": "Bearer"} + tokens = _parse_token_response(res) + assert tokens.access_token == "at" + + # Case 2: Form-urlencoded + res.headers = {"content-type": "application/x-www-form-urlencoded"} + res.text = "access_token=at2&token_type=Bearer" + tokens = _parse_token_response(res) + assert tokens.access_token == "at2" + + # Case 3: No content-type, but JSON + res.headers = {} + res.json.return_value = {"access_token": "at3", "token_type": "Bearer"} + tokens = _parse_token_response(res) + assert tokens.access_token == "at3" + + # Case 4: No content-type, not JSON, but Form + res.json.side_effect = json.JSONDecodeError("msg", "doc", 0) + res.text = "access_token=at4&token_type=Bearer" + tokens = _parse_token_response(res) + assert tokens.access_token == "at4" + + # Case 5: Validation Error fallback + res.json.side_effect = ValidationError.from_exception_data("error", []) + res.text = "access_token=at5&token_type=Bearer" + tokens = _parse_token_response(res) + assert tokens.access_token == "at5" + + @patch("core.helper.ssrf_proxy.post") + def test_exchange_authorization(self, mock_post): + client_info = OAuthClientInformation(client_id="c1", client_secret="s1") + metadata = OAuthMetadata( + authorization_endpoint="https://auth/authorize", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + + # Success + res = Mock() + res.is_success = True + res.headers = {"content-type": "application/json"} + res.json.return_value = {"access_token": "at", "token_type": "Bearer"} + mock_post.return_value = res + + tokens = exchange_authorization("https://api", metadata, client_info, "code", "verifier", "https://re") + assert tokens.access_token == "at" + + # Failure: Unsupported grant type + metadata.grant_types_supported = ["client_credentials"] + with pytest.raises(ValueError, match="Incompatible auth server"): + exchange_authorization("https://api", metadata, client_info, "code", "verifier", "https://re") + + # Failure: HTTP error + metadata.grant_types_supported = ["authorization_code"] + res.is_success = False + res.status_code = 400 + with pytest.raises(ValueError, match="Token exchange failed"): + exchange_authorization("https://api", metadata, client_info, "code", "verifier", "https://re") + + @patch("core.helper.ssrf_proxy.post") + def test_refresh_authorization(self, mock_post): + # Case 1: with client_secret + client_info = OAuthClientInformation(client_id="c1", client_secret="s1") + + # Success + res = Mock() + res.is_success = True + res.headers = {"content-type": "application/json"} + res.json.return_value = {"access_token": "at_new", "token_type": "Bearer"} + mock_post.return_value = res + + tokens = refresh_authorization("https://api", None, client_info, "rt") + assert tokens.access_token == "at_new" + assert mock_post.call_args[1]["data"]["client_secret"] == "s1" + + # Failure: MaxRetriesExceededError + mock_post.side_effect = ssrf_proxy.MaxRetriesExceededError("Too many retries") + with pytest.raises(MCPRefreshTokenError): + refresh_authorization("https://api", None, client_info, "rt") + + # Failure: HTTP error + mock_post.side_effect = None + res.is_success = False + res.text = "error_msg" + with pytest.raises(MCPRefreshTokenError, match="error_msg"): + refresh_authorization("https://api", None, client_info, "rt") + + # Failure: Incompatible metadata + metadata = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + with pytest.raises(ValueError, match="Incompatible auth server"): + refresh_authorization("https://api", metadata, client_info, "rt") + + @patch("core.helper.ssrf_proxy.post") + def test_client_credentials_flow(self, mock_post): + client_info = OAuthClientInformation(client_id="c1", client_secret="s1") + + # Success with secret + res = Mock() + res.is_success = True + res.headers = {"content-type": "application/json"} + res.json.return_value = {"access_token": "at_cc", "token_type": "Bearer"} + mock_post.return_value = res + + tokens = client_credentials_flow("https://api", None, client_info, "read") + assert tokens.access_token == "at_cc" + args, kwargs = mock_post.call_args + assert "Authorization" in kwargs["headers"] + + # Success without secret + client_info_no_secret = OAuthClientInformation(client_id="c2") + tokens = client_credentials_flow("https://api", None, client_info_no_secret) + args, kwargs = mock_post.call_args + assert kwargs["data"]["client_id"] == "c2" + + # Failure: Incompatible metadata + metadata = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + with pytest.raises(ValueError, match="Incompatible auth server"): + client_credentials_flow("https://api", metadata, client_info) + + # Failure: HTTP error + res.is_success = False + res.status_code = 401 + res.text = "Unauthorized" + with pytest.raises(ValueError, match="Client credentials token request failed"): + client_credentials_flow("https://api", None, client_info) + + @patch("core.helper.ssrf_proxy.post") + def test_register_client(self, mock_post): + # Case 1: Success with metadata + metadata = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + registration_endpoint="https://auth/register", + response_types_supported=["code"], + ) + client_metadata = OAuthClientMetadata(client_name="Dify", redirect_uris=["https://re"]) + + res = Mock() + res.is_success = True + res.json.return_value = { + "client_id": "c_new", + "client_secret": "s_new", + "client_name": "Dify", + "redirect_uris": ["https://re"], + } + mock_post.return_value = res + + info = register_client("https://api", metadata, client_metadata) + assert info.client_id == "c_new" + + # Case 2: Success without metadata + info = register_client("https://api", None, client_metadata) + assert mock_post.call_args[0][0] == "https://api/register" + + # Case 3: Metadata provided but no endpoint + metadata.registration_endpoint = None + with pytest.raises(ValueError, match="does not support dynamic client registration"): + register_client("https://api", metadata, client_metadata) + + # Failure: HTTP + res.is_success = False + res.raise_for_status = Mock() + res.status_code = 400 + # If is_success is false, it should call raise_for_status + register_client("https://api", None, client_metadata) + res.raise_for_status.assert_called_once() + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_orchestration_failures(self, mock_discover): + provider = Mock(spec=MCPProviderEntity) + provider.decrypt_server_url.return_value = "https://api" + provider.id = "p1" + provider.tenant_id = "t1" + + # Case 1: No server metadata + mock_discover.return_value = (None, None, None) + with pytest.raises(ValueError, match="Failed to discover OAuth metadata"): + auth(provider) + + # Case 2: No client info, exchange code provided + asm = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + ) + mock_discover.return_value = (asm, None, None) + provider.retrieve_client_information.return_value = None + with pytest.raises(ValueError, match="Existing OAuth client information is required"): + auth(provider, authorization_code="code") + + # Case 3: CLIENT_CREDENTIALS but client must provide info + asm.grant_types_supported = ["client_credentials"] + with pytest.raises(ValueError, match="requires client_id and client_secret"): + auth(provider) + + # Case 4: Client registration fails + asm.grant_types_supported = ["authorization_code"] + with patch("core.mcp.auth.auth_flow.register_client") as mock_reg: + mock_reg.side_effect = httpx.RequestError("Reg failed") + with pytest.raises(ValueError, match="Could not register OAuth client"): + auth(provider) + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_orchestration_client_credentials(self, mock_discover): + provider = Mock(spec=MCPProviderEntity) + provider.decrypt_server_url.return_value = "https://api" + provider.id = "p1" + provider.tenant_id = "t1" + provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="c1", client_secret="s1") + provider.decrypt_credentials.return_value = {"scope": "read"} + + asm = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["client_credentials"], + ) + mock_discover.return_value = (asm, None, None) + + with patch("core.mcp.auth.auth_flow.client_credentials_flow") as mock_cc: + mock_cc.return_value = OAuthTokens(access_token="at_cc", token_type="Bearer") + + result = auth(provider) + assert result.response == {"result": "success"} + assert result.actions[0].action_type == AuthActionType.SAVE_TOKENS + assert result.actions[0].data["grant_type"] == "client_credentials" + + # Failure in CC flow + mock_cc.side_effect = ValueError("CC Failed") + with pytest.raises(ValueError, match="Client credentials flow failed"): + auth(provider) + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_orchestration_authorization_code(self, mock_discover): + provider = Mock(spec=MCPProviderEntity) + provider.decrypt_server_url.return_value = "https://api" + provider.id = "p1" + provider.tenant_id = "t1" + provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="c1") + provider.decrypt_credentials.return_value = {} + + asm = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + mock_discover.return_value = (asm, None, None) + + # Case 1: Exchange code + with patch("core.mcp.auth.auth_flow._retrieve_redis_state") as mock_retrieve: + state = Mock(spec=OAuthCallbackState) + state.code_verifier = "cv" + state.redirect_uri = "https://re" + mock_retrieve.return_value = state + + with patch("core.mcp.auth.auth_flow.exchange_authorization") as mock_exchange: + mock_exchange.return_value = OAuthTokens(access_token="at_code", token_type="Bearer") + + # Success + result = auth(provider, authorization_code="code", state_param="sp") + assert result.response == {"result": "success"} + + # Missing state_param + with pytest.raises(ValueError, match="State parameter is required"): + auth(provider, authorization_code="code") + + # Missing verifier in state + state.code_verifier = None + with pytest.raises(ValueError, match="Missing code_verifier"): + auth(provider, authorization_code="code", state_param="sp") + + # Invalid state + mock_retrieve.side_effect = ValueError("Invalid") + with pytest.raises(ValueError, match="Invalid state parameter"): + auth(provider, authorization_code="code", state_param="sp") + + @patch("core.mcp.auth.auth_flow.discover_oauth_metadata") + def test_auth_orchestration_refresh_failure(self, mock_discover): + provider = Mock(spec=MCPProviderEntity) + provider.decrypt_server_url.return_value = "https://api" + provider.id = "p1" + provider.tenant_id = "t1" + provider.retrieve_client_information.return_value = OAuthClientInformation(client_id="c1") + provider.decrypt_credentials.return_value = {} + provider.retrieve_tokens.return_value = OAuthTokens(access_token="at", token_type="Bearer", refresh_token="rt") + + asm = OAuthMetadata( + authorization_endpoint="https://auth/auth", + token_endpoint="https://auth/token", + response_types_supported=["code"], + grant_types_supported=["authorization_code"], + ) + mock_discover.return_value = (asm, None, None) + + with patch("core.mcp.auth.auth_flow.refresh_authorization") as mock_refresh: + mock_refresh.side_effect = ValueError("Refresh Failed") + with pytest.raises(ValueError, match="Could not refresh OAuth tokens"): + auth(provider) diff --git a/api/tests/unit_tests/core/mcp/client/test_sse.py b/api/tests/unit_tests/core/mcp/client/test_sse.py index 490a647025..e6eeb6cd59 100644 --- a/api/tests/unit_tests/core/mcp/client/test_sse.py +++ b/api/tests/unit_tests/core/mcp/client/test_sse.py @@ -322,3 +322,475 @@ def test_sse_client_concurrent_access(): assert len(received_messages) == 10 for i in range(10): assert f"message_{i}" in received_messages + + +class TestStatusClasses: + """Tests for _StatusReady and _StatusError data containers.""" + + def test_status_ready_stores_endpoint(self): + from core.mcp.client.sse_client import _StatusReady + + status = _StatusReady("http://example.com/messages/") + assert status.endpoint_url == "http://example.com/messages/" + + def test_status_error_stores_exception(self): + from core.mcp.client.sse_client import _StatusError + + exc = ValueError("bad endpoint") + status = _StatusError(exc) + assert status.exc is exc + + +class TestSSETransportInit: + """Tests for SSETransport default and explicit init values.""" + + def test_defaults(self): + from core.mcp.client.sse_client import SSETransport + + t = SSETransport("http://example.com/sse") + assert t.url == "http://example.com/sse" + assert t.headers == {} + assert t.timeout == 5.0 + assert t.sse_read_timeout == 60.0 + assert t.endpoint_url is None + assert t.event_source is None + + def test_explicit_headers_not_mutated(self): + from core.mcp.client.sse_client import SSETransport + + hdrs = {"X-Foo": "bar"} + t = SSETransport("http://example.com/sse", headers=hdrs) + assert t.headers is hdrs + + +class TestHandleEndpointEvent: + """Tests for SSETransport._handle_endpoint_event covering the invalid-origin branch.""" + + def test_invalid_origin_puts_status_error(self): + from core.mcp.client.sse_client import SSETransport, _StatusError + + transport = SSETransport("http://example.com/sse") + status_queue: queue.Queue = queue.Queue() + + # Provide a full URL with a different origin so urljoin keeps it as-is + transport._handle_endpoint_event("http://evil.com/messages/", status_queue) + + result = status_queue.get_nowait() + assert isinstance(result, _StatusError) + assert "does not match" in str(result.exc) + + def test_valid_origin_puts_status_ready(self): + from core.mcp.client.sse_client import SSETransport, _StatusReady + + transport = SSETransport("http://example.com/sse") + status_queue: queue.Queue = queue.Queue() + + transport._handle_endpoint_event("/messages/?session_id=abc", status_queue) + + result = status_queue.get_nowait() + assert isinstance(result, _StatusReady) + assert "example.com" in result.endpoint_url + + +class TestHandleSSEEvent: + """Tests for SSETransport._handle_sse_event covering all match branches.""" + + def _make_sse(self, event_type: str, data: str): + sse = Mock() + sse.event = event_type + sse.data = data + return sse + + def test_message_event_dispatched(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + read_queue: queue.Queue = queue.Queue() + status_queue: queue.Queue = queue.Queue() + + valid_msg = '{"jsonrpc": "2.0", "id": 1, "method": "ping"}' + transport._handle_sse_event(self._make_sse("message", valid_msg), read_queue, status_queue) + + item = read_queue.get_nowait() + assert hasattr(item, "message") + + def test_unknown_event_logs_warning_and_does_nothing(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + read_queue: queue.Queue = queue.Queue() + status_queue: queue.Queue = queue.Queue() + + transport._handle_sse_event(self._make_sse("ping", "{}"), read_queue, status_queue) + + assert read_queue.empty() + assert status_queue.empty() + + +class TestSSEReader: + """Tests for SSETransport.sse_reader exception branches.""" + + def test_read_error_closes_cleanly(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + read_queue: queue.Queue = queue.Queue() + status_queue: queue.Queue = queue.Queue() + + event_source = Mock() + event_source.iter_sse.side_effect = httpx.ReadError("connection reset") + + transport.sse_reader(event_source, read_queue, status_queue) + + # Finally block always puts None as sentinel + sentinel = read_queue.get_nowait() + assert sentinel is None + + def test_generic_exception_puts_exc_then_none(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + read_queue: queue.Queue = queue.Queue() + status_queue: queue.Queue = queue.Queue() + + boom = RuntimeError("unexpected!") + event_source = Mock() + event_source.iter_sse.side_effect = boom + + transport.sse_reader(event_source, read_queue, status_queue) + + exc_item = read_queue.get_nowait() + assert exc_item is boom + + sentinel = read_queue.get_nowait() + assert sentinel is None + + +class TestSendMessage: + """Tests for SSETransport._send_message.""" + + def _make_session_message(self): + msg_json = '{"jsonrpc": "2.0", "id": 1, "method": "ping"}' + msg = types.JSONRPCMessage.model_validate_json(msg_json) + return types.SessionMessage(msg) + + def test_sends_post_and_raises_for_status(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + + mock_response = Mock() + mock_response.status_code = 200 + mock_client = Mock() + mock_client.post.return_value = mock_response + + session_msg = self._make_session_message() + transport._send_message(mock_client, "http://example.com/messages/", session_msg) + + mock_client.post.assert_called_once() + mock_response.raise_for_status.assert_called_once() + + +class TestPostWriter: + """Tests for SSETransport.post_writer exception branches.""" + + def _make_session_message(self): + msg_json = '{"jsonrpc": "2.0", "id": 1, "method": "ping"}' + msg = types.JSONRPCMessage.model_validate_json(msg_json) + return types.SessionMessage(msg) + + def test_none_message_exits_loop(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + write_queue: queue.Queue = queue.Queue() + write_queue.put(None) # Signal shutdown immediately + + mock_client = Mock() + transport.post_writer(mock_client, "http://example.com/messages/", write_queue) + + # Should put final None sentinel + sentinel = write_queue.get_nowait() + assert sentinel is None + + def test_exception_in_message_put_back_to_queue(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + write_queue: queue.Queue = queue.Queue() + + exc = ValueError("some error") + write_queue.put(exc) # Exception goes in first + write_queue.put(None) # Then shutdown signal + + mock_client = Mock() + transport.post_writer(mock_client, "http://example.com/messages/", write_queue) + + # The exception should be re-queued, then None from loop exit, then None from finally + item1 = write_queue.get_nowait() + assert isinstance(item1, Exception) + + def test_read_error_shuts_down_cleanly(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + write_queue: queue.Queue = queue.Queue() + + session_msg = self._make_session_message() + write_queue.put(session_msg) + + mock_response = Mock() + mock_response.status_code = 200 + mock_client = Mock() + mock_client.post.side_effect = httpx.ReadError("connection dropped") + + # post_writer calls _send_message which calls client.post → ReadError propagates + # The ReadError is raised inside _send_message → propagates out of the while loop + transport.post_writer(mock_client, "http://example.com/messages/", write_queue) + + # finally always puts None + sentinel = write_queue.get_nowait() + assert sentinel is None + + def test_generic_exception_puts_exc_in_queue(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + write_queue: queue.Queue = queue.Queue() + + session_msg = self._make_session_message() + write_queue.put(session_msg) + + mock_client = Mock() + boom = RuntimeError("boom") + mock_client.post.side_effect = boom + + transport.post_writer(mock_client, "http://example.com/messages/", write_queue) + + exc_item = write_queue.get_nowait() + assert isinstance(exc_item, Exception) + + sentinel = write_queue.get_nowait() + assert sentinel is None + + def test_queue_empty_timeout_continues_loop(self): + """Cover the 'except queue.Empty: continue' branch (line 188) in post_writer.""" + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + write_queue: queue.Queue = queue.Queue() + + mock_client = Mock() + + # Patch queue.Queue.get so it raises Empty first, then returns None (shutdown) + call_count = {"n": 0} + original_get = write_queue.get + + def patched_get(*args, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + raise queue.Empty + + write_queue.get = patched_get # type: ignore[method-assign] + + transport.post_writer(mock_client, "http://example.com/messages/", write_queue) + + # finally always puts None sentinel + sentinel = write_queue.get_nowait() + assert sentinel is None + assert call_count["n"] >= 2 # Empty on first, None on second (and possibly more retries) + + +class TestWaitForEndpoint: + """Tests for SSETransport._wait_for_endpoint edge cases.""" + + def test_raises_on_empty_queue(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + status_queue: queue.Queue = queue.Queue() # empty + + with pytest.raises(ValueError, match="failed to get endpoint URL"): + transport._wait_for_endpoint(status_queue) + + def test_raises_status_error_exception(self): + from core.mcp.client.sse_client import SSETransport, _StatusError + + transport = SSETransport("http://example.com/sse") + status_queue: queue.Queue = queue.Queue() + + exc = ValueError("malicious endpoint") + status_queue.put(_StatusError(exc)) + + with pytest.raises(ValueError, match="malicious endpoint"): + transport._wait_for_endpoint(status_queue) + + def test_raises_on_unknown_status_type(self): + from core.mcp.client.sse_client import SSETransport + + transport = SSETransport("http://example.com/sse") + status_queue: queue.Queue = queue.Queue() + + # Put an object that is neither _StatusReady nor _StatusError + status_queue.put("unexpected_value") + + with pytest.raises(ValueError, match="failed to get endpoint URL"): + transport._wait_for_endpoint(status_queue) + + +class TestSSEClientRuntimeError: + """Test sse_client context manager handles RuntimeError on close().""" + + def test_runtime_error_on_close_is_suppressed(self): + """Ensure RuntimeError raised by event_source.response.close() is caught.""" + test_url = "http://test.example/sse" + + class MockSSEEvent: + def __init__(self, event_type: str, data: str): + self.event = event_type + self.data = data + + endpoint_event = MockSSEEvent("endpoint", "/messages/?session_id=test-123") + + with patch("core.mcp.client.sse_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + with patch("core.mcp.client.sse_client.ssrf_proxy_sse_connect") as mock_sc: + mock_client = Mock() + mock_cf.return_value.__enter__.return_value = mock_client + + mock_es = Mock() + mock_es.response.raise_for_status.return_value = None + mock_es.iter_sse.return_value = [endpoint_event] + # Make close() raise RuntimeError to exercise line 307-308 + mock_es.response.close.side_effect = RuntimeError("already closed") + mock_sc.return_value.__enter__.return_value = mock_es + + # Should NOT raise even though close() raises RuntimeError + with contextlib.suppress(Exception): + with sse_client(test_url) as (rq, wq): + pass + + +class TestStandaloneSendMessage: + """Tests for the module-level send_message() function.""" + + def _make_session_message(self): + msg_json = '{"jsonrpc": "2.0", "id": 1, "method": "ping"}' + msg = types.JSONRPCMessage.model_validate_json(msg_json) + return types.SessionMessage(msg) + + def test_send_message_success(self): + from core.mcp.client.sse_client import send_message + + mock_response = Mock() + mock_response.status_code = 200 + mock_http_client = Mock() + mock_http_client.post.return_value = mock_response + + session_msg = self._make_session_message() + send_message(mock_http_client, "http://example.com/messages/", session_msg) + + mock_http_client.post.assert_called_once() + mock_response.raise_for_status.assert_called_once() + + def test_send_message_raises_on_http_error(self): + from core.mcp.client.sse_client import send_message + + mock_http_client = Mock() + mock_http_client.post.side_effect = httpx.ConnectError("refused") + + session_msg = self._make_session_message() + + with pytest.raises(httpx.ConnectError): + send_message(mock_http_client, "http://example.com/messages/", session_msg) + + def test_send_message_raises_for_status_failure(self): + from core.mcp.client.sse_client import send_message + + mock_response = Mock() + mock_response.raise_for_status.side_effect = httpx.HTTPStatusError( + "Not Found", request=Mock(), response=Mock(status_code=404) + ) + mock_http_client = Mock() + mock_http_client.post.return_value = mock_response + + session_msg = self._make_session_message() + + with pytest.raises(httpx.HTTPStatusError): + send_message(mock_http_client, "http://example.com/messages/", session_msg) + + +class TestReadMessages: + """Tests for the module-level read_messages() generator.""" + + def _make_mock_sse_event(self, event_type: str, data: str): + ev = Mock() + ev.event = event_type + ev.data = data + return ev + + def test_valid_message_event_yields_session_message(self): + from core.mcp.client.sse_client import read_messages + + valid_json = '{"jsonrpc": "2.0", "id": 1, "method": "ping"}' + mock_sse_event = self._make_mock_sse_event("message", valid_json) + + mock_client = Mock() + mock_client.events.return_value = [mock_sse_event] + + results = list(read_messages(mock_client)) + assert len(results) == 1 + assert hasattr(results[0], "message") + + def test_invalid_json_yields_exception(self): + from core.mcp.client.sse_client import read_messages + + mock_sse_event = self._make_mock_sse_event("message", "{not valid json}") + + mock_client = Mock() + mock_client.events.return_value = [mock_sse_event] + + results = list(read_messages(mock_client)) + assert len(results) == 1 + assert isinstance(results[0], Exception) + + def test_non_message_event_is_skipped(self): + from core.mcp.client.sse_client import read_messages + + mock_sse_event = self._make_mock_sse_event("endpoint", "/messages/") + + mock_client = Mock() + mock_client.events.return_value = [mock_sse_event] + + results = list(read_messages(mock_client)) + # Non-message events produce no output + assert results == [] + + def test_outer_exception_yields_exc(self): + from core.mcp.client.sse_client import read_messages + + boom = RuntimeError("stream broken") + mock_client = Mock() + mock_client.events.side_effect = boom + + results = list(read_messages(mock_client)) + assert len(results) == 1 + assert results[0] is boom + + def test_multiple_events_mixed(self): + from core.mcp.client.sse_client import read_messages + + valid_json = '{"jsonrpc": "2.0", "id": 2, "result": {}}' + events = [ + self._make_mock_sse_event("endpoint", "/messages/"), + self._make_mock_sse_event("message", valid_json), + self._make_mock_sse_event("message", "{bad json}"), + ] + + mock_client = Mock() + mock_client.events.return_value = events + + results = list(read_messages(mock_client)) + # endpoint is skipped; 1 valid SessionMessage + 1 Exception + assert len(results) == 2 + assert hasattr(results[0], "message") + assert isinstance(results[1], Exception) diff --git a/api/tests/unit_tests/core/mcp/client/test_streamable_http.py b/api/tests/unit_tests/core/mcp/client/test_streamable_http.py index 9a30a35a49..81f8da9a62 100644 --- a/api/tests/unit_tests/core/mcp/client/test_streamable_http.py +++ b/api/tests/unit_tests/core/mcp/client/test_streamable_http.py @@ -4,14 +4,39 @@ Tests for the StreamableHTTP client transport. Contains tests for only the client side of the StreamableHTTP transport. """ +import json import queue import threading import time +from contextlib import contextmanager +from datetime import timedelta from typing import Any -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch + +import httpx +import pytest +from httpx_sse import ServerSentEvent from core.mcp import types -from core.mcp.client.streamable_client import streamablehttp_client +from core.mcp.client.streamable_client import ( + LAST_EVENT_ID, + MCP_SESSION_ID, + RequestContext, + ResumptionError, + StreamableHTTPError, + StreamableHTTPTransport, + streamablehttp_client, +) +from core.mcp.types import ( + ClientMessageMetadata, + ErrorData, + JSONRPCError, + JSONRPCMessage, + JSONRPCNotification, + JSONRPCRequest, + JSONRPCResponse, + SessionMessage, +) # Test constants SERVER_NAME = "test_streamable_http_server" @@ -448,3 +473,1169 @@ def test_streamablehttp_client_resumption_token_handling(): assert write_queue is not None except Exception: pass # Expected due to mocking + + +# ── helpers ─────────────────────────────────────────────────────────────────── + + +def _make_request_msg(method: str = "ping", req_id: int = 1) -> JSONRPCMessage: + return JSONRPCMessage(root=JSONRPCRequest(jsonrpc="2.0", id=req_id, method=method)) + + +def _make_response_msg(req_id: int = 1, result: dict | None = None) -> JSONRPCMessage: + return JSONRPCMessage(root=JSONRPCResponse(jsonrpc="2.0", id=req_id, result=result or {})) + + +def _make_error_msg(req_id: int = 1, code: int = -32600) -> JSONRPCMessage: + return JSONRPCMessage(root=JSONRPCError(jsonrpc="2.0", id=req_id, error=ErrorData(code=code, message="err"))) + + +def _make_notification_msg(method: str = "notifications/initialized") -> JSONRPCMessage: + return JSONRPCMessage(root=JSONRPCNotification(jsonrpc="2.0", method=method)) + + +def _make_sse_mock(event: str = "message", data: str = "", sse_id: str = "") -> ServerSentEvent: + # Use real ServerSentEvent since StreamableHTTPTransport requires its structure + return ServerSentEvent(event=event, data=data, id=sse_id, retry=None) + + +def _new_transport(url: str = "http://example.com/mcp", **kwargs) -> StreamableHTTPTransport: + return StreamableHTTPTransport(url, **kwargs) + + +# ── StreamableHTTPTransport.__init__ ───────────────────────────────────────── + + +class TestStreamableHTTPTransportInit: + def test_defaults(self): + t = _new_transport() + assert t.url == "http://example.com/mcp" + assert t.headers == {} + assert t.timeout == 30 + assert t.sse_read_timeout == 300 + assert t.session_id is None + assert t.stop_event is not None + assert t._active_responses == [] + + def test_timedelta_timeout_and_sse_read_timeout(self): + t = _new_transport(timeout=timedelta(seconds=10), sse_read_timeout=timedelta(seconds=120)) + assert t.timeout == 10.0 + assert t.sse_read_timeout == 120.0 + + def test_custom_headers_merged_into_request_headers(self): + t = _new_transport(headers={"Authorization": "Bearer tok"}) + assert t.request_headers["Authorization"] == "Bearer tok" + assert "Accept" in t.request_headers + assert "content-type" in t.request_headers + + +# ── _update_headers_with_session ───────────────────────────────────────────── + + +class TestUpdateHeadersWithSession: + def test_no_session_id_returns_copy_without_session_header(self): + t = _new_transport() + t.session_id = None + result = t._update_headers_with_session({"X-Foo": "bar"}) + assert result == {"X-Foo": "bar"} + assert MCP_SESSION_ID not in result + + def test_with_session_id_adds_header(self): + t = _new_transport() + t.session_id = "sess-abc" + result = t._update_headers_with_session({"X-Foo": "bar"}) + assert result[MCP_SESSION_ID] == "sess-abc" + assert result["X-Foo"] == "bar" + + +# ── _register_response / _unregister_response / close_active_responses ──────── + + +class TestResponseRegistry: + def test_register_and_unregister(self): + t = _new_transport() + resp = MagicMock(spec=httpx.Response) + t._register_response(resp) + assert resp in t._active_responses + t._unregister_response(resp) + assert resp not in t._active_responses + + def test_unregister_not_registered_does_not_raise(self): + t = _new_transport() + resp = MagicMock(spec=httpx.Response) + t._unregister_response(resp) # Should swallow ValueError silently + + def test_close_active_responses_calls_close(self): + t = _new_transport() + resp1 = MagicMock(spec=httpx.Response) + resp2 = MagicMock(spec=httpx.Response) + t._register_response(resp1) + t._register_response(resp2) + t.close_active_responses() + resp1.close.assert_called_once() + resp2.close.assert_called_once() + assert t._active_responses == [] + + def test_close_active_responses_swallows_runtime_error(self): + t = _new_transport() + resp = MagicMock(spec=httpx.Response) + resp.close.side_effect = RuntimeError("already closed") + t._register_response(resp) + t.close_active_responses() # Should not raise + + +# ── _is_initialization_request / _is_initialized_notification ──────────────── + + +class TestMessageClassifiers: + def test_is_initialization_request_true(self): + t = _new_transport() + assert t._is_initialization_request(_make_request_msg("initialize")) is True + + def test_is_initialization_request_false_other_method(self): + t = _new_transport() + assert t._is_initialization_request(_make_request_msg("tools/list")) is False + + def test_is_initialization_request_false_not_request(self): + t = _new_transport() + assert t._is_initialization_request(_make_response_msg()) is False + + def test_is_initialized_notification_true(self): + t = _new_transport() + assert t._is_initialized_notification(_make_notification_msg("notifications/initialized")) is True + + def test_is_initialized_notification_false_other_method(self): + t = _new_transport() + assert t._is_initialized_notification(_make_notification_msg("notifications/cancelled")) is False + + def test_is_initialized_notification_false_not_notification(self): + t = _new_transport() + assert t._is_initialized_notification(_make_request_msg("notifications/initialized")) is False + + +# ── _maybe_extract_session_id_from_response ─────────────────────────────────── + + +class TestMaybeExtractSessionIdNew: + def test_extracts_session_id_when_present(self): + t = _new_transport() + resp = MagicMock() + resp.headers = {MCP_SESSION_ID: "new-session-99"} + t._maybe_extract_session_id_from_response(resp) + assert t.session_id == "new-session-99" + + def test_no_session_id_header_leaves_none(self): + t = _new_transport() + resp = MagicMock() + resp.headers = MagicMock() + resp.headers.get = MagicMock(return_value=None) + t._maybe_extract_session_id_from_response(resp) + assert t.session_id is None + + +# ── _handle_sse_event ───────────────────────────────────────────────────────── + + +class TestHandleSseEventNew: + def test_message_event_response_returns_true(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + sse = _make_sse_mock("message", json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}})) + assert t._handle_sse_event(sse, q) is True + assert isinstance(q.get_nowait(), SessionMessage) + + def test_message_event_error_returns_true(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "id": 1, "error": {"code": -32600, "message": "bad"}}) + sse = _make_sse_mock("message", data) + assert t._handle_sse_event(sse, q) is True + + def test_message_event_notification_returns_false(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "method": "notifications/something"}) + sse = _make_sse_mock("message", data) + assert t._handle_sse_event(sse, q) is False + assert isinstance(q.get_nowait(), SessionMessage) + + def test_message_event_empty_data_returns_false(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + sse = _make_sse_mock("message", " ") + assert t._handle_sse_event(sse, q) is False + assert q.empty() + + def test_message_event_invalid_json_puts_exception(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + sse = _make_sse_mock("message", "{bad json}") + assert t._handle_sse_event(sse, q) is False + assert isinstance(q.get_nowait(), Exception) + + def test_message_event_replaces_original_request_id(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + sse = _make_sse_mock("message", data, sse_id="") + t._handle_sse_event(sse, q, original_request_id=999) + item = q.get_nowait() + assert isinstance(item, SessionMessage) + assert item.message.root.id == 999 + + def test_message_event_calls_resumption_callback_when_sse_id_present(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + sse = _make_sse_mock("message", data, sse_id="token-abc") + callback = MagicMock() + t._handle_sse_event(sse, q, resumption_callback=callback) + callback.assert_called_once_with("token-abc") + + def test_message_event_no_callback_when_no_sse_id(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + sse = _make_sse_mock("message", data, sse_id="") + callback = MagicMock() + t._handle_sse_event(sse, q, resumption_callback=callback) + callback.assert_not_called() + + def test_ping_event_returns_false(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + sse = _make_sse_mock("ping", "") + assert t._handle_sse_event(sse, q) is False + assert q.empty() + + def test_unknown_event_returns_false(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + sse = _make_sse_mock("custom_event", "{}") + assert t._handle_sse_event(sse, q) is False + assert q.empty() + + +# ── handle_get_stream ───────────────────────────────────────────────────────── + + +class TestHandleGetStreamNew: + def test_skips_when_no_session_id(self): + t = _new_transport() + t.session_id = None + q: queue.Queue = queue.Queue() + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + t.handle_get_stream(MagicMock(), q) + mock_connect.assert_not_called() + + def test_handles_messages_via_sse(self): + t = _new_transport() + t.session_id = "sess-1" + q: queue.Queue = queue.Queue() + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_event_source = MagicMock() + mock_event_source.response = mock_response + mock_event_source.iter_sse.return_value = [mock_sse_event] + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.return_value.__enter__.return_value = mock_event_source + t.handle_get_stream(MagicMock(), q) + + assert isinstance(q.get_nowait(), SessionMessage) + + def test_stops_when_stop_event_set(self): + t = _new_transport() + t.session_id = "sess-1" + t.stop_event.set() + q: queue.Queue = queue.Queue() + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_event_source = MagicMock() + mock_event_source.response = mock_response + mock_event_source.iter_sse.return_value = [mock_sse_event] + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.return_value.__enter__.return_value = mock_event_source + t.handle_get_stream(MagicMock(), q) + + assert q.empty() + + def test_exception_when_not_stopped_is_logged(self): + t = _new_transport() + t.session_id = "sess-1" + q: queue.Queue = queue.Queue() + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.side_effect = Exception("connection error") + t.handle_get_stream(MagicMock(), q) # Should not raise + + def test_exception_when_stopped_is_suppressed(self): + t = _new_transport() + t.session_id = "sess-1" + t.stop_event.set() + q: queue.Queue = queue.Queue() + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.side_effect = Exception("connection error") + t.handle_get_stream(MagicMock(), q) # Should not raise or log + + +# ── _handle_resumption_request ──────────────────────────────────────────────── + + +class TestHandleResumptionRequestNew: + def _make_ctx(self, transport, q, resumption_token="token-123", message=None) -> RequestContext: + if message is None: + message = _make_request_msg("tools/list", req_id=42) + session_msg = SessionMessage(message) + metadata = None + if resumption_token: + metadata = MagicMock(spec=ClientMessageMetadata) + metadata.resumption_token = resumption_token + metadata.on_resumption_token_update = MagicMock() + return RequestContext( + client=MagicMock(), + headers=transport.request_headers, + session_id=transport.session_id, + session_message=session_msg, + metadata=metadata, + server_to_client_queue=q, + sse_read_timeout=60, + ) + + def test_raises_resumption_error_without_token(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + metadata = MagicMock(spec=ClientMessageMetadata) + metadata.resumption_token = None + ctx = RequestContext( + client=MagicMock(), + headers=t.request_headers, + session_id=None, + session_message=SessionMessage(_make_request_msg()), + metadata=metadata, + server_to_client_queue=q, + sse_read_timeout=60, + ) + with pytest.raises(ResumptionError): + t._handle_resumption_request(ctx) + + def test_raises_resumption_error_without_metadata(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = RequestContext( + client=MagicMock(), + headers=t.request_headers, + session_id=None, + session_message=SessionMessage(_make_request_msg()), + metadata=None, + server_to_client_queue=q, + sse_read_timeout=60, + ) + with pytest.raises(ResumptionError): + t._handle_resumption_request(ctx) + + def test_sets_last_event_id_header(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q, resumption_token="resume-999") + + captured_headers: dict = {} + data = json.dumps({"jsonrpc": "2.0", "id": 42, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_event_source = MagicMock() + mock_event_source.response = mock_response + mock_event_source.iter_sse.return_value = [mock_sse_event] + + def fake_connect(url, headers, **kwargs): + captured_headers.update(headers) + + @contextmanager + def _ctx(): + yield mock_event_source + + return _ctx() + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect", side_effect=fake_connect): + t._handle_resumption_request(ctx) + + assert captured_headers.get(LAST_EVENT_ID) == "resume-999" + + def test_stops_when_response_complete(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q, message=_make_request_msg("tools/list", 42)) + + data1 = json.dumps({"jsonrpc": "2.0", "id": 42, "result": {}}) + data2 = json.dumps({"jsonrpc": "2.0", "id": 43, "result": {}}) + sse1 = _make_sse_mock("message", data1) + sse2 = _make_sse_mock("message", data2) + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_event_source = MagicMock() + mock_event_source.response = mock_response + mock_event_source.iter_sse.return_value = [sse1, sse2] + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.return_value.__enter__.return_value = mock_event_source + t._handle_resumption_request(ctx) + + # Only the first event was processed (loop breaks on completion) + assert q.qsize() == 1 + + def test_stops_when_stop_event_set(self): + t = _new_transport() + t.stop_event.set() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_event_source = MagicMock() + mock_event_source.response = mock_response + mock_event_source.iter_sse.return_value = [mock_sse_event] + + with patch("core.mcp.client.streamable_client.ssrf_proxy_sse_connect") as mock_connect: + mock_connect.return_value.__enter__.return_value = mock_event_source + t._handle_resumption_request(ctx) + + assert q.empty() + + +# ── _handle_post_request ────────────────────────────────────────────────────── + + +class TestHandlePostRequestNew: + def _make_ctx(self, transport, q, message=None) -> RequestContext: + if message is None: + message = _make_request_msg("tools/list", 1) + return RequestContext( + client=MagicMock(), + headers=transport.request_headers, + session_id=transport.session_id, + session_message=SessionMessage(message), + metadata=None, + server_to_client_queue=q, + sse_read_timeout=60, + ) + + def _stream_ctx(self, mock_response): + @contextmanager + def _stream(*args, **kwargs): + yield mock_response + + return _stream + + def test_202_returns_immediately_no_queue(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + mock_resp = MagicMock() + mock_resp.status_code = 202 + ctx.client.stream = self._stream_ctx(mock_resp) + t._handle_post_request(ctx) + assert q.empty() + + def test_204_returns_immediately_no_queue(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + mock_resp = MagicMock() + mock_resp.status_code = 204 + ctx.client.stream = self._stream_ctx(mock_resp) + t._handle_post_request(ctx) + assert q.empty() + + def test_404_sends_session_terminated_error_for_request(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + msg = _make_request_msg("tools/list", 77) + ctx = self._make_ctx(t, q, message=msg) + mock_resp = MagicMock() + mock_resp.status_code = 404 + ctx.client.stream = self._stream_ctx(mock_resp) + t._handle_post_request(ctx) + item = q.get_nowait() + assert isinstance(item, SessionMessage) + assert isinstance(item.message.root, JSONRPCError) + assert item.message.root.id == 77 + + def test_404_for_notification_no_error_sent(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + msg = _make_notification_msg("some/notification") + ctx = self._make_ctx(t, q, message=msg) + mock_resp = MagicMock() + mock_resp.status_code = 404 + ctx.client.stream = self._stream_ctx(mock_resp) + t._handle_post_request(ctx) + assert q.empty() + + def test_json_response_puts_session_message(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + + response_data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {"ok": True}}).encode() + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"content-type": "application/json"} + mock_resp.raise_for_status.return_value = None + mock_resp.read.return_value = response_data + ctx.client.stream = self._stream_ctx(mock_resp) + + t._handle_post_request(ctx) + assert isinstance(q.get_nowait(), SessionMessage) + + def test_json_response_invalid_json_puts_exception(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"content-type": "application/json"} + mock_resp.raise_for_status.return_value = None + mock_resp.read.return_value = b"{bad json!" + ctx.client.stream = self._stream_ctx(mock_resp) + + t._handle_post_request(ctx) + assert isinstance(q.get_nowait(), Exception) + + def test_unexpected_content_type_puts_value_error(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"content-type": "text/plain"} + mock_resp.raise_for_status.return_value = None + ctx.client.stream = self._stream_ctx(mock_resp) + + t._handle_post_request(ctx) + item = q.get_nowait() + assert isinstance(item, ValueError) + assert "Unexpected content type" in str(item) + + def test_initialization_request_extracts_session_id(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + msg = _make_request_msg("initialize", 1) + ctx = self._make_ctx(t, q, message=msg) + + response_data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}).encode() + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = MagicMock() + headers_dict = {"content-type": "application/json", MCP_SESSION_ID: "new-sid"} + mock_resp.headers.__getitem__ = lambda self, k: headers_dict[k] + mock_resp.headers.get = lambda k, default=None: headers_dict.get(k, default) + mock_resp.raise_for_status.return_value = None + mock_resp.read.return_value = response_data + ctx.client.stream = self._stream_ctx(mock_resp) + + t._handle_post_request(ctx) + assert t.session_id == "new-sid" + + def test_notification_skips_response_processing(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + msg = _make_notification_msg("notifications/something") + ctx = self._make_ctx(t, q, message=msg) + + response_data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}).encode() + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"content-type": "application/json"} + mock_resp.raise_for_status.return_value = None + mock_resp.read.return_value = response_data + ctx.client.stream = self._stream_ctx(mock_resp) + + t._handle_post_request(ctx) + assert q.empty() + + def test_sse_response_handles_stream(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._make_ctx(t, q) + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"content-type": "text/event-stream"} + mock_resp.raise_for_status.return_value = None + ctx.client.stream = self._stream_ctx(mock_resp) + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + mock_es_instance = MagicMock() + mock_es_instance.iter_sse.return_value = [mock_sse_event] + MockEventSource.return_value = mock_es_instance + t._handle_post_request(ctx) + + assert isinstance(q.get_nowait(), SessionMessage) + + +# ── _handle_json_response ───────────────────────────────────────────────────── + + +class TestHandleJsonResponseNew: + def test_valid_json_puts_session_message(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}).encode() + mock_response = MagicMock() + mock_response.read.return_value = data + t._handle_json_response(mock_response, q) + assert isinstance(q.get_nowait(), SessionMessage) + + def test_invalid_json_puts_exception(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + mock_response = MagicMock() + mock_response.read.return_value = b"{ invalid }" + t._handle_json_response(mock_response, q) + assert isinstance(q.get_nowait(), Exception) + + +# ── _handle_sse_response ────────────────────────────────────────────────────── + + +class TestHandleSseResponseNew: + def _ctx(self, transport, q) -> RequestContext: + return RequestContext( + client=MagicMock(), + headers=transport.request_headers, + session_id=None, + session_message=SessionMessage(_make_request_msg()), + metadata=None, + server_to_client_queue=q, + sse_read_timeout=60, + ) + + def test_processes_sse_events(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._ctx(t, q) + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + mock_es_instance = MagicMock() + mock_es_instance.iter_sse.return_value = [mock_sse_event] + MockEventSource.return_value = mock_es_instance + t._handle_sse_response(mock_response, ctx) + + assert isinstance(q.get_nowait(), SessionMessage) + + def test_stops_when_stop_event_set(self): + t = _new_transport() + t.stop_event.set() + q: queue.Queue = queue.Queue() + ctx = self._ctx(t, q) + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + mock_sse_event = _make_sse_mock("message", data) + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + mock_es_instance = MagicMock() + mock_es_instance.iter_sse.return_value = [mock_sse_event] + MockEventSource.return_value = mock_es_instance + t._handle_sse_response(mock_response, ctx) + + assert q.empty() + + def test_stops_when_complete(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._ctx(t, q) + + data1 = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + data2 = json.dumps({"jsonrpc": "2.0", "id": 2, "result": {}}) + sse1 = _make_sse_mock("message", data1) + sse2 = _make_sse_mock("message", data2) + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + mock_es_instance = MagicMock() + mock_es_instance.iter_sse.return_value = [sse1, sse2] + MockEventSource.return_value = mock_es_instance + t._handle_sse_response(mock_response, ctx) + + assert q.qsize() == 1 # Only the first completion item + + def test_exception_outside_stop_puts_to_queue(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + ctx = self._ctx(t, q) + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + MockEventSource.side_effect = RuntimeError("EventSource error") + t._handle_sse_response(mock_response, ctx) + + assert isinstance(q.get_nowait(), Exception) + + def test_exception_suppressed_when_stopped(self): + t = _new_transport() + t.stop_event.set() + q: queue.Queue = queue.Queue() + ctx = self._ctx(t, q) + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + MockEventSource.side_effect = RuntimeError("EventSource error") + t._handle_sse_response(mock_response, ctx) + + assert q.empty() + + def test_with_metadata_resumption_callback(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + metadata = MagicMock(spec=ClientMessageMetadata) + callback = MagicMock() + metadata.on_resumption_token_update = callback + + ctx = RequestContext( + client=MagicMock(), + headers=t.request_headers, + session_id=None, + session_message=SessionMessage(_make_request_msg()), + metadata=metadata, + server_to_client_queue=q, + sse_read_timeout=60, + ) + + data = json.dumps({"jsonrpc": "2.0", "id": 1, "result": {}}) + sse = _make_sse_mock("message", data, sse_id="resume-token") + mock_response = MagicMock() + + with patch("core.mcp.client.streamable_client.EventSource") as MockEventSource: + mock_es_instance = MagicMock() + mock_es_instance.iter_sse.return_value = [sse] + MockEventSource.return_value = mock_es_instance + t._handle_sse_response(mock_response, ctx) + + callback.assert_called_once_with("resume-token") + + +# ── _handle_unexpected_content_type ────────────────────────────────────────── + + +class TestHandleUnexpectedContentTypeNew: + def test_puts_value_error_with_message(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + t._handle_unexpected_content_type("text/html", q) + item = q.get_nowait() + assert isinstance(item, ValueError) + assert "text/html" in str(item) + + +# ── _send_session_terminated_error ──────────────────────────────────────────── + + +class TestSendSessionTerminatedErrorNew: + def test_puts_jsonrpc_error(self): + t = _new_transport() + q: queue.Queue = queue.Queue() + t._send_session_terminated_error(q, 42) + item = q.get_nowait() + assert isinstance(item, SessionMessage) + assert isinstance(item.message.root, JSONRPCError) + assert item.message.root.id == 42 + assert item.message.root.error.code == 32600 + assert "terminated" in item.message.root.error.message.lower() + + +# ── post_writer ─────────────────────────────────────────────────────────────── + + +class TestPostWriterNew: + def test_none_message_exits_loop(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + c2s.put(None) + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + def test_stop_event_exits_loop(self): + t = _new_transport() + t.stop_event.set() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + def test_initialized_notification_calls_start_get_stream(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + start_get_stream = MagicMock() + + notif_msg = _make_notification_msg("notifications/initialized") + c2s.put(SessionMessage(notif_msg)) + c2s.put(None) + + with patch.object(t, "_handle_post_request"): + t.post_writer(MagicMock(), c2s, s2c, start_get_stream) + + start_get_stream.assert_called_once() + + def test_resumption_message_calls_handle_resumption_request(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + start_get_stream = MagicMock() + + msg = SessionMessage(_make_request_msg("tools/list", 10)) + metadata = MagicMock(spec=ClientMessageMetadata) + metadata.resumption_token = "resume-abc" + msg.metadata = metadata + c2s.put(msg) + c2s.put(None) + + with patch.object(t, "_handle_resumption_request") as mock_resumption: + t.post_writer(MagicMock(), c2s, s2c, start_get_stream) + + mock_resumption.assert_called_once() + + def test_regular_message_calls_handle_post_request(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + + msg = SessionMessage(_make_request_msg("tools/list", 5)) + c2s.put(msg) + c2s.put(None) + + with patch.object(t, "_handle_post_request") as mock_post: + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + mock_post.assert_called_once() + + def test_exception_in_handler_put_to_s2c_when_not_stopped(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + + msg = SessionMessage(_make_request_msg("tools/list", 5)) + c2s.put(msg) + c2s.put(None) + + boom = RuntimeError("oops") + with patch.object(t, "_handle_post_request", side_effect=boom): + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + item = s2c.get_nowait() + assert item is boom + + def test_exception_suppressed_when_stopped(self): + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + + msg = SessionMessage(_make_request_msg("tools/list", 5)) + c2s.put(msg) + c2s.put(None) + t.stop_event.set() + + boom = RuntimeError("oops") + with patch.object(t, "_handle_post_request", side_effect=boom): + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + assert s2c.empty() + + def test_queue_empty_timeout_continues_loop(self): + """Cover the 'except queue.Empty: continue' branch in post_writer.""" + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + call_count = {"n": 0} + + original_get = c2s.get + + def patched_get(*args, **kwargs): + call_count["n"] += 1 + if call_count["n"] == 1: + raise queue.Empty + + c2s.get = patched_get # type: ignore[method-assign] + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + assert call_count["n"] >= 2 + + def test_non_client_metadata_treated_as_none(self): + """session_message.metadata that's not ClientMessageMetadata → metadata is None.""" + t = _new_transport() + c2s: queue.Queue = queue.Queue() + s2c: queue.Queue = queue.Queue() + + msg = SessionMessage(_make_request_msg("tools/list", 5)) + msg.metadata = "not-a-client-metadata" + c2s.put(msg) + c2s.put(None) + + with patch.object(t, "_handle_post_request") as mock_post: + t.post_writer(MagicMock(), c2s, s2c, MagicMock()) + + ctx = mock_post.call_args[0][0] + assert ctx.metadata is None + + +# ── terminate_session ───────────────────────────────────────────────────────── + + +class TestTerminateSessionNew: + def test_no_session_id_skips(self): + t = _new_transport() + t.session_id = None + mock_client = MagicMock() + t.terminate_session(mock_client) + mock_client.delete.assert_not_called() + + def test_200_response_is_success(self): + t = _new_transport() + t.session_id = "sess-1" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.delete.return_value = mock_response + t.terminate_session(mock_client) + mock_client.delete.assert_called_once() + + def test_405_does_not_raise(self): + t = _new_transport() + t.session_id = "sess-1" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 405 + mock_client.delete.return_value = mock_response + t.terminate_session(mock_client) # Should not raise + + def test_non_200_logs_warning_does_not_raise(self): + t = _new_transport() + t.session_id = "sess-1" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 500 + mock_client.delete.return_value = mock_response + t.terminate_session(mock_client) # Should not raise + + def test_exception_is_swallowed(self): + t = _new_transport() + t.session_id = "sess-1" + mock_client = MagicMock() + mock_client.delete.side_effect = httpx.ConnectError("refused") + t.terminate_session(mock_client) # Should not raise + + +# ── get_session_id ──────────────────────────────────────────────────────────── + + +class TestGetSessionIdNew: + def test_returns_none_when_no_session(self): + t = _new_transport() + assert t.get_session_id() is None + + def test_returns_session_id_when_set(self): + t = _new_transport() + t.session_id = "my-session" + assert t.get_session_id() == "my-session" + + +# ── streamablehttp_client context manager ───────────────────────────────────── + + +class TestStreamablehttpClientContextManagerNew: + def test_yields_queues_and_callback(self): + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + mock_exec.return_value = mock_executor + + with streamablehttp_client("http://example.com/mcp") as (s2c, c2s, get_sid): + assert s2c is not None + assert c2s is not None + assert callable(get_sid) + + def test_terminate_on_close_false_does_not_delete(self): + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + mock_exec.return_value = mock_executor + + with streamablehttp_client("http://example.com/mcp", terminate_on_close=False) as (s2c, c2s, get_sid): + pass + mock_client.delete.assert_not_called() + + def test_queue_cleanup_on_outer_exception(self): + """Verify cleanup in finally block runs even when create_ssrf raises.""" + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_cf.side_effect = RuntimeError("connection failed") + + with pytest.raises(RuntimeError): + with streamablehttp_client("http://example.com/mcp"): + pass # pragma: no cover + + def test_timedelta_args_accepted(self): + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + mock_exec.return_value = mock_executor + + with streamablehttp_client( + "http://example.com/mcp", + timeout=timedelta(seconds=15), + sse_read_timeout=timedelta(seconds=60), + ) as (s2c, c2s, get_sid): + assert callable(get_sid) + + def test_start_get_stream_submits_to_executor(self): + """When context starts, post_writer is submitted to executor.""" + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + submitted_calls = [] + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + + def capture_submit(fn, *args, **kwargs): + submitted_calls.append((fn, args)) + + mock_executor.submit.side_effect = capture_submit + mock_exec.return_value = mock_executor + + with streamablehttp_client("http://example.com/mcp") as (s2c, c2s, get_sid): + pass + + # post_writer was submitted + assert len(submitted_calls) >= 1 + + def test_cleanup_puts_none_sentinels_to_queues(self): + """After context exit, None sentinels are put into both queues.""" + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + mock_exec.return_value = mock_executor + + with streamablehttp_client("http://example.com/mcp") as (s2c, c2s, get_sid): + pass + + # After context exit, None sentinel should be in c2s queue from cleanup + val = c2s.get_nowait() + assert val is None + + def test_terminate_called_when_session_id_set(self): + """When session_id is set and terminate_on_close=True, terminate_session is called.""" + from core.mcp.client.streamable_client import streamablehttp_client + + with patch("core.mcp.client.streamable_client.create_ssrf_proxy_mcp_http_client") as mock_cf: + mock_client = MagicMock() + mock_cf.return_value.__enter__.return_value = mock_client + + mock_delete_resp = MagicMock() + mock_delete_resp.status_code = 200 + mock_client.delete.return_value = mock_delete_resp + + with patch("core.mcp.client.streamable_client.ThreadPoolExecutor") as mock_exec: + mock_executor = MagicMock() + mock_exec.return_value = mock_executor + + with patch("core.mcp.client.streamable_client.StreamableHTTPTransport") as MockTransport: + mock_transport = MockTransport.return_value + mock_transport.request_headers = { + "Accept": "application/json, text/event-stream", + "content-type": "application/json", + } + mock_transport.timeout = 30 + mock_transport.sse_read_timeout = 300 + mock_transport.session_id = "active-session" + mock_transport.stop_event = MagicMock() + mock_transport.get_session_id = MagicMock(return_value="active-session") + + with streamablehttp_client("http://example.com/mcp", terminate_on_close=True) as ( + s2c, + c2s, + get_sid, + ): + pass + + mock_transport.terminate_session.assert_called_once_with(mock_client) + + +# ── Exception hierarchy ─────────────────────────────────────────────────────── + + +class TestExceptionHierarchyNew: + def test_streamable_http_error_is_exception(self): + err = StreamableHTTPError("test") + assert isinstance(err, Exception) + + def test_resumption_error_is_streamable_http_error(self): + err = ResumptionError("test") + assert isinstance(err, StreamableHTTPError) + assert isinstance(err, Exception) + + +# ── RequestContext dataclass ────────────────────────────────────────────────── + + +class TestRequestContextNew: + def test_creation(self): + import queue + + q: queue.Queue = queue.Queue() + ctx = RequestContext( + client=MagicMock(), + headers={"X-Test": "val"}, + session_id="sid", + session_message=SessionMessage(_make_request_msg()), + metadata=None, + server_to_client_queue=q, + sse_read_timeout=30.0, + ) + assert ctx.session_id == "sid" + assert ctx.sse_read_timeout == 30.0 + assert ctx.metadata is None diff --git a/api/tests/unit_tests/core/mcp/session/test_base_session.py b/api/tests/unit_tests/core/mcp/session/test_base_session.py new file mode 100644 index 0000000000..1dd916bcf1 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/session/test_base_session.py @@ -0,0 +1,617 @@ +import queue +import time +from concurrent.futures import Future, ThreadPoolExecutor +from datetime import timedelta +from typing import Union +from unittest.mock import MagicMock, patch + +import pytest +from httpx import HTTPStatusError, Request, Response +from pydantic import BaseModel, ConfigDict, RootModel + +from core.mcp.error import MCPAuthError, MCPConnectionError +from core.mcp.session.base_session import BaseSession, RequestResponder +from core.mcp.types import ( + CancelledNotification, + ClientNotification, + ClientRequest, + ErrorData, + JSONRPCError, + JSONRPCMessage, + JSONRPCNotification, + JSONRPCResponse, + Notification, + RequestParams, + SessionMessage, +) +from core.mcp.types import ( + Request as MCPRequest, +) + + +class MockRequestParams(RequestParams): + name: str = "default" + model_config = ConfigDict(extra="allow") + + +class MockRequest(MCPRequest[MockRequestParams, str]): + method: str = "test/request" + params: MockRequestParams = MockRequestParams() + + +class MockResult(BaseModel): + result: str + + +class MockNotificationParams(BaseModel): + message: str + + +class MockNotification(Notification[MockNotificationParams, str]): + method: str = "test/notification" + params: MockNotificationParams + + +class ReceiveRequest(RootModel[Union[MockRequest, ClientRequest]]): + pass + + +class ReceiveNotification(RootModel[Union[CancelledNotification, MockNotification, JSONRPCNotification]]): + pass + + +class MockSession(BaseSession[MockRequest, MockNotification, MockResult, ReceiveRequest, ReceiveNotification]): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.received_requests = [] + self.received_notifications = [] + self.handled_incoming = [] + + def _received_request(self, responder): + self.received_requests.append(responder) + + def _received_notification(self, notification): + self.received_notifications.append(notification) + + def _handle_incoming(self, item): + self.handled_incoming.append(item) + + +@pytest.fixture +def streams(): + return queue.Queue(), queue.Queue() + + +@pytest.mark.timeout(5) +def test_request_responder_respond(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + on_complete = MagicMock() + request = ReceiveRequest(MockRequest(method="test", params=MockRequestParams(name="test"))) + + responder = RequestResponder( + request_id=1, request_meta=None, request=request, session=session, on_complete=on_complete + ) + + with pytest.raises(RuntimeError, match="RequestResponder must be used as a context manager"): + responder.respond(MockResult(result="ok")) + + with responder as r: + r.respond(MockResult(result="ok")) + with pytest.raises(AssertionError, match="Request already responded to"): + r.respond(MockResult(result="error")) + + assert responder.completed is True + on_complete.assert_called_once_with(responder) + + msg = write_stream.get_nowait() + assert isinstance(msg.message.root, JSONRPCResponse) + assert msg.message.root.result == {"result": "ok"} + + +@pytest.mark.timeout(5) +def test_request_responder_cancel(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + on_complete = MagicMock() + request = ReceiveRequest(MockRequest(method="test", params=MockRequestParams(name="test"))) + + responder = RequestResponder( + request_id=1, request_meta=None, request=request, session=session, on_complete=on_complete + ) + + with pytest.raises(RuntimeError, match="RequestResponder must be used as a context manager"): + responder.cancel() + + with responder as r: + r.cancel() + + assert responder.completed is True + on_complete.assert_called_once_with(responder) + + msg = write_stream.get_nowait() + assert isinstance(msg.message.root, JSONRPCError) + assert msg.message.root.error.message == "Request cancelled" + + +@pytest.mark.timeout(10) +def test_base_session_lifecycle(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session as s: + assert isinstance(s, MockSession) + assert s._executor is not None + assert s._receiver_future is not None + + session._receiver_future.result(timeout=5.0) + assert session._receiver_future.done() + + +@pytest.mark.timeout(5) +def test_send_request_success(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_response(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + response = JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"result": "hello world"}) + read_stream.put(SessionMessage(message=JSONRPCMessage(response))) + except Exception: + pass + + import threading + + t = threading.Thread(target=mock_response, daemon=True) + t.start() + + with session: + result = session.send_request(request, MockResult) + assert result.result == "hello world" + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_request_retry_loop_coverage(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_delayed_response(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + time.sleep(0.2) + response = JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"result": "slow"}) + read_stream.put(SessionMessage(message=JSONRPCMessage(response))) + except: + pass + + import threading + + t = threading.Thread(target=mock_delayed_response, daemon=True) + t.start() + + with session: + result = session.send_request(request, MockResult, request_read_timeout_seconds=timedelta(seconds=0.1)) + assert result.result == "slow" + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_request_jsonrpc_error(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_error(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + error = JSONRPCError(jsonrpc="2.0", id=req_id, error=ErrorData(code=-32000, message="Error")) + read_stream.put(SessionMessage(message=JSONRPCMessage(error))) + except: + pass + + import threading + + t = threading.Thread(target=mock_error, daemon=True) + t.start() + + with session: + with pytest.raises(MCPConnectionError) as exc: + session.send_request(request, MockResult) + assert exc.value.args[0].message == "Error" + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_request_auth_error(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_error(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + error = JSONRPCError(jsonrpc="2.0", id=req_id, error=ErrorData(code=401, message="Unauthorized")) + read_stream.put(SessionMessage(message=JSONRPCMessage(error))) + except: + pass + + import threading + + t = threading.Thread(target=mock_error, daemon=True) + t.start() + + with session: + with pytest.raises(MCPAuthError): + session.send_request(request, MockResult) + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_request_http_status_error_coverage(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_direct_http_error(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + # To cover line 263 in base_session.py, we MUST put non-401 HTTPStatusError + # DIRECTLY into response_streams, as _receive_loop would convert it to JSONRPCError. + response = Response(status_code=403, request=Request("GET", "http://test")) + error = HTTPStatusError("Forbidden", request=response.request, response=response) + session._response_streams[req_id].put(error) + except: + pass + + import threading + + t = threading.Thread(target=mock_direct_http_error, daemon=True) + t.start() + + # We still need the session for request ID generation and queue setup + with session: + with pytest.raises(MCPConnectionError) as exc: + session.send_request(request, MockResult) + assert exc.value.args[0].code == 403 + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_request_http_status_auth_error(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_error(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + response = Response(status_code=401, request=Request("GET", "http://test")) + error = HTTPStatusError("Unauthorized", request=response.request, response=response) + read_stream.put(error) + except: + pass + + import threading + + t = threading.Thread(target=mock_error, daemon=True) + t.start() + + with session: + with pytest.raises(MCPAuthError): + session.send_request(request, MockResult) + t.join(timeout=1) + + +@pytest.mark.timeout(5) +def test_send_notification(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + notification = MockNotification(method="notify", params=MockNotificationParams(message="hi")) + + session.send_notification(notification, related_request_id="rel-1") + + msg = write_stream.get_nowait() + assert isinstance(msg.message.root, JSONRPCNotification) + assert msg.message.root.method == "notify" + assert msg.message.root.params == {"message": "hi"} + assert msg.metadata.related_request_id == "rel-1" + + +@pytest.mark.timeout(10) +def test_receive_loop_request(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + req_payload = {"jsonrpc": "2.0", "id": 1, "method": "test/request", "params": {"name": "test"}} + read_stream.put(SessionMessage(message=JSONRPCMessage.model_validate(req_payload))) + + for _ in range(30): + if session.received_requests: + break + time.sleep(0.1) + + assert len(session.received_requests) == 1 + responder = session.received_requests[0] + assert responder.request_id == 1 + assert responder.request.root.method == "test/request" + + +@pytest.mark.timeout(10) +def test_receive_loop_notification(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + notif_payload = {"jsonrpc": "2.0", "method": "test/notification", "params": {"message": "hello"}} + read_stream.put(SessionMessage(message=JSONRPCMessage.model_validate(notif_payload))) + + for _ in range(30): + if session.received_notifications: + break + time.sleep(0.1) + + assert len(session.received_notifications) == 1 + assert isinstance(session.received_notifications[0].root, MockNotification) + assert session.received_notifications[0].root.method == "test/notification" + + +@pytest.mark.timeout(15) +def test_receive_loop_cancel_notification(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ClientNotification) + + with session: + req_payload = {"jsonrpc": "2.0", "id": "req-1", "method": "test/request", "params": {"name": "test"}} + read_stream.put(SessionMessage(message=JSONRPCMessage.model_validate(req_payload))) + + for _ in range(30): + if "req-1" in session._in_flight: + break + time.sleep(0.1) + + assert "req-1" in session._in_flight + responder = session._in_flight["req-1"] + + with responder: + cancel_payload = {"jsonrpc": "2.0", "method": "notifications/cancelled", "params": {"requestId": "req-1"}} + read_stream.put(SessionMessage(message=JSONRPCMessage.model_validate(cancel_payload))) + + for _ in range(30): + if responder.completed: + break + time.sleep(0.1) + + assert responder.completed is True + msg = write_stream.get(timeout=2) + assert isinstance(msg.message.root, JSONRPCError) + assert msg.message.root.id == "req-1" + + +@pytest.mark.timeout(10) +def test_receive_loop_exception(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + read_stream.put(Exception("Unexpected error")) + for _ in range(30): + if any(isinstance(x, Exception) for x in session.handled_incoming): + break + time.sleep(0.1) + + assert any(isinstance(x, Exception) and str(x) == "Unexpected error" for x in session.handled_incoming) + + +@pytest.mark.timeout(10) +def test_receive_loop_http_status_error(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + session._request_id = 1 + resp_queue = queue.Queue() + session._response_streams[0] = resp_queue + + response = Response(status_code=401, request=Request("GET", "http://test")) + # Using 401 specifically as _receive_loop preserves it + error = HTTPStatusError("Unauthorized", request=response.request, response=response) + read_stream.put(error) + + got = resp_queue.get(timeout=2) + assert isinstance(got, HTTPStatusError) + + +@pytest.mark.timeout(10) +def test_receive_loop_http_status_error_non_401(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + session._request_id = 1 + resp_queue = queue.Queue() + session._response_streams[0] = resp_queue + + response = Response(status_code=500, request=Request("GET", "http://test")) + error = HTTPStatusError("Server Error", request=response.request, response=response) + read_stream.put(error) + + got = resp_queue.get(timeout=2) + assert isinstance(got, JSONRPCError) + assert got.error.code == 500 + + +@pytest.mark.timeout(5) +def test_check_receiver_status_fail(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + executor = ThreadPoolExecutor(max_workers=1) + + def raise_err(): + raise RuntimeError("Receiver failed") + + future = executor.submit(raise_err) + session._receiver_future = future + + try: + future.result() + except: + pass + + with pytest.raises(RuntimeError, match="Receiver failed"): + session.check_receiver_status() + executor.shutdown() + + +@pytest.mark.timeout(10) +def test_receive_loop_unknown_request_id(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + resp = JSONRPCResponse(jsonrpc="2.0", id=999, result={"ok": True}) + read_stream.put(SessionMessage(message=JSONRPCMessage(resp))) + + for _ in range(30): + if any(isinstance(x, RuntimeError) and "Server Error" in str(x) for x in session.handled_incoming): + break + time.sleep(0.1) + + assert any("Server Error" in str(x) for x in session.handled_incoming) + + +@pytest.mark.timeout(10) +def test_receive_loop_http_error_unknown_id(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with session: + response = Response(status_code=401, request=Request("GET", "http://test")) + error = HTTPStatusError("Unauthorized", request=response.request, response=response) + read_stream.put(error) + + for _ in range(30): + if any(isinstance(x, RuntimeError) and "unknown request ID" in str(x) for x in session.handled_incoming): + break + time.sleep(0.1) + + assert any("unknown request ID" in str(x) for x in session.handled_incoming) + + +@pytest.mark.timeout(10) +def test_receive_loop_validation_error_notification(streams): + from core.mcp.session.base_session import logger + + with patch.object(logger, "warning") as mock_warning: + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, RootModel[MockNotification]) + + with session: + notif_payload = {"jsonrpc": "2.0", "method": "bad", "params": {"some": "data"}} + read_stream.put(SessionMessage(message=JSONRPCMessage.model_validate(notif_payload))) + time.sleep(1.0) + + assert mock_warning.called + + +@pytest.mark.timeout(5) +def test_send_request_none_response(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + def mock_none(): + try: + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + session._response_streams[req_id].put(None) + except: + pass + + import threading + + t = threading.Thread(target=mock_none, daemon=True) + t.start() + + with session: + with pytest.raises(MCPConnectionError) as exc: + session.send_request(request, MockResult) + assert exc.value.args[0].message == "No response received" + t.join(timeout=1) + + +@pytest.mark.timeout(15) +def test_session_exit_timeout(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + mock_future = MagicMock(spec=Future) + mock_future.result.side_effect = TimeoutError() + mock_future.done.return_value = False + + session._receiver_future = mock_future + session._executor = MagicMock(spec=ThreadPoolExecutor) + + session.__exit__(None, None, None) + + mock_future.cancel.assert_called_once() + session._executor.shutdown.assert_called_once_with(wait=False) + + +@pytest.mark.timeout(10) +def test_receive_loop_fatal_exception(streams): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + with patch.object(read_stream, "get", side_effect=RuntimeError("Fatal loop error")): + with patch("core.mcp.session.base_session.logger") as mock_logger: + with pytest.raises(RuntimeError, match="Fatal loop error"): + with session: + pass + mock_logger.exception.assert_called_with("Error in message processing loop") + + +@pytest.mark.timeout(5) +def test_receive_loop_empty_coverage(streams): + with patch("core.mcp.session.base_session.DEFAULT_RESPONSE_READ_TIMEOUT", 0.1): + read_stream, write_stream = streams + session = MockSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + with session: + time.sleep(0.3) + + +@pytest.mark.timeout(2) +def test_base_methods_noop(streams): + read_stream, write_stream = streams + session = BaseSession(read_stream, write_stream, ReceiveRequest, ReceiveNotification) + + session._received_request(MagicMock()) + session._received_notification(MagicMock()) + session.send_progress_notification("token", 0.5) + session._handle_incoming(MagicMock()) + + +@pytest.mark.timeout(5) +def test_send_request_session_timeout_retry_6(streams): + read_stream, write_stream = streams + session = MockSession( + read_stream, write_stream, ReceiveRequest, ReceiveNotification, read_timeout_seconds=timedelta(seconds=0.1) + ) + + request = MockRequest(method="test", params=MockRequestParams(name="world")) + + with patch.object(session, "check_receiver_status", side_effect=[None, RuntimeError("timeout_broken")]): + with pytest.raises(RuntimeError, match="timeout_broken"): + session.send_request(request, MockResult) diff --git a/api/tests/unit_tests/core/mcp/session/test_client_session.py b/api/tests/unit_tests/core/mcp/session/test_client_session.py new file mode 100644 index 0000000000..c7b9d3cfa9 --- /dev/null +++ b/api/tests/unit_tests/core/mcp/session/test_client_session.py @@ -0,0 +1,576 @@ +import queue +from unittest.mock import MagicMock + +import pytest +from pydantic import AnyUrl + +from core.mcp import types +from core.mcp.session.base_session import RequestResponder, SessionMessage +from core.mcp.session.client_session import ( + ClientSession, + _default_list_roots_callback, + _default_logging_callback, + _default_message_handler, + _default_sampling_callback, +) + + +@pytest.fixture +def streams(): + return queue.Queue(), queue.Queue() + + +def test_client_session_init(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + assert session._client_info.name == "Dify" + assert session._sampling_callback == _default_sampling_callback + assert session._list_roots_callback == _default_list_roots_callback + assert session._logging_callback == _default_logging_callback + assert session._message_handler == _default_message_handler + + +def test_client_session_init_custom(streams): + read_stream, write_stream = streams + sampling_cb = MagicMock() + list_roots_cb = MagicMock() + logging_cb = MagicMock() + msg_handler = MagicMock() + client_info = types.Implementation(name="Custom", version="1.0") + + session = ClientSession( + read_stream, + write_stream, + sampling_callback=sampling_cb, + list_roots_callback=list_roots_cb, + logging_callback=logging_cb, + message_handler=msg_handler, + client_info=client_info, + ) + + assert session._client_info == client_info + assert session._sampling_callback == sampling_cb + assert session._list_roots_callback == list_roots_cb + assert session._logging_callback == logging_cb + assert session._message_handler == msg_handler + + +def test_initialize_success(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + expected_result = types.InitializeResult( + protocolVersion=types.LATEST_PROTOCOL_VERSION, + capabilities=types.ServerCapabilities(), + serverInfo=types.Implementation(name="test-server", version="1.0"), + ) + + def mock_server(): + # Handle initialize request + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result=expected_result.model_dump()) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + # Expect initialized notification + notif = write_stream.get(timeout=2) + assert notif.message.root.method == "notifications/initialized" + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.initialize() + assert result.protocolVersion == types.LATEST_PROTOCOL_VERSION + assert result.serverInfo.name == "test-server" + + t.join(timeout=1) + + +def test_initialize_custom_capabilities(streams): + read_stream, write_stream = streams + session = ClientSession( + read_stream, write_stream, sampling_callback=lambda c, p: None, list_roots_callback=lambda c: None + ) + + def mock_server(): + msg = write_stream.get(timeout=2) + params = msg.message.root.params + # Check that capabilities are set because we provided custom callbacks + assert params["capabilities"]["sampling"] is not None + assert params["capabilities"]["roots"]["listChanged"] is True + + req_id = msg.message.root.id + resp = types.JSONRPCResponse( + jsonrpc="2.0", + id=req_id, + result={ + "protocolVersion": types.LATEST_PROTOCOL_VERSION, + "capabilities": {}, + "serverInfo": {"name": "test", "version": "1.0"}, + }, + ) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + write_stream.get(timeout=2) # initialized notif + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + session.initialize() + t.join(timeout=1) + + +def test_initialize_unsupported_version(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + req_id = msg.message.root.id + resp = types.JSONRPCResponse( + jsonrpc="2.0", + id=req_id, + result={ + "protocolVersion": "0.0.1", # Unsupported + "capabilities": {}, + "serverInfo": {"name": "test", "version": "1.0"}, + }, + ) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + with pytest.raises(RuntimeError, match="Unsupported protocol version"): + session.initialize() + t.join(timeout=1) + + +def test_send_ping(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "ping" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + session.send_ping() + t.join(timeout=1) + + +def test_send_progress_notification(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + session.send_progress_notification(progress_token="token", progress=50.0, total=100.0) + + msg = write_stream.get_nowait() + assert msg.message.root.method == "notifications/progress" + assert msg.message.root.params["progressToken"] == "token" + assert msg.message.root.params["progress"] == 50.0 + assert msg.message.root.params["total"] == 100.0 + + +def test_set_logging_level(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "logging/setLevel" + assert msg.message.root.params["level"] == "debug" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + session.set_logging_level("debug") + t.join(timeout=1) + + +def test_list_resources(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "resources/list" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"resources": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.list_resources() + assert result.resources == [] + t.join(timeout=1) + + +def test_list_resource_templates(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "resources/templates/list" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"resourceTemplates": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.list_resource_templates() + assert result.resourceTemplates == [] + t.join(timeout=1) + + +def test_read_resource(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + uri = AnyUrl("file:///test") + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "resources/read" + assert msg.message.root.params["uri"] == str(uri) + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"contents": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.read_resource(uri) + assert result.contents == [] + t.join(timeout=1) + + +def test_subscribe_resource(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + uri = AnyUrl("file:///test") + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "resources/subscribe" + assert msg.message.root.params["uri"] == str(uri) + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + session.subscribe_resource(uri) + t.join(timeout=1) + + +def test_unsubscribe_resource(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + uri = AnyUrl("file:///test") + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "resources/unsubscribe" + assert msg.message.root.params["uri"] == str(uri) + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + session.unsubscribe_resource(uri) + t.join(timeout=1) + + +def test_call_tool(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "tools/call" + assert msg.message.root.params["name"] == "test-tool" + assert msg.message.root.params["arguments"] == {"arg": 1} + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"content": [], "isError": False}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.call_tool("test-tool", arguments={"arg": 1}) + assert result.isError is False + t.join(timeout=1) + + +def test_list_prompts(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "prompts/list" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"prompts": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.list_prompts() + assert result.prompts == [] + t.join(timeout=1) + + +def test_get_prompt(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "prompts/get" + assert msg.message.root.params["name"] == "test-prompt" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"messages": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.get_prompt("test-prompt") + assert result.messages == [] + t.join(timeout=1) + + +def test_complete(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + ref = types.PromptReference(type="ref/prompt", name="test") + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "completion/complete" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"completion": {"values": [], "hasMore": False}}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.complete(ref, argument={"name": "val", "value": "x"}) + assert result.completion.hasMore is False + t.join(timeout=1) + + +def test_list_tools(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + def mock_server(): + msg = write_stream.get(timeout=2) + assert msg.message.root.method == "tools/list" + req_id = msg.message.root.id + resp = types.JSONRPCResponse(jsonrpc="2.0", id=req_id, result={"tools": []}) + read_stream.put(SessionMessage(message=types.JSONRPCMessage(resp))) + + import threading + + t = threading.Thread(target=mock_server, daemon=True) + t.start() + + with session: + result = session.list_tools() + assert result.tools == [] + t.join(timeout=1) + + +def test_send_roots_list_changed(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + session.send_roots_list_changed() + + msg = write_stream.get_nowait() + assert msg.message.root.method == "notifications/roots/list_changed" + + +def test_received_request_sampling(streams): + read_stream, write_stream = streams + sampling_cb = MagicMock( + return_value=types.CreateMessageResult( + role="assistant", content=types.TextContent(type="text", text="hello"), model="gpt-4" + ) + ) + session = ClientSession(read_stream, write_stream, sampling_callback=sampling_cb) + + req = types.ServerRequest( + root=types.CreateMessageRequest( + method="sampling/createMessage", params=types.CreateMessageRequestParams(messages=[], maxTokens=100) + ) + ) + + responder = RequestResponder(request_id=1, request_meta=None, request=req, session=session, on_complete=MagicMock()) + + session._received_request(responder) + + msg = write_stream.get_nowait() + assert msg.message.root.result["model"] == "gpt-4" + sampling_cb.assert_called_once() + + +def test_received_request_list_roots(streams): + read_stream, write_stream = streams + list_roots_cb = MagicMock(return_value=types.ListRootsResult(roots=[])) + session = ClientSession(read_stream, write_stream, list_roots_callback=list_roots_cb) + + req = types.ServerRequest(root=types.ListRootsRequest(method="roots/list")) + + responder = RequestResponder(request_id=1, request_meta=None, request=req, session=session, on_complete=MagicMock()) + + session._received_request(responder) + + msg = write_stream.get_nowait() + assert msg.message.root.result["roots"] == [] + list_roots_cb.assert_called_once() + + +def test_received_request_ping(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + req = types.ServerRequest(root=types.PingRequest(method="ping")) + + responder = RequestResponder(request_id=1, request_meta=None, request=req, session=session, on_complete=MagicMock()) + + session._received_request(responder) + + msg = write_stream.get_nowait() + assert msg.message.root.result == {} + + +def test_handle_incoming(streams): + read_stream, write_stream = streams + msg_handler = MagicMock() + session = ClientSession(read_stream, write_stream, message_handler=msg_handler) + + item = MagicMock() + session._handle_incoming(item) + msg_handler.assert_called_once_with(item) + + +def test_received_notification_logging(streams): + read_stream, write_stream = streams + logging_cb = MagicMock() + session = ClientSession(read_stream, write_stream, logging_callback=logging_cb) + + notif = types.ServerNotification( + root=types.LoggingMessageNotification( + method="notifications/message", + params=types.LoggingMessageNotificationParams(level="info", data={"msg": "test"}), + ) + ) + + session._received_notification(notif) + logging_cb.assert_called_once() + assert logging_cb.call_args[0][0].level == "info" + + +def test_default_message_handler(): + # Exception case + with pytest.raises(ValueError, match="test error"): + _default_message_handler(Exception("test error")) + + # Notification case - should do nothing + _default_message_handler(MagicMock(spec=types.ServerNotification)) + + # RequestResponder case - should do nothing + _default_message_handler(MagicMock(spec=RequestResponder)) + + +def test_default_sampling_callback(): + ctx = MagicMock() + params = MagicMock() + res = _default_sampling_callback(ctx, params) + assert res.code == types.INVALID_REQUEST + assert "not supported" in res.message + + +def test_default_list_roots_callback(): + ctx = MagicMock() + res = _default_list_roots_callback(ctx) + assert res.code == types.INVALID_REQUEST + assert "not supported" in res.message + + +def test_default_logging_callback(): + params = MagicMock() + _default_logging_callback(params) # Should do nothing + + +def test_received_notification_unknown(streams): + read_stream, write_stream = streams + session = ClientSession(read_stream, write_stream) + + # Use a notification type that is NOT LoggingMessageNotification + notif = types.ServerNotification( + root=types.ResourceListChangedNotification(method="notifications/resources/list_changed") + ) + + session._received_notification(notif) + # Should just pass (case _:) diff --git a/api/tests/unit_tests/core/mcp/test_mcp_client.py b/api/tests/unit_tests/core/mcp/test_mcp_client.py index c0420d3371..c245b4a77e 100644 --- a/api/tests/unit_tests/core/mcp/test_mcp_client.py +++ b/api/tests/unit_tests/core/mcp/test_mcp_client.py @@ -2,13 +2,16 @@ from contextlib import ExitStack from types import TracebackType -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch import pytest +from sqlalchemy.orm import Session -from core.mcp.error import MCPConnectionError +from core.entities.mcp_provider import MCPProviderEntity +from core.mcp.auth_client import MCPClientWithAuthRetry +from core.mcp.error import MCPAuthError, MCPConnectionError from core.mcp.mcp_client import MCPClient -from core.mcp.types import CallToolResult, ListToolsResult, TextContent, Tool, ToolAnnotations +from core.mcp.types import CallToolResult, ListToolsResult, OAuthTokens, TextContent, Tool, ToolAnnotations class TestMCPClient: @@ -380,3 +383,256 @@ class TestMCPClient: timeout=30.0, sse_read_timeout=60.0, ) + + +class TestMCPClientWithAuthRetry: + """Test suite for MCPClientWithAuthRetry.""" + + @pytest.fixture + def mock_provider(self): + provider = MagicMock(spec=MCPProviderEntity) + provider.id = "test-provider-id" + provider.tenant_id = "test-tenant-id" + provider.retrieve_tokens.return_value = OAuthTokens( + access_token="new-token", + token_type="Bearer", + expires_in=3600, + refresh_token="refresh-token", + ) + return provider + + @pytest.fixture + def auth_client(self, mock_provider): + client = MCPClientWithAuthRetry( + server_url="http://test.example.com", + headers={"Authorization": "Bearer old-token"}, + provider_entity=mock_provider, + authorization_code="test-code", + by_server_id=True, + ) + return client + + def test_init(self, mock_provider): + """Test initialization.""" + client = MCPClientWithAuthRetry( + server_url="http://test.example.com", + headers={"Authorization": "Bearer test"}, + timeout=30.0, + provider_entity=mock_provider, + authorization_code="initial-code", + by_server_id=True, + ) + + assert client.server_url == "http://test.example.com" + assert client.headers == {"Authorization": "Bearer test"} + assert client.timeout == 30.0 + assert client.provider_entity == mock_provider + assert client.authorization_code == "initial-code" + assert client.by_server_id is True + assert client._has_retried is False + + @patch("core.mcp.auth_client.db") + @patch("core.mcp.auth_client.Session") + @patch("services.tools.mcp_tools_manage_service.MCPToolManageService") + def test_handle_auth_error_success( + self, mock_service_class, mock_session_class, mock_db, auth_client, mock_provider + ): + mock_session = MagicMock(spec=Session) + mock_session_class.return_value.__enter__.return_value = mock_session + + mock_service = mock_service_class.return_value + new_provider = MagicMock(spec=MCPProviderEntity) + new_provider.retrieve_tokens.return_value = OAuthTokens( + access_token="new-access-token", + token_type="Bearer", + expires_in=3600, + refresh_token="new-refresh-token", + ) + mock_service.get_provider_entity.return_value = new_provider + + # MCPAuthError parses resource_metadata and scope from www_authenticate_header + www_auth = 'Bearer resource_metadata="http://meta", scope="read"' + error = MCPAuthError("Auth failed", www_authenticate_header=www_auth) + + auth_client._handle_auth_error(error) + + # Verify service calls - error.resource_metadata_url and error.scope_hint are parsed from header + mock_service.auth_with_actions.assert_called_once_with( + mock_provider, + "test-code", + resource_metadata_url="http://meta", + scope_hint="read", + ) + mock_service.get_provider_entity.assert_called_once_with( + mock_provider.id, mock_provider.tenant_id, by_server_id=True + ) + + # Verify client updates + assert auth_client.headers["Authorization"] == "Bearer new-access-token" + assert auth_client.authorization_code is None + assert auth_client._has_retried is True + assert auth_client.provider_entity == new_provider + + def test_handle_auth_error_no_provider(self, auth_client): + """Test auth error handling when no provider entity is set.""" + auth_client.provider_entity = None + error = MCPAuthError("Auth failed") + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._handle_auth_error(error) + + assert exc_info.value == error + + def test_handle_auth_error_already_retried(self, auth_client): + """Test auth error handling when already retried.""" + auth_client._has_retried = True + error = MCPAuthError("Auth failed") + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._handle_auth_error(error) + + assert exc_info.value == error + + @patch("core.mcp.auth_client.db") + @patch("core.mcp.auth_client.Session") + @patch("services.tools.mcp_tools_manage_service.MCPToolManageService") + def test_handle_auth_error_no_token( + self, mock_service_class, mock_session_class, mock_db, auth_client, mock_provider + ): + """Test auth error handling when no token is received.""" + mock_session_class.return_value.__enter__.return_value = MagicMock() + mock_service = mock_service_class.return_value + + new_provider = MagicMock(spec=MCPProviderEntity) + new_provider.retrieve_tokens.return_value = None + mock_service.get_provider_entity.return_value = new_provider + + error = MCPAuthError("Auth failed") + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._handle_auth_error(error) + + assert "Authentication failed - no token received" in str(exc_info.value) + + @patch("core.mcp.auth_client.db") + @patch("core.mcp.auth_client.Session") + @patch("services.tools.mcp_tools_manage_service.MCPToolManageService") + def test_handle_auth_error_generic_exception(self, mock_service_class, mock_session_class, mock_db, auth_client): + """Test auth error handling when a generic exception occurs.""" + mock_session_class.side_effect = Exception("DB error") + + error = MCPAuthError("Auth failed") + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._handle_auth_error(error) + + assert "Authentication retry failed: DB error" in str(exc_info.value) + + @patch("core.mcp.auth_client.db") + @patch("core.mcp.auth_client.Session") + @patch("services.tools.mcp_tools_manage_service.MCPToolManageService") + def test_handle_auth_error_mcp_auth_error_propagation( + self, mock_service_class, mock_session_class, mock_db, auth_client + ): + """Test that MCPAuthError during refresh is propagated as is.""" + mock_session_class.return_value.__enter__.return_value = MagicMock() + mock_service = mock_service_class.return_value + mock_service.auth_with_actions.side_effect = MCPAuthError("Refresh failed") + + error = MCPAuthError("Initial auth failed") + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._handle_auth_error(error) + + assert "Refresh failed" in str(exc_info.value) + + def test_execute_with_retry_success_first_try(self, auth_client): + """Test execution success on first try.""" + mock_func = MagicMock(return_value="success") + + result = auth_client._execute_with_retry(mock_func, "arg1", kwarg1="val1") + + assert result == "success" + mock_func.assert_called_once_with("arg1", kwarg1="val1") + assert auth_client._has_retried is False + + @patch.object(MCPClientWithAuthRetry, "_handle_auth_error") + @patch.object(MCPClientWithAuthRetry, "_initialize") + def test_execute_with_retry_success_on_retry_initialized(self, mock_initialize, mock_handle_auth, auth_client): + """Test execution success on retry after auth error when client was already initialized.""" + mock_func = MagicMock() + mock_func.side_effect = [MCPAuthError("Auth failed"), "success"] + + auth_client._initialized = True + auth_client._exit_stack = MagicMock() + + result = auth_client._execute_with_retry(mock_func, "arg") + + assert result == "success" + assert mock_func.call_count == 2 + mock_handle_auth.assert_called_once() + mock_initialize.assert_called_once() + auth_client._exit_stack.close.assert_called_once() + assert auth_client._has_retried is False + + @patch.object(MCPClientWithAuthRetry, "_handle_auth_error") + @patch.object(MCPClientWithAuthRetry, "_initialize") + def test_execute_with_retry_success_on_retry_not_initialized(self, mock_initialize, mock_handle_auth, auth_client): + """Test retry when client was NOT initialized (skips cleanup/re-init).""" + mock_func = MagicMock() + mock_func.side_effect = [MCPAuthError("Auth failed"), "result"] + + auth_client._initialized = False + + result = auth_client._execute_with_retry(mock_func, "arg") + + assert result == "result" + assert mock_func.call_count == 2 + mock_handle_auth.assert_called_once() + mock_initialize.assert_not_called() + assert auth_client._has_retried is False + + @patch.object(MCPClientWithAuthRetry, "_handle_auth_error") + def test_execute_with_retry_failure_on_retry(self, mock_handle_auth, auth_client): + """Test execution failure even after retry.""" + mock_func = MagicMock() + mock_func.side_effect = [MCPAuthError("First fail"), MCPAuthError("Second fail")] + + with pytest.raises(MCPAuthError) as exc_info: + auth_client._execute_with_retry(mock_func, "arg") + + assert "Second fail" in str(exc_info.value) + assert mock_func.call_count == 2 + mock_handle_auth.assert_called_once() + assert auth_client._has_retried is False + + @patch.object(MCPClientWithAuthRetry, "_execute_with_retry") + def test_auth_client_context_manager_enter(self, mock_execute_retry, auth_client): + """Test context manager __enter__.""" + auth_client.__enter__() + + mock_execute_retry.assert_called_once() + func = mock_execute_retry.call_args[0][0] + + with patch("core.mcp.mcp_client.MCPClient.__enter__") as mock_base_enter: + result = func() + assert result == auth_client + mock_base_enter.assert_called_once() + + @patch.object(MCPClientWithAuthRetry, "_execute_with_retry") + def test_auth_client_list_tools(self, mock_execute_retry, auth_client): + """Test list_tools with retry.""" + auth_client.list_tools() + + mock_execute_retry.assert_called_once() + assert mock_execute_retry.call_args[0][0].__name__ == "list_tools" + + @patch.object(MCPClientWithAuthRetry, "_execute_with_retry") + def test_auth_client_invoke_tool(self, mock_execute_retry, auth_client): + """Test invoke_tool with retry.""" + auth_client.invoke_tool("test-tool", {"arg": "val"}) + + mock_execute_retry.assert_called_once() + assert mock_execute_retry.call_args[0][0].__name__ == "invoke_tool" + assert mock_execute_retry.call_args[0][1] == "test-tool" + assert mock_execute_retry.call_args[0][2] == {"arg": "val"} diff --git a/api/tests/unit_tests/core/memory/test_token_buffer_memory.py b/api/tests/unit_tests/core/memory/test_token_buffer_memory.py new file mode 100644 index 0000000000..5ecfe01808 --- /dev/null +++ b/api/tests/unit_tests/core/memory/test_token_buffer_memory.py @@ -0,0 +1,969 @@ +"""Comprehensive unit tests for core/memory/token_buffer_memory.py""" + +from unittest.mock import MagicMock, patch +from uuid import uuid4 + +import pytest + +from core.memory.token_buffer_memory import TokenBufferMemory +from dify_graph.model_runtime.entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + PromptMessageRole, + TextPromptMessageContent, + UserPromptMessage, +) +from models.model import AppMode + +# --------------------------------------------------------------------------- +# Helpers / shared fixtures +# --------------------------------------------------------------------------- + + +def _make_conversation(mode: AppMode = AppMode.CHAT) -> MagicMock: + """Return a minimal Conversation mock.""" + conv = MagicMock() + conv.id = str(uuid4()) + conv.mode = mode + conv.model_config = {} + return conv + + +def _make_model_instance() -> MagicMock: + """Return a ModelInstance mock whose token counter returns a constant.""" + mi = MagicMock() + mi.get_llm_num_tokens.return_value = 100 + return mi + + +def _make_message(answer: str = "hello", answer_tokens: int = 5) -> MagicMock: + msg = MagicMock() + msg.id = str(uuid4()) + msg.query = "user query" + msg.answer = answer + msg.answer_tokens = answer_tokens + msg.workflow_run_id = str(uuid4()) + msg.created_at = MagicMock() + return msg + + +# =========================================================================== +# Tests for __init__ and workflow_run_repo property +# =========================================================================== + + +class TestInit: + def test_init_stores_conversation_and_model_instance(self): + conv = _make_conversation() + mi = _make_model_instance() + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + assert mem.conversation is conv + assert mem.model_instance is mi + assert mem._workflow_run_repo is None + + def test_workflow_run_repo_is_created_lazily(self): + conv = _make_conversation() + mi = _make_model_instance() + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + + mock_repo = MagicMock() + with ( + patch("core.memory.token_buffer_memory.sessionmaker") as mock_sm, + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.DifyAPIRepositoryFactory.create_api_workflow_run_repository", + return_value=mock_repo, + ), + ): + mock_db.engine = MagicMock() + repo = mem.workflow_run_repo + assert repo is mock_repo + assert mem._workflow_run_repo is mock_repo + + def test_workflow_run_repo_cached_after_first_access(self): + conv = _make_conversation() + mi = _make_model_instance() + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + + existing_repo = MagicMock() + mem._workflow_run_repo = existing_repo + + with patch( + "core.memory.token_buffer_memory.DifyAPIRepositoryFactory.create_api_workflow_run_repository" + ) as mock_factory: + repo = mem.workflow_run_repo + mock_factory.assert_not_called() + assert repo is existing_repo + + +# =========================================================================== +# Tests for _build_prompt_message_with_files +# =========================================================================== + + +class TestBuildPromptMessageWithFiles: + """Tests for the private _build_prompt_message_with_files method.""" + + # ------------------------------------------------------------------ + # Mode: CHAT / AGENT_CHAT / COMPLETION (simple branch) + # ------------------------------------------------------------------ + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_no_files_user_message(self, mode): + """When file_extra_config is falsy or app_record is None → plain UserPromptMessage.""" + conv = _make_conversation(mode) + mi = _make_model_instance() + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + + with patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, # falsy → file_objs = [] + ): + result = mem._build_prompt_message_with_files( + message_files=[], + text_content="hello", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + assert isinstance(result, UserPromptMessage) + assert result.content == "hello" + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_no_files_assistant_message(self, mode): + """Plain AssistantPromptMessage when no files and is_user_message=False.""" + conv = _make_conversation(mode) + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + with patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ): + result = mem._build_prompt_message_with_files( + message_files=[], + text_content="ai reply", + message=_make_message(), + app_record=None, + is_user_message=False, + ) + + assert isinstance(result, AssistantPromptMessage) + assert result.content == "ai reply" + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_with_files_user_message(self, mode): + """When files are present, returns UserPromptMessage with list content.""" + conv = _make_conversation(mode) + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + mock_file_extra_config = MagicMock() + mock_file_extra_config.image_config = None # no detail override + + mock_file_obj = MagicMock() + # Must be a real entity so Pydantic's tagged union discriminator can validate it + real_image_content = ImagePromptMessageContent( + url="http://example.com/img.png", format="png", mime_type="image/png" + ) + + mock_message_file = MagicMock() + mock_app_record = MagicMock() + mock_app_record.tenant_id = "tenant-1" + + with ( + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=mock_file_extra_config, + ), + patch( + "core.memory.token_buffer_memory.file_factory.build_from_message_file", + return_value=mock_file_obj, + ), + patch( + "core.memory.token_buffer_memory.file_manager.to_prompt_message_content", + return_value=real_image_content, + ), + ): + result = mem._build_prompt_message_with_files( + message_files=[mock_message_file], + text_content="user text", + message=_make_message(), + app_record=mock_app_record, + is_user_message=True, + ) + + assert isinstance(result, UserPromptMessage) + assert isinstance(result.content, list) + # Last element should be TextPromptMessageContent + assert isinstance(result.content[-1], TextPromptMessageContent) + assert result.content[-1].data == "user text" + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_with_files_assistant_message(self, mode): + """When files are present, returns AssistantPromptMessage with list content.""" + conv = _make_conversation(mode) + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + mock_file_extra_config = MagicMock() + mock_file_extra_config.image_config = None + + mock_file_obj = MagicMock() + real_image_content = ImagePromptMessageContent( + url="http://example.com/img.png", format="png", mime_type="image/png" + ) + mock_app_record = MagicMock() + mock_app_record.tenant_id = "tenant-1" + + with ( + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=mock_file_extra_config, + ), + patch( + "core.memory.token_buffer_memory.file_factory.build_from_message_file", + return_value=mock_file_obj, + ), + patch( + "core.memory.token_buffer_memory.file_manager.to_prompt_message_content", + return_value=real_image_content, + ), + ): + result = mem._build_prompt_message_with_files( + message_files=[MagicMock()], + text_content="ai text", + message=_make_message(), + app_record=mock_app_record, + is_user_message=False, + ) + + assert isinstance(result, AssistantPromptMessage) + assert isinstance(result.content, list) + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_with_files_image_detail_overridden(self, mode): + """When image_config.detail is set, detail is taken from config.""" + conv = _make_conversation(mode) + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + mock_image_config = MagicMock() + mock_image_config.detail = ImagePromptMessageContent.DETAIL.LOW + + mock_file_extra_config = MagicMock() + mock_file_extra_config.image_config = mock_image_config + + mock_app_record = MagicMock() + mock_app_record.tenant_id = "tenant-1" + + real_image_content = ImagePromptMessageContent( + url="http://example.com/img.png", format="png", mime_type="image/png" + ) + + with ( + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=mock_file_extra_config, + ), + patch( + "core.memory.token_buffer_memory.file_factory.build_from_message_file", + return_value=MagicMock(), + ), + patch( + "core.memory.token_buffer_memory.file_manager.to_prompt_message_content", + return_value=real_image_content, + ) as mock_to_prompt, + ): + mem._build_prompt_message_with_files( + message_files=[MagicMock()], + text_content="user text", + message=_make_message(), + app_record=mock_app_record, + is_user_message=True, + ) + # Ensure the LOW detail was passed through + mock_to_prompt.assert_called_once_with( + mock_to_prompt.call_args[0][0], image_detail_config=ImagePromptMessageContent.DETAIL.LOW + ) + + @pytest.mark.parametrize("mode", [AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.COMPLETION]) + def test_chat_mode_app_record_none_returns_empty_file_objs(self, mode): + """app_record=None path → file_objs stays empty → plain messages.""" + conv = _make_conversation(mode) + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + mock_file_extra_config = MagicMock() + + with patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=mock_file_extra_config, + ): + result = mem._build_prompt_message_with_files( + message_files=[MagicMock()], + text_content="hello", + message=_make_message(), + app_record=None, # <-- forces the else branch → file_objs = [] + is_user_message=True, + ) + + assert isinstance(result, UserPromptMessage) + assert result.content == "hello" + + # ------------------------------------------------------------------ + # Mode: ADVANCED_CHAT / WORKFLOW + # ------------------------------------------------------------------ + + @pytest.mark.parametrize("mode", [AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def test_workflow_mode_no_app_raises(self, mode): + """Raises ValueError when conversation.app is falsy.""" + conv = _make_conversation(mode) + conv.app = None + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + with pytest.raises(ValueError, match="App not found for conversation"): + mem._build_prompt_message_with_files( + message_files=[], + text_content="text", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + @pytest.mark.parametrize("mode", [AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def test_workflow_mode_no_workflow_run_id_raises(self, mode): + """Raises ValueError when message.workflow_run_id is falsy.""" + conv = _make_conversation(mode) + conv.app = MagicMock() + + message = _make_message() + message.workflow_run_id = None # force missing + + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + with pytest.raises(ValueError, match="Workflow run ID not found"): + mem._build_prompt_message_with_files( + message_files=[], + text_content="text", + message=message, + app_record=MagicMock(), + is_user_message=True, + ) + + @pytest.mark.parametrize("mode", [AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def test_workflow_mode_workflow_run_not_found_raises(self, mode): + """Raises ValueError when workflow_run_repo returns None.""" + conv = _make_conversation(mode) + mock_app = MagicMock() + conv.app = mock_app + + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + mem._workflow_run_repo = MagicMock() + mem._workflow_run_repo.get_workflow_run_by_id.return_value = None + + with pytest.raises(ValueError, match="Workflow run not found"): + mem._build_prompt_message_with_files( + message_files=[], + text_content="text", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + @pytest.mark.parametrize("mode", [AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def test_workflow_mode_workflow_not_found_raises(self, mode): + """Raises ValueError when Workflow lookup returns None.""" + conv = _make_conversation(mode) + conv.app = MagicMock() + + mock_workflow_run = MagicMock() + mock_workflow_run.workflow_id = str(uuid4()) + + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + mem._workflow_run_repo = MagicMock() + mem._workflow_run_repo.get_workflow_run_by_id.return_value = mock_workflow_run + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + ): + mock_db.session.scalar.return_value = None # workflow not found + + with pytest.raises(ValueError, match="Workflow not found"): + mem._build_prompt_message_with_files( + message_files=[], + text_content="text", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + @pytest.mark.parametrize("mode", [AppMode.ADVANCED_CHAT, AppMode.WORKFLOW]) + def test_workflow_mode_success_no_files_user(self, mode): + """Happy path: workflow mode, no message files → plain UserPromptMessage.""" + conv = _make_conversation(mode) + conv.app = MagicMock() + + mock_workflow_run = MagicMock() + mock_workflow_run.workflow_id = str(uuid4()) + + mock_workflow = MagicMock() + mock_workflow.features_dict = {} + + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + mem._workflow_run_repo = MagicMock() + mem._workflow_run_repo.get_workflow_run_by_id.return_value = mock_workflow_run + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalar.return_value = mock_workflow + + result = mem._build_prompt_message_with_files( + message_files=[], + text_content="wf text", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + assert isinstance(result, UserPromptMessage) + assert result.content == "wf text" + + # ------------------------------------------------------------------ + # Invalid mode + # ------------------------------------------------------------------ + + def test_invalid_mode_raises_assertion(self): + """Any unknown AppMode raises AssertionError.""" + conv = _make_conversation() + conv.mode = "unknown_mode" # not in any set + mem = TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + with pytest.raises(AssertionError, match="Invalid app mode"): + mem._build_prompt_message_with_files( + message_files=[], + text_content="text", + message=_make_message(), + app_record=MagicMock(), + is_user_message=True, + ) + + +# =========================================================================== +# Tests for get_history_prompt_messages +# =========================================================================== + + +class TestGetHistoryPromptMessages: + """Tests for get_history_prompt_messages.""" + + def _make_memory(self, mode: AppMode = AppMode.CHAT) -> TokenBufferMemory: + conv = _make_conversation(mode) + conv.app = MagicMock() + return TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + def test_returns_empty_when_no_messages(self): + mem = self._make_memory() + with patch("core.memory.token_buffer_memory.db") as mock_db: + mock_db.session.scalars.return_value.all.return_value = [] + result = mem.get_history_prompt_messages() + assert result == [] + + def test_skips_first_message_without_answer(self): + """The newest message (index 0 after extraction) without answer and tokens==0 is skipped.""" + mem = self._make_memory() + + msg_no_answer = _make_message(answer="", answer_tokens=0) + msg_no_answer.parent_message_id = None # ensures extract_thread_messages returns it + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg_no_answer], + ), + ): + mock_db.session.scalars.return_value.all.side_effect = [ + [msg_no_answer], # first call: messages query + [], # second call: user files query (never hit, but safe) + ] + result = mem.get_history_prompt_messages() + + assert result == [] + + def test_message_with_answer_not_skipped(self): + """A message with a non-empty answer is NOT popped.""" + mem = self._make_memory() + + msg = _make_message(answer="some answer", answer_tokens=10) + msg.parent_message_id = None + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + # user files query → empty; assistant files query → empty + mock_db.session.scalars.return_value.all.return_value = [] + result = mem.get_history_prompt_messages() + + assert len(result) == 2 # one user + one assistant + + def test_message_limit_default_is_500(self): + """When message_limit is None the stmt is limited to 500.""" + mem = self._make_memory() + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch("core.memory.token_buffer_memory.select") as mock_select, + patch("core.memory.token_buffer_memory.extract_thread_messages", return_value=[]), + ): + mock_stmt = MagicMock() + mock_select.return_value.where.return_value.order_by.return_value = mock_stmt + mock_stmt.limit.return_value = mock_stmt + mock_db.session.scalars.return_value.all.return_value = [] + + mem.get_history_prompt_messages(message_limit=None) + mock_stmt.limit.assert_called_with(500) + + def test_message_limit_clipped_to_500(self): + """A message_limit > 500 is clamped to 500.""" + mem = self._make_memory() + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch("core.memory.token_buffer_memory.select") as mock_select, + patch("core.memory.token_buffer_memory.extract_thread_messages", return_value=[]), + ): + mock_stmt = MagicMock() + mock_select.return_value.where.return_value.order_by.return_value = mock_stmt + mock_stmt.limit.return_value = mock_stmt + mock_db.session.scalars.return_value.all.return_value = [] + + mem.get_history_prompt_messages(message_limit=9999) + mock_stmt.limit.assert_called_with(500) + + def test_message_limit_positive_used(self): + """A positive message_limit < 500 is used as-is.""" + mem = self._make_memory() + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch("core.memory.token_buffer_memory.select") as mock_select, + patch("core.memory.token_buffer_memory.extract_thread_messages", return_value=[]), + ): + mock_stmt = MagicMock() + mock_select.return_value.where.return_value.order_by.return_value = mock_stmt + mock_stmt.limit.return_value = mock_stmt + mock_db.session.scalars.return_value.all.return_value = [] + + mem.get_history_prompt_messages(message_limit=10) + mock_stmt.limit.assert_called_with(10) + + def test_message_limit_zero_uses_default(self): + """message_limit=0 triggers the else branch → default 500.""" + mem = self._make_memory() + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch("core.memory.token_buffer_memory.select") as mock_select, + patch("core.memory.token_buffer_memory.extract_thread_messages", return_value=[]), + ): + mock_stmt = MagicMock() + mock_select.return_value.where.return_value.order_by.return_value = mock_stmt + mock_stmt.limit.return_value = mock_stmt + mock_db.session.scalars.return_value.all.return_value = [] + + mem.get_history_prompt_messages(message_limit=0) + mock_stmt.limit.assert_called_with(500) + + def test_user_files_cause_build_with_files_call(self): + """When user_files is non-empty _build_prompt_message_with_files is invoked.""" + mem = self._make_memory() + msg = _make_message() + msg.parent_message_id = None + + mock_user_file = MagicMock() + mock_user_prompt = UserPromptMessage(content="from build") + mock_assistant_prompt = AssistantPromptMessage(content="answer") + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + # messages query + r.all.return_value = [msg] + elif call_count["n"] == 1: + # user files + r.all.return_value = [mock_user_file] + else: + # assistant files + r.all.return_value = [] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch.object( + mem, + "_build_prompt_message_with_files", + side_effect=[mock_user_prompt, mock_assistant_prompt], + ) as mock_build, + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages() + + assert mock_build.call_count >= 1 + # First call should be user message + first_call_kwargs = mock_build.call_args_list[0][1] + assert first_call_kwargs["is_user_message"] is True + + def test_assistant_files_cause_build_with_files_call(self): + """When assistant_files is non-empty, build is called with is_user_message=False.""" + mem = self._make_memory() + msg = _make_message() + msg.parent_message_id = None + + mock_assistant_file = MagicMock() + mock_user_prompt = UserPromptMessage(content="query") + mock_assistant_prompt = AssistantPromptMessage(content="built") + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + r.all.return_value = [msg] + elif call_count["n"] == 1: + r.all.return_value = [] # no user files + else: + r.all.return_value = [mock_assistant_file] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch.object( + mem, + "_build_prompt_message_with_files", + return_value=mock_assistant_prompt, + ) as mock_build, + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages() + + mock_build.assert_called_once() + call_kwargs = mock_build.call_args[1] + assert call_kwargs["is_user_message"] is False + + def test_token_pruning_removes_oldest_messages(self): + """If tokens exceed limit, oldest messages are removed until within limit.""" + conv = _make_conversation() + conv.app = MagicMock() + + # Model returns tokens that decrease only after removing pairs + token_values = [3000, 1500] # first call over limit, second within + mi = MagicMock() + mi.get_llm_num_tokens.side_effect = token_values + + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + + msg = _make_message() + msg.parent_message_id = None + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + r.all.return_value = [msg] + else: + r.all.return_value = [] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages(max_token_limit=2000) + + # After pruning, we should have fewer than the 2 initial messages + assert len(result) <= 1 + + def test_token_pruning_stops_at_single_message(self): + """Pruning stops when only 1 message remains (to prevent empty list).""" + conv = _make_conversation() + conv.app = MagicMock() + + # Always over limit + mi = MagicMock() + mi.get_llm_num_tokens.return_value = 99999 + + mem = TokenBufferMemory(conversation=conv, model_instance=mi) + + msg = _make_message() + msg.parent_message_id = None + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + r.all.return_value = [msg] + else: + r.all.return_value = [] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages(max_token_limit=1) + + # At least 1 message should remain + assert len(result) >= 1 + + def test_no_pruning_when_within_limit(self): + """When tokens ≤ limit, no pruning occurs.""" + mem = self._make_memory() + mem.model_instance.get_llm_num_tokens.return_value = 50 # well under default 2000 + + msg = _make_message() + msg.parent_message_id = None + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + r.all.return_value = [msg] + else: + r.all.return_value = [] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages(max_token_limit=2000) + + assert len(result) == 2 # user + assistant + + def test_plain_user_and_assistant_messages_returned(self): + """Without files, plain UserPromptMessage and AssistantPromptMessage appear.""" + mem = self._make_memory() + + msg = _make_message(answer="My answer") + msg.query = "My query" + msg.parent_message_id = None + + call_count = {"n": 0} + + def scalars_side_effect(stmt): + r = MagicMock() + if call_count["n"] == 0: + r.all.return_value = [msg] + else: + r.all.return_value = [] + call_count["n"] += 1 + return r + + with ( + patch("core.memory.token_buffer_memory.db") as mock_db, + patch( + "core.memory.token_buffer_memory.extract_thread_messages", + return_value=[msg], + ), + patch( + "core.memory.token_buffer_memory.FileUploadConfigManager.convert", + return_value=None, + ), + ): + mock_db.session.scalars.side_effect = scalars_side_effect + result = mem.get_history_prompt_messages() + + assert len(result) == 2 + user_msg, ai_msg = result + assert isinstance(user_msg, UserPromptMessage) + assert user_msg.content == "My query" + assert isinstance(ai_msg, AssistantPromptMessage) + assert ai_msg.content == "My answer" + + +# =========================================================================== +# Tests for get_history_prompt_text +# =========================================================================== + + +class TestGetHistoryPromptText: + """Tests for get_history_prompt_text.""" + + def _make_memory(self) -> TokenBufferMemory: + conv = _make_conversation() + conv.app = MagicMock() + return TokenBufferMemory(conversation=conv, model_instance=_make_model_instance()) + + def test_empty_messages_returns_empty_string(self): + mem = self._make_memory() + with patch.object(mem, "get_history_prompt_messages", return_value=[]): + result = mem.get_history_prompt_text() + assert result == "" + + def test_user_and_assistant_messages_formatted(self): + mem = self._make_memory() + messages = [ + UserPromptMessage(content="Hello"), + AssistantPromptMessage(content="World"), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text(human_prefix="H", ai_prefix="A") + assert result == "H: Hello\nA: World" + + def test_custom_prefixes_applied(self): + mem = self._make_memory() + messages = [ + UserPromptMessage(content="Hi"), + AssistantPromptMessage(content="Bye"), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text(human_prefix="Human", ai_prefix="Bot") + assert "Human: Hi" in result + assert "Bot: Bye" in result + + def test_list_content_with_text_and_image(self): + """List content: TextPromptMessageContent → text; ImagePromptMessageContent → [image].""" + mem = self._make_memory() + messages = [ + UserPromptMessage( + content=[ + TextPromptMessageContent(data="caption"), + ImagePromptMessageContent(url="http://img", format="png", mime_type="image/png"), + ] + ), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text() + assert "caption" in result + assert "[image]" in result + + def test_list_content_text_only(self): + mem = self._make_memory() + messages = [ + UserPromptMessage(content=[TextPromptMessageContent(data="just text")]), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text() + assert "just text" in result + + def test_list_content_image_only(self): + mem = self._make_memory() + messages = [ + UserPromptMessage( + content=[ + ImagePromptMessageContent(url="http://img", format="jpg", mime_type="image/jpeg"), + ] + ), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text() + assert "[image]" in result + + def test_unknown_role_skipped(self): + """Messages with a role that is not USER or ASSISTANT are skipped.""" + mem = self._make_memory() + + # Create a mock message with a SYSTEM role + system_msg = MagicMock() + system_msg.role = PromptMessageRole.SYSTEM + system_msg.content = "system instruction" + + user_msg = UserPromptMessage(content="hi") + + with patch.object(mem, "get_history_prompt_messages", return_value=[system_msg, user_msg]): + result = mem.get_history_prompt_text() + + assert "system instruction" not in result + assert "Human: hi" in result + + def test_passes_max_token_limit_and_message_limit(self): + """Parameters are forwarded to get_history_prompt_messages.""" + mem = self._make_memory() + with patch.object(mem, "get_history_prompt_messages", return_value=[]) as mock_get: + mem.get_history_prompt_text(max_token_limit=500, message_limit=10) + mock_get.assert_called_once_with(max_token_limit=500, message_limit=10) + + def test_multiple_messages_joined_by_newline(self): + mem = self._make_memory() + messages = [ + UserPromptMessage(content="Q1"), + AssistantPromptMessage(content="A1"), + UserPromptMessage(content="Q2"), + AssistantPromptMessage(content="A2"), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text() + lines = result.split("\n") + assert len(lines) == 4 + assert lines[0] == "Human: Q1" + assert lines[1] == "Assistant: A1" + assert lines[2] == "Human: Q2" + assert lines[3] == "Assistant: A2" + + def test_assistant_list_content_formatted(self): + """AssistantPromptMessage with list content is also handled.""" + mem = self._make_memory() + messages = [ + AssistantPromptMessage( + content=[ + TextPromptMessageContent(data="response text"), + ImagePromptMessageContent(url="http://img2", format="png", mime_type="image/png"), + ] + ), + ] + with patch.object(mem, "get_history_prompt_messages", return_value=messages): + result = mem.get_history_prompt_text() + assert "response text" in result + assert "[image]" in result diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/data_exporter/test_traceclient.py b/api/tests/unit_tests/core/ops/aliyun_trace/data_exporter/test_traceclient.py new file mode 100644 index 0000000000..acb43d4036 --- /dev/null +++ b/api/tests/unit_tests/core/ops/aliyun_trace/data_exporter/test_traceclient.py @@ -0,0 +1,326 @@ +import time +import uuid +from datetime import datetime +from unittest.mock import MagicMock, patch + +import httpx +import pytest +from opentelemetry.sdk.trace import ReadableSpan +from opentelemetry.trace import SpanKind, Status, StatusCode + +from core.ops.aliyun_trace.data_exporter.traceclient import ( + INVALID_SPAN_ID, + SpanBuilder, + TraceClient, + build_endpoint, + convert_datetime_to_nanoseconds, + convert_string_to_id, + convert_to_span_id, + convert_to_trace_id, + create_link, + generate_span_id, +) +from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData + + +@pytest.fixture +def trace_client_factory(): + """Factory fixture for creating TraceClient instances with automatic cleanup.""" + clients_to_shutdown = [] + + def _factory(**kwargs): + client = TraceClient(**kwargs) + clients_to_shutdown.append(client) + return client + + yield _factory + + # Cleanup: shutdown all created clients + for client in clients_to_shutdown: + client.shutdown() + + +class TestTraceClient: + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + @patch("core.ops.aliyun_trace.data_exporter.traceclient.socket.gethostname") + def test_init(self, mock_gethostname, mock_exporter_class, trace_client_factory): + mock_gethostname.return_value = "test-host" + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + + assert client.endpoint == "http://test-endpoint" + assert client.max_queue_size == 1000 + assert client.schedule_delay_sec == 5 + assert client.done is False + assert client.worker_thread.is_alive() + + client.shutdown() + assert client.done is True + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_export(self, mock_exporter_class, trace_client_factory): + mock_exporter = mock_exporter_class.return_value + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + spans = [MagicMock(spec=ReadableSpan)] + client.export(spans) + mock_exporter.export.assert_called_once_with(spans) + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.httpx.head") + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_api_check_success(self, mock_exporter_class, mock_head, trace_client_factory): + mock_response = MagicMock() + mock_response.status_code = 405 + mock_head.return_value = mock_response + + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + assert client.api_check() is True + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.httpx.head") + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_api_check_failure_status(self, mock_exporter_class, mock_head, trace_client_factory): + mock_response = MagicMock() + mock_response.status_code = 500 + mock_head.return_value = mock_response + + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + assert client.api_check() is False + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.httpx.head") + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_api_check_exception(self, mock_exporter_class, mock_head, trace_client_factory): + mock_head.side_effect = httpx.RequestError("Connection error") + + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + with pytest.raises(ValueError, match="AliyunTrace API check failed: Connection error"): + client.api_check() + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_get_project_url(self, mock_exporter_class, trace_client_factory): + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + assert client.get_project_url() == "https://arms.console.aliyun.com/#/llm" + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_add_span(self, mock_exporter_class, trace_client_factory): + client = trace_client_factory( + service_name="test-service", + endpoint="http://test-endpoint", + max_export_batch_size=2, + ) + + # Test add None + client.add_span(None) + assert len(client.queue) == 0 + + # Test add valid SpanData + span_data = SpanData( + name="test-span", + trace_id=123, + span_id=456, + parent_span_id=None, + start_time=1000, + end_time=2000, + status=Status(StatusCode.OK), + span_kind=SpanKind.INTERNAL, + ) + + mock_span = MagicMock(spec=ReadableSpan) + client.span_builder.build_span = MagicMock(return_value=mock_span) + + with patch.object(client.condition, "notify") as mock_notify: + client.add_span(span_data) + assert len(client.queue) == 1 + mock_notify.assert_not_called() + + client.add_span(span_data) + assert len(client.queue) == 2 + mock_notify.assert_called_once() + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + @patch("core.ops.aliyun_trace.data_exporter.traceclient.logger") + def test_add_span_queue_full(self, mock_logger, mock_exporter_class, trace_client_factory): + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint", max_queue_size=1) + + span_data = SpanData( + name="test-span", + trace_id=123, + span_id=456, + parent_span_id=None, + start_time=1000, + end_time=2000, + status=Status(StatusCode.OK), + span_kind=SpanKind.INTERNAL, + ) + mock_span = MagicMock(spec=ReadableSpan) + client.span_builder.build_span = MagicMock(return_value=mock_span) + + client.add_span(span_data) + assert len(client.queue) == 1 + + client.add_span(span_data) + assert len(client.queue) == 1 + mock_logger.warning.assert_called_with("Queue is full, likely spans will be dropped.") + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_export_batch_error(self, mock_exporter_class, trace_client_factory): + mock_exporter = mock_exporter_class.return_value + mock_exporter.export.side_effect = Exception("Export failed") + + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + mock_span = MagicMock(spec=ReadableSpan) + client.queue.append(mock_span) + + with patch("core.ops.aliyun_trace.data_exporter.traceclient.logger") as mock_logger: + client._export_batch() + mock_logger.warning.assert_called() + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_worker_loop(self, mock_exporter_class, trace_client_factory): + # We need to test the wait timeout in _worker + # But _worker runs in a thread. Let's mock condition.wait. + client = trace_client_factory( + service_name="test-service", + endpoint="http://test-endpoint", + schedule_delay_sec=0.1, + ) + + with patch.object(client.condition, "wait") as mock_wait: + # Let it run for a bit then shut down + time.sleep(0.2) + client.shutdown() + # mock_wait might have been called + assert mock_wait.called or client.done + + @patch("core.ops.aliyun_trace.data_exporter.traceclient.OTLPSpanExporter") + def test_shutdown_flushes(self, mock_exporter_class, trace_client_factory): + mock_exporter = mock_exporter_class.return_value + client = trace_client_factory(service_name="test-service", endpoint="http://test-endpoint") + + mock_span = MagicMock(spec=ReadableSpan) + client.queue.append(mock_span) + + client.shutdown() + # Should have called export twice (once in worker/export_batch, once in shutdown) + # or at least once if worker was waiting + assert mock_exporter.export.called + assert mock_exporter.shutdown.called + + +class TestSpanBuilder: + def test_build_span(self): + resource = MagicMock() + builder = SpanBuilder(resource) + + span_data = SpanData( + name="test-span", + trace_id=123, + span_id=456, + parent_span_id=789, + start_time=1000, + end_time=2000, + status=Status(StatusCode.OK), + span_kind=SpanKind.INTERNAL, + attributes={"attr1": "val1"}, + events=[], + links=[], + ) + + span = builder.build_span(span_data) + assert isinstance(span, ReadableSpan) + assert span.name == "test-span" + assert span.context.trace_id == 123 + assert span.context.span_id == 456 + assert span.parent.span_id == 789 + assert span.resource == resource + assert span.attributes == {"attr1": "val1"} + + def test_build_span_no_parent(self): + resource = MagicMock() + builder = SpanBuilder(resource) + + span_data = SpanData( + name="test-span", + trace_id=123, + span_id=456, + parent_span_id=None, + start_time=1000, + end_time=2000, + status=Status(StatusCode.OK), + span_kind=SpanKind.INTERNAL, + ) + + span = builder.build_span(span_data) + assert span.parent is None + + +def test_create_link(): + trace_id_str = "0123456789abcdef0123456789abcdef" + link = create_link(trace_id_str) + assert link.context.trace_id == int(trace_id_str, 16) + assert link.context.span_id == INVALID_SPAN_ID + + with pytest.raises(ValueError, match="Invalid trace ID format"): + create_link("invalid-hex") + + +def test_generate_span_id(): + # Test normal generation + span_id = generate_span_id() + assert isinstance(span_id, int) + assert span_id != INVALID_SPAN_ID + + # Test retry loop + with patch("core.ops.aliyun_trace.data_exporter.traceclient.random.getrandbits") as mock_rand: + mock_rand.side_effect = [INVALID_SPAN_ID, 999] + span_id = generate_span_id() + assert span_id == 999 + assert mock_rand.call_count == 2 + + +def test_convert_to_trace_id(): + uid = str(uuid.uuid4()) + trace_id = convert_to_trace_id(uid) + assert trace_id == uuid.UUID(uid).int + + with pytest.raises(ValueError, match="UUID cannot be None"): + convert_to_trace_id(None) + + with pytest.raises(ValueError, match="Invalid UUID input"): + convert_to_trace_id("not-a-uuid") + + +def test_convert_string_to_id(): + assert convert_string_to_id("test") > 0 + # Test with None string + with patch("core.ops.aliyun_trace.data_exporter.traceclient.generate_span_id") as mock_gen: + mock_gen.return_value = 12345 + assert convert_string_to_id(None) == 12345 + + +def test_convert_to_span_id(): + uid = str(uuid.uuid4()) + span_id = convert_to_span_id(uid, "test-type") + assert isinstance(span_id, int) + + with pytest.raises(ValueError, match="UUID cannot be None"): + convert_to_span_id(None, "test") + + with pytest.raises(ValueError, match="Invalid UUID input"): + convert_to_span_id("not-a-uuid", "test") + + +def test_convert_datetime_to_nanoseconds(): + dt = datetime(2023, 1, 1, 12, 0, 0) + ns = convert_datetime_to_nanoseconds(dt) + assert ns == int(dt.timestamp() * 1e9) + assert convert_datetime_to_nanoseconds(None) is None + + +def test_build_endpoint(): + license_key = "abc" + + # CMS 2.0 endpoint + url1 = "https://log.aliyuncs.com" + assert build_endpoint(url1, license_key) == "https://log.aliyuncs.com/adapt_abc/api/v1/traces" + + # XTrace endpoint + url2 = "https://example.com" + assert build_endpoint(url2, license_key) == "https://example.com/adapt_abc/api/otlp/traces" diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_aliyun_trace_entity.py b/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_aliyun_trace_entity.py new file mode 100644 index 0000000000..2fcb927e0c --- /dev/null +++ b/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_aliyun_trace_entity.py @@ -0,0 +1,88 @@ +import pytest +from opentelemetry import trace as trace_api +from opentelemetry.sdk.trace import Event +from opentelemetry.trace import SpanKind, Status, StatusCode +from pydantic import ValidationError + +from core.ops.aliyun_trace.entities.aliyun_trace_entity import SpanData, TraceMetadata + + +class TestTraceMetadata: + def test_trace_metadata_init(self): + links = [trace_api.Link(context=trace_api.SpanContext(0, 0, False))] + metadata = TraceMetadata( + trace_id=123, workflow_span_id=456, session_id="session_1", user_id="user_1", links=links + ) + assert metadata.trace_id == 123 + assert metadata.workflow_span_id == 456 + assert metadata.session_id == "session_1" + assert metadata.user_id == "user_1" + assert metadata.links == links + + +class TestSpanData: + def test_span_data_init_required_fields(self): + span_data = SpanData(trace_id=123, span_id=456, name="test_span", start_time=1000, end_time=2000) + assert span_data.trace_id == 123 + assert span_data.span_id == 456 + assert span_data.name == "test_span" + assert span_data.start_time == 1000 + assert span_data.end_time == 2000 + + # Check defaults + assert span_data.parent_span_id is None + assert span_data.attributes == {} + assert span_data.events == [] + assert span_data.links == [] + assert span_data.status.status_code == StatusCode.UNSET + assert span_data.span_kind == SpanKind.INTERNAL + + def test_span_data_with_optional_fields(self): + event = Event(name="event_1", timestamp=1500) + link = trace_api.Link(context=trace_api.SpanContext(0, 0, False)) + status = Status(StatusCode.OK) + + span_data = SpanData( + trace_id=123, + parent_span_id=111, + span_id=456, + name="test_span", + attributes={"key": "value"}, + events=[event], + links=[link], + status=status, + start_time=1000, + end_time=2000, + span_kind=SpanKind.SERVER, + ) + + assert span_data.parent_span_id == 111 + assert span_data.attributes == {"key": "value"} + assert span_data.events == [event] + assert span_data.links == [link] + assert span_data.status.status_code == status.status_code + assert span_data.span_kind == SpanKind.SERVER + + def test_span_data_missing_required_fields(self): + with pytest.raises(ValidationError): + SpanData( + trace_id=123, + # span_id missing + name="test_span", + start_time=1000, + end_time=2000, + ) + + def test_span_data_arbitrary_types_allowed(self): + # opentelemetry.trace.Status and Event are "arbitrary types" for Pydantic + # This test ensures they are accepted thanks to model_config + status = Status(StatusCode.ERROR, description="error occurred") + event = Event(name="exception", timestamp=1234, attributes={"exception.type": "ValueError"}) + + span_data = SpanData( + trace_id=123, span_id=456, name="test_span", status=status, events=[event], start_time=1000, end_time=2000 + ) + + assert span_data.status.status_code == status.status_code + assert span_data.status.description == status.description + assert span_data.events == [event] diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_semconv.py b/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_semconv.py new file mode 100644 index 0000000000..3961555b9a --- /dev/null +++ b/api/tests/unit_tests/core/ops/aliyun_trace/entities/test_semconv.py @@ -0,0 +1,68 @@ +from core.ops.aliyun_trace.entities.semconv import ( + ACS_ARMS_SERVICE_FEATURE, + GEN_AI_COMPLETION, + GEN_AI_FRAMEWORK, + GEN_AI_INPUT_MESSAGE, + GEN_AI_OUTPUT_MESSAGE, + GEN_AI_PROMPT, + GEN_AI_PROVIDER_NAME, + GEN_AI_REQUEST_MODEL, + GEN_AI_RESPONSE_FINISH_REASON, + GEN_AI_SESSION_ID, + GEN_AI_SPAN_KIND, + GEN_AI_USAGE_INPUT_TOKENS, + GEN_AI_USAGE_OUTPUT_TOKENS, + GEN_AI_USAGE_TOTAL_TOKENS, + GEN_AI_USER_ID, + GEN_AI_USER_NAME, + INPUT_VALUE, + OUTPUT_VALUE, + RETRIEVAL_DOCUMENT, + RETRIEVAL_QUERY, + TOOL_DESCRIPTION, + TOOL_NAME, + TOOL_PARAMETERS, + GenAISpanKind, +) + + +def test_constants(): + assert ACS_ARMS_SERVICE_FEATURE == "acs.arms.service.feature" + assert GEN_AI_SESSION_ID == "gen_ai.session.id" + assert GEN_AI_USER_ID == "gen_ai.user.id" + assert GEN_AI_USER_NAME == "gen_ai.user.name" + assert GEN_AI_SPAN_KIND == "gen_ai.span.kind" + assert GEN_AI_FRAMEWORK == "gen_ai.framework" + assert INPUT_VALUE == "input.value" + assert OUTPUT_VALUE == "output.value" + assert RETRIEVAL_QUERY == "retrieval.query" + assert RETRIEVAL_DOCUMENT == "retrieval.document" + assert GEN_AI_REQUEST_MODEL == "gen_ai.request.model" + assert GEN_AI_PROVIDER_NAME == "gen_ai.provider.name" + assert GEN_AI_USAGE_INPUT_TOKENS == "gen_ai.usage.input_tokens" + assert GEN_AI_USAGE_OUTPUT_TOKENS == "gen_ai.usage.output_tokens" + assert GEN_AI_USAGE_TOTAL_TOKENS == "gen_ai.usage.total_tokens" + assert GEN_AI_PROMPT == "gen_ai.prompt" + assert GEN_AI_COMPLETION == "gen_ai.completion" + assert GEN_AI_RESPONSE_FINISH_REASON == "gen_ai.response.finish_reason" + assert GEN_AI_INPUT_MESSAGE == "gen_ai.input.messages" + assert GEN_AI_OUTPUT_MESSAGE == "gen_ai.output.messages" + assert TOOL_NAME == "tool.name" + assert TOOL_DESCRIPTION == "tool.description" + assert TOOL_PARAMETERS == "tool.parameters" + + +def test_gen_ai_span_kind_enum(): + assert GenAISpanKind.CHAIN == "CHAIN" + assert GenAISpanKind.RETRIEVER == "RETRIEVER" + assert GenAISpanKind.RERANKER == "RERANKER" + assert GenAISpanKind.LLM == "LLM" + assert GenAISpanKind.EMBEDDING == "EMBEDDING" + assert GenAISpanKind.TOOL == "TOOL" + assert GenAISpanKind.AGENT == "AGENT" + assert GenAISpanKind.TASK == "TASK" + + # Verify iteration works (covers the class definition) + kinds = list(GenAISpanKind) + assert len(kinds) == 8 + assert "LLM" in kinds diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py new file mode 100644 index 0000000000..dfd61acfa7 --- /dev/null +++ b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace.py @@ -0,0 +1,647 @@ +from __future__ import annotations + +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from opentelemetry.trace import Link, SpanContext, SpanKind, Status, StatusCode, TraceFlags + +import core.ops.aliyun_trace.aliyun_trace as aliyun_trace_module +from core.ops.aliyun_trace.aliyun_trace import AliyunDataTrace +from core.ops.aliyun_trace.entities.semconv import ( + GEN_AI_COMPLETION, + GEN_AI_INPUT_MESSAGE, + GEN_AI_OUTPUT_MESSAGE, + GEN_AI_PROMPT, + GEN_AI_REQUEST_MODEL, + GEN_AI_RESPONSE_FINISH_REASON, + GEN_AI_USAGE_TOTAL_TOKENS, + RETRIEVAL_DOCUMENT, + RETRIEVAL_QUERY, + TOOL_DESCRIPTION, + TOOL_NAME, + TOOL_PARAMETERS, + GenAISpanKind, +) +from core.ops.entities.config_entity import AliyunConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + WorkflowTraceInfo, +) +from dify_graph.entities import WorkflowNodeExecution +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey + + +class RecordingTraceClient: + def __init__(self, service_name: str = "service", endpoint: str = "endpoint"): + self.service_name = service_name + self.endpoint = endpoint + self.added_spans: list[object] = [] + + def add_span(self, span) -> None: + self.added_spans.append(span) + + def api_check(self) -> bool: + return True + + def get_project_url(self) -> str: + return "project-url" + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +def _make_link(trace_id: int = 1, span_id: int = 2) -> Link: + context = SpanContext( + trace_id=trace_id, + span_id=span_id, + is_remote=False, + trace_flags=TraceFlags.SAMPLED, + ) + return Link(context) + + +def _make_workflow_trace_info(**overrides) -> WorkflowTraceInfo: + defaults = { + "workflow_id": "workflow-id", + "tenant_id": "tenant-id", + "workflow_run_id": "00000000-0000-0000-0000-000000000001", + "workflow_run_elapsed_time": 1.0, + "workflow_run_status": "succeeded", + "workflow_run_inputs": {"sys.query": "hello"}, + "workflow_run_outputs": {"answer": "world"}, + "workflow_run_version": "v1", + "total_tokens": 1, + "file_list": [], + "query": "hello", + "metadata": {"conversation_id": "conv", "user_id": "u", "app_id": "app"}, + "message_id": None, + "start_time": _dt(), + "end_time": _dt(), + "trace_id": "550e8400-e29b-41d4-a716-446655440000", + } + defaults.update(overrides) + return WorkflowTraceInfo(**defaults) + + +def _make_message_trace_info(**overrides) -> MessageTraceInfo: + defaults = { + "conversation_model": "chat", + "message_tokens": 1, + "answer_tokens": 2, + "total_tokens": 3, + "conversation_mode": "chat", + "metadata": {"conversation_id": "conv", "ls_model_name": "m", "ls_provider": "p"}, + "message_id": "00000000-0000-0000-0000-000000000002", + "message_data": SimpleNamespace(from_account_id="acc", from_end_user_id=None), + "inputs": {"prompt": "hi"}, + "outputs": "ok", + "start_time": _dt(), + "end_time": _dt(), + "error": None, + "trace_id": "550e8400-e29b-41d4-a716-446655440000", + } + defaults.update(overrides) + return MessageTraceInfo(**defaults) + + +def _make_dataset_retrieval_trace_info(**overrides) -> DatasetRetrievalTraceInfo: + defaults = { + "metadata": {"conversation_id": "conv", "user_id": "u"}, + "message_id": "00000000-0000-0000-0000-000000000003", + "message_data": SimpleNamespace(), + "inputs": "q", + "documents": [SimpleNamespace()], + "start_time": _dt(), + "end_time": _dt(), + "trace_id": "550e8400-e29b-41d4-a716-446655440000", + } + defaults.update(overrides) + return DatasetRetrievalTraceInfo(**defaults) + + +def _make_tool_trace_info(**overrides) -> ToolTraceInfo: + defaults = { + "tool_name": "tool", + "tool_inputs": {"x": 1}, + "tool_outputs": "out", + "tool_config": {"desc": "d"}, + "tool_parameters": {}, + "time_cost": 0.1, + "metadata": {"conversation_id": "conv", "user_id": "u"}, + "message_id": "00000000-0000-0000-0000-000000000004", + "message_data": SimpleNamespace(), + "inputs": {"i": "v"}, + "outputs": {"o": "v"}, + "start_time": _dt(), + "end_time": _dt(), + "error": None, + "trace_id": "550e8400-e29b-41d4-a716-446655440000", + } + defaults.update(overrides) + return ToolTraceInfo(**defaults) + + +def _make_suggested_question_trace_info(**overrides) -> SuggestedQuestionTraceInfo: + defaults = { + "suggested_question": ["q1", "q2"], + "level": "info", + "total_tokens": 1, + "metadata": {"conversation_id": "conv", "user_id": "u", "ls_model_name": "m", "ls_provider": "p"}, + "message_id": "00000000-0000-0000-0000-000000000005", + "inputs": {"i": 1}, + "start_time": _dt(), + "end_time": _dt(), + "error": None, + "trace_id": "550e8400-e29b-41d4-a716-446655440000", + } + defaults.update(overrides) + return SuggestedQuestionTraceInfo(**defaults) + + +@pytest.fixture +def trace_instance(monkeypatch: pytest.MonkeyPatch) -> AliyunDataTrace: + monkeypatch.setattr(aliyun_trace_module, "build_endpoint", lambda base_url, license_key: "built-endpoint") + monkeypatch.setattr(aliyun_trace_module, "TraceClient", RecordingTraceClient) + # Mock get_service_account_with_tenant to avoid DB errors + monkeypatch.setattr(AliyunDataTrace, "get_service_account_with_tenant", lambda self, app_id: MagicMock()) + + config = AliyunConfig(app_name="app", license_key="k", endpoint="https://example.com") + trace = AliyunDataTrace(config) + return trace + + +def test_init_builds_endpoint_and_client(monkeypatch: pytest.MonkeyPatch): + build_endpoint = MagicMock(return_value="built") + trace_client_cls = MagicMock() + monkeypatch.setattr(aliyun_trace_module, "build_endpoint", build_endpoint) + monkeypatch.setattr(aliyun_trace_module, "TraceClient", trace_client_cls) + + config = AliyunConfig(app_name="my-app", license_key="license", endpoint="https://example.com") + trace = AliyunDataTrace(config) + + build_endpoint.assert_called_once_with("https://example.com", "license") + trace_client_cls.assert_called_once_with(service_name="my-app", endpoint="built") + assert trace.trace_config == config + + +def test_trace_dispatches_to_correct_methods(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + workflow_trace = MagicMock() + message_trace = MagicMock() + suggested_question_trace = MagicMock() + dataset_retrieval_trace = MagicMock() + tool_trace = MagicMock() + monkeypatch.setattr(trace_instance, "workflow_trace", workflow_trace) + monkeypatch.setattr(trace_instance, "message_trace", message_trace) + monkeypatch.setattr(trace_instance, "suggested_question_trace", suggested_question_trace) + monkeypatch.setattr(trace_instance, "dataset_retrieval_trace", dataset_retrieval_trace) + monkeypatch.setattr(trace_instance, "tool_trace", tool_trace) + + trace_instance.trace(_make_workflow_trace_info()) + workflow_trace.assert_called_once() + + trace_instance.trace(_make_message_trace_info()) + message_trace.assert_called_once() + + trace_instance.trace(_make_suggested_question_trace_info()) + suggested_question_trace.assert_called_once() + + trace_instance.trace(_make_dataset_retrieval_trace_info()) + dataset_retrieval_trace.assert_called_once() + + trace_instance.trace(_make_tool_trace_info()) + tool_trace.assert_called_once() + + # Branches that do nothing but should be covered + trace_instance.trace(ModerationTraceInfo(flagged=False, action="allow", preset_response="", query="", metadata={})) + trace_instance.trace(GenerateNameTraceInfo(tenant_id="t", metadata={})) + + +def test_api_check_delegates(trace_instance: AliyunDataTrace): + trace_instance.trace_client.api_check = MagicMock(return_value=False) + assert trace_instance.api_check() is False + + +def test_get_project_url_success(trace_instance: AliyunDataTrace): + assert trace_instance.get_project_url() == "project-url" + + +def test_get_project_url_error(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(trace_instance.trace_client, "get_project_url", MagicMock(side_effect=Exception("boom"))) + logger_mock = MagicMock() + monkeypatch.setattr(aliyun_trace_module, "logger", logger_mock) + + with pytest.raises(ValueError, match=r"Aliyun get project url failed: boom"): + trace_instance.get_project_url() + logger_mock.info.assert_called() + + +def test_workflow_trace_adds_workflow_and_node_spans(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_trace_id", lambda _: 111) + monkeypatch.setattr( + aliyun_trace_module, "convert_to_span_id", lambda _, span_type: {"workflow": 222}.get(span_type, 0) + ) + monkeypatch.setattr(aliyun_trace_module, "create_links_from_trace_id", lambda _: []) + + add_workflow_span = MagicMock() + get_workflow_node_executions = MagicMock(return_value=[MagicMock(), MagicMock()]) + build_workflow_node_span = MagicMock(side_effect=["span-1", "span-2"]) + monkeypatch.setattr(trace_instance, "add_workflow_span", add_workflow_span) + monkeypatch.setattr(trace_instance, "get_workflow_node_executions", get_workflow_node_executions) + monkeypatch.setattr(trace_instance, "build_workflow_node_span", build_workflow_node_span) + + trace_info = _make_workflow_trace_info( + trace_id="abcd", metadata={"conversation_id": "c", "user_id": "u", "app_id": "app"} + ) + trace_instance.workflow_trace(trace_info) + + add_workflow_span.assert_called_once() + passed_trace_metadata = add_workflow_span.call_args.args[1] + assert passed_trace_metadata.trace_id == 111 + assert passed_trace_metadata.workflow_span_id == 222 + assert passed_trace_metadata.session_id == "c" + assert passed_trace_metadata.user_id == "u" + assert passed_trace_metadata.links == [] + + assert trace_instance.trace_client.added_spans == ["span-1", "span-2"] + + +def test_message_trace_returns_early_if_no_message_data(trace_instance: AliyunDataTrace): + trace_info = _make_message_trace_info(message_data=None) + trace_instance.message_trace(trace_info) + assert trace_instance.trace_client.added_spans == [] + + +def test_message_trace_creates_message_and_llm_spans(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_trace_id", lambda _: 10) + monkeypatch.setattr( + aliyun_trace_module, + "convert_to_span_id", + lambda _, span_type: {"message": 20, "llm": 30}.get(span_type, 0), + ) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + monkeypatch.setattr(aliyun_trace_module, "get_user_id_from_message_data", lambda _: "user") + monkeypatch.setattr(aliyun_trace_module, "create_links_from_trace_id", lambda _: []) + + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "create_status_from_error", lambda _: status) + + trace_info = _make_message_trace_info( + metadata={"conversation_id": "conv", "ls_model_name": "model", "ls_provider": "provider"}, + message_tokens=7, + answer_tokens=11, + total_tokens=18, + outputs="completion", + ) + trace_instance.message_trace(trace_info) + + assert len(trace_instance.trace_client.added_spans) == 2 + message_span, llm_span = trace_instance.trace_client.added_spans + + assert message_span.name == "message" + assert message_span.trace_id == 10 + assert message_span.parent_span_id is None + assert message_span.span_id == 20 + assert message_span.span_kind == SpanKind.SERVER + assert message_span.status == status + assert message_span.attributes["gen_ai.span.kind"] == GenAISpanKind.CHAIN + + assert llm_span.name == "llm" + assert llm_span.parent_span_id == 20 + assert llm_span.span_id == 30 + assert llm_span.status == status + assert llm_span.attributes[GEN_AI_REQUEST_MODEL] == "model" + assert llm_span.attributes[GEN_AI_USAGE_TOTAL_TOKENS] == "18" + + +def test_dataset_retrieval_trace_returns_early_if_no_message_data(trace_instance: AliyunDataTrace): + trace_info = _make_dataset_retrieval_trace_info(message_data=None) + trace_instance.dataset_retrieval_trace(trace_info) + assert trace_instance.trace_client.added_spans == [] + + +def test_dataset_retrieval_trace_creates_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_trace_id", lambda _: 1) + monkeypatch.setattr( + aliyun_trace_module, "convert_to_span_id", lambda _, span_type: {"message": 2}.get(span_type, 0) + ) + monkeypatch.setattr(aliyun_trace_module, "generate_span_id", lambda: 3) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + monkeypatch.setattr(aliyun_trace_module, "create_links_from_trace_id", lambda _: []) + monkeypatch.setattr(aliyun_trace_module, "extract_retrieval_documents", lambda _: [{"doc": "d"}]) + + trace_instance.dataset_retrieval_trace(_make_dataset_retrieval_trace_info(inputs="query")) + assert len(trace_instance.trace_client.added_spans) == 1 + span = trace_instance.trace_client.added_spans[0] + assert span.name == "dataset_retrieval" + assert span.attributes[RETRIEVAL_QUERY] == "query" + assert span.attributes[RETRIEVAL_DOCUMENT] == '[{"doc": "d"}]' + + +def test_tool_trace_returns_early_if_no_message_data(trace_instance: AliyunDataTrace): + trace_info = _make_tool_trace_info(message_data=None) + trace_instance.tool_trace(trace_info) + assert trace_instance.trace_client.added_spans == [] + + +def test_tool_trace_creates_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_trace_id", lambda _: 10) + monkeypatch.setattr( + aliyun_trace_module, "convert_to_span_id", lambda _, span_type: {"message": 20}.get(span_type, 0) + ) + monkeypatch.setattr(aliyun_trace_module, "generate_span_id", lambda: 30) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + monkeypatch.setattr(aliyun_trace_module, "create_links_from_trace_id", lambda _: []) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "create_status_from_error", lambda _: status) + + trace_instance.tool_trace( + _make_tool_trace_info( + tool_name="my-tool", + tool_inputs={"a": 1}, + tool_config={"description": "x"}, + inputs={"i": 1}, + ) + ) + + assert len(trace_instance.trace_client.added_spans) == 1 + span = trace_instance.trace_client.added_spans[0] + assert span.name == "my-tool" + assert span.status == status + assert span.attributes[TOOL_NAME] == "my-tool" + assert span.attributes[TOOL_DESCRIPTION] == '{"description": "x"}' + + +def test_get_workflow_node_executions_requires_app_id(trace_instance: AliyunDataTrace): + trace_info = _make_workflow_trace_info(metadata={"conversation_id": "c"}) + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.get_workflow_node_executions(trace_info) + + +def test_get_workflow_node_executions_builds_repo_and_fetches( + trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch +): + trace_info = _make_workflow_trace_info(metadata={"app_id": "app", "conversation_id": "c", "user_id": "u"}) + + account = object() + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", MagicMock(return_value=account)) + monkeypatch.setattr(aliyun_trace_module, "sessionmaker", MagicMock()) + monkeypatch.setattr(aliyun_trace_module, "db", SimpleNamespace(engine="engine")) + + repo = MagicMock() + repo.get_by_workflow_run.return_value = ["node1"] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr(aliyun_trace_module, "DifyCoreRepositoryFactory", mock_factory) + + result = trace_instance.get_workflow_node_executions(trace_info) + assert result == ["node1"] + repo.get_by_workflow_run.assert_called_once_with(workflow_run_id=trace_info.workflow_run_id) + + +def test_build_workflow_node_span_routes_llm_type(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + node_execution = MagicMock(spec=WorkflowNodeExecution) + trace_info = _make_workflow_trace_info() + trace_metadata = MagicMock() + + monkeypatch.setattr(trace_instance, "build_workflow_llm_span", MagicMock(return_value="llm")) + + node_execution.node_type = BuiltinNodeTypes.LLM + assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "llm" + + +def test_build_workflow_node_span_routes_knowledge_retrieval_type( + trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch +): + node_execution = MagicMock(spec=WorkflowNodeExecution) + trace_info = _make_workflow_trace_info() + trace_metadata = MagicMock() + + monkeypatch.setattr(trace_instance, "build_workflow_retrieval_span", MagicMock(return_value="retrieval")) + + node_execution.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL + assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "retrieval" + + +def test_build_workflow_node_span_routes_tool_type(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + node_execution = MagicMock(spec=WorkflowNodeExecution) + trace_info = _make_workflow_trace_info() + trace_metadata = MagicMock() + + monkeypatch.setattr(trace_instance, "build_workflow_tool_span", MagicMock(return_value="tool")) + + node_execution.node_type = BuiltinNodeTypes.TOOL + assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "tool" + + +def test_build_workflow_node_span_routes_code_type(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + node_execution = MagicMock(spec=WorkflowNodeExecution) + trace_info = _make_workflow_trace_info() + trace_metadata = MagicMock() + + monkeypatch.setattr(trace_instance, "build_workflow_task_span", MagicMock(return_value="task")) + + node_execution.node_type = BuiltinNodeTypes.CODE + assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) == "task" + + +def test_build_workflow_node_span_handles_errors( + trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +): + node_execution = MagicMock(spec=WorkflowNodeExecution) + trace_info = _make_workflow_trace_info() + trace_metadata = MagicMock() + + monkeypatch.setattr(trace_instance, "build_workflow_task_span", MagicMock(side_effect=RuntimeError("boom"))) + node_execution.node_type = BuiltinNodeTypes.CODE + + assert trace_instance.build_workflow_node_span(node_execution, trace_info, trace_metadata) is None + assert "Error occurred in build_workflow_node_span" in caplog.text + + +def test_build_workflow_task_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_span_id", lambda _, __: 9) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "get_workflow_node_status", lambda _: status) + + trace_metadata = SimpleNamespace(trace_id=1, workflow_span_id=2, session_id="s", user_id="u", links=[]) + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node-id" + node_execution.title = "title" + node_execution.inputs = {"a": 1} + node_execution.outputs = {"b": 2} + node_execution.created_at = _dt() + node_execution.finished_at = _dt() + + span = trace_instance.build_workflow_task_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span.trace_id == 1 + assert span.span_id == 9 + assert span.status.status_code == StatusCode.OK + assert span.attributes["gen_ai.span.kind"] == GenAISpanKind.TASK + + +def test_build_workflow_tool_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_span_id", lambda _, __: 9) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "get_workflow_node_status", lambda _: status) + + trace_metadata = SimpleNamespace(trace_id=1, workflow_span_id=2, session_id="s", user_id="u", links=[_make_link()]) + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node-id" + node_execution.title = "my-tool" + node_execution.inputs = {"a": 1} + node_execution.outputs = {"b": 2} + node_execution.created_at = _dt() + node_execution.finished_at = _dt() + node_execution.metadata = {WorkflowNodeExecutionMetadataKey.TOOL_INFO: {"k": "v"}} + + span = trace_instance.build_workflow_tool_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span.attributes[TOOL_NAME] == "my-tool" + assert span.attributes[TOOL_DESCRIPTION] == '{"k": "v"}' + assert span.attributes[TOOL_PARAMETERS] == '{"a": 1}' + assert span.status.status_code == StatusCode.OK + + # Cover metadata is None and inputs is None + node_execution.metadata = None + node_execution.inputs = None + span2 = trace_instance.build_workflow_tool_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span2.attributes[TOOL_DESCRIPTION] == "{}" + assert span2.attributes[TOOL_PARAMETERS] == "{}" + + +def test_build_workflow_retrieval_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_span_id", lambda _, __: 9) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "get_workflow_node_status", lambda _: status) + monkeypatch.setattr( + aliyun_trace_module, "format_retrieval_documents", lambda docs: [{"formatted": True}] if docs else [] + ) + + trace_metadata = SimpleNamespace(trace_id=1, workflow_span_id=2, session_id="s", user_id="u", links=[]) + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node-id" + node_execution.title = "retrieval" + node_execution.inputs = {"query": "q"} + node_execution.outputs = {"result": [{"doc": "d"}]} + node_execution.created_at = _dt() + node_execution.finished_at = _dt() + + span = trace_instance.build_workflow_retrieval_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span.attributes[RETRIEVAL_QUERY] == "q" + assert span.attributes[RETRIEVAL_DOCUMENT] == '[{"formatted": true}]' + + # Cover empty inputs/outputs + node_execution.inputs = None + node_execution.outputs = None + span2 = trace_instance.build_workflow_retrieval_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span2.attributes[RETRIEVAL_QUERY] == "" + assert span2.attributes[RETRIEVAL_DOCUMENT] == "[]" + + +def test_build_workflow_llm_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_span_id", lambda _, __: 9) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "get_workflow_node_status", lambda _: status) + monkeypatch.setattr(aliyun_trace_module, "format_input_messages", lambda _: "in") + monkeypatch.setattr(aliyun_trace_module, "format_output_messages", lambda _: "out") + + trace_metadata = SimpleNamespace(trace_id=1, workflow_span_id=2, session_id="s", user_id="u", links=[]) + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node-id" + node_execution.title = "llm" + node_execution.process_data = { + "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}, + "prompts": ["p"], + "model_name": "m", + "model_provider": "p1", + } + node_execution.outputs = {"text": "t", "finish_reason": "stop"} + node_execution.created_at = _dt() + node_execution.finished_at = _dt() + + span = trace_instance.build_workflow_llm_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span.attributes[GEN_AI_USAGE_TOTAL_TOKENS] == "3" + assert span.attributes[GEN_AI_REQUEST_MODEL] == "m" + assert span.attributes[GEN_AI_PROMPT] == '["p"]' + assert span.attributes[GEN_AI_COMPLETION] == "t" + assert span.attributes[GEN_AI_RESPONSE_FINISH_REASON] == "stop" + assert span.attributes[GEN_AI_INPUT_MESSAGE] == "in" + assert span.attributes[GEN_AI_OUTPUT_MESSAGE] == "out" + + # Cover usage from outputs if not in process_data + node_execution.process_data = {"prompts": []} + node_execution.outputs = {"usage": {"total_tokens": 10}, "text": ""} + span2 = trace_instance.build_workflow_llm_span(_make_workflow_trace_info(), node_execution, trace_metadata) + assert span2.attributes[GEN_AI_USAGE_TOTAL_TOKENS] == "10" + + +def test_add_workflow_span(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr( + aliyun_trace_module, "convert_to_span_id", lambda _, span_type: {"message": 20}.get(span_type, 0) + ) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "create_status_from_error", lambda _: status) + + trace_metadata = SimpleNamespace(trace_id=1, workflow_span_id=2, session_id="s", user_id="u", links=[]) + + # CASE 1: With message_id + trace_info = _make_workflow_trace_info( + message_id="msg-1", workflow_run_inputs={"sys.query": "hi"}, workflow_run_outputs={"ans": "ok"} + ) + trace_instance.add_workflow_span(trace_info, trace_metadata) + + assert len(trace_instance.trace_client.added_spans) == 2 + message_span = trace_instance.trace_client.added_spans[0] + workflow_span = trace_instance.trace_client.added_spans[1] + + assert message_span.name == "message" + assert message_span.span_kind == SpanKind.SERVER + assert message_span.parent_span_id is None + + assert workflow_span.name == "workflow" + assert workflow_span.span_kind == SpanKind.INTERNAL + assert workflow_span.parent_span_id == 20 + + trace_instance.trace_client.added_spans.clear() + + # CASE 2: Without message_id + trace_info_no_msg = _make_workflow_trace_info(message_id=None) + trace_instance.add_workflow_span(trace_info_no_msg, trace_metadata) + assert len(trace_instance.trace_client.added_spans) == 1 + span = trace_instance.trace_client.added_spans[0] + assert span.name == "workflow" + assert span.span_kind == SpanKind.SERVER + assert span.parent_span_id is None + + +def test_suggested_question_trace(trace_instance: AliyunDataTrace, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(aliyun_trace_module, "convert_to_trace_id", lambda _: 10) + monkeypatch.setattr( + aliyun_trace_module, + "convert_to_span_id", + lambda _, span_type: {"message": 20, "suggested_question": 21}.get(span_type, 0), + ) + monkeypatch.setattr(aliyun_trace_module, "convert_datetime_to_nanoseconds", lambda _: 123) + monkeypatch.setattr(aliyun_trace_module, "create_links_from_trace_id", lambda _: []) + status = Status(StatusCode.OK) + monkeypatch.setattr(aliyun_trace_module, "create_status_from_error", lambda _: status) + + trace_info = _make_suggested_question_trace_info(suggested_question=["how?"]) + trace_instance.suggested_question_trace(trace_info) + + assert len(trace_instance.trace_client.added_spans) == 1 + span = trace_instance.trace_client.added_spans[0] + assert span.name == "suggested_question" + assert span.attributes[GEN_AI_COMPLETION] == '["how?"]' diff --git a/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace_utils.py b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace_utils.py new file mode 100644 index 0000000000..763fc90710 --- /dev/null +++ b/api/tests/unit_tests/core/ops/aliyun_trace/test_aliyun_trace_utils.py @@ -0,0 +1,275 @@ +import json +from unittest.mock import MagicMock + +from opentelemetry.trace import Link, StatusCode + +from core.ops.aliyun_trace.entities.semconv import ( + GEN_AI_FRAMEWORK, + GEN_AI_SESSION_ID, + GEN_AI_SPAN_KIND, + GEN_AI_USER_ID, + INPUT_VALUE, + OUTPUT_VALUE, +) +from core.ops.aliyun_trace.utils import ( + create_common_span_attributes, + create_links_from_trace_id, + create_status_from_error, + extract_retrieval_documents, + format_input_messages, + format_output_messages, + format_retrieval_documents, + get_user_id_from_message_data, + get_workflow_node_status, + serialize_json_data, +) +from core.rag.models.document import Document +from dify_graph.entities import WorkflowNodeExecution +from dify_graph.enums import WorkflowNodeExecutionStatus +from models import EndUser + + +def test_get_user_id_from_message_data_no_end_user(monkeypatch): + message_data = MagicMock() + message_data.from_account_id = "account_id" + message_data.from_end_user_id = None + + assert get_user_id_from_message_data(message_data) == "account_id" + + +def test_get_user_id_from_message_data_with_end_user(monkeypatch): + message_data = MagicMock() + message_data.from_account_id = "account_id" + message_data.from_end_user_id = "end_user_id" + + end_user_data = MagicMock(spec=EndUser) + end_user_data.session_id = "session_id" + + mock_query = MagicMock() + mock_query.where.return_value.first.return_value = end_user_data + + mock_session = MagicMock() + mock_session.query.return_value = mock_query + + from core.ops.aliyun_trace.utils import db + + monkeypatch.setattr(db, "session", mock_session) + + assert get_user_id_from_message_data(message_data) == "session_id" + + +def test_get_user_id_from_message_data_end_user_not_found(monkeypatch): + message_data = MagicMock() + message_data.from_account_id = "account_id" + message_data.from_end_user_id = "end_user_id" + + mock_query = MagicMock() + mock_query.where.return_value.first.return_value = None + + mock_session = MagicMock() + mock_session.query.return_value = mock_query + + from core.ops.aliyun_trace.utils import db + + monkeypatch.setattr(db, "session", mock_session) + + assert get_user_id_from_message_data(message_data) == "account_id" + + +def test_create_status_from_error(): + # Case OK + status_ok = create_status_from_error(None) + assert status_ok.status_code == StatusCode.OK + + # Case Error + status_err = create_status_from_error("some error") + assert status_err.status_code == StatusCode.ERROR + assert status_err.description == "some error" + + +def test_get_workflow_node_status(): + node_execution = MagicMock(spec=WorkflowNodeExecution) + + # SUCCEEDED + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + status = get_workflow_node_status(node_execution) + assert status.status_code == StatusCode.OK + + # FAILED + node_execution.status = WorkflowNodeExecutionStatus.FAILED + node_execution.error = "node fail" + status = get_workflow_node_status(node_execution) + assert status.status_code == StatusCode.ERROR + assert status.description == "node fail" + + # EXCEPTION + node_execution.status = WorkflowNodeExecutionStatus.EXCEPTION + node_execution.error = "node exception" + status = get_workflow_node_status(node_execution) + assert status.status_code == StatusCode.ERROR + assert status.description == "node exception" + + # UNSET/OTHER + node_execution.status = WorkflowNodeExecutionStatus.RUNNING + status = get_workflow_node_status(node_execution) + assert status.status_code == StatusCode.UNSET + + +def test_create_links_from_trace_id(monkeypatch): + # Mock create_link + mock_link = MagicMock(spec=Link) + import core.ops.aliyun_trace.data_exporter.traceclient + + monkeypatch.setattr(core.ops.aliyun_trace.data_exporter.traceclient, "create_link", lambda trace_id_str: mock_link) + + # Trace ID None + assert create_links_from_trace_id(None) == [] + + # Trace ID Present + links = create_links_from_trace_id("trace_id") + assert len(links) == 1 + assert links[0] == mock_link + + +def test_extract_retrieval_documents(): + doc1 = MagicMock(spec=Document) + doc1.page_content = "content1" + doc1.metadata = {"dataset_id": "ds1", "doc_id": "di1", "document_id": "dd1", "score": 0.9} + + doc2 = MagicMock(spec=Document) + doc2.page_content = "content2" + doc2.metadata = {"dataset_id": "ds2"} # Missing some keys + + documents = [doc1, doc2] + extracted = extract_retrieval_documents(documents) + + assert len(extracted) == 2 + assert extracted[0]["content"] == "content1" + assert extracted[0]["metadata"]["dataset_id"] == "ds1" + assert extracted[0]["score"] == 0.9 + + assert extracted[1]["content"] == "content2" + assert extracted[1]["metadata"]["dataset_id"] == "ds2" + assert extracted[1]["metadata"]["doc_id"] is None + assert extracted[1]["score"] is None + + +def test_serialize_json_data(): + data = {"a": 1} + # Test ensure_ascii default (False) + assert serialize_json_data(data) == json.dumps(data, ensure_ascii=False) + # Test ensure_ascii True + assert serialize_json_data(data, ensure_ascii=True) == json.dumps(data, ensure_ascii=True) + + +def test_create_common_span_attributes(): + attrs = create_common_span_attributes( + session_id="s1", user_id="u1", span_kind="kind1", framework="fw1", inputs="in1", outputs="out1" + ) + assert attrs[GEN_AI_SESSION_ID] == "s1" + assert attrs[GEN_AI_USER_ID] == "u1" + assert attrs[GEN_AI_SPAN_KIND] == "kind1" + assert attrs[GEN_AI_FRAMEWORK] == "fw1" + assert attrs[INPUT_VALUE] == "in1" + assert attrs[OUTPUT_VALUE] == "out1" + + +def test_format_retrieval_documents(): + # Not a list + assert format_retrieval_documents("not a list") == [] + + # Valid list + docs = [ + {"metadata": {"score": 0.8, "document_id": "doc1", "source": "src1"}, "content": "c1", "title": "t1"}, + { + "metadata": {"_source": "src2", "doc_metadata": {"extra": "val"}}, + "content": "c2", + # Missing title + }, + "not a dict", # Should be skipped + ] + formatted = format_retrieval_documents(docs) + + assert len(formatted) == 2 + assert formatted[0]["document"]["content"] == "c1" + assert formatted[0]["document"]["metadata"]["title"] == "t1" + assert formatted[0]["document"]["metadata"]["source"] == "src1" + assert formatted[0]["document"]["score"] == 0.8 + assert formatted[0]["document"]["id"] == "doc1" + + assert formatted[1]["document"]["content"] == "c2" + assert formatted[1]["document"]["metadata"]["source"] == "src2" + assert formatted[1]["document"]["metadata"]["extra"] == "val" + assert "title" not in formatted[1]["document"]["metadata"] + assert formatted[1]["document"]["score"] == 0.0 # Default + + # Exception handling + # We can trigger an exception by passing something that causes an error in the loop logic, + # but the try/except covers the whole function. + # Passing a list that contains something that throws when calling .get() - though dicts won't. + # Let's mock a dict that raises on get. + class BadDict: + def get(self, *args, **kwargs): + raise Exception("boom") + + assert format_retrieval_documents([BadDict()]) == [] + + +def test_format_input_messages(): + # Not a dict + assert format_input_messages(None) == serialize_json_data([]) + + # No prompts + assert format_input_messages({}) == serialize_json_data([]) + + # Valid prompts + process_data = { + "prompts": [ + {"role": "user", "text": "hello"}, + {"role": "assistant", "text": "hi"}, + {"role": "system", "text": "be helpful"}, + {"role": "tool", "text": "result"}, + {"role": "invalid", "text": "skip me"}, + "not a dict", + {"role": "user", "text": ""}, # Empty text, should be skipped? Code says `if text: message = ...` + ] + } + result = format_input_messages(process_data) + result_list = json.loads(result) + + assert len(result_list) == 4 + assert result_list[0]["role"] == "user" + assert result_list[0]["parts"][0]["content"] == "hello" + assert result_list[1]["role"] == "assistant" + assert result_list[2]["role"] == "system" + assert result_list[3]["role"] == "tool" + + # Exception path + assert format_input_messages({"prompts": [None]}) == serialize_json_data([]) + + +def test_format_output_messages(): + # Not a dict + assert format_output_messages(None) == serialize_json_data([]) + + # No text + assert format_output_messages({"finish_reason": "stop"}) == serialize_json_data([]) + + # Valid + outputs = {"text": "done", "finish_reason": "length"} + result = format_output_messages(outputs) + result_list = json.loads(result) + assert len(result_list) == 1 + assert result_list[0]["role"] == "assistant" + assert result_list[0]["parts"][0]["content"] == "done" + assert result_list[0]["finish_reason"] == "length" + + # Invalid finish reason + outputs2 = {"text": "done", "finish_reason": "unknown"} + result2 = format_output_messages(outputs2) + result_list2 = json.loads(result2) + assert result_list2[0]["finish_reason"] == "stop" + + # Exception path + # Trigger exception in serialize_json_data by passing non-serializable + assert format_output_messages({"text": MagicMock()}) == serialize_json_data([]) diff --git a/api/tests/unit_tests/core/ops/arize_phoenix_trace/test_arize_phoenix_trace.py b/api/tests/unit_tests/core/ops/arize_phoenix_trace/test_arize_phoenix_trace.py new file mode 100644 index 0000000000..1cee2f5b68 --- /dev/null +++ b/api/tests/unit_tests/core/ops/arize_phoenix_trace/test_arize_phoenix_trace.py @@ -0,0 +1,398 @@ +from datetime import UTC, datetime, timedelta +from unittest.mock import MagicMock, patch + +import pytest +from opentelemetry.sdk.trace import Tracer +from opentelemetry.semconv.trace import SpanAttributes as OTELSpanAttributes +from opentelemetry.trace import StatusCode + +from core.ops.arize_phoenix_trace.arize_phoenix_trace import ( + ArizePhoenixDataTrace, + datetime_to_nanos, + error_to_string, + safe_json_dumps, + set_span_status, + setup_tracer, + wrap_span_metadata, +) +from core.ops.entities.config_entity import ArizeConfig, PhoenixConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + WorkflowTraceInfo, +) + +# --- Helpers --- + + +def _dt(): + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +def _make_workflow_info(**kwargs): + defaults = { + "workflow_id": "w1", + "tenant_id": "t1", + "workflow_run_id": "r1", + "workflow_run_elapsed_time": 1.0, + "workflow_run_status": "succeeded", + "workflow_run_inputs": {"in": "val"}, + "workflow_run_outputs": {"out": "val"}, + "workflow_run_version": "1.0", + "total_tokens": 10, + "file_list": ["f1"], + "query": "hi", + "metadata": {"app_id": "app1"}, + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + } + defaults.update(kwargs) + return WorkflowTraceInfo(**defaults) + + +def _make_message_info(**kwargs): + defaults = { + "conversation_model": "chat", + "message_tokens": 5, + "answer_tokens": 5, + "total_tokens": 10, + "conversation_mode": "chat", + "metadata": {"app_id": "app1"}, + "inputs": {"in": "val"}, + "outputs": "val", + "start_time": _dt(), + "end_time": _dt(), + "message_id": "m1", + } + defaults.update(kwargs) + return MessageTraceInfo(**defaults) + + +# --- Utility Function Tests --- + + +def test_datetime_to_nanos(): + dt = _dt() + expected = int(dt.timestamp() * 1_000_000_000) + assert datetime_to_nanos(dt) == expected + + with patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.datetime") as mock_dt: + mock_now = MagicMock() + mock_now.timestamp.return_value = 1704110400.0 + mock_dt.now.return_value = mock_now + assert datetime_to_nanos(None) == 1704110400000000000 + + +def test_error_to_string(): + try: + raise ValueError("boom") + except ValueError as e: + err = e + + res = error_to_string(err) + assert "ValueError: boom" in res + assert "traceback" in res.lower() or "line" in res.lower() + + assert error_to_string("str error") == "str error" + assert error_to_string(None) == "Empty Stack Trace" + + +def test_set_span_status(): + span = MagicMock() + # OK + set_span_status(span, None) + span.set_status.assert_called() + assert span.set_status.call_args[0][0].status_code == StatusCode.OK + + # Error Exception + span.reset_mock() + set_span_status(span, ValueError("fail")) + assert span.set_status.call_args[0][0].status_code == StatusCode.ERROR + span.record_exception.assert_called() + + # Error String + span.reset_mock() + set_span_status(span, "fail-str") + assert span.set_status.call_args[0][0].status_code == StatusCode.ERROR + span.add_event.assert_called() + + # repr branch + class SilentError: + def __str__(self): + return "" + + def __repr__(self): + return "SilentErrorRepr" + + span.reset_mock() + set_span_status(span, SilentError()) + assert span.add_event.call_args[1]["attributes"][OTELSpanAttributes.EXCEPTION_MESSAGE] == "SilentErrorRepr" + + +def test_safe_json_dumps(): + assert safe_json_dumps({"a": _dt()}) == '{"a": "2024-01-01 00:00:00+00:00"}' + + +def test_wrap_span_metadata(): + res = wrap_span_metadata({"a": 1}, b=2) + assert res == {"a": 1, "b": 2, "created_from": "Dify"} + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.GrpcOTLPSpanExporter") +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.trace_sdk.TracerProvider") +def test_setup_tracer_arize(mock_provider, mock_exporter): + config = ArizeConfig(endpoint="http://a.com", api_key="k", space_id="s", project="p") + setup_tracer(config) + mock_exporter.assert_called_once() + assert mock_exporter.call_args[1]["endpoint"] == "http://a.com/v1" + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.HttpOTLPSpanExporter") +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.trace_sdk.TracerProvider") +def test_setup_tracer_phoenix(mock_provider, mock_exporter): + config = PhoenixConfig(endpoint="http://p.com", project="p") + setup_tracer(config) + mock_exporter.assert_called_once() + assert mock_exporter.call_args[1]["endpoint"] == "http://p.com/v1/traces" + + +def test_setup_tracer_exception(): + config = ArizeConfig(endpoint="http://a.com", project="p") + with patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.urlparse", side_effect=Exception("boom")): + with pytest.raises(Exception, match="boom"): + setup_tracer(config) + + +# --- ArizePhoenixDataTrace Class Tests --- + + +@pytest.fixture +def trace_instance(): + with patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.setup_tracer") as mock_setup: + mock_tracer = MagicMock(spec=Tracer) + mock_processor = MagicMock() + mock_setup.return_value = (mock_tracer, mock_processor) + config = ArizeConfig(endpoint="http://a.com", api_key="k", space_id="s", project="p") + return ArizePhoenixDataTrace(config) + + +def test_trace_dispatch(trace_instance): + with ( + patch.object(trace_instance, "workflow_trace") as m1, + patch.object(trace_instance, "message_trace") as m2, + patch.object(trace_instance, "moderation_trace") as m3, + patch.object(trace_instance, "suggested_question_trace") as m4, + patch.object(trace_instance, "dataset_retrieval_trace") as m5, + patch.object(trace_instance, "tool_trace") as m6, + patch.object(trace_instance, "generate_name_trace") as m7, + ): + trace_instance.trace(_make_workflow_info()) + m1.assert_called() + + trace_instance.trace(_make_message_info()) + m2.assert_called() + + trace_instance.trace(ModerationTraceInfo(flagged=True, action="a", preset_response="p", query="q", metadata={})) + m3.assert_called() + + trace_instance.trace(SuggestedQuestionTraceInfo(suggested_question=[], total_tokens=0, level="i", metadata={})) + m4.assert_called() + + trace_instance.trace(DatasetRetrievalTraceInfo(metadata={})) + m5.assert_called() + + trace_instance.trace( + ToolTraceInfo( + tool_name="t", + tool_inputs={}, + tool_outputs="o", + metadata={}, + tool_config={}, + time_cost=1, + tool_parameters={}, + ) + ) + m6.assert_called() + + trace_instance.trace(GenerateNameTraceInfo(tenant_id="t", metadata={})) + m7.assert_called() + + +def test_trace_exception(trace_instance): + with patch.object(trace_instance, "workflow_trace", side_effect=RuntimeError("fail")): + with pytest.raises(RuntimeError): + trace_instance.trace(_make_workflow_info()) + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.sessionmaker") +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.DifyCoreRepositoryFactory") +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.db") +def test_workflow_trace_full(mock_db, mock_repo_factory, mock_sessionmaker, trace_instance): + mock_db.engine = MagicMock() + info = _make_workflow_info() + repo = MagicMock() + mock_repo_factory.create_workflow_node_execution_repository.return_value = repo + + node1 = MagicMock() + node1.node_type = "llm" + node1.status = "succeeded" + node1.inputs = {"q": "hi"} + node1.outputs = {"a": "bye", "usage": {"total_tokens": 5}} + node1.created_at = _dt() + node1.elapsed_time = 1.0 + node1.process_data = { + "prompts": [{"role": "user", "content": "hi"}], + "model_provider": "openai", + "model_name": "gpt-4", + } + node1.metadata = {"k": "v"} + node1.title = "title" + node1.id = "n1" + node1.error = None + + repo.get_by_workflow_run.return_value = [node1] + + with patch.object(trace_instance, "get_service_account_with_tenant"): + trace_instance.workflow_trace(info) + + assert trace_instance.tracer.start_span.call_count >= 2 + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.db") +def test_workflow_trace_no_app_id(mock_db, trace_instance): + mock_db.engine = MagicMock() + info = _make_workflow_info() + info.metadata = {} + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.workflow_trace(info) + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.db") +def test_message_trace_success(mock_db, trace_instance): + mock_db.engine = MagicMock() + info = _make_message_info() + info.message_data = MagicMock() + info.message_data.from_account_id = "acc1" + info.message_data.from_end_user_id = None + info.message_data.query = "q" + info.message_data.answer = "a" + info.message_data.status = "s" + info.message_data.model_id = "m" + info.message_data.model_provider = "p" + info.message_data.message_metadata = "{}" + info.message_data.error = None + info.error = None + + trace_instance.message_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +@patch("core.ops.arize_phoenix_trace.arize_phoenix_trace.db") +def test_message_trace_with_error(mock_db, trace_instance): + mock_db.engine = MagicMock() + info = _make_message_info() + info.message_data = MagicMock() + info.message_data.from_account_id = "acc1" + info.message_data.from_end_user_id = None + info.message_data.query = "q" + info.message_data.answer = "a" + info.message_data.status = "s" + info.message_data.model_id = "m" + info.message_data.model_provider = "p" + info.message_data.message_metadata = "{}" + info.message_data.error = "processing failed" + info.error = "message error" + + trace_instance.message_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_trace_methods_return_early_with_no_message_data(trace_instance): + info = MagicMock() + info.message_data = None + + trace_instance.moderation_trace(info) + trace_instance.suggested_question_trace(info) + trace_instance.dataset_retrieval_trace(info) + trace_instance.tool_trace(info) + trace_instance.generate_name_trace(info) + + assert trace_instance.tracer.start_span.call_count == 0 + + +def test_moderation_trace_ok(trace_instance): + info = ModerationTraceInfo(flagged=True, action="a", preset_response="p", query="q", metadata={}) + info.message_data = MagicMock() + info.message_data.error = None + trace_instance.moderation_trace(info) + # root span (1) + moderation span (1) = 2 + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_suggested_question_trace_ok(trace_instance): + info = SuggestedQuestionTraceInfo(suggested_question=["?"], total_tokens=1, level="i", metadata={}) + info.message_data = MagicMock() + info.error = None + trace_instance.suggested_question_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_dataset_retrieval_trace_ok(trace_instance): + info = DatasetRetrievalTraceInfo(documents=[], metadata={}) + info.message_data = MagicMock() + info.error = None + trace_instance.dataset_retrieval_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_tool_trace_ok(trace_instance): + info = ToolTraceInfo( + tool_name="t", tool_inputs={}, tool_outputs="o", metadata={}, tool_config={}, time_cost=1, tool_parameters={} + ) + info.message_data = MagicMock() + info.error = None + trace_instance.tool_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_generate_name_trace_ok(trace_instance): + info = GenerateNameTraceInfo(tenant_id="t", metadata={}) + info.message_data = MagicMock() + info.message_data.error = None + trace_instance.generate_name_trace(info) + assert trace_instance.tracer.start_span.call_count >= 1 + + +def test_get_project_url_phoenix(trace_instance): + trace_instance.arize_phoenix_config = PhoenixConfig(endpoint="http://p.com", project="p") + assert "p.com/projects/" in trace_instance.get_project_url() + + +def test_set_attribute_none_logic(trace_instance): + # Test role can be None + attrs = trace_instance._construct_llm_attributes([{"role": None, "content": "hi"}]) + assert "llm.input_messages.0.message.role" not in attrs + + # Test tool call id can be None + tool_call_none_id = {"id": None, "function": {"name": "f1"}} + attrs = trace_instance._construct_llm_attributes([{"role": "assistant", "tool_calls": [tool_call_none_id]}]) + assert "llm.input_messages.0.message.tool_calls.0.tool_call.id" not in attrs + + +def test_construct_llm_attributes_dict_branch(trace_instance): + attrs = trace_instance._construct_llm_attributes({"prompt": "hi"}) + assert '"prompt": "hi"' in attrs["llm.input_messages.0.message.content"] + assert attrs["llm.input_messages.0.message.role"] == "user" + + +def test_api_check_success(trace_instance): + assert trace_instance.api_check() is True + + +def test_ensure_root_span_basic(trace_instance): + trace_instance.ensure_root_span("tid") + assert "tid" in trace_instance.dify_trace_ids diff --git a/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py b/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py new file mode 100644 index 0000000000..0ff135562c --- /dev/null +++ b/api/tests/unit_tests/core/ops/langfuse_trace/test_langfuse_trace.py @@ -0,0 +1,698 @@ +import collections +import logging +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from core.ops.entities.config_entity import LangfuseConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + TraceTaskName, + WorkflowTraceInfo, +) +from core.ops.langfuse_trace.entities.langfuse_trace_entity import ( + LangfuseGeneration, + LangfuseSpan, + LangfuseTrace, + LevelEnum, + UnitEnum, +) +from core.ops.langfuse_trace.langfuse_trace import LangFuseDataTrace +from dify_graph.enums import BuiltinNodeTypes +from models import EndUser +from models.enums import MessageStatus + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +@pytest.fixture +def langfuse_config(): + return LangfuseConfig(public_key="pk-123", secret_key="sk-123", host="https://cloud.langfuse.com") + + +@pytest.fixture +def trace_instance(langfuse_config, monkeypatch): + # Mock Langfuse client to avoid network calls + mock_client = MagicMock() + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.Langfuse", lambda **kwargs: mock_client) + + instance = LangFuseDataTrace(langfuse_config) + return instance + + +def test_init(langfuse_config, monkeypatch): + mock_langfuse = MagicMock() + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.Langfuse", mock_langfuse) + monkeypatch.setenv("FILES_URL", "http://test.url") + + instance = LangFuseDataTrace(langfuse_config) + + mock_langfuse.assert_called_once_with( + public_key=langfuse_config.public_key, + secret_key=langfuse_config.secret_key, + host=langfuse_config.host, + ) + assert instance.file_base_url == "http://test.url" + + +def test_trace_dispatch(trace_instance, monkeypatch): + methods = [ + "workflow_trace", + "message_trace", + "moderation_trace", + "suggested_question_trace", + "dataset_retrieval_trace", + "tool_trace", + "generate_name_trace", + ] + mocks = {method: MagicMock() for method in methods} + for method, m in mocks.items(): + monkeypatch.setattr(trace_instance, method, m) + + # WorkflowTraceInfo + info = MagicMock(spec=WorkflowTraceInfo) + trace_instance.trace(info) + mocks["workflow_trace"].assert_called_once_with(info) + + # MessageTraceInfo + info = MagicMock(spec=MessageTraceInfo) + trace_instance.trace(info) + mocks["message_trace"].assert_called_once_with(info) + + # ModerationTraceInfo + info = MagicMock(spec=ModerationTraceInfo) + trace_instance.trace(info) + mocks["moderation_trace"].assert_called_once_with(info) + + # SuggestedQuestionTraceInfo + info = MagicMock(spec=SuggestedQuestionTraceInfo) + trace_instance.trace(info) + mocks["suggested_question_trace"].assert_called_once_with(info) + + # DatasetRetrievalTraceInfo + info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_instance.trace(info) + mocks["dataset_retrieval_trace"].assert_called_once_with(info) + + # ToolTraceInfo + info = MagicMock(spec=ToolTraceInfo) + trace_instance.trace(info) + mocks["tool_trace"].assert_called_once_with(info) + + # GenerateNameTraceInfo + info = MagicMock(spec=GenerateNameTraceInfo) + trace_instance.trace(info) + mocks["generate_name_trace"].assert_called_once_with(info) + + +def test_workflow_trace_with_message_id(trace_instance, monkeypatch): + # Setup trace info + trace_info = WorkflowTraceInfo( + workflow_id="wf-1", + tenant_id="tenant-1", + workflow_run_id="run-1", + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={"input": "hi"}, + workflow_run_outputs={"output": "hello"}, + workflow_run_version="1.0", + message_id="msg-1", + conversation_id="conv-1", + total_tokens=100, + file_list=[], + query="hi", + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id="trace-1", + metadata={"app_id": "app-1", "user_id": "user-1"}, + workflow_app_log_id="log-1", + error="", + ) + + # Mock DB and Repositories + mock_session = MagicMock() + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.sessionmaker", lambda bind: lambda: mock_session) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.db", MagicMock(engine="engine")) + + # Mock node executions + node_llm = MagicMock() + node_llm.id = "node-llm" + node_llm.title = "LLM Node" + node_llm.node_type = BuiltinNodeTypes.LLM + node_llm.status = "succeeded" + node_llm.process_data = { + "model_mode": "chat", + "model_name": "gpt-4", + "model_provider": "openai", + "usage": {"prompt_tokens": 10, "completion_tokens": 20}, + } + node_llm.inputs = {"prompts": "p"} + node_llm.outputs = {"text": "t"} + node_llm.created_at = _dt() + node_llm.elapsed_time = 0.5 + node_llm.metadata = {"foo": "bar"} + + node_other = MagicMock() + node_other.id = "node-other" + node_other.title = "Other Node" + node_other.node_type = BuiltinNodeTypes.CODE + node_other.status = "failed" + node_other.process_data = None + node_other.inputs = {"code": "print"} + node_other.outputs = {"result": "ok"} + node_other.created_at = None # Trigger datetime.now() branch + node_other.elapsed_time = 0.2 + node_other.metadata = None + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node_llm, node_other] + + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.DifyCoreRepositoryFactory", mock_factory) + + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + # Track calls to add_trace, add_span, add_generation + trace_instance.add_trace = MagicMock() + trace_instance.add_span = MagicMock() + trace_instance.add_generation = MagicMock() + + trace_instance.workflow_trace(trace_info) + + # Verify add_trace (Workflow Level) + trace_instance.add_trace.assert_called_once() + trace_data = trace_instance.add_trace.call_args[1]["langfuse_trace_data"] + assert trace_data.id == "trace-1" + assert trace_data.name == TraceTaskName.MESSAGE_TRACE + assert "message" in trace_data.tags + assert "workflow" in trace_data.tags + + # Verify add_span (Workflow Run Span) + assert trace_instance.add_span.call_count >= 1 + # First span should be workflow run span because message_id is present + workflow_span = trace_instance.add_span.call_args_list[0][1]["langfuse_span_data"] + assert workflow_span.id == "run-1" + assert workflow_span.name == TraceTaskName.WORKFLOW_TRACE + + # Verify Generation for LLM node + trace_instance.add_generation.assert_called_once() + gen_data = trace_instance.add_generation.call_args[1]["langfuse_generation_data"] + assert gen_data.id == "node-llm" + assert gen_data.usage.input == 10 + assert gen_data.usage.output == 20 + + # Verify normal span for Other node + # Second add_span call + other_span = trace_instance.add_span.call_args_list[1][1]["langfuse_span_data"] + assert other_span.id == "node-other" + assert other_span.level == LevelEnum.ERROR + + +def test_workflow_trace_no_message_id(trace_instance, monkeypatch): + trace_info = WorkflowTraceInfo( + workflow_id="wf-1", + tenant_id="tenant-1", + workflow_run_id="run-1", + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1.0", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id="conv-1", + start_time=_dt(), + end_time=_dt(), + trace_id=None, # Should fallback to workflow_run_id + metadata={"app_id": "app-1"}, + workflow_app_log_id="log-1", + error="", + ) + + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.db", MagicMock(engine="engine")) + repo = MagicMock() + repo.get_by_workflow_run.return_value = [] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_trace = MagicMock() + trace_instance.workflow_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_data = trace_instance.add_trace.call_args[1]["langfuse_trace_data"] + assert trace_data.id == "run-1" + assert trace_data.name == TraceTaskName.WORKFLOW_TRACE + + +def test_workflow_trace_missing_app_id(trace_instance, monkeypatch): + trace_info = WorkflowTraceInfo( + workflow_id="wf-1", + tenant_id="tenant-1", + workflow_run_id="run-1", + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1.0", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id="conv-1", + start_time=_dt(), + end_time=_dt(), + metadata={}, # Missing app_id + workflow_app_log_id="log-1", + error="", + ) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.db", MagicMock(engine="engine")) + + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.workflow_trace(trace_info) + + +def test_message_trace_basic(trace_instance, monkeypatch): + message_data = MagicMock() + message_data.id = "msg-1" + message_data.from_account_id = "acc-1" + message_data.from_end_user_id = None + message_data.provider_response_latency = 0.5 + message_data.conversation_id = "conv-1" + message_data.total_price = 0.01 + message_data.model_id = "gpt-4" + message_data.answer = "hello" + message_data.status = MessageStatus.NORMAL + message_data.error = None + + trace_info = MessageTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs={"query": "hi"}, + outputs={"answer": "hello"}, + message_tokens=10, + answer_tokens=20, + total_tokens=30, + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id="trace-1", + metadata={"foo": "bar"}, + conversation_mode="chat", + conversation_model="gpt-4", + file_list=[], + error=None, + ) + + trace_instance.add_trace = MagicMock() + trace_instance.add_generation = MagicMock() + + trace_instance.message_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_instance.add_generation.assert_called_once() + + gen_data = trace_instance.add_generation.call_args[0][0] + assert gen_data.name == "llm" + assert gen_data.usage.total == 30 + + +def test_message_trace_with_end_user(trace_instance, monkeypatch): + message_data = MagicMock() + message_data.id = "msg-1" + message_data.from_account_id = "acc-1" + message_data.from_end_user_id = "end-user-1" + message_data.conversation_id = "conv-1" + message_data.status = MessageStatus.NORMAL + message_data.model_id = "gpt-4" + message_data.error = "" + message_data.answer = "hello" + message_data.total_price = 0.0 + message_data.provider_response_latency = 0.1 + + trace_info = MessageTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs={}, + outputs={}, + message_tokens=0, + answer_tokens=0, + total_tokens=0, + start_time=_dt(), + end_time=_dt(), + metadata={}, + conversation_mode="chat", + conversation_model="gpt-4", + file_list=[], + error=None, + ) + + # Mock DB session for EndUser lookup + mock_end_user = MagicMock(spec=EndUser) + mock_end_user.session_id = "session-id-123" + + mock_query = MagicMock() + mock_query.where.return_value.first.return_value = mock_end_user + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.db.session.query", lambda model: mock_query) + + trace_instance.add_trace = MagicMock() + trace_instance.add_generation = MagicMock() + + trace_instance.message_trace(trace_info) + + trace_data = trace_instance.add_trace.call_args[1]["langfuse_trace_data"] + assert trace_data.user_id == "session-id-123" + assert trace_data.metadata["user_id"] == "session-id-123" + + +def test_message_trace_none_data(trace_instance): + trace_info = SimpleNamespace(message_data=None, file_list=[], metadata={}) + trace_instance.add_trace = MagicMock() + trace_instance.message_trace(trace_info) + trace_instance.add_trace.assert_not_called() + + +def test_moderation_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + + trace_info = ModerationTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs={"q": "hi"}, + action="stop", + flagged=True, + preset_response="blocked", + start_time=None, + end_time=None, + metadata={"foo": "bar"}, + trace_id="trace-1", + query="hi", + ) + + trace_instance.add_span = MagicMock() + trace_instance.moderation_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[1]["langfuse_span_data"] + assert span_data.name == TraceTaskName.MODERATION_TRACE + assert span_data.output["flagged"] is True + + +def test_suggested_question_trace(trace_instance): + message_data = MagicMock() + message_data.status = MessageStatus.NORMAL + message_data.error = None + + trace_info = SuggestedQuestionTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs="hi", + suggested_question=["q1"], + total_tokens=10, + level="info", + start_time=_dt(), + end_time=_dt(), + metadata={}, + trace_id="trace-1", + ) + + trace_instance.add_generation = MagicMock() + trace_instance.suggested_question_trace(trace_info) + + trace_instance.add_generation.assert_called_once() + gen_data = trace_instance.add_generation.call_args[1]["langfuse_generation_data"] + assert gen_data.name == TraceTaskName.SUGGESTED_QUESTION_TRACE + assert gen_data.usage.unit == UnitEnum.CHARACTERS + + +def test_dataset_retrieval_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = DatasetRetrievalTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs="query", + documents=[{"id": "doc1"}], + start_time=None, + end_time=None, + metadata={}, + trace_id="trace-1", + ) + + trace_instance.add_span = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[1]["langfuse_span_data"] + assert span_data.name == TraceTaskName.DATASET_RETRIEVAL_TRACE + assert span_data.output["documents"] == [{"id": "doc1"}] + + +def test_tool_trace(trace_instance): + trace_info = ToolTraceInfo( + message_id="msg-1", + message_data=MagicMock(), + inputs={}, + outputs={}, + tool_name="my_tool", + tool_inputs={"a": 1}, + tool_outputs="result_string", + time_cost=0.1, + start_time=_dt(), + end_time=_dt(), + metadata={}, + trace_id="trace-1", + tool_config={}, + tool_parameters={}, + error="some error", + ) + + trace_instance.add_span = MagicMock() + trace_instance.tool_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[1]["langfuse_span_data"] + assert span_data.name == "my_tool" + assert span_data.level == LevelEnum.ERROR + + +def test_generate_name_trace(trace_instance): + trace_info = GenerateNameTraceInfo( + inputs={"q": "hi"}, + outputs={"name": "new"}, + tenant_id="tenant-1", + conversation_id="conv-1", + start_time=_dt(), + end_time=_dt(), + metadata={"m": 1}, + ) + + trace_instance.add_trace = MagicMock() + trace_instance.add_span = MagicMock() + + trace_instance.generate_name_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_instance.add_span.assert_called_once() + + trace_data = trace_instance.add_trace.call_args[1]["langfuse_trace_data"] + assert trace_data.name == TraceTaskName.GENERATE_NAME_TRACE + assert trace_data.user_id == "tenant-1" + + span_data = trace_instance.add_span.call_args[1]["langfuse_span_data"] + assert span_data.trace_id == "conv-1" + + +def test_add_trace_success(trace_instance): + data = LangfuseTrace(id="t1", name="trace") + trace_instance.add_trace(data) + trace_instance.langfuse_client.trace.assert_called_once() + + +def test_add_trace_error(trace_instance): + trace_instance.langfuse_client.trace.side_effect = Exception("error") + data = LangfuseTrace(id="t1", name="trace") + with pytest.raises(ValueError, match="LangFuse Failed to create trace: error"): + trace_instance.add_trace(data) + + +def test_add_span_success(trace_instance): + data = LangfuseSpan(id="s1", name="span", trace_id="t1") + trace_instance.add_span(data) + trace_instance.langfuse_client.span.assert_called_once() + + +def test_add_span_error(trace_instance): + trace_instance.langfuse_client.span.side_effect = Exception("error") + data = LangfuseSpan(id="s1", name="span", trace_id="t1") + with pytest.raises(ValueError, match="LangFuse Failed to create span: error"): + trace_instance.add_span(data) + + +def test_update_span(trace_instance): + span = MagicMock() + data = LangfuseSpan(id="s1", name="span", trace_id="t1") + trace_instance.update_span(span, data) + span.end.assert_called_once() + + +def test_add_generation_success(trace_instance): + data = LangfuseGeneration(id="g1", name="gen", trace_id="t1") + trace_instance.add_generation(data) + trace_instance.langfuse_client.generation.assert_called_once() + + +def test_add_generation_error(trace_instance): + trace_instance.langfuse_client.generation.side_effect = Exception("error") + data = LangfuseGeneration(id="g1", name="gen", trace_id="t1") + with pytest.raises(ValueError, match="LangFuse Failed to create generation: error"): + trace_instance.add_generation(data) + + +def test_update_generation(trace_instance): + gen = MagicMock() + data = LangfuseGeneration(id="g1", name="gen", trace_id="t1") + trace_instance.update_generation(gen, data) + gen.end.assert_called_once() + + +def test_api_check_success(trace_instance): + trace_instance.langfuse_client.auth_check.return_value = True + assert trace_instance.api_check() is True + + +def test_api_check_error(trace_instance): + trace_instance.langfuse_client.auth_check.side_effect = Exception("fail") + with pytest.raises(ValueError, match="LangFuse API check failed: fail"): + trace_instance.api_check() + + +def test_get_project_key_success(trace_instance): + mock_data = MagicMock() + mock_data.id = "proj-1" + trace_instance.langfuse_client.client.projects.get.return_value = MagicMock(data=[mock_data]) + assert trace_instance.get_project_key() == "proj-1" + + +def test_get_project_key_error(trace_instance): + trace_instance.langfuse_client.client.projects.get.side_effect = Exception("fail") + with pytest.raises(ValueError, match="LangFuse get project key failed: fail"): + trace_instance.get_project_key() + + +def test_moderation_trace_none(trace_instance): + trace_info = ModerationTraceInfo( + message_id="m", + message_data=None, + inputs={}, + action="s", + flagged=False, + preset_response="", + query="", + metadata={}, + ) + trace_instance.add_span = MagicMock() + trace_instance.moderation_trace(trace_info) + trace_instance.add_span.assert_not_called() + + +def test_suggested_question_trace_none(trace_instance): + trace_info = SuggestedQuestionTraceInfo( + message_id="m", message_data=None, inputs={}, suggested_question=[], total_tokens=0, level="i", metadata={} + ) + trace_instance.add_generation = MagicMock() + trace_instance.suggested_question_trace(trace_info) + trace_instance.add_generation.assert_not_called() + + +def test_dataset_retrieval_trace_none(trace_instance): + trace_info = DatasetRetrievalTraceInfo(message_id="m", message_data=None, inputs={}, documents=[], metadata={}) + trace_instance.add_span = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + trace_instance.add_span.assert_not_called() + + +def test_langfuse_trace_entity_with_list_dict_input(): + # To cover lines 29-31 in langfuse_trace_entity.py + # We need to mock replace_text_with_content or just check if it works + # Actually replace_text_with_content is imported from core.ops.utils + data = LangfuseTrace(id="t1", name="n", input=[{"text": "hello"}]) + assert isinstance(data.input, list) + assert data.input[0]["content"] == "hello" + + +def test_workflow_trace_handles_usage_extraction_error(trace_instance, monkeypatch, caplog): + # Setup trace info to trigger LLM node usage extraction + trace_info = WorkflowTraceInfo( + workflow_id="wf-1", + tenant_id="t", + workflow_run_id="r", + workflow_run_elapsed_time=1.0, + workflow_run_status="s", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id="c", + start_time=_dt(), + end_time=_dt(), + metadata={"app_id": "app-1"}, + workflow_app_log_id="l", + error="", + ) + + node = MagicMock() + node.id = "n1" + node.title = "LLM Node" + node.node_type = BuiltinNodeTypes.LLM + node.status = "succeeded" + + class BadDict(collections.UserDict): + def get(self, key, default=None): + if key == "usage": + raise Exception("Usage extraction failed") + return super().get(key, default) + + node.process_data = BadDict({"model_mode": "chat", "model_name": "gpt-4", "usage": True, "prompts": ["p"]}) + node.created_at = _dt() + node.elapsed_time = 0.1 + node.metadata = {} + node.outputs = {} + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.langfuse_trace.langfuse_trace.db", MagicMock(engine="engine")) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_trace = MagicMock() + trace_instance.add_generation = MagicMock() + + with caplog.at_level(logging.ERROR): + trace_instance.workflow_trace(trace_info) + + assert "Failed to extract usage" in caplog.text + trace_instance.add_generation.assert_called_once() diff --git a/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py b/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py new file mode 100644 index 0000000000..f656f7435f --- /dev/null +++ b/api/tests/unit_tests/core/ops/langsmith_trace/test_langsmith_trace.py @@ -0,0 +1,608 @@ +import collections +from datetime import datetime, timedelta +from unittest.mock import MagicMock + +import pytest + +from core.ops.entities.config_entity import LangSmithConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + TraceTaskName, + WorkflowTraceInfo, +) +from core.ops.langsmith_trace.entities.langsmith_trace_entity import ( + LangSmithRunModel, + LangSmithRunType, + LangSmithRunUpdateModel, +) +from core.ops.langsmith_trace.langsmith_trace import LangSmithDataTrace +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey +from models import EndUser + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0) + + +@pytest.fixture +def langsmith_config(): + return LangSmithConfig(api_key="ls-123", project="default", endpoint="https://api.smith.langchain.com") + + +@pytest.fixture +def trace_instance(langsmith_config, monkeypatch): + # Mock LangSmith client + mock_client = MagicMock() + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.Client", lambda **kwargs: mock_client) + + instance = LangSmithDataTrace(langsmith_config) + return instance + + +def test_init(langsmith_config, monkeypatch): + mock_client_class = MagicMock() + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.Client", mock_client_class) + monkeypatch.setenv("FILES_URL", "http://test.url") + + instance = LangSmithDataTrace(langsmith_config) + + mock_client_class.assert_called_once_with(api_key=langsmith_config.api_key, api_url=langsmith_config.endpoint) + assert instance.langsmith_key == langsmith_config.api_key + assert instance.project_name == langsmith_config.project + assert instance.file_base_url == "http://test.url" + + +def test_trace_dispatch(trace_instance, monkeypatch): + methods = [ + "workflow_trace", + "message_trace", + "moderation_trace", + "suggested_question_trace", + "dataset_retrieval_trace", + "tool_trace", + "generate_name_trace", + ] + mocks = {method: MagicMock() for method in methods} + for method, m in mocks.items(): + monkeypatch.setattr(trace_instance, method, m) + + # WorkflowTraceInfo + info = MagicMock(spec=WorkflowTraceInfo) + trace_instance.trace(info) + mocks["workflow_trace"].assert_called_once_with(info) + + # MessageTraceInfo + info = MagicMock(spec=MessageTraceInfo) + trace_instance.trace(info) + mocks["message_trace"].assert_called_once_with(info) + + # ModerationTraceInfo + info = MagicMock(spec=ModerationTraceInfo) + trace_instance.trace(info) + mocks["moderation_trace"].assert_called_once_with(info) + + # SuggestedQuestionTraceInfo + info = MagicMock(spec=SuggestedQuestionTraceInfo) + trace_instance.trace(info) + mocks["suggested_question_trace"].assert_called_once_with(info) + + # DatasetRetrievalTraceInfo + info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_instance.trace(info) + mocks["dataset_retrieval_trace"].assert_called_once_with(info) + + # ToolTraceInfo + info = MagicMock(spec=ToolTraceInfo) + trace_instance.trace(info) + mocks["tool_trace"].assert_called_once_with(info) + + # GenerateNameTraceInfo + info = MagicMock(spec=GenerateNameTraceInfo) + trace_instance.trace(info) + mocks["generate_name_trace"].assert_called_once_with(info) + + +def test_workflow_trace(trace_instance, monkeypatch): + # Setup trace info + workflow_data = MagicMock() + workflow_data.created_at = _dt() + workflow_data.finished_at = _dt() + timedelta(seconds=1) + + trace_info = WorkflowTraceInfo( + tenant_id="tenant-1", + workflow_id="wf-1", + workflow_run_id="run-1", + workflow_run_inputs={"input": "hi"}, + workflow_run_outputs={"output": "hello"}, + workflow_run_status="succeeded", + workflow_run_version="1.0", + workflow_run_elapsed_time=1.0, + total_tokens=100, + file_list=[], + query="hi", + message_id="msg-1", + conversation_id="conv-1", + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id="trace-1", + metadata={"app_id": "app-1"}, + workflow_app_log_id="log-1", + error="", + workflow_data=workflow_data, + ) + + # Mock dependencies + mock_session = MagicMock() + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.sessionmaker", lambda bind: lambda: mock_session) + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.db", MagicMock(engine="engine")) + + # Mock node executions + node_llm = MagicMock() + node_llm.id = "node-llm" + node_llm.title = "LLM Node" + node_llm.node_type = BuiltinNodeTypes.LLM + node_llm.status = "succeeded" + node_llm.process_data = { + "model_mode": "chat", + "model_name": "gpt-4", + "model_provider": "openai", + "usage": {"prompt_tokens": 10, "completion_tokens": 20}, + } + node_llm.inputs = {"prompts": "p"} + node_llm.outputs = {"text": "t"} + node_llm.created_at = _dt() + node_llm.elapsed_time = 0.5 + node_llm.metadata = {WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 30} + + node_other = MagicMock() + node_other.id = "node-other" + node_other.title = "Tool Node" + node_other.node_type = BuiltinNodeTypes.TOOL + node_other.status = "succeeded" + node_other.process_data = None + node_other.inputs = {"tool_input": "val"} + node_other.outputs = {"tool_output": "val"} + node_other.created_at = None # Trigger datetime.now() + node_other.elapsed_time = 0.2 + node_other.metadata = {} + + node_retrieval = MagicMock() + node_retrieval.id = "node-retrieval" + node_retrieval.title = "Retrieval Node" + node_retrieval.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL + node_retrieval.status = "succeeded" + node_retrieval.process_data = None + node_retrieval.inputs = {"query": "val"} + node_retrieval.outputs = {"results": "val"} + node_retrieval.created_at = _dt() + node_retrieval.elapsed_time = 0.2 + node_retrieval.metadata = {} + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node_llm, node_other, node_retrieval] + + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.DifyCoreRepositoryFactory", mock_factory) + + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_run = MagicMock() + + trace_instance.workflow_trace(trace_info) + + # Verify add_run calls + # 1. message run (id="msg-1") + # 2. workflow run (id="run-1") + # 3. node llm run (id="node-llm") + # 4. node other run (id="node-other") + # 5. node retrieval run (id="node-retrieval") + assert trace_instance.add_run.call_count == 5 + + call_args = [call[0][0] for call in trace_instance.add_run.call_args_list] + + assert call_args[0].id == "msg-1" + assert call_args[0].name == TraceTaskName.MESSAGE_TRACE + + assert call_args[1].id == "run-1" + assert call_args[1].name == TraceTaskName.WORKFLOW_TRACE + assert call_args[1].parent_run_id == "msg-1" + + assert call_args[2].id == "node-llm" + assert call_args[2].run_type == LangSmithRunType.llm + + assert call_args[3].id == "node-other" + assert call_args[3].run_type == LangSmithRunType.tool + + assert call_args[4].id == "node-retrieval" + assert call_args[4].run_type == LangSmithRunType.retriever + + +def test_workflow_trace_no_start_time(trace_instance, monkeypatch): + workflow_data = MagicMock() + workflow_data.created_at = _dt() + workflow_data.finished_at = _dt() + timedelta(seconds=1) + + trace_info = WorkflowTraceInfo( + tenant_id="tenant-1", + workflow_id="wf-1", + workflow_run_id="run-1", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_status="succeeded", + workflow_run_version="1.0", + workflow_run_elapsed_time=1.0, + total_tokens=10, + file_list=[], + query="hi", + message_id="msg-1", + conversation_id="conv-1", + start_time=None, + end_time=None, + trace_id="trace-1", + metadata={"app_id": "app-1"}, + workflow_app_log_id="log-1", + error="", + workflow_data=workflow_data, + ) + + mock_session = MagicMock() + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.sessionmaker", lambda bind: lambda: mock_session) + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.db", MagicMock(engine="engine")) + repo = MagicMock() + repo.get_by_workflow_run.return_value = [] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_run = MagicMock() + trace_instance.workflow_trace(trace_info) + assert trace_instance.add_run.called + + +def test_workflow_trace_missing_app_id(trace_instance, monkeypatch): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.trace_id = "trace-1" + trace_info.message_id = None + trace_info.workflow_run_id = "run-1" + trace_info.start_time = None + trace_info.workflow_data = MagicMock() + trace_info.workflow_data.created_at = _dt() + trace_info.metadata = {} # Empty metadata + trace_info.workflow_app_log_id = "log-1" + trace_info.file_list = [] + trace_info.total_tokens = 0 + trace_info.workflow_run_inputs = {} + trace_info.workflow_run_outputs = {} + trace_info.error = "" + + mock_session = MagicMock() + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.sessionmaker", lambda bind: lambda: mock_session) + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.db", MagicMock(engine="engine")) + + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.workflow_trace(trace_info) + + +def test_message_trace(trace_instance, monkeypatch): + message_data = MagicMock() + message_data.id = "msg-1" + message_data.from_account_id = "acc-1" + message_data.from_end_user_id = "end-user-1" + message_data.answer = "hello answer" + + trace_info = MessageTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs={"input": "hi"}, + outputs={"answer": "hello"}, + message_tokens=10, + answer_tokens=20, + total_tokens=30, + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id="trace-1", + metadata={"foo": "bar"}, + conversation_mode="chat", + conversation_model="gpt-4", + file_list=[], + error=None, + message_file_data=MagicMock(url="file-url"), + ) + + # Mock EndUser lookup + mock_end_user = MagicMock(spec=EndUser) + mock_end_user.session_id = "session-id-123" + mock_query = MagicMock() + mock_query.where.return_value.first.return_value = mock_end_user + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.db.session.query", lambda model: mock_query) + + trace_instance.add_run = MagicMock() + + trace_instance.message_trace(trace_info) + + # 1. message run + # 2. llm run + assert trace_instance.add_run.call_count == 2 + + call_args = [call[0][0] for call in trace_instance.add_run.call_args_list] + assert call_args[0].id == "msg-1" + assert call_args[0].extra["metadata"]["end_user_id"] == "session-id-123" + assert call_args[1].parent_run_id == "msg-1" + assert call_args[1].name == "llm" + + +def test_message_trace_no_data(trace_instance): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.message_data = None + trace_info.file_list = [] + trace_info.message_file_data = None + trace_info.metadata = {} + trace_instance.add_run = MagicMock() + trace_instance.message_trace(trace_info) + trace_instance.add_run.assert_not_called() + + +def test_moderation_trace_no_data(trace_instance): + trace_info = MagicMock(spec=ModerationTraceInfo) + trace_info.message_data = None + trace_instance.add_run = MagicMock() + trace_instance.moderation_trace(trace_info) + trace_instance.add_run.assert_not_called() + + +def test_suggested_question_trace_no_data(trace_instance): + trace_info = MagicMock(spec=SuggestedQuestionTraceInfo) + trace_info.message_data = None + trace_instance.add_run = MagicMock() + trace_instance.suggested_question_trace(trace_info) + trace_instance.add_run.assert_not_called() + + +def test_dataset_retrieval_trace_no_data(trace_instance): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_data = None + trace_instance.add_run = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + trace_instance.add_run.assert_not_called() + + +def test_moderation_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = ModerationTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs={"q": "hi"}, + action="stop", + flagged=True, + preset_response="blocked", + start_time=None, + end_time=None, + metadata={}, + trace_id="trace-1", + query="hi", + ) + + trace_instance.add_run = MagicMock() + trace_instance.moderation_trace(trace_info) + trace_instance.add_run.assert_called_once() + assert trace_instance.add_run.call_args[0][0].name == TraceTaskName.MODERATION_TRACE + + +def test_suggested_question_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = SuggestedQuestionTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs="hi", + suggested_question=["q1"], + total_tokens=10, + level="info", + start_time=None, + end_time=None, + metadata={}, + trace_id="trace-1", + ) + + trace_instance.add_run = MagicMock() + trace_instance.suggested_question_trace(trace_info) + trace_instance.add_run.assert_called_once() + assert trace_instance.add_run.call_args[0][0].name == TraceTaskName.SUGGESTED_QUESTION_TRACE + + +def test_dataset_retrieval_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = DatasetRetrievalTraceInfo( + message_id="msg-1", + message_data=message_data, + inputs="query", + documents=[{"id": "doc1"}], + start_time=None, + end_time=None, + metadata={}, + trace_id="trace-1", + ) + + trace_instance.add_run = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + trace_instance.add_run.assert_called_once() + assert trace_instance.add_run.call_args[0][0].name == TraceTaskName.DATASET_RETRIEVAL_TRACE + + +def test_tool_trace(trace_instance): + trace_info = ToolTraceInfo( + message_id="msg-1", + message_data=MagicMock(), + inputs={}, + outputs={}, + tool_name="my_tool", + tool_inputs={"a": 1}, + tool_outputs="result", + time_cost=0.1, + start_time=_dt(), + end_time=_dt(), + metadata={}, + trace_id="trace-1", + tool_config={}, + tool_parameters={}, + file_url="http://file", + ) + + trace_instance.add_run = MagicMock() + trace_instance.tool_trace(trace_info) + trace_instance.add_run.assert_called_once() + assert trace_instance.add_run.call_args[0][0].name == "my_tool" + + +def test_generate_name_trace(trace_instance): + trace_info = GenerateNameTraceInfo( + inputs={"q": "hi"}, + outputs={"name": "new"}, + tenant_id="tenant-1", + conversation_id="conv-1", + start_time=None, + end_time=None, + metadata={}, + trace_id="trace-1", + ) + + trace_instance.add_run = MagicMock() + trace_instance.generate_name_trace(trace_info) + trace_instance.add_run.assert_called_once() + assert trace_instance.add_run.call_args[0][0].name == TraceTaskName.GENERATE_NAME_TRACE + + +def test_add_run_success(trace_instance): + run_data = LangSmithRunModel( + id="run-1", name="test", inputs={}, outputs={}, run_type=LangSmithRunType.tool, start_time=_dt() + ) + trace_instance.project_id = "proj-1" + trace_instance.add_run(run_data) + trace_instance.langsmith_client.create_run.assert_called_once() + args, kwargs = trace_instance.langsmith_client.create_run.call_args + assert kwargs["session_id"] == "proj-1" + + +def test_add_run_error(trace_instance): + run_data = LangSmithRunModel(id="run-1", name="test", run_type=LangSmithRunType.tool, start_time=_dt()) + trace_instance.langsmith_client.create_run.side_effect = Exception("failed") + with pytest.raises(ValueError, match="LangSmith Failed to create run: failed"): + trace_instance.add_run(run_data) + + +def test_update_run_success(trace_instance): + update_data = LangSmithRunUpdateModel(run_id="run-1", outputs={"out": "val"}) + trace_instance.update_run(update_data) + trace_instance.langsmith_client.update_run.assert_called_once() + + +def test_update_run_error(trace_instance): + update_data = LangSmithRunUpdateModel(run_id="run-1") + trace_instance.langsmith_client.update_run.side_effect = Exception("failed") + with pytest.raises(ValueError, match="LangSmith Failed to update run: failed"): + trace_instance.update_run(update_data) + + +def test_workflow_trace_usage_extraction_error(trace_instance, monkeypatch, caplog): + workflow_data = MagicMock() + workflow_data.created_at = _dt() + workflow_data.finished_at = _dt() + timedelta(seconds=1) + + trace_info = WorkflowTraceInfo( + tenant_id="tenant-1", + workflow_id="wf-1", + workflow_run_id="run-1", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_status="succeeded", + workflow_run_version="1.0", + workflow_run_elapsed_time=1.0, + total_tokens=100, + file_list=[], + query="hi", + message_id="msg-1", + conversation_id="conv-1", + start_time=_dt(), + end_time=_dt(), + trace_id="trace-1", + metadata={"app_id": "app-1"}, + workflow_app_log_id="log-1", + error="", + workflow_data=workflow_data, + ) + + class BadDict(collections.UserDict): + def get(self, key, default=None): + if key == "usage": + raise Exception("Usage extraction failed") + return super().get(key, default) + + node_llm = MagicMock() + node_llm.id = "node-llm" + node_llm.title = "LLM Node" + node_llm.node_type = BuiltinNodeTypes.LLM + node_llm.status = "succeeded" + node_llm.process_data = BadDict({"model_mode": "chat", "model_name": "gpt-4", "usage": True, "prompts": ["p"]}) + node_llm.inputs = {} + node_llm.outputs = {} + node_llm.created_at = _dt() + node_llm.elapsed_time = 0.5 + node_llm.metadata = {} + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node_llm] + + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.langsmith_trace.langsmith_trace.db", MagicMock(engine="engine")) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_run = MagicMock() + + import logging + + with caplog.at_level(logging.ERROR): + trace_instance.workflow_trace(trace_info) + + assert "Failed to extract usage" in caplog.text + + +def test_api_check_success(trace_instance): + assert trace_instance.api_check() is True + assert trace_instance.langsmith_client.create_project.called + assert trace_instance.langsmith_client.delete_project.called + + +def test_api_check_error(trace_instance): + trace_instance.langsmith_client.create_project.side_effect = Exception("error") + with pytest.raises(ValueError, match="LangSmith API check failed: error"): + trace_instance.api_check() + + +def test_get_project_url_success(trace_instance): + trace_instance.langsmith_client.get_run_url.return_value = "https://smith.langchain.com/o/org/p/proj/r/run" + url = trace_instance.get_project_url() + assert url == "https://smith.langchain.com/o/org/p/proj" + + +def test_get_project_url_error(trace_instance): + trace_instance.langsmith_client.get_run_url.side_effect = Exception("error") + with pytest.raises(ValueError, match="LangSmith get run url failed: error"): + trace_instance.get_project_url() diff --git a/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py b/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py new file mode 100644 index 0000000000..cccedaa08c --- /dev/null +++ b/api/tests/unit_tests/core/ops/mlflow_trace/test_mlflow_trace.py @@ -0,0 +1,1019 @@ +"""Comprehensive tests for core.ops.mlflow_trace.mlflow_trace module.""" + +from __future__ import annotations + +import json +import os +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from core.ops.entities.config_entity import DatabricksConfig, MLflowConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + WorkflowTraceInfo, +) +from core.ops.mlflow_trace.mlflow_trace import MLflowDataTrace, datetime_to_nanoseconds +from dify_graph.enums import BuiltinNodeTypes + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +def _make_workflow_trace_info(**overrides) -> WorkflowTraceInfo: + defaults = { + "workflow_id": "wf-id", + "tenant_id": "tenant", + "workflow_run_id": "run-1", + "workflow_run_elapsed_time": 1.0, + "workflow_run_status": "succeeded", + "workflow_run_inputs": {"key": "val"}, + "workflow_run_outputs": {"answer": "42"}, + "workflow_run_version": "v1", + "total_tokens": 10, + "file_list": [], + "query": "hello", + "metadata": {"user_id": "u1", "conversation_id": "c1"}, + "start_time": _dt(), + "end_time": _dt(), + } + defaults.update(overrides) + return WorkflowTraceInfo(**defaults) + + +def _make_message_trace_info(**overrides) -> MessageTraceInfo: + defaults = { + "conversation_model": "chat", + "message_tokens": 5, + "answer_tokens": 10, + "total_tokens": 15, + "conversation_mode": "chat", + "metadata": {"conversation_id": "c1", "from_account_id": "a1"}, + "message_id": "msg-1", + "message_data": SimpleNamespace( + model_provider="openai", + model_id="gpt-4", + total_price=0.01, + answer="response text", + ), + "inputs": {"prompt": "hi"}, + "outputs": "ok", + "start_time": _dt(), + "end_time": _dt(), + "error": None, + } + defaults.update(overrides) + return MessageTraceInfo(**defaults) + + +def _make_tool_trace_info(**overrides) -> ToolTraceInfo: + defaults = { + "tool_name": "my_tool", + "tool_inputs": {"x": 1}, + "tool_outputs": "output", + "tool_config": {"desc": "d"}, + "tool_parameters": {"p": "v"}, + "time_cost": 0.5, + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "inputs": {"i": "v"}, + "outputs": {"o": "v"}, + "start_time": _dt(), + "end_time": _dt(), + "error": None, + } + defaults.update(overrides) + return ToolTraceInfo(**defaults) + + +def _make_moderation_trace_info(**overrides) -> ModerationTraceInfo: + defaults = { + "flagged": False, + "action": "allow", + "preset_response": "", + "query": "test", + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + } + defaults.update(overrides) + return ModerationTraceInfo(**defaults) + + +def _make_dataset_retrieval_trace_info(**overrides) -> DatasetRetrievalTraceInfo: + defaults = { + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "message_data": SimpleNamespace(), + "inputs": "query", + "documents": [{"content": "doc"}], + "start_time": _dt(), + "end_time": _dt(), + } + defaults.update(overrides) + return DatasetRetrievalTraceInfo(**defaults) + + +def _make_suggested_question_trace_info(**overrides) -> SuggestedQuestionTraceInfo: + defaults = { + "suggested_question": ["q1", "q2"], + "level": "info", + "total_tokens": 5, + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "message_data": SimpleNamespace(created_at=_dt(), updated_at=_dt()), + "inputs": {"i": 1}, + "start_time": _dt(), + "end_time": _dt(), + "error": None, + } + defaults.update(overrides) + return SuggestedQuestionTraceInfo(**defaults) + + +def _make_generate_name_trace_info(**overrides) -> GenerateNameTraceInfo: + defaults = { + "tenant_id": "t1", + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "inputs": {"i": 1}, + "outputs": {"name": "test"}, + "start_time": _dt(), + "end_time": _dt(), + } + defaults.update(overrides) + return GenerateNameTraceInfo(**defaults) + + +def _make_node(**overrides): + """Create a mock workflow node execution row.""" + defaults = { + "id": "node-1", + "tenant_id": "t1", + "app_id": "app-1", + "title": "Node Title", + "node_type": BuiltinNodeTypes.CODE, + "status": "succeeded", + "inputs": '{"key": "value"}', + "outputs": '{"result": "ok"}', + "created_at": _dt(), + "elapsed_time": 1.0, + "process_data": None, + "execution_metadata": None, + } + defaults.update(overrides) + return SimpleNamespace(**defaults) + + +# ── Fixtures ───────────────────────────────────────────────────────────────── + + +@pytest.fixture +def mock_mlflow(): + with patch("core.ops.mlflow_trace.mlflow_trace.mlflow") as mock: + yield mock + + +@pytest.fixture +def mock_tracing(): + """Patch all MLflow tracing functions used by the module.""" + with ( + patch("core.ops.mlflow_trace.mlflow_trace.start_span_no_context") as mock_start, + patch("core.ops.mlflow_trace.mlflow_trace.update_current_trace") as mock_update, + patch("core.ops.mlflow_trace.mlflow_trace.set_span_in_context") as mock_set, + patch("core.ops.mlflow_trace.mlflow_trace.detach_span_from_context") as mock_detach, + ): + yield { + "start": mock_start, + "update": mock_update, + "set": mock_set, + "detach": mock_detach, + } + + +@pytest.fixture +def mock_db(): + with patch("core.ops.mlflow_trace.mlflow_trace.db") as mock: + yield mock + + +@pytest.fixture +def trace_instance(mock_mlflow): + """Create an MLflowDataTrace using a basic MLflowConfig (no auth).""" + config = MLflowConfig(tracking_uri="http://localhost:5000", experiment_id="0") + return MLflowDataTrace(config) + + +# ── datetime_to_nanoseconds ───────────────────────────────────────────────── + + +class TestDatetimeToNanoseconds: + def test_none_returns_none(self): + assert datetime_to_nanoseconds(None) is None + + def test_converts_datetime(self): + dt = datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + expected = int(dt.timestamp() * 1_000_000_000) + assert datetime_to_nanoseconds(dt) == expected + + +# ── __init__ / setup ───────────────────────────────────────────────────────── + + +class TestInit: + def test_mlflow_config_no_auth(self, mock_mlflow): + config = MLflowConfig(tracking_uri="http://localhost:5000", experiment_id="0") + trace = MLflowDataTrace(config) + mock_mlflow.set_tracking_uri.assert_called_with("http://localhost:5000") + mock_mlflow.set_experiment.assert_called_with(experiment_id="0") + assert trace.get_project_url() == "http://localhost:5000/#/experiments/0/traces" + assert os.environ["MLFLOW_ENABLE_ASYNC_TRACE_LOGGING"] == "true" + + def test_mlflow_config_with_auth(self, mock_mlflow): + config = MLflowConfig( + tracking_uri="http://localhost:5000", + experiment_id="1", + username="user", + password="pass", + ) + MLflowDataTrace(config) + assert os.environ["MLFLOW_TRACKING_USERNAME"] == "user" + assert os.environ["MLFLOW_TRACKING_PASSWORD"] == "pass" + + def test_databricks_oauth(self, mock_mlflow): + config = DatabricksConfig( + host="https://db.com/", + experiment_id="42", + client_id="cid", + client_secret="csec", + ) + trace = MLflowDataTrace(config) + assert os.environ["DATABRICKS_HOST"] == "https://db.com/" + assert os.environ["DATABRICKS_CLIENT_ID"] == "cid" + assert os.environ["DATABRICKS_CLIENT_SECRET"] == "csec" + mock_mlflow.set_tracking_uri.assert_called_with("databricks") + # Trailing slash stripped + assert trace.get_project_url() == "https://db.com/ml/experiments/42/traces" + + def test_databricks_pat(self, mock_mlflow): + config = DatabricksConfig( + host="https://db.com", + experiment_id="1", + personal_access_token="pat", + ) + trace = MLflowDataTrace(config) + assert os.environ["DATABRICKS_TOKEN"] == "pat" + assert "db.com/ml/experiments/1/traces" in trace.get_project_url() + + def test_databricks_no_creds_raises(self, mock_mlflow): + config = DatabricksConfig(host="https://db.com", experiment_id="1") + with pytest.raises(ValueError, match="Either Databricks token"): + MLflowDataTrace(config) + + +# ── trace dispatcher ──────────────────────────────────────────────────────── + + +class TestTraceDispatcher: + def test_dispatches_workflow(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "workflow_trace") as mock_wt: + trace_instance.trace(_make_workflow_trace_info()) + mock_wt.assert_called_once() + + def test_dispatches_message(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "message_trace") as mock_mt: + trace_instance.trace(_make_message_trace_info()) + mock_mt.assert_called_once() + + def test_dispatches_tool(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "tool_trace") as mock_tt: + trace_instance.trace(_make_tool_trace_info()) + mock_tt.assert_called_once() + + def test_dispatches_moderation(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "moderation_trace") as mock_mod: + trace_instance.trace(_make_moderation_trace_info(message_data=SimpleNamespace(created_at=_dt()))) + mock_mod.assert_called_once() + + def test_dispatches_dataset_retrieval(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "dataset_retrieval_trace") as mock_dr: + trace_instance.trace(_make_dataset_retrieval_trace_info()) + mock_dr.assert_called_once() + + def test_dispatches_suggested_question(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "suggested_question_trace") as mock_sq: + trace_instance.trace(_make_suggested_question_trace_info()) + mock_sq.assert_called_once() + + def test_dispatches_generate_name(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "generate_name_trace") as mock_gn: + trace_instance.trace(_make_generate_name_trace_info()) + mock_gn.assert_called_once() + + def test_reraises_exception(self, trace_instance, mock_tracing, mock_db): + with patch.object(trace_instance, "workflow_trace", side_effect=RuntimeError("boom")): + with pytest.raises(RuntimeError, match="boom"): + trace_instance.trace(_make_workflow_trace_info()) + + +# ── workflow_trace ─────────────────────────────────────────────────────────── + + +class TestWorkflowTrace: + def test_basic_workflow_no_nodes(self, trace_instance, mock_tracing, mock_db): + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [] + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + + trace_info = _make_workflow_trace_info(conversation_id="sess-1") + trace_instance.workflow_trace(trace_info) + + # Workflow span started and ended + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + def test_workflow_filters_sys_inputs_and_adds_query(self, trace_instance, mock_tracing, mock_db): + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [] + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + + trace_info = _make_workflow_trace_info( + workflow_run_inputs={"sys.app_id": "x", "user_input": "hi"}, + query="hello", + ) + trace_instance.workflow_trace(trace_info) + + call_kwargs = mock_tracing["start"].call_args + inputs = call_kwargs.kwargs["inputs"] + assert "sys.app_id" not in inputs + assert inputs["user_input"] == "hi" + assert inputs["query"] == "hello" + + def test_workflow_with_llm_node(self, trace_instance, mock_tracing, mock_db): + llm_node = _make_node( + node_type=BuiltinNodeTypes.LLM, + process_data=json.dumps( + { + "prompts": [{"role": "user", "text": "hi"}], + "model_name": "gpt-4", + "model_provider": "openai", + "finish_reason": "stop", + "usage": {"prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15}, + } + ), + outputs='{"text": "hello world"}', + ) + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [llm_node] + + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + assert mock_tracing["start"].call_count == 2 + node_span.end.assert_called_once() + workflow_span.end.assert_called_once() + + def test_workflow_with_question_classifier_node(self, trace_instance, mock_tracing, mock_db): + qc_node = _make_node( + node_type=BuiltinNodeTypes.QUESTION_CLASSIFIER, + process_data=json.dumps( + { + "prompts": "classify this", + "model_name": "gpt-4", + "model_provider": "openai", + } + ), + ) + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [qc_node] + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + assert mock_tracing["start"].call_count == 2 + + def test_workflow_with_http_request_node(self, trace_instance, mock_tracing, mock_db): + http_node = _make_node( + node_type=BuiltinNodeTypes.HTTP_REQUEST, + process_data='{"url": "https://api.com"}', + ) + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [http_node] + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + # HTTP_REQUEST uses process_data as inputs + node_start_call = mock_tracing["start"].call_args_list[1] + assert node_start_call.kwargs["inputs"] == '{"url": "https://api.com"}' + + def test_workflow_with_knowledge_retrieval_node(self, trace_instance, mock_tracing, mock_db): + kr_node = _make_node( + node_type=BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, + outputs=json.dumps( + { + "result": [ + {"content": "doc1", "metadata": {"source": "s1"}}, + {"content": "doc2", "metadata": {}}, + ] + } + ), + ) + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [kr_node] + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + # outputs should be parsed to Document objects + end_call = node_span.end.call_args + outputs = end_call.kwargs["outputs"] + assert len(outputs) == 2 + + def test_workflow_with_failed_node(self, trace_instance, mock_tracing, mock_db): + failed_node = _make_node(status="failed") + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [failed_node] + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + node_span.set_status.assert_called_once() + node_span.add_event.assert_called_once() + + def test_workflow_with_workflow_error(self, trace_instance, mock_tracing, mock_db): + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [] + workflow_span = MagicMock() + mock_tracing["start"].return_value = workflow_span + mock_tracing["set"].return_value = "token" + + trace_info = _make_workflow_trace_info(error="workflow failed") + trace_instance.workflow_trace(trace_info) + workflow_span.set_status.assert_called_once() + workflow_span.add_event.assert_called_once() + # Still ends the span via finally + workflow_span.end.assert_called_once() + + def test_workflow_node_no_inputs_no_outputs(self, trace_instance, mock_tracing, mock_db): + node = _make_node(inputs=None, outputs=None) + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [node] + workflow_span = MagicMock() + node_span = MagicMock() + mock_tracing["start"].side_effect = [workflow_span, node_span] + mock_tracing["set"].return_value = "token" + + trace_instance.workflow_trace(_make_workflow_trace_info()) + node_call = mock_tracing["start"].call_args_list[1] + assert node_call.kwargs["inputs"] == {} + end_call = node_span.end.call_args + assert end_call.kwargs["outputs"] == {} + + def test_workflow_no_user_id_no_conversation_id(self, trace_instance, mock_tracing, mock_db): + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [] + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + + trace_info = _make_workflow_trace_info( + metadata={}, + conversation_id=None, + ) + trace_instance.workflow_trace(trace_info) + # _set_trace_metadata still called with empty metadata + mock_tracing["update"].assert_called_once() + + def test_workflow_empty_query(self, trace_instance, mock_tracing, mock_db): + """When query is empty string, it's falsy so no query key added.""" + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = [] + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + + trace_info = _make_workflow_trace_info(query="") + trace_instance.workflow_trace(trace_info) + call_kwargs = mock_tracing["start"].call_args + inputs = call_kwargs.kwargs["inputs"] + assert "query" not in inputs + + +# ── _parse_llm_inputs_and_attributes ───────────────────────────────────────── + + +class TestParseLlmInputsAndAttributes: + def test_none_process_data(self, trace_instance): + node = _make_node(process_data=None) + inputs, attrs = trace_instance._parse_llm_inputs_and_attributes(node) + assert inputs == {} + assert attrs == {} + + def test_invalid_json(self, trace_instance): + node = _make_node(process_data="not json") + inputs, attrs = trace_instance._parse_llm_inputs_and_attributes(node) + assert inputs == {} + assert attrs == {} + + def test_valid_process_data_with_usage(self, trace_instance): + node = _make_node( + process_data=json.dumps( + { + "prompts": [{"role": "user", "text": "hi"}], + "model_name": "gpt-4", + "model_provider": "openai", + "finish_reason": "stop", + "usage": {"prompt_tokens": 5, "completion_tokens": 10, "total_tokens": 15}, + } + ) + ) + inputs, attrs = trace_instance._parse_llm_inputs_and_attributes(node) + assert isinstance(inputs, list) + assert attrs["model_name"] == "gpt-4" + assert "usage" in attrs + + def test_valid_process_data_without_usage(self, trace_instance): + node = _make_node( + process_data=json.dumps( + { + "prompts": "simple prompt", + "model_name": "gpt-3.5", + } + ) + ) + inputs, attrs = trace_instance._parse_llm_inputs_and_attributes(node) + assert inputs == "simple prompt" + assert attrs["model_name"] == "gpt-3.5" + + +# ── _parse_knowledge_retrieval_outputs ─────────────────────────────────────── + + +class TestParseKnowledgeRetrievalOutputs: + def test_with_results(self, trace_instance): + outputs = {"result": [{"content": "c1", "metadata": {"s": "1"}}]} + docs = trace_instance._parse_knowledge_retrieval_outputs(outputs) + assert len(docs) == 1 + assert docs[0].page_content == "c1" + + def test_empty_result(self, trace_instance): + outputs = {"result": []} + result = trace_instance._parse_knowledge_retrieval_outputs(outputs) + assert result == outputs + + def test_no_result_key(self, trace_instance): + outputs = {"other": "data"} + result = trace_instance._parse_knowledge_retrieval_outputs(outputs) + assert result == outputs + + def test_result_not_list(self, trace_instance): + outputs = {"result": "not a list"} + result = trace_instance._parse_knowledge_retrieval_outputs(outputs) + assert result == outputs + + +# ── message_trace ──────────────────────────────────────────────────────────── + + +class TestMessageTrace: + def test_returns_early_if_no_message_data(self, trace_instance, mock_tracing, mock_db): + trace_info = _make_message_trace_info(message_data=None) + trace_instance.message_trace(trace_info) + mock_tracing["start"].assert_not_called() + + def test_basic_message_trace(self, trace_instance, mock_tracing, mock_db): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + mock_db.session.query.return_value.where.return_value.first.return_value = None + + trace_instance.message_trace(_make_message_trace_info()) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + def test_message_trace_with_error(self, trace_instance, mock_tracing, mock_db): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + mock_db.session.query.return_value.where.return_value.first.return_value = None + + trace_info = _make_message_trace_info(error="something broke") + trace_instance.message_trace(trace_info) + span.set_status.assert_called_once() + span.add_event.assert_called_once() + + def test_message_trace_with_file_data(self, trace_instance, mock_tracing, mock_db, monkeypatch): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + mock_db.session.query.return_value.where.return_value.first.return_value = None + monkeypatch.setenv("FILES_URL", "http://files.test") + + file_data = SimpleNamespace(url="path/to/file.png") + trace_info = _make_message_trace_info( + message_file_data=file_data, + file_list=["existing_file.txt"], + ) + trace_instance.message_trace(trace_info) + call_kwargs = mock_tracing["start"].call_args + attrs = call_kwargs.kwargs["attributes"] + assert "http://files.test/path/to/file.png" in attrs["file_list"] + assert "existing_file.txt" in attrs["file_list"] + + def test_message_trace_file_list_none(self, trace_instance, mock_tracing, mock_db): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + mock_db.session.query.return_value.where.return_value.first.return_value = None + + trace_info = _make_message_trace_info(file_list=None, message_file_data=None) + trace_instance.message_trace(trace_info) + mock_tracing["start"].assert_called_once() + + def test_message_trace_with_end_user(self, trace_instance, mock_tracing, mock_db): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + + end_user = MagicMock() + end_user.session_id = "session-xyz" + mock_db.session.query.return_value.where.return_value.first.return_value = end_user + + trace_info = _make_message_trace_info( + metadata={"from_end_user_id": "eu-1", "conversation_id": "c1"}, + ) + trace_instance.message_trace(trace_info) + # update_current_trace called with user id from EndUser + mock_tracing["update"].assert_called_once() + + def test_message_trace_with_no_conversation_id(self, trace_instance, mock_tracing, mock_db): + span = MagicMock() + mock_tracing["start"].return_value = span + mock_tracing["set"].return_value = "token" + mock_db.session.query.return_value.where.return_value.first.return_value = None + + trace_info = _make_message_trace_info( + metadata={"from_account_id": "acc-1"}, + ) + trace_instance.message_trace(trace_info) + mock_tracing["update"].assert_called_once() + + +# ── _get_message_user_id ───────────────────────────────────────────────────── + + +class TestGetMessageUserId: + def test_returns_end_user_session_id(self, trace_instance, mock_db): + end_user = MagicMock() + end_user.session_id = "session-1" + mock_db.session.query.return_value.where.return_value.first.return_value = end_user + result = trace_instance._get_message_user_id({"from_end_user_id": "eu-1"}) + assert result == "session-1" + + def test_returns_account_id_when_no_end_user(self, trace_instance, mock_db): + mock_db.session.query.return_value.where.return_value.first.return_value = None + result = trace_instance._get_message_user_id({"from_end_user_id": "eu-1", "from_account_id": "acc-1"}) + assert result == "acc-1" + + def test_returns_account_id_when_no_end_user_id(self, trace_instance, mock_db): + result = trace_instance._get_message_user_id({"from_account_id": "acc-1"}) + assert result == "acc-1" + + def test_returns_none_when_nothing(self, trace_instance, mock_db): + result = trace_instance._get_message_user_id({}) + assert result is None + + +# ── tool_trace ─────────────────────────────────────────────────────────────── + + +class TestToolTrace: + def test_basic_tool_trace(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_instance.tool_trace(_make_tool_trace_info()) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + span.set_status.assert_not_called() + + def test_tool_trace_with_error(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_instance.tool_trace(_make_tool_trace_info(error="tool failed")) + span.set_status.assert_called_once() + span.add_event.assert_called_once() + span.end.assert_called_once() + + +# ── moderation_trace ───────────────────────────────────────────────────────── + + +class TestModerationTrace: + def test_returns_early_if_no_message_data(self, trace_instance, mock_tracing): + trace_info = _make_moderation_trace_info(message_data=None) + trace_instance.moderation_trace(trace_info) + mock_tracing["start"].assert_not_called() + + def test_basic_moderation_trace(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_info = _make_moderation_trace_info( + message_data=SimpleNamespace(created_at=_dt()), + start_time=_dt(), + end_time=_dt(), + ) + trace_instance.moderation_trace(trace_info) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + end_kwargs = span.end.call_args.kwargs["outputs"] + assert end_kwargs["action"] == "allow" + assert end_kwargs["flagged"] is False + + def test_moderation_uses_message_data_created_at_if_no_start_time(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_info = _make_moderation_trace_info( + message_data=SimpleNamespace(created_at=_dt()), + start_time=None, + end_time=_dt(), + ) + trace_instance.moderation_trace(trace_info) + mock_tracing["start"].assert_called_once() + + +# ── dataset_retrieval_trace ────────────────────────────────────────────────── + + +class TestDatasetRetrievalTrace: + def test_returns_early_if_no_message_data(self, trace_instance, mock_tracing): + trace_info = _make_dataset_retrieval_trace_info(message_data=None) + trace_instance.dataset_retrieval_trace(trace_info) + mock_tracing["start"].assert_not_called() + + def test_basic_dataset_retrieval_trace(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_instance.dataset_retrieval_trace(_make_dataset_retrieval_trace_info()) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + +# ── suggested_question_trace ───────────────────────────────────────────────── + + +class TestSuggestedQuestionTrace: + def test_returns_early_if_no_message_data(self, trace_instance, mock_tracing): + trace_info = _make_suggested_question_trace_info(message_data=None) + trace_instance.suggested_question_trace(trace_info) + mock_tracing["start"].assert_not_called() + + def test_basic_suggested_question_trace(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_instance.suggested_question_trace(_make_suggested_question_trace_info()) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + def test_suggested_question_with_error(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_info = _make_suggested_question_trace_info(error="failed") + trace_instance.suggested_question_trace(trace_info) + span.set_status.assert_called_once() + span.add_event.assert_called_once() + + def test_uses_message_data_times_when_no_start_end(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_info = _make_suggested_question_trace_info( + start_time=None, + end_time=None, + ) + trace_instance.suggested_question_trace(trace_info) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + +# ── generate_name_trace ────────────────────────────────────────────────────── + + +class TestGenerateNameTrace: + def test_basic_generate_name_trace(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["start"].return_value = span + + trace_instance.generate_name_trace(_make_generate_name_trace_info()) + mock_tracing["start"].assert_called_once() + span.end.assert_called_once() + + +# ── _get_workflow_nodes ────────────────────────────────────────────────────── + + +class TestGetWorkflowNodes: + def test_queries_db(self, trace_instance, mock_db): + mock_db.session.query.return_value.filter.return_value.order_by.return_value.all.return_value = ["n1", "n2"] + result = trace_instance._get_workflow_nodes("run-1") + assert result == ["n1", "n2"] + + +# ── _get_node_span_type ───────────────────────────────────────────────────── + + +class TestGetNodeSpanType: + @pytest.mark.parametrize( + ("node_type", "expected_contains"), + [ + (BuiltinNodeTypes.LLM, "LLM"), + (BuiltinNodeTypes.QUESTION_CLASSIFIER, "LLM"), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "RETRIEVER"), + (BuiltinNodeTypes.TOOL, "TOOL"), + (BuiltinNodeTypes.CODE, "TOOL"), + (BuiltinNodeTypes.HTTP_REQUEST, "TOOL"), + (BuiltinNodeTypes.AGENT, "AGENT"), + ], + ) + def test_mapped_types(self, trace_instance, node_type, expected_contains): + result = trace_instance._get_node_span_type(node_type) + assert expected_contains in str(result) + + def test_unknown_type_returns_chain(self, trace_instance): + result = trace_instance._get_node_span_type("unknown_node") + assert result == "CHAIN" + + +# ── _set_trace_metadata ───────────────────────────────────────────────────── + + +class TestSetTraceMetadata: + def test_sets_and_detaches(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["set"].return_value = "token" + + trace_instance._set_trace_metadata(span, {"key": "val"}) + mock_tracing["set"].assert_called_once_with(span) + mock_tracing["update"].assert_called_once_with(metadata={"key": "val"}) + mock_tracing["detach"].assert_called_once_with("token") + + def test_detaches_even_on_error(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["set"].return_value = "token" + mock_tracing["update"].side_effect = RuntimeError("fail") + + with pytest.raises(RuntimeError): + trace_instance._set_trace_metadata(span, {}) + mock_tracing["detach"].assert_called_once_with("token") + + def test_no_detach_when_token_is_none(self, trace_instance, mock_tracing): + span = MagicMock() + mock_tracing["set"].return_value = None + + trace_instance._set_trace_metadata(span, {}) + mock_tracing["detach"].assert_not_called() + + +# ── _parse_prompts ─────────────────────────────────────────────────────────── + + +class TestParsePrompts: + def test_string_input(self, trace_instance): + assert trace_instance._parse_prompts("hello") == "hello" + + def test_dict_input(self, trace_instance): + result = trace_instance._parse_prompts({"role": "user", "text": "hi"}) + assert result == {"role": "user", "content": "hi"} + + def test_list_input(self, trace_instance): + prompts = [ + {"role": "user", "text": "hi"}, + {"role": "assistant", "text": "hello"}, + ] + result = trace_instance._parse_prompts(prompts) + assert len(result) == 2 + assert result[0]["role"] == "user" + + def test_none_input(self, trace_instance): + assert trace_instance._parse_prompts(None) is None + + def test_int_passthrough(self, trace_instance): + assert trace_instance._parse_prompts(42) == 42 + + +# ── _parse_single_message ─────────────────────────────────────────────────── + + +class TestParseSingleMessage: + def test_basic_message(self, trace_instance): + result = trace_instance._parse_single_message({"role": "user", "text": "hello"}) + assert result == {"role": "user", "content": "hello"} + + def test_default_role(self, trace_instance): + result = trace_instance._parse_single_message({"text": "hello"}) + assert result["role"] == "user" + + def test_with_tool_calls(self, trace_instance): + item = { + "role": "assistant", + "text": "", + "tool_calls": [{"id": "tc1", "function": {"name": "fn"}}], + } + result = trace_instance._parse_single_message(item) + assert "tool_calls" in result + + def test_tool_role_ignores_tool_calls(self, trace_instance): + item = { + "role": "tool", + "text": "result", + "tool_calls": [{"id": "tc1"}], + } + result = trace_instance._parse_single_message(item) + assert "tool_calls" not in result + + def test_with_files(self, trace_instance): + item = {"role": "user", "text": "look", "files": ["f1.png"]} + result = trace_instance._parse_single_message(item) + assert result["files"] == ["f1.png"] + + def test_no_files(self, trace_instance): + result = trace_instance._parse_single_message({"role": "user", "text": "hi"}) + assert "files" not in result + + +# ── _resolve_tool_call_ids ─────────────────────────────────────────────────── + + +class TestResolveToolCallIds: + def test_resolves_tool_call_ids(self, trace_instance): + messages = [ + { + "role": "assistant", + "content": "", + "tool_calls": [{"id": "tc1"}, {"id": "tc2"}], + }, + {"role": "tool", "content": "result1"}, + {"role": "tool", "content": "result2"}, + ] + result = trace_instance._resolve_tool_call_ids(messages) + assert result[1]["tool_call_id"] == "tc1" + assert result[2]["tool_call_id"] == "tc2" + + def test_no_tool_calls(self, trace_instance): + messages = [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hello"}, + ] + result = trace_instance._resolve_tool_call_ids(messages) + assert "tool_call_id" not in result[0] + assert "tool_call_id" not in result[1] + + def test_tool_message_no_ids_available(self, trace_instance): + """Tool message with no preceding tool_calls should not crash.""" + messages = [ + {"role": "tool", "content": "result"}, + ] + result = trace_instance._resolve_tool_call_ids(messages) + assert "tool_call_id" not in result[0] + + +# ── api_check ──────────────────────────────────────────────────────────────── + + +class TestApiCheck: + def test_success(self, trace_instance, mock_mlflow): + mock_mlflow.search_experiments.return_value = [] + assert trace_instance.api_check() is True + + def test_failure(self, trace_instance, mock_mlflow): + mock_mlflow.search_experiments.side_effect = ConnectionError("refused") + with pytest.raises(ValueError, match="MLflow connection failed"): + trace_instance.api_check() + + +# ── get_project_url ────────────────────────────────────────────────────────── + + +class TestGetProjectUrl: + def test_returns_url(self, trace_instance): + assert "experiments" in trace_instance.get_project_url() diff --git a/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py b/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py new file mode 100644 index 0000000000..b2cb7d5109 --- /dev/null +++ b/api/tests/unit_tests/core/ops/opik_trace/test_opik_trace.py @@ -0,0 +1,678 @@ +import collections +import logging +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from core.ops.entities.config_entity import OpikConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + TraceTaskName, + WorkflowTraceInfo, +) +from core.ops.opik_trace.opik_trace import OpikDataTrace, prepare_opik_uuid, wrap_dict, wrap_metadata +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey +from models import EndUser +from models.enums import MessageStatus + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +@pytest.fixture +def opik_config(): + return OpikConfig( + project="test-project", workspace="test-workspace", url="https://cloud.opik.com/api/", api_key="api-key-123" + ) + + +@pytest.fixture +def trace_instance(opik_config, monkeypatch): + mock_client = MagicMock() + monkeypatch.setattr("core.ops.opik_trace.opik_trace.Opik", lambda **kwargs: mock_client) + + instance = OpikDataTrace(opik_config) + return instance + + +def test_wrap_dict(): + assert wrap_dict("input", {"a": 1}) == {"a": 1} + assert wrap_dict("input", "hello") == {"input": "hello"} + + +def test_wrap_metadata(): + assert wrap_metadata({"a": 1}, b=2) == {"a": 1, "b": 2, "created_from": "dify"} + + +def test_prepare_opik_uuid(): + # Test with valid datetime and uuid string + dt = datetime(2024, 1, 1) + uuid_str = "b3e8e918-472e-4b69-8051-12502c34fc07" + result = prepare_opik_uuid(dt, uuid_str) + assert result is not None + # We won't test the exact uuid7 value but just that it returns a string id + + # Test with None dt and uuid_str + result = prepare_opik_uuid(None, None) + assert result is not None + + +def test_init(opik_config, monkeypatch): + mock_opik = MagicMock() + monkeypatch.setattr("core.ops.opik_trace.opik_trace.Opik", mock_opik) + monkeypatch.setenv("FILES_URL", "http://test.url") + + instance = OpikDataTrace(opik_config) + + mock_opik.assert_called_once_with( + project_name=opik_config.project, + workspace=opik_config.workspace, + host=opik_config.url, + api_key=opik_config.api_key, + ) + assert instance.file_base_url == "http://test.url" + assert instance.project == opik_config.project + + +def test_trace_dispatch(trace_instance, monkeypatch): + methods = [ + "workflow_trace", + "message_trace", + "moderation_trace", + "suggested_question_trace", + "dataset_retrieval_trace", + "tool_trace", + "generate_name_trace", + ] + mocks = {method: MagicMock() for method in methods} + for method, m in mocks.items(): + monkeypatch.setattr(trace_instance, method, m) + + # WorkflowTraceInfo + info = MagicMock(spec=WorkflowTraceInfo) + trace_instance.trace(info) + mocks["workflow_trace"].assert_called_once_with(info) + + # MessageTraceInfo + info = MagicMock(spec=MessageTraceInfo) + trace_instance.trace(info) + mocks["message_trace"].assert_called_once_with(info) + + # ModerationTraceInfo + info = MagicMock(spec=ModerationTraceInfo) + trace_instance.trace(info) + mocks["moderation_trace"].assert_called_once_with(info) + + # SuggestedQuestionTraceInfo + info = MagicMock(spec=SuggestedQuestionTraceInfo) + trace_instance.trace(info) + mocks["suggested_question_trace"].assert_called_once_with(info) + + # DatasetRetrievalTraceInfo + info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_instance.trace(info) + mocks["dataset_retrieval_trace"].assert_called_once_with(info) + + # ToolTraceInfo + info = MagicMock(spec=ToolTraceInfo) + trace_instance.trace(info) + mocks["tool_trace"].assert_called_once_with(info) + + # GenerateNameTraceInfo + info = MagicMock(spec=GenerateNameTraceInfo) + trace_instance.trace(info) + mocks["generate_name_trace"].assert_called_once_with(info) + + +def test_workflow_trace_with_message_id(trace_instance, monkeypatch): + # Define constants for better readability + WORKFLOW_ID = "fb05c7cd-6cec-4add-8a84-df03a408b4ce" + WORKFLOW_RUN_ID = "33c67568-7a8a-450e-8916-a5f135baeaef" + MESSAGE_ID = "04ec3956-85f3-488a-8539-1017251dc8c6" + CONVERSATION_ID = "d3d01066-23ae-4830-9ce4-eb5640b42a7e" + TRACE_ID = "bf26d929-6f15-4c2f-9abc-761c217056f3" + WORKFLOW_APP_LOG_ID = "ca0e018e-edd4-43fb-a05a-ea001ca8ef4b" + LLM_NODE_ID = "80d7dfa8-08f4-4ab7-aa37-0ca7d27207e3" + CODE_NODE_ID = "b9cd9a7b-c534-4aa9-b5da-efd454140900" + + trace_info = WorkflowTraceInfo( + workflow_id=WORKFLOW_ID, + tenant_id="tenant-1", + workflow_run_id=WORKFLOW_RUN_ID, + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={"input": "hi"}, + workflow_run_outputs={"output": "hello"}, + workflow_run_version="1.0", + message_id=MESSAGE_ID, + conversation_id=CONVERSATION_ID, + total_tokens=100, + file_list=[], + query="hi", + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id=TRACE_ID, + metadata={"app_id": "app-1", "user_id": "user-1"}, + workflow_app_log_id=WORKFLOW_APP_LOG_ID, + error="", + ) + + mock_session = MagicMock() + monkeypatch.setattr("core.ops.opik_trace.opik_trace.sessionmaker", lambda bind: lambda: mock_session) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.db", MagicMock(engine="engine")) + + node_llm = MagicMock() + node_llm.id = LLM_NODE_ID + node_llm.title = "LLM Node" + node_llm.node_type = BuiltinNodeTypes.LLM + node_llm.status = "succeeded" + node_llm.process_data = { + "model_mode": "chat", + "model_name": "gpt-4", + "model_provider": "openai", + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}, + } + node_llm.inputs = {"prompts": "p"} + node_llm.outputs = {"text": "t"} + node_llm.created_at = _dt() + node_llm.elapsed_time = 0.5 + node_llm.metadata = {"foo": "bar"} + + node_other = MagicMock() + node_other.id = CODE_NODE_ID + node_other.title = "Other Node" + node_other.node_type = BuiltinNodeTypes.CODE + node_other.status = "failed" + node_other.process_data = None + node_other.inputs = {"code": "print"} + node_other.outputs = {"result": "ok"} + node_other.created_at = None + node_other.elapsed_time = 0.2 + node_other.metadata = {WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS.value: 10} + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node_llm, node_other] + + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.opik_trace.opik_trace.DifyCoreRepositoryFactory", mock_factory) + + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_trace = MagicMock() + trace_instance.add_span = MagicMock() + + trace_instance.workflow_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_data = trace_instance.add_trace.call_args[1].get("opik_trace_data", trace_instance.add_trace.call_args[0][0]) + assert trace_data["name"] == TraceTaskName.MESSAGE_TRACE + assert "message" in trace_data["tags"] + assert "workflow" in trace_data["tags"] + + assert trace_instance.add_span.call_count >= 1 + + +def test_workflow_trace_no_message_id(trace_instance, monkeypatch): + # Define constants for better readability + WORKFLOW_ID = "f0708b36-b1d7-42b3-a876-1d01b7d8f1a3" + WORKFLOW_RUN_ID = "d42ec285-c2fd-4248-8866-5c9386b101ac" + CONVERSATION_ID = "88a17f2e-9436-4472-bab9-4b1601d5af3c" + WORKFLOW_APP_LOG_ID = "41780d0d-ffba-4220-bc0c-401e4c89cdfb" + + trace_info = WorkflowTraceInfo( + workflow_id=WORKFLOW_ID, + tenant_id="tenant-1", + workflow_run_id=WORKFLOW_RUN_ID, + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1.0", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id=CONVERSATION_ID, + start_time=_dt(), + end_time=_dt(), + trace_id=None, + metadata={"app_id": "app-1"}, + workflow_app_log_id=WORKFLOW_APP_LOG_ID, + error="", + ) + + monkeypatch.setattr("core.ops.opik_trace.opik_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.db", MagicMock(engine="engine")) + repo = MagicMock() + repo.get_by_workflow_run.return_value = [] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.opik_trace.opik_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_trace = MagicMock() + trace_instance.workflow_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + + +def test_workflow_trace_missing_app_id(trace_instance, monkeypatch): + trace_info = WorkflowTraceInfo( + workflow_id="5745f1b8-f8e6-4859-8110-996acb6c8d6a", + tenant_id="tenant-1", + workflow_run_id="46f53304-1659-464b-bee5-116585f0bec8", + workflow_run_elapsed_time=1.0, + workflow_run_status="succeeded", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1.0", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id="83f86b89-caef-4de8-a0f9-f164eddae1ea", + start_time=_dt(), + end_time=_dt(), + metadata={}, + workflow_app_log_id="339760b2-4b94-4532-8c81-133a97e4680e", + error="", + ) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.db", MagicMock(engine="engine")) + + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.workflow_trace(trace_info) + + +def test_message_trace_basic(trace_instance, monkeypatch): + # Define constants for better readability + MESSAGE_DATA_ID = "e3a26712-8cac-4a25-94a4-a3bff21ee3ab" + CONVERSATION_ID = "9d3f3751-7521-4c19-9307-20e3cf6789a3" + MESSAGE_TRACE_ID = "710ace2f-bca8-41be-858c-54da42742a77" + OPIT_TRACE_ID = "f7dfd978-0d10-4549-8abf-00f2cbc49d2c" + + message_data = MagicMock() + message_data.id = MESSAGE_DATA_ID + message_data.from_account_id = "acc-1" + message_data.from_end_user_id = None + message_data.provider_response_latency = 0.5 + message_data.conversation_id = CONVERSATION_ID + message_data.total_price = 0.01 + message_data.model_id = "gpt-4" + message_data.answer = "hello" + message_data.status = MessageStatus.NORMAL + message_data.error = None + + trace_info = MessageTraceInfo( + message_id=MESSAGE_TRACE_ID, + message_data=message_data, + inputs={"query": "hi"}, + outputs={"answer": "hello"}, + message_tokens=10, + answer_tokens=20, + total_tokens=30, + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + trace_id=OPIT_TRACE_ID, + metadata={"foo": "bar"}, + conversation_mode="chat", + conversation_model="gpt-4", + file_list=[], + error=None, + message_file_data=MagicMock(url="test.png"), + ) + + trace_instance.add_trace = MagicMock(return_value=MagicMock(id="trace_id_1")) + trace_instance.add_span = MagicMock() + + trace_instance.message_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_instance.add_span.assert_called_once() + + +def test_message_trace_with_end_user(trace_instance, monkeypatch): + message_data = MagicMock() + message_data.id = "85411059-79fb-4deb-a76c-c2e215f1b97e" + message_data.from_account_id = "acc-1" + message_data.from_end_user_id = "end-user-1" + message_data.conversation_id = "7d9f96d8-3be2-4e93-9c0e-922ff98dccc6" + message_data.status = MessageStatus.NORMAL + message_data.model_id = "gpt-4" + message_data.error = "" + message_data.answer = "hello" + message_data.total_price = 0.0 + message_data.provider_response_latency = 0.1 + + trace_info = MessageTraceInfo( + message_id="6bff35c7-33b7-4acb-ba21-44569a0327d0", + message_data=message_data, + inputs={}, + outputs={}, + message_tokens=0, + answer_tokens=0, + total_tokens=0, + start_time=_dt(), + end_time=_dt(), + metadata={}, + conversation_mode="chat", + conversation_model="gpt-4", + file_list=["url1"], + error=None, + ) + + mock_end_user = MagicMock(spec=EndUser) + mock_end_user.session_id = "session-id-123" + + mock_query = MagicMock() + mock_query.where.return_value.first.return_value = mock_end_user + monkeypatch.setattr("core.ops.opik_trace.opik_trace.db.session.query", lambda model: mock_query) + + trace_instance.add_trace = MagicMock(return_value=MagicMock(id="trace_id_2")) + trace_instance.add_span = MagicMock() + + trace_instance.message_trace(trace_info) + + trace_data = trace_instance.add_trace.call_args[0][0] + assert trace_data["metadata"]["user_id"] == "acc-1" + assert trace_data["metadata"]["end_user_id"] == "session-id-123" + + +def test_message_trace_none_data(trace_instance): + trace_info = SimpleNamespace(message_data=None, file_list=[], message_file_data=None, metadata={}) + trace_instance.add_trace = MagicMock() + trace_instance.message_trace(trace_info) + trace_instance.add_trace.assert_not_called() + + +def test_moderation_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = ModerationTraceInfo( + message_id="489d0dfd-065c-4106-8f9c-daded296c92d", + message_data=message_data, + inputs={"q": "hi"}, + action="stop", + flagged=True, + preset_response="blocked", + start_time=None, + end_time=None, + metadata={"foo": "bar"}, + trace_id="6f16cf18-9f4b-4955-8b6b-43cfa10978fc", + query="hi", + ) + + trace_instance.add_span = MagicMock() + trace_instance.moderation_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[0][0] + assert span_data["name"] == TraceTaskName.MODERATION_TRACE + assert span_data["output"]["flagged"] is True + + +def test_moderation_trace_none(trace_instance): + trace_info = ModerationTraceInfo( + message_id="cd732e4e-37f1-4c7e-8c64-820308bedcbf", + message_data=None, + inputs={}, + action="s", + flagged=False, + preset_response="", + query="", + metadata={}, + ) + trace_instance.add_span = MagicMock() + trace_instance.moderation_trace(trace_info) + trace_instance.add_span.assert_not_called() + + +def test_suggested_question_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = SuggestedQuestionTraceInfo( + message_id="7de55bda-a91d-477e-98ab-85c53c438469", + message_data=message_data, + inputs="hi", + suggested_question=["q1"], + total_tokens=10, + level="info", + start_time=_dt(), + end_time=_dt(), + metadata={}, + trace_id="a6687292-68c7-42ba-ae51-285579944d7b", + ) + + trace_instance.add_span = MagicMock() + trace_instance.suggested_question_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[0][0] + assert span_data["name"] == TraceTaskName.SUGGESTED_QUESTION_TRACE + + +def test_suggested_question_trace_none(trace_instance): + trace_info = SuggestedQuestionTraceInfo( + message_id="23696fc5-7e7f-46ec-bce8-1adc3c7f297d", + message_data=None, + inputs={}, + suggested_question=[], + total_tokens=0, + level="i", + metadata={}, + ) + trace_instance.add_span = MagicMock() + trace_instance.suggested_question_trace(trace_info) + trace_instance.add_span.assert_not_called() + + +def test_dataset_retrieval_trace(trace_instance): + message_data = MagicMock() + message_data.created_at = _dt() + message_data.updated_at = _dt() + + trace_info = DatasetRetrievalTraceInfo( + message_id="3e1a819f-c391-4950-adfd-96f82e5419a1", + message_data=message_data, + inputs="query", + documents=[{"id": "doc1"}], + start_time=None, + end_time=None, + metadata={}, + trace_id="41361000-e9be-4d11-b5e4-ab27ce0817d6", + ) + + trace_instance.add_span = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[0][0] + assert span_data["name"] == TraceTaskName.DATASET_RETRIEVAL_TRACE + + +def test_dataset_retrieval_trace_none(trace_instance): + trace_info = DatasetRetrievalTraceInfo( + message_id="35d6d44c-bccb-4e6e-8bd8-859257723ea8", message_data=None, inputs={}, documents=[], metadata={} + ) + trace_instance.add_span = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + trace_instance.add_span.assert_not_called() + + +def test_tool_trace(trace_instance): + trace_info = ToolTraceInfo( + message_id="99db92c4-2254-496a-b5cc-18153315ce35", + message_data=MagicMock(), + inputs={}, + outputs={}, + tool_name="my_tool", + tool_inputs={"a": 1}, + tool_outputs="result_string", + time_cost=0.1, + start_time=_dt(), + end_time=_dt(), + metadata={}, + trace_id="a15a5fcb-7ffd-4458-8330-208f4cb1f796", + tool_config={}, + tool_parameters={}, + error="some error", + ) + + trace_instance.add_span = MagicMock() + trace_instance.tool_trace(trace_info) + + trace_instance.add_span.assert_called_once() + span_data = trace_instance.add_span.call_args[0][0] + assert span_data["name"] == "my_tool" + + +def test_generate_name_trace(trace_instance): + trace_info = GenerateNameTraceInfo( + inputs={"q": "hi"}, + outputs={"name": "new"}, + tenant_id="tenant-1", + conversation_id="271fe28f-6b86-416b-8d6b-bbbbfa9db791", + start_time=_dt(), + end_time=_dt(), + metadata={"921f010e-6878-4831-ae6b-271bf68c56fb": 1}, + ) + + trace_instance.add_trace = MagicMock(return_value=MagicMock(id="trace_id_3")) + trace_instance.add_span = MagicMock() + + trace_instance.generate_name_trace(trace_info) + + trace_instance.add_trace.assert_called_once() + trace_instance.add_span.assert_called_once() + + trace_data = trace_instance.add_trace.call_args[0][0] + assert trace_data["name"] == TraceTaskName.GENERATE_NAME_TRACE + + span_data = trace_instance.add_span.call_args[0][0] + assert span_data["trace_id"] == "trace_id_3" + + +def test_add_trace_success(trace_instance): + trace_data = {"id": "t1", "name": "trace"} + trace_instance.opik_client.trace.return_value = MagicMock(id="t1") + trace = trace_instance.add_trace(trace_data) + trace_instance.opik_client.trace.assert_called_once() + assert trace.id == "t1" + + +def test_add_trace_error(trace_instance): + trace_instance.opik_client.trace.side_effect = Exception("error") + trace_data = {"id": "t1", "name": "trace"} + with pytest.raises(ValueError, match="Opik Failed to create trace: error"): + trace_instance.add_trace(trace_data) + + +def test_add_span_success(trace_instance): + span_data = {"id": "s1", "name": "span", "trace_id": "t1"} + trace_instance.add_span(span_data) + trace_instance.opik_client.span.assert_called_once() + + +def test_add_span_error(trace_instance): + trace_instance.opik_client.span.side_effect = Exception("error") + span_data = {"id": "s1", "name": "span", "trace_id": "t1"} + with pytest.raises(ValueError, match="Opik Failed to create span: error"): + trace_instance.add_span(span_data) + + +def test_api_check_success(trace_instance): + trace_instance.opik_client.auth_check.return_value = True + assert trace_instance.api_check() is True + + +def test_api_check_error(trace_instance): + trace_instance.opik_client.auth_check.side_effect = Exception("fail") + with pytest.raises(ValueError, match="Opik API check failed: fail"): + trace_instance.api_check() + + +def test_get_project_url_success(trace_instance): + trace_instance.opik_client.get_project_url.return_value = "http://project.url" + assert trace_instance.get_project_url() == "http://project.url" + trace_instance.opik_client.get_project_url.assert_called_once_with(project_name=trace_instance.project) + + +def test_get_project_url_error(trace_instance): + trace_instance.opik_client.get_project_url.side_effect = Exception("fail") + with pytest.raises(ValueError, match="Opik get run url failed: fail"): + trace_instance.get_project_url() + + +def test_workflow_trace_usage_extraction_error_fixed(trace_instance, monkeypatch, caplog): + trace_info = WorkflowTraceInfo( + workflow_id="86a52565-4a6b-4a1b-9bfd-98e4595e70de", + tenant_id="66e8e918-472e-4b69-8051-12502c34fc07", + workflow_run_id="8403965c-3344-4d22-a8fe-d8d55cee64d9", + workflow_run_elapsed_time=1.0, + workflow_run_status="s", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="1", + total_tokens=0, + file_list=[], + query="", + message_id=None, + conversation_id="7a02cb9d-6949-4c59-a89d-f25bbc881e0e", + start_time=_dt(), + end_time=_dt(), + metadata={"app_id": "77e8e918-472e-4b69-8051-12502c34fc07"}, + workflow_app_log_id="82268424-e193-476c-a6db-f473388ee5fe", + error="", + ) + + node = MagicMock() + node.id = "88e8e918-472e-4b69-8051-12502c34fc07" + node.title = "LLM Node" + node.node_type = BuiltinNodeTypes.LLM + node.status = "succeeded" + + class BadDict(collections.UserDict): + def get(self, key, default=None): + if key == "usage": + raise Exception("Usage extraction failed") + return super().get(key, default) + + node.process_data = BadDict({"model_mode": "chat", "model_name": "gpt-4", "usage": True, "prompts": ["p"]}) + node.created_at = _dt() + node.elapsed_time = 0.1 + node.metadata = {} + node.outputs = {} + + repo = MagicMock() + repo.get_by_workflow_run.return_value = [node] + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + monkeypatch.setattr("core.ops.opik_trace.opik_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.sessionmaker", lambda bind: lambda: MagicMock()) + monkeypatch.setattr("core.ops.opik_trace.opik_trace.db", MagicMock(engine="engine")) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.add_trace = MagicMock() + trace_instance.add_span = MagicMock() + + with caplog.at_level(logging.ERROR): + trace_instance.workflow_trace(trace_info) + + assert "Failed to extract usage" in caplog.text + assert trace_instance.add_span.call_count >= 1 + # Verify that at least one of the spans is for the LLM Node + span_names = [call.args[0]["name"] for call in trace_instance.add_span.call_args_list] + assert "LLM Node" in span_names diff --git a/api/tests/unit_tests/core/ops/tencent_trace/test_client.py b/api/tests/unit_tests/core/ops/tencent_trace/test_client.py new file mode 100644 index 0000000000..870c18e53e --- /dev/null +++ b/api/tests/unit_tests/core/ops/tencent_trace/test_client.py @@ -0,0 +1,583 @@ +"""Tests for the TencentTraceClient helpers that drive tracing and metrics.""" + +from __future__ import annotations + +import sys +import types +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +from opentelemetry.sdk.trace import Event +from opentelemetry.trace import Status, StatusCode + +from core.ops.tencent_trace import client as client_module +from core.ops.tencent_trace.client import TencentTraceClient, _get_opentelemetry_sdk_version +from core.ops.tencent_trace.entities.tencent_trace_entity import SpanData + +metric_reader_instances: list[DummyMetricReader] = [] +meter_provider_instances: list[DummyMeterProvider] = [] + + +class DummyHistogram: + """Placeholder histogram type used by the stubbed metric stack.""" + + +class AggregationTemporality: + DELTA = "delta" + + +class DummyMeter: + def __init__(self) -> None: + self.created: list[tuple[dict[str, object], MagicMock]] = [] + + def create_histogram(self, **kwargs: object) -> MagicMock: + hist = MagicMock(name=f"hist-{kwargs.get('name')}") + self.created.append((kwargs, hist)) + return hist + + +class DummyMeterProvider: + def __init__(self, resource: object, metric_readers: list[object]) -> None: + self.resource = resource + self.metric_readers = metric_readers + self.meter = DummyMeter() + self.shutdown = MagicMock(name="meter_provider_shutdown") + meter_provider_instances.append(self) + + def get_meter(self, name: str, version: str) -> DummyMeter: + return self.meter + + +class DummyMetricReader: + def __init__(self, exporter: object, export_interval_millis: int) -> None: + self.exporter = exporter + self.export_interval_millis = export_interval_millis + self.shutdown = MagicMock(name="metric_reader_shutdown") + metric_reader_instances.append(self) + + +class DummyGrpcMetricExporter: + def __init__(self, **kwargs: object) -> None: + self.kwargs = kwargs + + +class DummyHttpMetricExporter: + def __init__(self, **kwargs: object) -> None: + self.kwargs = kwargs + + +class DummyJsonMetricExporter: + def __init__(self, **kwargs: object) -> None: + self.kwargs = kwargs + + +class DummyJsonMetricExporterNoTemporality: + """Exporter that rejects preferred_temporality to exercise fallback.""" + + def __init__(self, **kwargs: object) -> None: + if "preferred_temporality" in kwargs: + raise RuntimeError("unsupported preferred_temporality") + self.kwargs = kwargs + + +def _add_stub_modules(monkeypatch: pytest.MonkeyPatch) -> None: + """Drop fake metric modules into sys.modules so the client imports resolve.""" + + metrics_module = types.ModuleType("opentelemetry.sdk.metrics") + metrics_module.Histogram = DummyHistogram + metrics_module.MeterProvider = DummyMeterProvider + monkeypatch.setitem(sys.modules, "opentelemetry.sdk.metrics", metrics_module) + + metrics_export_module = types.ModuleType("opentelemetry.sdk.metrics.export") + metrics_export_module.AggregationTemporality = AggregationTemporality + metrics_export_module.PeriodicExportingMetricReader = DummyMetricReader + monkeypatch.setitem(sys.modules, "opentelemetry.sdk.metrics.export", metrics_export_module) + + grpc_module = types.ModuleType("opentelemetry.exporter.otlp.proto.grpc.metric_exporter") + grpc_module.OTLPMetricExporter = DummyGrpcMetricExporter + monkeypatch.setitem(sys.modules, "opentelemetry.exporter.otlp.proto.grpc.metric_exporter", grpc_module) + + http_module = types.ModuleType("opentelemetry.exporter.otlp.proto.http.metric_exporter") + http_module.OTLPMetricExporter = DummyHttpMetricExporter + monkeypatch.setitem(sys.modules, "opentelemetry.exporter.otlp.proto.http.metric_exporter", http_module) + + http_json_module = types.ModuleType("opentelemetry.exporter.otlp.http.json.metric_exporter") + http_json_module.OTLPMetricExporter = DummyJsonMetricExporter + monkeypatch.setitem(sys.modules, "opentelemetry.exporter.otlp.http.json.metric_exporter", http_json_module) + + legacy_json_module = types.ModuleType("opentelemetry.exporter.otlp.json.metric_exporter") + legacy_json_module.OTLPMetricExporter = DummyJsonMetricExporter + monkeypatch.setitem(sys.modules, "opentelemetry.exporter.otlp.json.metric_exporter", legacy_json_module) + + +@pytest.fixture(autouse=True) +def stub_metric_modules(monkeypatch: pytest.MonkeyPatch) -> None: + metric_reader_instances.clear() + meter_provider_instances.clear() + _add_stub_modules(monkeypatch) + + +@pytest.fixture(autouse=True) +def patch_core_components(monkeypatch: pytest.MonkeyPatch) -> dict[str, object]: + span_exporter = MagicMock(name="span_exporter") + monkeypatch.setattr(client_module, "OTLPSpanExporter", MagicMock(return_value=span_exporter)) + + span_processor = MagicMock(name="span_processor") + monkeypatch.setattr(client_module, "BatchSpanProcessor", MagicMock(return_value=span_processor)) + + tracer = MagicMock(name="tracer") + span = MagicMock(name="span") + tracer.start_span.return_value = span + + tracer_provider = MagicMock(name="tracer_provider") + tracer_provider.get_tracer.return_value = tracer + tracer_provider.shutdown = MagicMock(name="tracer_provider_shutdown") + monkeypatch.setattr(client_module, "TracerProvider", MagicMock(return_value=tracer_provider)) + + resource = MagicMock(name="resource") + monkeypatch.setattr(client_module, "Resource", MagicMock(return_value=resource)) + + logger_mock = MagicMock(name="tencent_logger") + monkeypatch.setattr(client_module, "logger", logger_mock) + + trace_api_stub = SimpleNamespace( + set_span_in_context=MagicMock(name="set_span_in_context", return_value="trace-context"), + NonRecordingSpan=MagicMock(name="non_recording_span", side_effect=lambda ctx: f"non-{ctx}"), + ) + monkeypatch.setattr(client_module, "trace_api", trace_api_stub) + + fake_config = SimpleNamespace( + project=SimpleNamespace(version="test"), + COMMIT_SHA="sha", + DEPLOY_ENV="dev", + EDITION="cloud", + ) + monkeypatch.setattr(client_module, "dify_config", fake_config) + + monkeypatch.setattr(client_module.socket, "gethostname", lambda: "fake-host") + monkeypatch.setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "") + + return { + "span_exporter": span_exporter, + "span_processor": span_processor, + "tracer": tracer, + "span": span, + "tracer_provider": tracer_provider, + "logger": logger_mock, + "trace_api": trace_api_stub, + } + + +def _build_client() -> TencentTraceClient: + return TencentTraceClient( + service_name="service", + endpoint="https://trace.example.com:4317", + token="token", + ) + + +def test_get_opentelemetry_sdk_version_reads_install(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(client_module, "version", lambda pkg: "2.0.0") + assert _get_opentelemetry_sdk_version() == "2.0.0" + + +def test_get_opentelemetry_sdk_version_falls_back(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(client_module, "version", MagicMock(side_effect=RuntimeError("boom"))) + assert _get_opentelemetry_sdk_version() == "1.27.0" + + +@pytest.mark.parametrize( + ("endpoint", "expected"), + [ + ( + "https://example.com:9090", + ("example.com:9090", False, "example.com", 9090), + ), + ( + "http://localhost", + ("localhost:4317", True, "localhost", 4317), + ), + ( + "example.com:bad", + ("example.com:4317", False, "example.com", 4317), + ), + ], +) +def test_resolve_grpc_target_parsable_variants(endpoint: str, expected: tuple[str, bool, str, int]) -> None: + assert TencentTraceClient._resolve_grpc_target(endpoint) == expected + + +def test_resolve_grpc_target_handles_errors() -> None: + assert TencentTraceClient._resolve_grpc_target(123) == ("localhost:4317", True, "localhost", 4317) + + +@pytest.mark.parametrize( + ("method", "attr_name", "args"), + [ + ("record_llm_duration", "hist_llm_duration", (0.3, {"foo": object()})), + ("record_token_usage", "hist_token_usage", (5, "input", "chat", "gpt", "gpt", "addr", "dify")), + ("record_time_to_first_token", "hist_time_to_first_token", (0.4, "dify", "gpt")), + ("record_time_to_generate", "hist_time_to_generate", (0.6, "dify", "gpt")), + ("record_trace_duration", "hist_trace_duration", (1.0, {"meta": object()})), + ], +) +def test_record_methods_call_histograms(method: str, attr_name: str, args: tuple[object, ...]) -> None: + client = _build_client() + hist_mock = MagicMock(name=attr_name) + setattr(client, attr_name, hist_mock) + + getattr(client, method)(*args) + hist_mock.record.assert_called_once() + + +def test_record_methods_skip_when_histogram_missing() -> None: + client = _build_client() + client.hist_llm_duration = None + client.record_llm_duration(0.1) + + client.hist_token_usage = None + client.record_token_usage(1, "go", "chat", "model", "model", "addr", "provider") + + client.hist_time_to_first_token = None + client.record_time_to_first_token(0.2, "prov", "model") + + client.hist_time_to_generate = None + client.record_time_to_generate(0.3, "prov", "model") + + client.hist_trace_duration = None + client.record_trace_duration(0.5) + + +def test_record_llm_duration_handles_exceptions(patch_core_components: dict[str, object]) -> None: + client = _build_client() + client.hist_llm_duration = MagicMock(name="hist_llm_duration") + client.hist_llm_duration.record.side_effect = RuntimeError("boom") + + client.record_llm_duration(0.2) + logger = patch_core_components["logger"] + logger.debug.assert_called() + + +def test_create_and_export_span_sets_attributes(patch_core_components: dict[str, object]) -> None: + client = _build_client() + span = patch_core_components["span"] + span.get_span_context.return_value = "ctx" + + data = SpanData( + trace_id=1, + parent_span_id=None, + span_id=2, + name="span", + attributes={"key": "value"}, + events=[Event(name="evt", attributes={"k": "v"}, timestamp=123)], + status=Status(StatusCode.OK), + start_time=10, + end_time=20, + ) + + client._create_and_export_span(data) + span.set_attributes.assert_called_once() + span.add_event.assert_called_once() + span.set_status.assert_called_once() + span.end.assert_called_once_with(end_time=20) + assert client.span_contexts[2] == "ctx" + + +def test_create_and_export_span_uses_parent_context(patch_core_components: dict[str, object]) -> None: + client = _build_client() + client.span_contexts[10] = "existing" + span = patch_core_components["span"] + span.get_span_context.return_value = "child" + + data = SpanData( + trace_id=1, + parent_span_id=10, + span_id=11, + name="span", + attributes={}, + events=[], + start_time=0, + end_time=1, + ) + + client._create_and_export_span(data) + trace_api = patch_core_components["trace_api"] + trace_api.NonRecordingSpan.assert_called_once_with("existing") + trace_api.set_span_in_context.assert_called_once() + + +def test_create_and_export_span_exception_logs_error(patch_core_components: dict[str, object]) -> None: + client = _build_client() + span = patch_core_components["span"] + span.get_span_context.return_value = "ctx" + client.tracer.start_span.side_effect = RuntimeError("boom") + + client._create_and_export_span( + SpanData( + trace_id=1, + parent_span_id=None, + span_id=2, + name="span", + attributes={}, + events=[], + start_time=0, + end_time=1, + ) + ) + logger = patch_core_components["logger"] + logger.exception.assert_called_once() + + +def test_api_check_connects_successfully(monkeypatch: pytest.MonkeyPatch) -> None: + client = _build_client() + + monkeypatch.setattr( + TencentTraceClient, + "_resolve_grpc_target", + MagicMock(return_value=("host:123", False, "host", 123)), + ) + + socket_mock = MagicMock() + socket_instance = MagicMock() + socket_instance.connect_ex.return_value = 0 + socket_mock.return_value = socket_instance + monkeypatch.setattr(client_module.socket, "socket", socket_mock) + + assert client.api_check() + socket_instance.connect_ex.assert_called_once() + + +def test_api_check_returns_false_and_handles_local(monkeypatch: pytest.MonkeyPatch) -> None: + client = _build_client() + + monkeypatch.setattr( + TencentTraceClient, + "_resolve_grpc_target", + MagicMock(return_value=("host:123", False, "host", 123)), + ) + + socket_mock = MagicMock() + socket_instance = MagicMock() + socket_instance.connect_ex.return_value = 1 + socket_mock.return_value = socket_instance + monkeypatch.setattr(client_module.socket, "socket", socket_mock) + + assert not client.api_check() + + monkeypatch.setattr( + TencentTraceClient, + "_resolve_grpc_target", + MagicMock(return_value=("localhost:4317", True, "localhost", 4317)), + ) + socket_instance.connect_ex.return_value = 1 + assert client.api_check() + + +def test_api_check_handles_exceptions(monkeypatch: pytest.MonkeyPatch) -> None: + client = TencentTraceClient("svc", "https://localhost", "token") + + monkeypatch.setattr(client_module.socket, "socket", MagicMock(side_effect=RuntimeError("boom"))) + assert client.api_check() + + +def test_get_project_url() -> None: + client = _build_client() + assert client.get_project_url() == "https://console.cloud.tencent.com/apm" + + +def test_shutdown_flushes_all_components(patch_core_components: dict[str, object]) -> None: + client = _build_client() + span_processor = patch_core_components["span_processor"] + tracer_provider = patch_core_components["tracer_provider"] + + client.shutdown() + span_processor.force_flush.assert_called_once() + span_processor.shutdown.assert_called_once() + tracer_provider.shutdown.assert_called_once() + + meter_provider = meter_provider_instances[-1] + metric_reader = metric_reader_instances[-1] + meter_provider.shutdown.assert_called_once() + metric_reader.shutdown.assert_called_once() + + +def test_shutdown_logs_when_meter_provider_fails(patch_core_components: dict[str, object]) -> None: + client = _build_client() + meter_provider = meter_provider_instances[-1] + meter_provider.shutdown.side_effect = RuntimeError("boom") + client.metric_reader.shutdown.side_effect = RuntimeError("boom") + + client.shutdown() + logger = patch_core_components["logger"] + logger.debug.assert_any_call( + "[Tencent APM] Error shutting down meter provider", + exc_info=True, + ) + logger.debug.assert_any_call( + "[Tencent APM] Error shutting down metric reader", + exc_info=True, + ) + + +def test_metrics_initialization_failure_sets_histogram_attributes(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(DummyMeterProvider, "__init__", MagicMock(side_effect=RuntimeError("err"))) + client = _build_client() + + assert client.meter is None + assert client.meter_provider is None + assert client.hist_llm_duration is None + assert client.hist_token_usage is None + assert client.hist_time_to_first_token is None + assert client.hist_time_to_generate is None + assert client.hist_trace_duration is None + assert client.metric_reader is None + + +def test_add_span_logs_exception(monkeypatch: pytest.MonkeyPatch, patch_core_components: dict[str, object]) -> None: + client = _build_client() + monkeypatch.setattr(client, "_create_and_export_span", MagicMock(side_effect=RuntimeError("boom"))) + + client.add_span( + SpanData( + trace_id=1, + parent_span_id=None, + span_id=2, + name="span", + attributes={}, + events=[], + start_time=0, + end_time=1, + ) + ) + + logger = patch_core_components["logger"] + logger.exception.assert_called_once() + + +def test_create_and_export_span_converts_attribute_types(patch_core_components: dict[str, object]) -> None: + client = _build_client() + span = patch_core_components["span"] + span.get_span_context.return_value = "ctx" + + data = SpanData.model_construct( + trace_id=1, + parent_span_id=None, + span_id=2, + name="span", + attributes={"num": 5, "flag": True, "pi": 3.14, "text": "value"}, + events=[], + links=[], + status=Status(StatusCode.OK), + start_time=0, + end_time=1, + ) + + client._create_and_export_span(data) + (attrs,) = span.set_attributes.call_args.args + assert attrs["num"] == 5 + assert attrs["flag"] is True + assert attrs["pi"] == 3.14 + assert attrs["text"] == "value" + + +def test_record_llm_duration_converts_attributes() -> None: + client = _build_client() + hist_mock = MagicMock(name="hist_llm_duration") + client.hist_llm_duration = hist_mock + + client.record_llm_duration(0.3, {"foo": object(), "bar": 2}) + _, attrs = hist_mock.record.call_args.args + assert isinstance(attrs["foo"], str) + assert attrs["bar"] == 2 + + +def test_record_trace_duration_converts_attributes() -> None: + client = _build_client() + hist_mock = MagicMock(name="hist_trace_duration") + client.hist_trace_duration = hist_mock + + client.record_trace_duration(1.0, {"meta": object(), "ok": True}) + _, attrs = hist_mock.record.call_args.args + assert isinstance(attrs["meta"], str) + assert attrs["ok"] is True + + +@pytest.mark.parametrize( + ("method", "attr_name", "args"), + [ + ("record_token_usage", "hist_token_usage", (5, "input", "chat", "gpt", "gpt", "addr", "dify")), + ("record_time_to_first_token", "hist_time_to_first_token", (0.4, "dify", "gpt")), + ("record_time_to_generate", "hist_time_to_generate", (0.6, "dify", "gpt")), + ("record_trace_duration", "hist_trace_duration", (1.0, {"meta": object()})), + ], +) +def test_record_methods_handle_exceptions( + method: str, attr_name: str, args: tuple[object, ...], patch_core_components: dict[str, object] +) -> None: + client = _build_client() + hist_mock = MagicMock(name=attr_name) + hist_mock.record.side_effect = RuntimeError("boom") + setattr(client, attr_name, hist_mock) + + getattr(client, method)(*args) + logger = patch_core_components["logger"] + logger.debug.assert_called() + + +def test_metrics_initializes_grpc_metric_exporter() -> None: + client = _build_client() + metric_reader = metric_reader_instances[-1] + + assert isinstance(metric_reader.exporter, DummyGrpcMetricExporter) + assert metric_reader.export_interval_millis == client.metrics_export_interval_sec * 1000 + assert metric_reader.exporter.kwargs["endpoint"] == "trace.example.com:4317" + assert metric_reader.exporter.kwargs["insecure"] is False + assert metric_reader.exporter.kwargs["headers"]["authorization"] == "Bearer token" + + +def test_metrics_initializes_http_protobuf_metric_exporter(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/protobuf") + client = _build_client() + metric_reader = metric_reader_instances[-1] + + assert isinstance(metric_reader.exporter, DummyHttpMetricExporter) + assert metric_reader.export_interval_millis == client.metrics_export_interval_sec * 1000 + assert metric_reader.exporter.kwargs["endpoint"] == client.endpoint + assert metric_reader.exporter.kwargs["headers"]["authorization"] == "Bearer token" + + +def test_metrics_initializes_http_json_metric_exporter(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/json") + client = _build_client() + metric_reader = metric_reader_instances[-1] + + assert isinstance(metric_reader.exporter, DummyJsonMetricExporter) + assert metric_reader.export_interval_millis == client.metrics_export_interval_sec * 1000 + assert metric_reader.exporter.kwargs["endpoint"] == client.endpoint + assert metric_reader.exporter.kwargs["headers"]["authorization"] == "Bearer token" + assert "preferred_temporality" in metric_reader.exporter.kwargs + + +def test_metrics_http_json_metric_exporter_falls_back_without_temporality(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/json") + exporter_module = sys.modules["opentelemetry.exporter.otlp.http.json.metric_exporter"] + monkeypatch.setattr(exporter_module, "OTLPMetricExporter", DummyJsonMetricExporterNoTemporality) + _ = _build_client() + metric_reader = metric_reader_instances[-1] + + assert isinstance(metric_reader.exporter, DummyJsonMetricExporterNoTemporality) + assert "preferred_temporality" not in metric_reader.exporter.kwargs + + +def test_metrics_http_json_uses_http_fallback_when_no_json_exporter(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("OTEL_EXPORTER_OTLP_PROTOCOL", "http/json") + + def _fail_import(mod_path: str) -> types.ModuleType: + raise ModuleNotFoundError(mod_path) + + monkeypatch.setattr(client_module.importlib, "import_module", _fail_import) + + _ = _build_client() + metric_reader = metric_reader_instances[-1] + assert isinstance(metric_reader.exporter, DummyHttpMetricExporter) diff --git a/api/tests/unit_tests/core/ops/tencent_trace/test_span_builder.py b/api/tests/unit_tests/core/ops/tencent_trace/test_span_builder.py new file mode 100644 index 0000000000..a0b6d52720 --- /dev/null +++ b/api/tests/unit_tests/core/ops/tencent_trace/test_span_builder.py @@ -0,0 +1,359 @@ +from datetime import datetime +from unittest.mock import MagicMock, patch + +from opentelemetry.trace import StatusCode + +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + MessageTraceInfo, + ToolTraceInfo, + WorkflowTraceInfo, +) +from core.ops.tencent_trace.entities.semconv import ( + GEN_AI_IS_ENTRY, + GEN_AI_IS_STREAMING_REQUEST, + GEN_AI_MODEL_NAME, + GEN_AI_SPAN_KIND, + GEN_AI_USAGE_INPUT_TOKENS, + INPUT_VALUE, + RETRIEVAL_DOCUMENT, + RETRIEVAL_QUERY, + TOOL_DESCRIPTION, + TOOL_NAME, + TOOL_PARAMETERS, + GenAISpanKind, +) +from core.ops.tencent_trace.span_builder import TencentSpanBuilder +from core.rag.models.document import Document +from dify_graph.entities import WorkflowNodeExecution +from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus + + +class TestTencentSpanBuilder: + def test_get_time_nanoseconds(self): + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_datetime_to_nanoseconds") as mock_convert: + mock_convert.return_value = 123456789 + dt = datetime.now() + result = TencentSpanBuilder._get_time_nanoseconds(dt) + assert result == 123456789 + mock_convert.assert_called_once_with(dt) + + def test_build_workflow_spans(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.workflow_run_id = "run_id" + trace_info.error = None + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.workflow_run_inputs = {"sys.query": "hello"} + trace_info.workflow_run_outputs = {"answer": "world"} + trace_info.metadata = {"conversation_id": "conv_id"} + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.side_effect = [1, 2] # workflow_span_id, message_span_id + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + spans = TencentSpanBuilder.build_workflow_spans(trace_info, 123, "user_1") + + assert len(spans) == 2 + assert spans[0].name == "message" + assert spans[0].span_id == 2 + assert spans[1].name == "workflow" + assert spans[1].span_id == 1 + assert spans[1].parent_span_id == 2 + + def test_build_workflow_spans_no_message(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.workflow_run_id = "run_id" + trace_info.error = "some error" + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.workflow_run_inputs = {} + trace_info.workflow_run_outputs = {} + trace_info.metadata = {} # No conversation_id + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 1 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + spans = TencentSpanBuilder.build_workflow_spans(trace_info, 123, "user_1") + + assert len(spans) == 1 + assert spans[0].name == "workflow" + assert spans[0].status.status_code == StatusCode.ERROR + assert spans[0].status.description == "some error" + assert spans[0].attributes[GEN_AI_IS_ENTRY] == "true" + + def test_build_workflow_llm_span(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {"conversation_id": "conv_id"} + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.process_data = { + "model_name": "gpt-4", + "model_provider": "openai", + "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30, "time_to_first_token": 0.5}, + "prompts": ["hello"], + } + node_execution.outputs = {"text": "world"} + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 456 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_llm_span(123, 1, trace_info, node_execution) + + assert span.name == "GENERATION" + assert span.attributes[GEN_AI_MODEL_NAME] == "gpt-4" + assert span.attributes[GEN_AI_IS_STREAMING_REQUEST] == "true" + assert span.attributes[GEN_AI_USAGE_INPUT_TOKENS] == "10" + + def test_build_workflow_llm_span_usage_in_outputs(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {} + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.process_data = {} + node_execution.outputs = { + "text": "world", + "usage": {"prompt_tokens": 15, "completion_tokens": 25, "total_tokens": 40}, + } + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 456 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_llm_span(123, 1, trace_info, node_execution) + + assert span.attributes[GEN_AI_USAGE_INPUT_TOKENS] == "15" + assert GEN_AI_IS_STREAMING_REQUEST not in span.attributes + + def test_build_message_span_standalone(self): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.message_id = "msg_id" + trace_info.error = None + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.inputs = {"q": "hi"} + trace_info.outputs = "hello" + trace_info.metadata = {"conversation_id": "conv_id"} + trace_info.is_streaming_request = True + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 789 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_message_span(trace_info, 123, "user_1") + + assert span.name == "message" + assert span.attributes[GEN_AI_IS_STREAMING_REQUEST] == "true" + assert span.attributes[INPUT_VALUE] == str(trace_info.inputs) + + def test_build_message_span_standalone_with_error(self): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.message_id = "msg_id" + trace_info.error = "some error" + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.inputs = None + trace_info.outputs = None + trace_info.metadata = {} + trace_info.is_streaming_request = False + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 789 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_message_span(trace_info, 123, "user_1") + + assert span.status.status_code == StatusCode.ERROR + assert span.status.description == "some error" + assert span.attributes[INPUT_VALUE] == "" + + def test_build_tool_span(self): + trace_info = MagicMock(spec=ToolTraceInfo) + trace_info.message_id = "msg_id" + trace_info.tool_name = "search" + trace_info.error = "tool error" + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.tool_parameters = {"p": 1} + trace_info.tool_inputs = {"i": 2} + trace_info.tool_outputs = "result" + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 101 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_tool_span(trace_info, 123, 1) + + assert span.name == "search" + assert span.status.status_code == StatusCode.ERROR + assert span.attributes[TOOL_NAME] == "search" + + def test_build_retrieval_span(self): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_id = "msg_id" + trace_info.inputs = "query" + trace_info.error = None + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + + doc = Document( + page_content="content", metadata={"dataset_id": "d1", "doc_id": "di1", "document_id": "du1", "score": 0.9} + ) + trace_info.documents = [doc] + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 202 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_retrieval_span(trace_info, 123, 1) + + assert span.name == "retrieval" + assert span.attributes[RETRIEVAL_QUERY] == "query" + assert "content" in span.attributes[RETRIEVAL_DOCUMENT] + + def test_build_retrieval_span_with_error(self): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_id = "msg_id" + trace_info.inputs = "" + trace_info.error = "retrieval failed" + trace_info.start_time = datetime.now() + trace_info.end_time = datetime.now() + trace_info.documents = [] + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 202 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_retrieval_span(trace_info, 123, 1) + + assert span.status.status_code == StatusCode.ERROR + assert span.status.description == "retrieval failed" + + def test_get_workflow_node_status(self): + node = MagicMock(spec=WorkflowNodeExecution) + + node.status = WorkflowNodeExecutionStatus.SUCCEEDED + assert TencentSpanBuilder._get_workflow_node_status(node).status_code == StatusCode.OK + + node.status = WorkflowNodeExecutionStatus.FAILED + node.error = "fail" + status = TencentSpanBuilder._get_workflow_node_status(node) + assert status.status_code == StatusCode.ERROR + assert status.description == "fail" + + node.status = WorkflowNodeExecutionStatus.EXCEPTION + node.error = "exc" + status = TencentSpanBuilder._get_workflow_node_status(node) + assert status.status_code == StatusCode.ERROR + assert status.description == "exc" + + node.status = WorkflowNodeExecutionStatus.RUNNING + assert TencentSpanBuilder._get_workflow_node_status(node).status_code == StatusCode.UNSET + + def test_build_workflow_retrieval_span(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {"conversation_id": "conv_id"} + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.title = "my retrieval" + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.inputs = {"query": "q1"} + node_execution.outputs = {"result": [{"content": "c1"}]} + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 303 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_retrieval_span(123, 1, trace_info, node_execution) + + assert span.name == "my retrieval" + assert span.attributes[RETRIEVAL_QUERY] == "q1" + assert "c1" in span.attributes[RETRIEVAL_DOCUMENT] + + def test_build_workflow_retrieval_span_empty(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {} + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.title = "my retrieval" + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.inputs = {} + node_execution.outputs = {} + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 303 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_retrieval_span(123, 1, trace_info, node_execution) + + assert span.attributes[RETRIEVAL_QUERY] == "" + assert span.attributes[RETRIEVAL_DOCUMENT] == "" + + def test_build_workflow_tool_span(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.title = "my tool" + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.metadata = {WorkflowNodeExecutionMetadataKey.TOOL_INFO: {"info": "some"}} + node_execution.inputs = {"param": "val"} + node_execution.outputs = {"res": "ok"} + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 404 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_tool_span(123, 1, trace_info, node_execution) + + assert span.name == "my tool" + assert span.attributes[TOOL_NAME] == "my tool" + assert "some" in span.attributes[TOOL_DESCRIPTION] + + def test_build_workflow_tool_span_no_metadata(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.title = "my tool" + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.metadata = None + node_execution.inputs = None + node_execution.outputs = {"res": "ok"} + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 404 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_tool_span(123, 1, trace_info, node_execution) + + assert span.attributes[TOOL_DESCRIPTION] == "{}" + assert span.attributes[TOOL_PARAMETERS] == "{}" + + def test_build_workflow_task_span(self): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {"conversation_id": "conv_id"} + + node_execution = MagicMock(spec=WorkflowNodeExecution) + node_execution.id = "node_id" + node_execution.title = "my task" + node_execution.status = WorkflowNodeExecutionStatus.SUCCEEDED + node_execution.inputs = {"in": 1} + node_execution.outputs = {"out": 2} + node_execution.created_at = datetime.now() + node_execution.finished_at = datetime.now() + + with patch("core.ops.tencent_trace.utils.TencentTraceUtils.convert_to_span_id") as mock_convert_id: + mock_convert_id.return_value = 505 + with patch.object(TencentSpanBuilder, "_get_time_nanoseconds", return_value=100): + span = TencentSpanBuilder.build_workflow_task_span(123, 1, trace_info, node_execution) + + assert span.name == "my task" + assert span.attributes[GEN_AI_SPAN_KIND] == GenAISpanKind.TASK.value diff --git a/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py new file mode 100644 index 0000000000..f259e4639f --- /dev/null +++ b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace.py @@ -0,0 +1,647 @@ +import logging +from unittest.mock import MagicMock, patch + +import pytest + +from core.ops.entities.config_entity import TencentConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + WorkflowTraceInfo, +) +from core.ops.tencent_trace.tencent_trace import TencentDataTrace +from dify_graph.entities import WorkflowNodeExecution +from dify_graph.enums import BuiltinNodeTypes +from models import Account, App, TenantAccountJoin + +logger = logging.getLogger(__name__) + + +@pytest.fixture +def tencent_config(): + return TencentConfig(service_name="test-service", endpoint="https://test-endpoint", token="test-token") + + +@pytest.fixture +def mock_trace_client(): + with patch("core.ops.tencent_trace.tencent_trace.TencentTraceClient") as mock: + yield mock + + +@pytest.fixture +def mock_span_builder(): + with patch("core.ops.tencent_trace.tencent_trace.TencentSpanBuilder") as mock: + yield mock + + +@pytest.fixture +def mock_trace_utils(): + with patch("core.ops.tencent_trace.tencent_trace.TencentTraceUtils") as mock: + yield mock + + +@pytest.fixture +def tencent_data_trace(tencent_config, mock_trace_client): + return TencentDataTrace(tencent_config) + + +class TestTencentDataTrace: + def test_init(self, tencent_config, mock_trace_client): + trace = TencentDataTrace(tencent_config) + mock_trace_client.assert_called_once_with( + service_name=tencent_config.service_name, + endpoint=tencent_config.endpoint, + token=tencent_config.token, + metrics_export_interval_sec=5, + ) + assert trace.trace_client == mock_trace_client.return_value + + def test_trace_dispatch(self, tencent_data_trace): + methods = [ + ( + WorkflowTraceInfo( + workflow_id="wf", + tenant_id="t", + workflow_run_id="run", + workflow_run_elapsed_time=1.0, + workflow_run_status="s", + workflow_run_inputs={}, + workflow_run_outputs={}, + workflow_run_version="v", + total_tokens=0, + file_list=[], + query="", + metadata={}, + ), + "workflow_trace", + ), + ( + MessageTraceInfo( + message_id="msg", + message_data={}, + inputs={}, + outputs={}, + start_time=None, + end_time=None, + conversation_mode="chat", + conversation_model="gpt-3.5-turbo", + message_tokens=0, + answer_tokens=0, + total_tokens=0, + metadata={}, + ), + "message_trace", + ), + ( + ModerationTraceInfo( + flagged=False, action="a", preset_response="p", query="q", metadata={}, message_id="m" + ), + None, + ), # Pass + ( + SuggestedQuestionTraceInfo( + suggested_question=[], + level="l", + total_tokens=0, + metadata={}, + message_id="m", + message_data={}, + inputs={}, + start_time=None, + end_time=None, + ), + "suggested_question_trace", + ), + ( + DatasetRetrievalTraceInfo( + metadata={}, + message_id="m", + message_data={}, + inputs={}, + documents=[], + start_time=None, + end_time=None, + ), + "dataset_retrieval_trace", + ), + ( + ToolTraceInfo( + tool_name="t", + tool_inputs={}, + tool_outputs="", + tool_config={}, + tool_parameters={}, + time_cost=0, + metadata={}, + message_id="m", + inputs={}, + outputs={}, + start_time=None, + end_time=None, + ), + "tool_trace", + ), + ( + GenerateNameTraceInfo( + tenant_id="t", metadata={}, message_id="m", inputs={}, outputs={}, start_time=None, end_time=None + ), + None, + ), # Pass + ] + + for trace_info, method_name in methods: + if method_name: + with patch.object(tencent_data_trace, method_name) as mock_method: + tencent_data_trace.trace(trace_info) + mock_method.assert_called_once_with(trace_info) + else: + tencent_data_trace.trace(trace_info) + + def test_api_check(self, tencent_data_trace): + tencent_data_trace.trace_client.api_check.return_value = True + assert tencent_data_trace.api_check() is True + tencent_data_trace.trace_client.api_check.assert_called_once() + + def test_get_project_url(self, tencent_data_trace): + tencent_data_trace.trace_client.get_project_url.return_value = "http://url" + assert tencent_data_trace.get_project_url() == "http://url" + tencent_data_trace.trace_client.get_project_url.assert_called_once() + + def test_workflow_trace(self, tencent_data_trace, mock_trace_utils, mock_span_builder): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.workflow_run_id = "run-id" + trace_info.trace_id = "parent-trace-id" + + mock_trace_utils.convert_to_trace_id.return_value = 123 + mock_trace_utils.create_link.return_value = "link" + + with patch.object(tencent_data_trace, "_get_user_id", return_value="user-1"): + with patch.object(tencent_data_trace, "_process_workflow_nodes") as mock_proc: + with patch.object(tencent_data_trace, "_record_workflow_trace_duration") as mock_dur: + mock_span_builder.build_workflow_spans.return_value = [MagicMock(), MagicMock()] + + tencent_data_trace.workflow_trace(trace_info) + + mock_trace_utils.convert_to_trace_id.assert_called_once_with("run-id") + mock_trace_utils.create_link.assert_called_once_with("parent-trace-id") + mock_span_builder.build_workflow_spans.assert_called_once() + assert tencent_data_trace.trace_client.add_span.call_count == 2 + mock_proc.assert_called_once_with(trace_info, 123) + mock_dur.assert_called_once_with(trace_info) + + def test_workflow_trace_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.workflow_run_id = "run-id" + + with patch( + "core.ops.tencent_trace.tencent_trace.TencentTraceUtils.convert_to_trace_id", side_effect=Exception("error") + ): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.workflow_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Failed to process workflow trace") + + def test_message_trace(self, tencent_data_trace, mock_trace_utils, mock_span_builder): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.message_id = "msg-id" + trace_info.trace_id = "parent-trace-id" + + mock_trace_utils.convert_to_trace_id.return_value = 123 + mock_trace_utils.create_link.return_value = "link" + + with patch.object(tencent_data_trace, "_get_user_id", return_value="user-1"): + with patch.object(tencent_data_trace, "_record_message_llm_metrics") as mock_metrics: + with patch.object(tencent_data_trace, "_record_message_trace_duration") as mock_dur: + mock_span_builder.build_message_span.return_value = MagicMock() + + tencent_data_trace.message_trace(trace_info) + + mock_trace_utils.convert_to_trace_id.assert_called_once_with("msg-id") + mock_trace_utils.create_link.assert_called_once_with("parent-trace-id") + mock_span_builder.build_message_span.assert_called_once() + tencent_data_trace.trace_client.add_span.assert_called_once() + mock_metrics.assert_called_once_with(trace_info) + mock_dur.assert_called_once_with(trace_info) + + def test_message_trace_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + + with patch( + "core.ops.tencent_trace.tencent_trace.TencentTraceUtils.convert_to_trace_id", side_effect=Exception("error") + ): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.message_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Failed to process message trace") + + def test_tool_trace(self, tencent_data_trace, mock_trace_utils, mock_span_builder): + trace_info = MagicMock(spec=ToolTraceInfo) + trace_info.message_id = "msg-id" + + mock_trace_utils.convert_to_span_id.return_value = 456 + mock_trace_utils.convert_to_trace_id.return_value = 123 + + tencent_data_trace.tool_trace(trace_info) + + mock_trace_utils.convert_to_span_id.assert_called_once_with("msg-id", "message") + mock_trace_utils.convert_to_trace_id.assert_called_once_with("msg-id") + mock_span_builder.build_tool_span.assert_called_once_with(trace_info, 123, 456) + tencent_data_trace.trace_client.add_span.assert_called_once() + + def test_tool_trace_no_msg_id(self, tencent_data_trace): + trace_info = MagicMock(spec=ToolTraceInfo) + trace_info.message_id = None + + tencent_data_trace.tool_trace(trace_info) + tencent_data_trace.trace_client.add_span.assert_not_called() + + def test_tool_trace_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=ToolTraceInfo) + trace_info.message_id = "msg-id" + + with patch( + "core.ops.tencent_trace.tencent_trace.TencentTraceUtils.convert_to_span_id", side_effect=Exception("error") + ): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.tool_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Failed to process tool trace") + + def test_dataset_retrieval_trace(self, tencent_data_trace, mock_trace_utils, mock_span_builder): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_id = "msg-id" + + mock_trace_utils.convert_to_span_id.return_value = 456 + mock_trace_utils.convert_to_trace_id.return_value = 123 + + tencent_data_trace.dataset_retrieval_trace(trace_info) + + mock_trace_utils.convert_to_span_id.assert_called_once_with("msg-id", "message") + mock_trace_utils.convert_to_trace_id.assert_called_once_with("msg-id") + mock_span_builder.build_retrieval_span.assert_called_once_with(trace_info, 123, 456) + tencent_data_trace.trace_client.add_span.assert_called_once() + + def test_dataset_retrieval_trace_no_msg_id(self, tencent_data_trace): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_id = None + + tencent_data_trace.dataset_retrieval_trace(trace_info) + tencent_data_trace.trace_client.add_span.assert_not_called() + + def test_dataset_retrieval_trace_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=DatasetRetrievalTraceInfo) + trace_info.message_id = "msg-id" + + with patch( + "core.ops.tencent_trace.tencent_trace.TencentTraceUtils.convert_to_span_id", side_effect=Exception("error") + ): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.dataset_retrieval_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Failed to process dataset retrieval trace") + + def test_suggested_question_trace(self, tencent_data_trace): + trace_info = MagicMock(spec=SuggestedQuestionTraceInfo) + with patch("core.ops.tencent_trace.tencent_trace.logger.info") as mock_log: + tencent_data_trace.suggested_question_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Processing suggested question trace") + + def test_suggested_question_trace_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=SuggestedQuestionTraceInfo) + with patch("core.ops.tencent_trace.tencent_trace.logger.info", side_effect=Exception("error")): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.suggested_question_trace(trace_info) + mock_log.assert_called_once_with("[Tencent APM] Failed to process suggested question trace") + + def test_process_workflow_nodes(self, tencent_data_trace, mock_trace_utils): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.workflow_run_id = "run-id" + mock_trace_utils.convert_to_span_id.return_value = 111 + + node1 = MagicMock(spec=WorkflowNodeExecution) + node1.id = "n1" + node1.node_type = BuiltinNodeTypes.LLM + node2 = MagicMock(spec=WorkflowNodeExecution) + node2.id = "n2" + node2.node_type = BuiltinNodeTypes.TOOL + + with patch.object(tencent_data_trace, "_get_workflow_node_executions", return_value=[node1, node2]): + with patch.object(tencent_data_trace, "_build_workflow_node_span", side_effect=["span1", "span2"]): + with patch.object(tencent_data_trace, "_record_llm_metrics") as mock_metrics: + tencent_data_trace._process_workflow_nodes(trace_info, 123) + + assert tencent_data_trace.trace_client.add_span.call_count == 2 + mock_metrics.assert_called_once_with(node1) + + def test_process_workflow_nodes_node_exception(self, tencent_data_trace, mock_trace_utils): + trace_info = MagicMock(spec=WorkflowTraceInfo) + mock_trace_utils.convert_to_span_id.return_value = 111 + + node = MagicMock(spec=WorkflowNodeExecution) + node.id = "n1" + + with patch.object(tencent_data_trace, "_get_workflow_node_executions", return_value=[node]): + with patch.object(tencent_data_trace, "_build_workflow_node_span", side_effect=Exception("node error")): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace._process_workflow_nodes(trace_info, 123) + # The exception should be caught by the outer handler since convert_to_span_id is called first + mock_log.assert_called_once_with("[Tencent APM] Failed to process workflow nodes") + + def test_process_workflow_nodes_exception(self, tencent_data_trace, mock_trace_utils): + trace_info = MagicMock(spec=WorkflowTraceInfo) + mock_trace_utils.convert_to_span_id.side_effect = Exception("outer error") + + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace._process_workflow_nodes(trace_info, 123) + mock_log.assert_called_once_with("[Tencent APM] Failed to process workflow nodes") + + def test_build_workflow_node_span(self, tencent_data_trace, mock_span_builder): + trace_info = MagicMock(spec=WorkflowTraceInfo) + + nodes = [ + (BuiltinNodeTypes.LLM, mock_span_builder.build_workflow_llm_span), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, mock_span_builder.build_workflow_retrieval_span), + (BuiltinNodeTypes.TOOL, mock_span_builder.build_workflow_tool_span), + (BuiltinNodeTypes.CODE, mock_span_builder.build_workflow_task_span), + ] + + for node_type, builder_method in nodes: + node = MagicMock(spec=WorkflowNodeExecution) + node.node_type = node_type + builder_method.return_value = "span" + + result = tencent_data_trace._build_workflow_node_span(node, 123, trace_info, 456) + + assert result == "span" + builder_method.assert_called_once_with(123, 456, trace_info, node) + + def test_build_workflow_node_span_exception(self, tencent_data_trace, mock_span_builder): + node = MagicMock(spec=WorkflowNodeExecution) + node.node_type = BuiltinNodeTypes.LLM + node.id = "n1" + mock_span_builder.build_workflow_llm_span.side_effect = Exception("error") + + with patch("core.ops.tencent_trace.tencent_trace.logger.debug") as mock_log: + result = tencent_data_trace._build_workflow_node_span(node, 123, MagicMock(), 456) + assert result is None + mock_log.assert_called_once() + + def test_get_workflow_node_executions(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {"app_id": "app-1"} + trace_info.workflow_run_id = "run-1" + + app = MagicMock(spec=App) + app.id = "app-1" + app.created_by = "user-1" + + account = MagicMock(spec=Account) + account.id = "user-1" + + tenant_join = MagicMock(spec=TenantAccountJoin) + tenant_join.tenant_id = "tenant-1" + + mock_executions = [MagicMock()] + + with patch("core.ops.tencent_trace.tencent_trace.db") as mock_db: + mock_db.engine = "engine" + with patch("core.ops.tencent_trace.tencent_trace.Session") as mock_session_ctx: + session = mock_session_ctx.return_value.__enter__.return_value + session.scalar.side_effect = [app, account] + session.query.return_value.filter_by.return_value.first.return_value = tenant_join + + with patch( + "core.ops.tencent_trace.tencent_trace.SQLAlchemyWorkflowNodeExecutionRepository" + ) as mock_repo: + mock_repo.return_value.get_by_workflow_run.return_value = mock_executions + + results = tencent_data_trace._get_workflow_node_executions(trace_info) + + assert results == mock_executions + account.set_tenant_id.assert_called_once_with("tenant-1") + + def test_get_workflow_node_executions_no_app_id(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {} + + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + results = tencent_data_trace._get_workflow_node_executions(trace_info) + assert results == [] + mock_log.assert_called_once() + + def test_get_workflow_node_executions_app_not_found(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.metadata = {"app_id": "app-1"} + + with patch("core.ops.tencent_trace.tencent_trace.db") as mock_db: + mock_db.init_app = MagicMock() # Ensure init_app is mocked + mock_db.engine = "engine" + with patch("core.ops.tencent_trace.tencent_trace.Session") as mock_session_ctx: + session = mock_session_ctx.return_value.__enter__.return_value + session.scalar.return_value = None + + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + results = tencent_data_trace._get_workflow_node_executions(trace_info) + assert results == [] + mock_log.assert_called_once() + + def test_get_user_id_workflow(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.tenant_id = "tenant-1" + trace_info.metadata = {"user_id": "user-1"} + + with patch("core.ops.tencent_trace.tencent_trace.sessionmaker", side_effect=Exception("Database error")): + with patch("core.ops.tencent_trace.tencent_trace.db") as mock_db: + mock_db.init_app = MagicMock() + mock_db.engine = MagicMock() + + user_id = tencent_data_trace._get_user_id(trace_info) + assert user_id == "unknown" + + def test_get_user_id_only_user_id(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.metadata = {"user_id": "user-1"} + + user_id = tencent_data_trace._get_user_id(trace_info) + assert user_id == "user-1" + + def test_get_user_id_anonymous(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.metadata = {} + + user_id = tencent_data_trace._get_user_id(trace_info) + assert user_id == "anonymous" + + def test_get_user_id_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.tenant_id = "t" + trace_info.metadata = {"user_id": "u"} + + with patch("core.ops.tencent_trace.tencent_trace.sessionmaker", side_effect=Exception("error")): + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + user_id = tencent_data_trace._get_user_id(trace_info) + assert user_id == "unknown" + mock_log.assert_called_once_with("[Tencent APM] Failed to get user ID") + + def test_record_llm_metrics_usage_in_process_data(self, tencent_data_trace): + node = MagicMock(spec=WorkflowNodeExecution) + node.process_data = { + "usage": { + "latency": 2.5, + "time_to_first_token": 0.5, + "time_to_generate": 2.0, + "prompt_tokens": 10, + "completion_tokens": 20, + }, + "model_provider": "openai", + "model_name": "gpt-4", + "model_mode": "chat", + } + node.outputs = {} + + tencent_data_trace._record_llm_metrics(node) + + tencent_data_trace.trace_client.record_llm_duration.assert_called_once() + tencent_data_trace.trace_client.record_time_to_first_token.assert_called_once() + tencent_data_trace.trace_client.record_time_to_generate.assert_called_once() + assert tencent_data_trace.trace_client.record_token_usage.call_count == 2 + + def test_record_llm_metrics_usage_in_outputs(self, tencent_data_trace): + node = MagicMock(spec=WorkflowNodeExecution) + node.process_data = {} + node.outputs = {"usage": {"latency": 1.0, "prompt_tokens": 5}} + + tencent_data_trace._record_llm_metrics(node) + tencent_data_trace.trace_client.record_llm_duration.assert_called_once() + tencent_data_trace.trace_client.record_token_usage.assert_called_once() + + def test_record_llm_metrics_exception(self, tencent_data_trace): + node = MagicMock(spec=WorkflowNodeExecution) + node.process_data = None + node.outputs = None + + with patch("core.ops.tencent_trace.tencent_trace.logger.debug") as mock_log: + tencent_data_trace._record_llm_metrics(node) + # Should not crash + + def test_record_message_llm_metrics(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.metadata = {"ls_provider": "openai", "ls_model_name": "gpt-4"} + trace_info.message_data = {"provider_response_latency": 1.1} + trace_info.is_streaming_request = True + trace_info.gen_ai_server_time_to_first_token = 0.2 + trace_info.llm_streaming_time_to_generate = 0.9 + trace_info.message_tokens = 15 + trace_info.answer_tokens = 25 + + tencent_data_trace._record_message_llm_metrics(trace_info) + + tencent_data_trace.trace_client.record_llm_duration.assert_called_once() + tencent_data_trace.trace_client.record_time_to_first_token.assert_called_once() + tencent_data_trace.trace_client.record_time_to_generate.assert_called_once() + assert tencent_data_trace.trace_client.record_token_usage.call_count == 2 + + def test_record_message_llm_metrics_object_data(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.metadata = {} + msg_data = MagicMock() + msg_data.provider_response_latency = 1.1 + msg_data.model_provider = "anthropic" + msg_data.model_id = "claude" + trace_info.message_data = msg_data + trace_info.is_streaming_request = False + + tencent_data_trace._record_message_llm_metrics(trace_info) + tencent_data_trace.trace_client.record_llm_duration.assert_called_once() + + def test_record_message_llm_metrics_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.metadata = None + + with patch("core.ops.tencent_trace.tencent_trace.logger.debug") as mock_log: + tencent_data_trace._record_message_llm_metrics(trace_info) + # Should not crash + + def test_record_workflow_trace_duration(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + from datetime import datetime, timedelta + + now = datetime.now() + trace_info.start_time = now + trace_info.end_time = now + timedelta(seconds=3) + trace_info.workflow_run_status = "succeeded" + trace_info.conversation_id = "conv-1" + + # Mock the record_trace_duration method to capture arguments + with patch.object(tencent_data_trace.trace_client, "record_trace_duration") as mock_record: + tencent_data_trace._record_workflow_trace_duration(trace_info) + + # Assert the method was called once + mock_record.assert_called_once() + + # Extract arguments passed to the method + args, kwargs = mock_record.call_args + + # Validate the duration argument + assert args[0] == 3.0 + + # Validate the attributes dict in kwargs + attributes = kwargs["attributes"] if "attributes" in kwargs else args[1] if len(args) > 1 else {} + assert attributes["conversation_mode"] == "workflow" + assert attributes["has_conversation"] == "true" + + def test_record_workflow_trace_duration_fallback(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.start_time = None + trace_info.workflow_run_elapsed_time = 4.5 + trace_info.workflow_run_status = "failed" + trace_info.conversation_id = None + + with patch.object(tencent_data_trace.trace_client, "record_trace_duration") as mock_record: + tencent_data_trace._record_workflow_trace_duration(trace_info) + mock_record.assert_called_once() + args, kwargs = mock_record.call_args + assert args[0] == 4.5 + # Check attributes dict (either in kwargs or as second positional arg) + attributes = kwargs["attributes"] if "attributes" in kwargs else args[1] if len(args) > 1 else {} + assert attributes["has_conversation"] == "false" + + def test_record_workflow_trace_duration_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=WorkflowTraceInfo) + trace_info.start_time = MagicMock() # This might cause total_seconds() to fail if not mocked right + + with patch("core.ops.tencent_trace.tencent_trace.logger.debug") as mock_log: + tencent_data_trace._record_workflow_trace_duration(trace_info) + + def test_record_message_trace_duration(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + from datetime import datetime, timedelta + + now = datetime.now() + trace_info.start_time = now + trace_info.end_time = now + timedelta(seconds=2) + trace_info.conversation_mode = "chat" + trace_info.is_streaming_request = True + + tencent_data_trace._record_message_trace_duration(trace_info) + tencent_data_trace.trace_client.record_trace_duration.assert_called_once_with( + 2.0, {"conversation_mode": "chat", "stream": "true"} + ) + + def test_record_message_trace_duration_exception(self, tencent_data_trace): + trace_info = MagicMock(spec=MessageTraceInfo) + trace_info.start_time = None + + with patch("core.ops.tencent_trace.tencent_trace.logger.debug") as mock_log: + tencent_data_trace._record_message_trace_duration(trace_info) + + def test_del(self, tencent_data_trace): + client = tencent_data_trace.trace_client + tencent_data_trace.__del__() + client.shutdown.assert_called_once() + + def test_del_exception(self, tencent_data_trace): + tencent_data_trace.trace_client.shutdown.side_effect = Exception("error") + with patch("core.ops.tencent_trace.tencent_trace.logger.exception") as mock_log: + tencent_data_trace.__del__() + mock_log.assert_called_once_with("[Tencent APM] Failed to shutdown trace client during cleanup") diff --git a/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace_utils.py b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace_utils.py new file mode 100644 index 0000000000..ef28d18e20 --- /dev/null +++ b/api/tests/unit_tests/core/ops/tencent_trace/test_tencent_trace_utils.py @@ -0,0 +1,106 @@ +"""Unit tests for Tencent APM tracing utilities.""" + +from __future__ import annotations + +import hashlib +import uuid +from datetime import UTC, datetime +from unittest.mock import patch + +import pytest +from opentelemetry.trace import Link, TraceFlags + +from core.ops.tencent_trace.utils import TencentTraceUtils + + +def test_convert_to_trace_id_with_valid_uuid() -> None: + uuid_str = "12345678-1234-5678-1234-567812345678" + assert TencentTraceUtils.convert_to_trace_id(uuid_str) == uuid.UUID(uuid_str).int + + +def test_convert_to_trace_id_uses_uuid4_when_none() -> None: + expected_uuid = uuid.UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") + with patch("core.ops.tencent_trace.utils.uuid.uuid4", return_value=expected_uuid) as uuid4_mock: + assert TencentTraceUtils.convert_to_trace_id(None) == expected_uuid.int + uuid4_mock.assert_called_once() + + +def test_convert_to_trace_id_raises_value_error_for_invalid_uuid() -> None: + with pytest.raises(ValueError, match=r"^Invalid UUID input:"): + TencentTraceUtils.convert_to_trace_id("not-a-uuid") + + +def test_convert_to_span_id_is_deterministic_and_sensitive_to_type() -> None: + uuid_str = "12345678-1234-5678-1234-567812345678" + span_type = "llm" + + uuid_obj = uuid.UUID(uuid_str) + combined_key = f"{uuid_obj.hex}-{span_type}" + hash_bytes = hashlib.sha256(combined_key.encode("utf-8")).digest() + expected = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False) + + assert TencentTraceUtils.convert_to_span_id(uuid_str, span_type) == expected + assert TencentTraceUtils.convert_to_span_id(uuid_str, "other") != expected + + +def test_convert_to_span_id_uses_uuid4_when_none() -> None: + expected_uuid = uuid.UUID("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb") + with patch("core.ops.tencent_trace.utils.uuid.uuid4", return_value=expected_uuid) as uuid4_mock: + span_id = TencentTraceUtils.convert_to_span_id(None, "workflow") + assert isinstance(span_id, int) + uuid4_mock.assert_called_once() + + +def test_convert_to_span_id_raises_value_error_for_invalid_uuid() -> None: + with pytest.raises(ValueError, match=r"^Invalid UUID input:"): + TencentTraceUtils.convert_to_span_id("bad-uuid", "span") + + +def test_generate_span_id_skips_invalid_span_id() -> None: + with patch( + "core.ops.tencent_trace.utils.random.getrandbits", + side_effect=[TencentTraceUtils.INVALID_SPAN_ID, 42], + ) as bits_mock: + assert TencentTraceUtils.generate_span_id() == 42 + assert bits_mock.call_count == 2 + + +def test_convert_datetime_to_nanoseconds_accepts_datetime() -> None: + start_time = datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + expected = int(start_time.timestamp() * 1e9) + assert TencentTraceUtils.convert_datetime_to_nanoseconds(start_time) == expected + + +def test_convert_datetime_to_nanoseconds_uses_now_when_none() -> None: + fixed = datetime(2024, 1, 2, 3, 4, 5, tzinfo=UTC) + expected = int(fixed.timestamp() * 1e9) + + with patch("core.ops.tencent_trace.utils.datetime") as datetime_mock: + datetime_mock.now.return_value = fixed + assert TencentTraceUtils.convert_datetime_to_nanoseconds(None) == expected + datetime_mock.now.assert_called_once() + + +@pytest.mark.parametrize( + ("trace_id_str", "expected_trace_id"), + [ + ("0" * 31 + "1", int("0" * 31 + "1", 16)), + (str(uuid.UUID("cccccccc-cccc-cccc-cccc-cccccccccccc")), uuid.UUID("cccccccc-cccc-cccc-cccc-cccccccccccc").int), + ], +) +def test_create_link_accepts_hex_or_uuid(trace_id_str: str, expected_trace_id: int) -> None: + link = TencentTraceUtils.create_link(trace_id_str) + assert isinstance(link, Link) + assert link.context.trace_id == expected_trace_id + assert link.context.span_id == TencentTraceUtils.INVALID_SPAN_ID + assert link.context.is_remote is False + assert link.context.trace_flags == TraceFlags(TraceFlags.SAMPLED) + + +@pytest.mark.parametrize("trace_id_str", ["g" * 32, "not-a-uuid", None]) +def test_create_link_falls_back_to_uuid4(trace_id_str: object) -> None: + fallback_uuid = uuid.UUID("dddddddd-dddd-dddd-dddd-dddddddddddd") + with patch("core.ops.tencent_trace.utils.uuid.uuid4", return_value=fallback_uuid) as uuid4_mock: + link = TencentTraceUtils.create_link(trace_id_str) # type: ignore[arg-type] + assert link.context.trace_id == fallback_uuid.int + uuid4_mock.assert_called_once() diff --git a/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py b/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py index 32389b4d64..49d6b698ef 100644 --- a/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py +++ b/api/tests/unit_tests/core/ops/test_arize_phoenix_trace.py @@ -1,29 +1,29 @@ from openinference.semconv.trace import OpenInferenceSpanKindValues from core.ops.arize_phoenix_trace.arize_phoenix_trace import _NODE_TYPE_TO_SPAN_KIND, _get_node_span_kind -from dify_graph.enums import NodeType +from dify_graph.enums import BUILT_IN_NODE_TYPES, BuiltinNodeTypes class TestGetNodeSpanKind: """Tests for _get_node_span_kind helper.""" def test_all_node_types_are_mapped_correctly(self): - """Ensure every NodeType enum member is mapped to the correct span kind.""" + """Ensure every built-in node type is mapped to the correct span kind.""" # Mappings for node types that have a specialised span kind. special_mappings = { - NodeType.LLM: OpenInferenceSpanKindValues.LLM, - NodeType.KNOWLEDGE_RETRIEVAL: OpenInferenceSpanKindValues.RETRIEVER, - NodeType.TOOL: OpenInferenceSpanKindValues.TOOL, - NodeType.AGENT: OpenInferenceSpanKindValues.AGENT, + BuiltinNodeTypes.LLM: OpenInferenceSpanKindValues.LLM, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: OpenInferenceSpanKindValues.RETRIEVER, + BuiltinNodeTypes.TOOL: OpenInferenceSpanKindValues.TOOL, + BuiltinNodeTypes.AGENT: OpenInferenceSpanKindValues.AGENT, } - # Test that every NodeType enum member is mapped to the correct span kind. + # Test that every built-in node type is mapped to the correct span kind. # Node types not in `special_mappings` should default to CHAIN. - for node_type in NodeType: + for node_type in BUILT_IN_NODE_TYPES: expected_span_kind = special_mappings.get(node_type, OpenInferenceSpanKindValues.CHAIN) actual_span_kind = _get_node_span_kind(node_type) assert actual_span_kind == expected_span_kind, ( - f"NodeType.{node_type.name} was mapped to {actual_span_kind}, but {expected_span_kind} was expected." + f"Node type {node_type!r} was mapped to {actual_span_kind}, but {expected_span_kind} was expected." ) def test_unknown_string_defaults_to_chain(self): diff --git a/api/tests/unit_tests/core/ops/test_base_trace_instance.py b/api/tests/unit_tests/core/ops/test_base_trace_instance.py new file mode 100644 index 0000000000..a8bee7dfa7 --- /dev/null +++ b/api/tests/unit_tests/core/ops/test_base_trace_instance.py @@ -0,0 +1,112 @@ +from unittest.mock import MagicMock + +import pytest +from sqlalchemy.orm import Session + +from core.ops.base_trace_instance import BaseTraceInstance +from core.ops.entities.config_entity import BaseTracingConfig +from core.ops.entities.trace_entity import BaseTraceInfo +from models import Account, App, TenantAccountJoin + + +class ConcreteTraceInstance(BaseTraceInstance): + def __init__(self, trace_config: BaseTracingConfig): + super().__init__(trace_config) + + def trace(self, trace_info: BaseTraceInfo): + super().trace(trace_info) + + +@pytest.fixture +def mock_db_session(monkeypatch): + mock_session = MagicMock(spec=Session) + mock_session.__enter__.return_value = mock_session + mock_session.__exit__.return_value = None + + mock_session_class = MagicMock(return_value=mock_session) + + monkeypatch.setattr("core.ops.base_trace_instance.Session", mock_session_class) + monkeypatch.setattr("core.ops.base_trace_instance.db", MagicMock()) + return mock_session + + +def test_get_service_account_with_tenant_app_not_found(mock_db_session): + mock_db_session.scalar.return_value = None + + config = MagicMock(spec=BaseTracingConfig) + instance = ConcreteTraceInstance(config) + + with pytest.raises(ValueError, match="App with id some_app_id not found"): + instance.get_service_account_with_tenant("some_app_id") + + +def test_get_service_account_with_tenant_no_creator(mock_db_session): + mock_app = MagicMock(spec=App) + mock_app.id = "some_app_id" + mock_app.created_by = None + mock_db_session.scalar.return_value = mock_app + + config = MagicMock(spec=BaseTracingConfig) + instance = ConcreteTraceInstance(config) + + with pytest.raises(ValueError, match="App with id some_app_id has no creator"): + instance.get_service_account_with_tenant("some_app_id") + + +def test_get_service_account_with_tenant_creator_not_found(mock_db_session): + mock_app = MagicMock(spec=App) + mock_app.id = "some_app_id" + mock_app.created_by = "creator_id" + + # First call to scalar returns app, second returns None (for account) + mock_db_session.scalar.side_effect = [mock_app, None] + + config = MagicMock(spec=BaseTracingConfig) + instance = ConcreteTraceInstance(config) + + with pytest.raises(ValueError, match="Creator account with id creator_id not found for app some_app_id"): + instance.get_service_account_with_tenant("some_app_id") + + +def test_get_service_account_with_tenant_tenant_not_found(mock_db_session): + mock_app = MagicMock(spec=App) + mock_app.id = "some_app_id" + mock_app.created_by = "creator_id" + + mock_account = MagicMock(spec=Account) + mock_account.id = "creator_id" + + mock_db_session.scalar.side_effect = [mock_app, mock_account] + + # session.query(TenantAccountJoin).filter_by(...).first() returns None + mock_db_session.query.return_value.filter_by.return_value.first.return_value = None + + config = MagicMock(spec=BaseTracingConfig) + instance = ConcreteTraceInstance(config) + + with pytest.raises(ValueError, match="Current tenant not found for account creator_id"): + instance.get_service_account_with_tenant("some_app_id") + + +def test_get_service_account_with_tenant_success(mock_db_session): + mock_app = MagicMock(spec=App) + mock_app.id = "some_app_id" + mock_app.created_by = "creator_id" + + mock_account = MagicMock(spec=Account) + mock_account.id = "creator_id" + mock_account.set_tenant_id = MagicMock() + + mock_db_session.scalar.side_effect = [mock_app, mock_account] + + mock_tenant_join = MagicMock(spec=TenantAccountJoin) + mock_tenant_join.tenant_id = "tenant_id" + mock_db_session.query.return_value.filter_by.return_value.first.return_value = mock_tenant_join + + config = MagicMock(spec=BaseTracingConfig) + instance = ConcreteTraceInstance(config) + + result = instance.get_service_account_with_tenant("some_app_id") + + assert result == mock_account + mock_account.set_tenant_id.assert_called_once_with("tenant_id") diff --git a/api/tests/unit_tests/core/ops/test_opik_trace.py b/api/tests/unit_tests/core/ops/test_opik_trace.py new file mode 100644 index 0000000000..7660967183 --- /dev/null +++ b/api/tests/unit_tests/core/ops/test_opik_trace.py @@ -0,0 +1,329 @@ +"""Tests for OpikDataTrace workflow_trace changes. + +Covers: +- _seed_to_uuid4 helper: produces valid UUID4 strings deterministically +- prepare_opik_uuid helper: basic contract +- workflow_trace without message_id now creates a root span parented to None +- workflow_trace without message_id: node spans parent to root_span_id (not workflow_app_log_id) +- workflow_trace with message_id still creates root span keyed on workflow_run_id (unchanged path) +""" + +from __future__ import annotations + +import uuid +from datetime import datetime +from unittest.mock import MagicMock, patch + +from core.ops.entities.trace_entity import TraceTaskName, WorkflowTraceInfo +from core.ops.opik_trace.opik_trace import OpikDataTrace, _seed_to_uuid4, prepare_opik_uuid + +# A stable UUID4 used as the workflow_run_id throughout all tests. +_WORKFLOW_RUN_ID = "a3f1b2c4-d5e6-4f78-9a0b-c1d2e3f4a5b6" + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_workflow_trace_info( + *, + message_id: str | None = None, + workflow_app_log_id: str | None = None, + workflow_run_id: str = _WORKFLOW_RUN_ID, +) -> WorkflowTraceInfo: + """Return a minimal WorkflowTraceInfo suitable for unit testing.""" + return WorkflowTraceInfo( + message_id=message_id, + workflow_id="wf-id", + tenant_id="tenant-id", + workflow_run_id=workflow_run_id, + workflow_app_log_id=workflow_app_log_id, + workflow_run_elapsed_time=1.5, + workflow_run_status="succeeded", + workflow_run_inputs={"query": "hello"}, + workflow_run_outputs={"result": "world"}, + workflow_run_version="1", + total_tokens=42, + file_list=[], + query="hello", + start_time=datetime(2025, 1, 1, 12, 0, 0), + end_time=datetime(2025, 1, 1, 12, 0, 1), + metadata={"app_id": "app-abc"}, + conversation_id=None, + ) + + +def _make_opik_trace_instance() -> OpikDataTrace: + """Construct an OpikDataTrace with the Opik SDK client mocked out.""" + with patch("core.ops.opik_trace.opik_trace.Opik"): + from core.ops.entities.config_entity import OpikConfig + + config = OpikConfig(api_key="key", project="test-project", url="https://www.comet.com/opik/api/") + instance = OpikDataTrace(config) + + instance.add_trace = MagicMock(return_value=MagicMock(id="mock-trace-id")) + instance.add_span = MagicMock() + instance.get_service_account_with_tenant = MagicMock(return_value=MagicMock()) + return instance + + +# --------------------------------------------------------------------------- +# _seed_to_uuid4 +# --------------------------------------------------------------------------- + + +class TestSeedToUuid4: + def test_returns_valid_uuid4_string(self): + result = _seed_to_uuid4("some-arbitrary-seed") + parsed = uuid.UUID(result) + assert parsed.version == 4 + + def test_is_deterministic(self): + assert _seed_to_uuid4("seed-abc") == _seed_to_uuid4("seed-abc") + + def test_different_seeds_give_different_results(self): + assert _seed_to_uuid4("seed-1") != _seed_to_uuid4("seed-2") + + def test_workflow_run_id_with_root_suffix_is_valid_uuid4(self): + """The primary use-case: deriving a root-span UUID from workflow_run_id + '-root'.""" + seed = _WORKFLOW_RUN_ID + "-root" + result = _seed_to_uuid4(seed) + parsed = uuid.UUID(result) + assert parsed.version == 4 + + def test_seed_and_seed_root_produce_different_uuids(self): + """Root span UUID must differ from the base workflow UUID to avoid ID collisions.""" + base = _seed_to_uuid4(_WORKFLOW_RUN_ID) + with_root = _seed_to_uuid4(_WORKFLOW_RUN_ID + "-root") + assert base != with_root + + +# --------------------------------------------------------------------------- +# prepare_opik_uuid +# --------------------------------------------------------------------------- + + +class TestPrepareOpikUuid: + def test_is_deterministic(self): + dt = datetime(2025, 6, 15, 10, 30, 0) + uid = str(uuid.uuid4()) + assert prepare_opik_uuid(dt, uid) == prepare_opik_uuid(dt, uid) + + def test_different_uuids_give_different_results(self): + dt = datetime(2025, 6, 15, 10, 30, 0) + assert prepare_opik_uuid(dt, str(uuid.uuid4())) != prepare_opik_uuid(dt, str(uuid.uuid4())) + + def test_none_datetime_does_not_raise(self): + assert prepare_opik_uuid(None, str(uuid.uuid4())) is not None + + def test_none_uuid_does_not_raise(self): + assert prepare_opik_uuid(datetime(2025, 1, 1), None) is not None + + +# --------------------------------------------------------------------------- +# workflow_trace — no message_id (new code path) +# --------------------------------------------------------------------------- + + +class TestWorkflowTraceWithoutMessageId: + def _run(self, trace_info: WorkflowTraceInfo, node_executions: list | None = None): + instance = _make_opik_trace_instance() + fake_repo = MagicMock() + fake_repo.get_by_workflow_run.return_value = node_executions or [] + + with ( + patch("core.ops.opik_trace.opik_trace.db") as mock_db, + patch("core.ops.opik_trace.opik_trace.sessionmaker"), + patch( + "core.ops.opik_trace.opik_trace.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + return_value=fake_repo, + ), + ): + mock_db.engine = MagicMock() + instance.workflow_trace(trace_info) + + return instance + + def _expected_root_span_id(self, trace_info: WorkflowTraceInfo): + return prepare_opik_uuid( + trace_info.start_time, + _seed_to_uuid4(trace_info.workflow_run_id + "-root"), + ) + + def test_root_span_is_created(self): + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + assert instance.add_span.called + + def test_root_span_id_matches_expected(self): + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + + expected = self._expected_root_span_id(trace_info) + root_span_kwargs = instance.add_span.call_args_list[0][0][0] + assert root_span_kwargs["id"] == expected + + def test_root_span_has_no_parent(self): + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + + root_span_kwargs = instance.add_span.call_args_list[0][0][0] + assert root_span_kwargs["parent_span_id"] is None + + def test_trace_name_is_workflow_trace(self): + """Without message_id, the Opik trace itself should be named WORKFLOW_TRACE.""" + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + + trace_kwargs = instance.add_trace.call_args_list[0][0][0] + assert trace_kwargs["name"] == TraceTaskName.WORKFLOW_TRACE + + def test_root_span_name_is_workflow_trace(self): + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + + root_span_kwargs = instance.add_span.call_args_list[0][0][0] + assert root_span_kwargs["name"] == TraceTaskName.WORKFLOW_TRACE + + def test_root_span_has_workflow_tag(self): + trace_info = _make_workflow_trace_info(message_id=None) + instance = self._run(trace_info) + + root_span_kwargs = instance.add_span.call_args_list[0][0][0] + assert "workflow" in root_span_kwargs["tags"] + + def test_node_execution_spans_are_parented_to_root(self): + """Node spans must use root_span_id as parent, not any other ID.""" + trace_info = _make_workflow_trace_info(message_id=None) + expected_root_span_id = self._expected_root_span_id(trace_info) + + node_exec = MagicMock() + node_exec.id = str(uuid.uuid4()) + node_exec.title = "LLM Node" + node_exec.node_type = "llm" + node_exec.status = "succeeded" + node_exec.process_data = {} + node_exec.inputs = {"prompt": "hi"} + node_exec.outputs = {"text": "hello"} + node_exec.created_at = datetime(2025, 1, 1, 12, 0, 0) + node_exec.elapsed_time = 0.5 + node_exec.metadata = {} + + instance = self._run(trace_info, node_executions=[node_exec]) + + # call_args_list[0] = root span, [1] = node execution span + assert instance.add_span.call_count == 2 + node_span_kwargs = instance.add_span.call_args_list[1][0][0] + assert node_span_kwargs["parent_span_id"] == expected_root_span_id + + def test_node_span_not_parented_to_workflow_app_log_id(self): + """Old behaviour derived parent from workflow_app_log_id; that must no longer apply.""" + trace_info = _make_workflow_trace_info( + message_id=None, + workflow_app_log_id=str(uuid.uuid4()), + ) + + node_exec = MagicMock() + node_exec.id = str(uuid.uuid4()) + node_exec.title = "Tool Node" + node_exec.node_type = "tool" + node_exec.status = "succeeded" + node_exec.process_data = {} + node_exec.inputs = {} + node_exec.outputs = {} + node_exec.created_at = datetime(2025, 1, 1, 12, 0, 0) + node_exec.elapsed_time = 0.2 + node_exec.metadata = {} + + instance = self._run(trace_info, node_executions=[node_exec]) + + old_parent_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_app_log_id) + node_span_kwargs = instance.add_span.call_args_list[1][0][0] + assert node_span_kwargs["parent_span_id"] != old_parent_id + + def test_root_span_id_differs_from_trace_id(self): + """The root span must have a different ID from the Opik trace to maintain correct hierarchy.""" + trace_info = _make_workflow_trace_info(message_id=None) + dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id + opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id) + root_span_id = self._expected_root_span_id(trace_info) + assert root_span_id != opik_trace_id + + +# --------------------------------------------------------------------------- +# workflow_trace — with message_id (unchanged path, guard against regression) +# --------------------------------------------------------------------------- + + +class TestWorkflowTraceWithMessageId: + _MESSAGE_ID = str(uuid.uuid4()) + + def _run(self, trace_info: WorkflowTraceInfo, node_executions: list | None = None): + instance = _make_opik_trace_instance() + fake_repo = MagicMock() + fake_repo.get_by_workflow_run.return_value = node_executions or [] + + with ( + patch("core.ops.opik_trace.opik_trace.db") as mock_db, + patch("core.ops.opik_trace.opik_trace.sessionmaker"), + patch( + "core.ops.opik_trace.opik_trace.DifyCoreRepositoryFactory.create_workflow_node_execution_repository", + return_value=fake_repo, + ), + ): + mock_db.engine = MagicMock() + instance.workflow_trace(trace_info) + + return instance + + def test_trace_name_is_message_trace(self): + """With message_id, the Opik trace should be named MESSAGE_TRACE.""" + trace_info = _make_workflow_trace_info(message_id=self._MESSAGE_ID) + instance = self._run(trace_info) + + trace_kwargs = instance.add_trace.call_args_list[0][0][0] + assert trace_kwargs["name"] == TraceTaskName.MESSAGE_TRACE + + def test_root_span_uses_workflow_run_id_directly(self): + """When message_id is set, root_span_id = prepare_opik_uuid(start_time, workflow_run_id).""" + trace_info = _make_workflow_trace_info(message_id=self._MESSAGE_ID) + instance = self._run(trace_info) + + expected_root_span_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_run_id) + root_span_kwargs = instance.add_span.call_args_list[0][0][0] + assert root_span_kwargs["id"] == expected_root_span_id + + def test_root_span_id_differs_from_no_message_id_case(self): + """The two branches must produce different root span IDs for the same workflow_run_id.""" + id_with_message = prepare_opik_uuid( + datetime(2025, 1, 1, 12, 0, 0), + _WORKFLOW_RUN_ID, + ) + id_without_message = prepare_opik_uuid( + datetime(2025, 1, 1, 12, 0, 0), + _seed_to_uuid4(_WORKFLOW_RUN_ID + "-root"), + ) + assert id_with_message != id_without_message + + def test_node_spans_parented_to_workflow_run_root_span(self): + """Node spans must still parent to root_span_id derived from workflow_run_id.""" + trace_info = _make_workflow_trace_info(message_id=self._MESSAGE_ID) + expected_root_span_id = prepare_opik_uuid(trace_info.start_time, trace_info.workflow_run_id) + + node_exec = MagicMock() + node_exec.id = str(uuid.uuid4()) + node_exec.title = "LLM" + node_exec.node_type = "llm" + node_exec.status = "succeeded" + node_exec.process_data = {} + node_exec.inputs = {} + node_exec.outputs = {} + node_exec.created_at = datetime(2025, 1, 1, 12, 0, 0) + node_exec.elapsed_time = 0.3 + node_exec.metadata = {} + + instance = self._run(trace_info, node_executions=[node_exec]) + + node_span_kwargs = instance.add_span.call_args_list[1][0][0] + assert node_span_kwargs["parent_span_id"] == expected_root_span_id diff --git a/api/tests/unit_tests/core/ops/test_ops_trace_manager.py b/api/tests/unit_tests/core/ops/test_ops_trace_manager.py new file mode 100644 index 0000000000..2d325ccb0e --- /dev/null +++ b/api/tests/unit_tests/core/ops/test_ops_trace_manager.py @@ -0,0 +1,576 @@ +import contextlib +import json +import queue +from datetime import datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from core.ops.ops_trace_manager import ( + OpsTraceManager, + TraceQueueManager, + TraceTask, + TraceTaskName, +) + + +class DummyConfig: + def __init__(self, **kwargs): + self._data = kwargs + + def model_dump(self): + return dict(self._data) + + +class DummyTraceInstance: + instances: list["DummyTraceInstance"] = [] + + def __init__(self, config): + self.config = config + DummyTraceInstance.instances.append(self) + + def api_check(self): + return True + + def get_project_key(self): + return "fake-key" + + def get_project_url(self): + return "https://project.fake" + + +FAKE_PROVIDER_ENTRY = { + "config_class": DummyConfig, + "secret_keys": ["secret_value"], + "other_keys": ["other_value"], + "trace_instance": DummyTraceInstance, +} + + +class FakeProviderMap: + def __init__(self, data): + self._data = data + + def __getitem__(self, key): + if key in self._data: + return self._data[key] + raise KeyError(f"Unsupported tracing provider: {key}") + + +class DummyTimer: + def __init__(self, interval, function): + self.interval = interval + self.function = function + self.name = "" + self.daemon = False + self.started = False + + def start(self): + self.started = True + + def is_alive(self): + return False + + +class FakeMessageFile: + def __init__(self): + self.url = "path/to/file" + self.id = "file-id" + self.type = "document" + self.created_by_role = "role" + self.created_by = "user" + + +def make_message_data(**overrides): + created_at = datetime(2025, 2, 20, 12, 0, 0) + base = { + "id": "msg-id", + "conversation_id": "conv-id", + "created_at": created_at, + "updated_at": created_at + timedelta(seconds=3), + "message": "hello", + "provider_response_latency": 1, + "message_tokens": 5, + "answer_tokens": 7, + "answer": "world", + "error": "", + "status": "complete", + "model_provider": "provider", + "model_id": "model", + "from_end_user_id": "end-user", + "from_account_id": "account", + "agent_based": False, + "workflow_run_id": "workflow-run", + "from_source": "source", + "message_metadata": json.dumps({"usage": {"time_to_first_token": 1, "time_to_generate": 2}}), + "agent_thoughts": [], + "query": "sample-query", + "inputs": "sample-input", + } + base.update(overrides) + + class MessageData: + def __init__(self, data): + self.__dict__.update(data) + + def to_dict(self): + return dict(self.__dict__) + + return MessageData(base) + + +def make_agent_thought(tool_name, created_at): + return SimpleNamespace( + tools=[tool_name], + created_at=created_at, + tool_meta={ + tool_name: { + "tool_config": {"foo": "bar"}, + "time_cost": 5, + "error": "", + "tool_parameters": {"x": 1}, + } + }, + ) + + +def make_workflow_run(): + return SimpleNamespace( + workflow_id="wf-1", + tenant_id="tenant", + id="run-id", + elapsed_time=10, + status="finished", + inputs_dict={"sys.file": ["f1"], "query": "search"}, + outputs_dict={"out": "value"}, + version="3", + error=None, + total_tokens=12, + workflow_run_id="run-id", + created_at=datetime(2025, 2, 20, 10, 0, 0), + finished_at=datetime(2025, 2, 20, 10, 0, 5), + triggered_from="user", + app_id="app-id", + to_dict=lambda self=None: {"run": "value"}, + ) + + +def configure_db_query(session, *, message_file=None, workflow_app_log=None): + def _side_effect(model): + query = MagicMock() + query.filter_by.return_value.first.return_value = None + if message_file and model.__name__ == "MessageFile": + query.filter_by.return_value.first.return_value = message_file + if workflow_app_log and model.__name__ == "WorkflowAppLog": + query.filter_by.return_value.first.return_value = workflow_app_log + return query + + session.query.side_effect = _side_effect + + +class DummySessionContext: + scalar_values = [] + + def __init__(self, engine): + self._values = list(self.scalar_values) + self._index = 0 + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return False + + def scalar(self, *args, **kwargs): + if self._index >= len(self._values): + return None + value = self._values[self._index] + self._index += 1 + return value + + +@pytest.fixture(autouse=True) +def patch_provider_map(monkeypatch): + monkeypatch.setattr( + "core.ops.ops_trace_manager.provider_config_map", FakeProviderMap({"dummy": FAKE_PROVIDER_ENTRY}) + ) + OpsTraceManager.ops_trace_instances_cache.clear() + OpsTraceManager.decrypted_configs_cache.clear() + + +@pytest.fixture(autouse=True) +def patch_timer_and_current_app(monkeypatch): + monkeypatch.setattr("core.ops.ops_trace_manager.threading.Timer", DummyTimer) + monkeypatch.setattr("core.ops.ops_trace_manager.trace_manager_queue", queue.Queue()) + monkeypatch.setattr("core.ops.ops_trace_manager.trace_manager_timer", None) + + class FakeApp: + def app_context(self): + return contextlib.nullcontext() + + fake_current = MagicMock() + fake_current._get_current_object.return_value = FakeApp() + monkeypatch.setattr("core.ops.ops_trace_manager.current_app", fake_current) + + +@pytest.fixture(autouse=True) +def patch_sqlalchemy_session(monkeypatch): + monkeypatch.setattr("core.ops.ops_trace_manager.Session", DummySessionContext) + + +@pytest.fixture +def encryption_mocks(monkeypatch): + encrypt_mock = MagicMock(side_effect=lambda tenant, value: f"enc-{value}") + batch_decrypt_mock = MagicMock(side_effect=lambda tenant, values: [f"dec-{value}" for value in values]) + obfuscate_mock = MagicMock(side_effect=lambda value: f"ob-{value}") + monkeypatch.setattr("core.ops.ops_trace_manager.encrypt_token", encrypt_mock) + monkeypatch.setattr("core.ops.ops_trace_manager.batch_decrypt_token", batch_decrypt_mock) + monkeypatch.setattr("core.ops.ops_trace_manager.obfuscated_token", obfuscate_mock) + return encrypt_mock, batch_decrypt_mock, obfuscate_mock + + +@pytest.fixture +def mock_db(monkeypatch): + session = MagicMock() + session.scalars.return_value.all.return_value = ["chat"] + db_mock = MagicMock() + db_mock.session = session + db_mock.engine = MagicMock() + monkeypatch.setattr("core.ops.ops_trace_manager.db", db_mock) + return session + + +@pytest.fixture +def workflow_repo_fixture(monkeypatch): + repo = MagicMock() + repo.get_workflow_run_by_id_without_tenant.return_value = make_workflow_run() + monkeypatch.setattr(TraceTask, "_get_workflow_run_repo", classmethod(lambda cls: repo)) + return repo + + +@pytest.fixture +def trace_task_message(monkeypatch, mock_db): + message_data = make_message_data() + monkeypatch.setattr("core.ops.ops_trace_manager.get_message_data", lambda msg_id: message_data) + configure_db_query(mock_db, message_file=FakeMessageFile(), workflow_app_log=SimpleNamespace(id="log-id")) + return message_data + + +def test_encrypt_tracing_config_handles_star_and_encrypt(encryption_mocks): + encrypted = OpsTraceManager.encrypt_tracing_config( + "tenant", + "dummy", + {"secret_value": "value", "other_value": "info"}, + current_trace_config={"secret_value": "keep"}, + ) + assert encrypted["secret_value"] == "enc-value" + assert encrypted["other_value"] == "info" + + +def test_encrypt_tracing_config_preserves_star(encryption_mocks): + encrypted = OpsTraceManager.encrypt_tracing_config( + "tenant", + "dummy", + {"secret_value": "*", "other_value": "info"}, + current_trace_config={"secret_value": "keep"}, + ) + assert encrypted["secret_value"] == "keep" + + +def test_decrypt_tracing_config_caches(encryption_mocks): + _, decrypt_mock, _ = encryption_mocks + payload = {"secret_value": "enc", "other_value": "info"} + first = OpsTraceManager.decrypt_tracing_config("tenant", "dummy", payload) + second = OpsTraceManager.decrypt_tracing_config("tenant", "dummy", payload) + assert first == second + assert decrypt_mock.call_count == 1 + + +def test_obfuscated_decrypt_token(encryption_mocks): + _, _, obfuscate_mock = encryption_mocks + result = OpsTraceManager.obfuscated_decrypt_token("dummy", {"secret_value": "value", "other_value": "info"}) + assert "secret_value" in result + assert result["secret_value"] == "ob-value" + obfuscate_mock.assert_called_once() + + +def test_get_decrypted_tracing_config_returns_config(encryption_mocks, mock_db): + trace_config_data = SimpleNamespace(tracing_config={"secret_value": "enc", "other_value": "info"}) + mock_db.query.return_value.where.return_value.first.return_value = trace_config_data + app = SimpleNamespace(id="app-id", tenant_id="tenant") + mock_db.scalar.return_value = app + + decrypted = OpsTraceManager.get_decrypted_tracing_config("app-id", "dummy") + assert decrypted["other_value"] == "info" + + +def test_get_decrypted_tracing_config_missing_trace_config(mock_db): + mock_db.query.return_value.where.return_value.first.return_value = None + assert OpsTraceManager.get_decrypted_tracing_config("app-id", "dummy") is None + + +def test_get_decrypted_tracing_config_raises_for_missing_app(mock_db): + trace_config_data = SimpleNamespace(tracing_config={"secret_value": "enc"}) + mock_db.query.return_value.where.return_value.first.return_value = trace_config_data + mock_db.scalar.return_value = None + with pytest.raises(ValueError, match="App not found"): + OpsTraceManager.get_decrypted_tracing_config("app-id", "dummy") + + +def test_get_decrypted_tracing_config_raises_for_none_config(mock_db): + trace_config_data = SimpleNamespace(tracing_config=None) + mock_db.query.return_value.where.return_value.first.return_value = trace_config_data + mock_db.scalar.return_value = SimpleNamespace(tenant_id="tenant") + with pytest.raises(ValueError, match="Tracing config cannot be None"): + OpsTraceManager.get_decrypted_tracing_config("app-id", "dummy") + + +def test_get_ops_trace_instance_handles_none_app(mock_db): + mock_db.query.return_value.where.return_value.first.return_value = None + assert OpsTraceManager.get_ops_trace_instance("app-id") is None + + +def test_get_ops_trace_instance_returns_none_when_disabled(mock_db, monkeypatch): + app = SimpleNamespace(id="app-id", tracing=json.dumps({"enabled": False})) + mock_db.query.return_value.where.return_value.first.return_value = app + assert OpsTraceManager.get_ops_trace_instance("app-id") is None + + +def test_get_ops_trace_instance_invalid_provider(mock_db, monkeypatch): + app = SimpleNamespace(id="app-id", tracing=json.dumps({"enabled": True, "tracing_provider": "missing"})) + mock_db.query.return_value.where.return_value.first.return_value = app + monkeypatch.setattr("core.ops.ops_trace_manager.provider_config_map", FakeProviderMap({})) + assert OpsTraceManager.get_ops_trace_instance("app-id") is None + + +def test_get_ops_trace_instance_success(monkeypatch, mock_db): + app = SimpleNamespace(id="app-id", tracing=json.dumps({"enabled": True, "tracing_provider": "dummy"})) + mock_db.query.return_value.where.return_value.first.return_value = app + monkeypatch.setattr( + "core.ops.ops_trace_manager.OpsTraceManager.get_decrypted_tracing_config", + classmethod(lambda cls, aid, provider: {"secret_value": "decrypted", "other_value": "info"}), + ) + instance = OpsTraceManager.get_ops_trace_instance("app-id") + assert instance is not None + cached_instance = OpsTraceManager.get_ops_trace_instance("app-id") + assert instance is cached_instance + + +def test_get_app_config_through_message_id_returns_none(mock_db): + mock_db.scalar.return_value = None + assert OpsTraceManager.get_app_config_through_message_id("m") is None + + +def test_get_app_config_through_message_id_prefers_override(mock_db): + message = SimpleNamespace(conversation_id="conv") + conversation = SimpleNamespace(app_model_config_id=None, override_model_configs={"foo": "bar"}) + app_config = SimpleNamespace(id="config-id") + mock_db.scalar.side_effect = [message, conversation] + result = OpsTraceManager.get_app_config_through_message_id("m") + assert result == {"foo": "bar"} + + +def test_get_app_config_through_message_id_app_model_config(mock_db): + message = SimpleNamespace(conversation_id="conv") + conversation = SimpleNamespace(app_model_config_id="cfg", override_model_configs=None) + mock_db.scalar.side_effect = [message, conversation, SimpleNamespace(id="cfg")] + result = OpsTraceManager.get_app_config_through_message_id("m") + assert result.id == "cfg" + + +def test_update_app_tracing_config_invalid_provider(mock_db, monkeypatch): + mock_db.query.return_value.where.return_value.first.return_value = None + with pytest.raises(ValueError, match="Invalid tracing provider"): + OpsTraceManager.update_app_tracing_config("app", True, "bad") + with pytest.raises(ValueError, match="App not found"): + OpsTraceManager.update_app_tracing_config("app", True, None) + + +def test_update_app_tracing_config_success(mock_db): + app = SimpleNamespace(id="app-id", tracing="{}") + mock_db.query.return_value.where.return_value.first.return_value = app + OpsTraceManager.update_app_tracing_config("app-id", True, "dummy") + assert app.tracing is not None + mock_db.commit.assert_called_once() + + +def test_get_app_tracing_config_errors_when_missing(mock_db): + mock_db.query.return_value.where.return_value.first.return_value = None + with pytest.raises(ValueError, match="App not found"): + OpsTraceManager.get_app_tracing_config("app") + + +def test_get_app_tracing_config_returns_defaults(mock_db): + mock_db.query.return_value.where.return_value.first.return_value = SimpleNamespace(tracing=None) + assert OpsTraceManager.get_app_tracing_config("app-id") == {"enabled": False, "tracing_provider": None} + + +def test_get_app_tracing_config_returns_payload(mock_db): + payload = {"enabled": True, "tracing_provider": "dummy"} + mock_db.query.return_value.where.return_value.first.return_value = SimpleNamespace(tracing=json.dumps(payload)) + assert OpsTraceManager.get_app_tracing_config("app-id") == payload + + +def test_check_and_project_helpers(monkeypatch): + monkeypatch.setattr( + "core.ops.ops_trace_manager.provider_config_map", + FakeProviderMap( + { + "dummy": { + "config_class": DummyConfig, + "trace_instance": type( + "Trace", + (), + { + "__init__": lambda self, cfg: None, + "api_check": lambda self: True, + "get_project_key": lambda self: "key", + "get_project_url": lambda self: "url", + }, + ), + "secret_keys": [], + "other_keys": [], + } + } + ), + ) + assert OpsTraceManager.check_trace_config_is_effective({}, "dummy") + assert OpsTraceManager.get_trace_config_project_key({}, "dummy") == "key" + assert OpsTraceManager.get_trace_config_project_url({}, "dummy") == "url" + + +def test_trace_task_conversation_and_extract(monkeypatch): + task = TraceTask(trace_type=TraceTaskName.CONVERSATION_TRACE, message_id="msg") + assert task.conversation_trace(foo="bar") == {"foo": "bar"} + assert task._extract_streaming_metrics(make_message_data(message_metadata="not json")) == {} + + +def test_trace_task_message_trace(trace_task_message, mock_db): + task = TraceTask(trace_type=TraceTaskName.MESSAGE_TRACE, message_id="msg-id") + result = task.message_trace("msg-id") + assert result.message_id == "msg-id" + + +def test_trace_task_workflow_trace(workflow_repo_fixture, mock_db): + DummySessionContext.scalar_values = ["wf-app-log", "message-ref"] + execution = SimpleNamespace(id_="run-id") + task = TraceTask( + trace_type=TraceTaskName.WORKFLOW_TRACE, workflow_execution=execution, conversation_id="conv", user_id="user" + ) + result = task.workflow_trace(workflow_run_id="run-id", conversation_id="conv", user_id="user") + assert result.workflow_run_id == "run-id" + assert result.workflow_id == "wf-1" + + +def test_trace_task_moderation_trace(trace_task_message): + task = TraceTask(trace_type=TraceTaskName.MODERATION_TRACE, message_id="msg-id") + moderation_result = SimpleNamespace(action="block", preset_response="no", query="q", flagged=True) + timer = {"start": 1, "end": 2} + result = task.moderation_trace("msg-id", timer, moderation_result=moderation_result, inputs={"src": "payload"}) + assert result.flagged is True + assert result.message_id == "log-id" + + +def test_trace_task_suggested_question_trace(trace_task_message): + task = TraceTask(trace_type=TraceTaskName.SUGGESTED_QUESTION_TRACE, message_id="msg-id") + timer = {"start": 1, "end": 2} + result = task.suggested_question_trace("msg-id", timer, suggested_question=["q1"]) + assert result.message_id == "log-id" + assert "suggested_question" in result.__dict__ + + +def test_trace_task_dataset_retrieval_trace(trace_task_message): + task = TraceTask(trace_type=TraceTaskName.DATASET_RETRIEVAL_TRACE, message_id="msg-id") + timer = {"start": 1, "end": 2} + mock_doc = SimpleNamespace(model_dump=lambda: {"doc": "value"}) + result = task.dataset_retrieval_trace("msg-id", timer, documents=[mock_doc]) + assert result.documents == [{"doc": "value"}] + + +def test_trace_task_tool_trace(monkeypatch, mock_db): + custom_message = make_message_data(agent_thoughts=[make_agent_thought("tool-a", datetime(2025, 2, 20, 12, 1, 0))]) + monkeypatch.setattr("core.ops.ops_trace_manager.get_message_data", lambda _: custom_message) + configure_db_query(mock_db, message_file=FakeMessageFile()) + task = TraceTask(trace_type=TraceTaskName.TOOL_TRACE, message_id="msg-id") + timer = {"start": 1, "end": 5} + result = task.tool_trace("msg-id", timer, tool_name="tool-a", tool_inputs={"foo": 1}, tool_outputs="result") + assert result.tool_name == "tool-a" + assert result.time_cost == 5 + + +def test_trace_task_generate_name_trace(): + task = TraceTask(trace_type=TraceTaskName.GENERATE_NAME_TRACE, conversation_id="conv-id") + timer = {"start": 1, "end": 2} + assert task.generate_name_trace("conv-id", timer, tenant_id=None) == {} + result = task.generate_name_trace( + "conv-id", timer, tenant_id="tenant", generate_conversation_name="name", inputs="q" + ) + assert result.outputs == "name" + assert result.tenant_id == "tenant" + + +def test_extract_streaming_metrics_invalid_json(): + task = TraceTask(trace_type=TraceTaskName.MESSAGE_TRACE, message_id="msg-id") + fake_message = make_message_data(message_metadata="invalid") + assert task._extract_streaming_metrics(fake_message) == {} + + +def test_trace_queue_manager_add_and_collect(monkeypatch): + monkeypatch.setattr( + "core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", classmethod(lambda cls, aid: True) + ) + manager = TraceQueueManager(app_id="app-id", user_id="user") + task = TraceTask(trace_type=TraceTaskName.CONVERSATION_TRACE) + manager.add_trace_task(task) + tasks = manager.collect_tasks() + assert tasks == [task] + + +def test_trace_queue_manager_run_invokes_send(monkeypatch): + monkeypatch.setattr( + "core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", classmethod(lambda cls, aid: True) + ) + manager = TraceQueueManager(app_id="app-id", user_id="user") + task = TraceTask(trace_type=TraceTaskName.CONVERSATION_TRACE) + called = {} + + def fake_collect(): + return [task] + + def fake_send(tasks): + called["tasks"] = tasks + + monkeypatch.setattr(TraceQueueManager, "collect_tasks", lambda self: fake_collect()) + monkeypatch.setattr(TraceQueueManager, "send_to_celery", lambda self, t: fake_send(t)) + manager.run() + assert called["tasks"] == [task] + + +def test_trace_queue_manager_send_to_celery(monkeypatch): + monkeypatch.setattr( + "core.ops.ops_trace_manager.OpsTraceManager.get_ops_trace_instance", classmethod(lambda cls, aid: True) + ) + storage_save = MagicMock() + process_delay = MagicMock() + monkeypatch.setattr("core.ops.ops_trace_manager.storage.save", storage_save) + monkeypatch.setattr("core.ops.ops_trace_manager.process_trace_tasks.delay", process_delay) + monkeypatch.setattr("core.ops.ops_trace_manager.uuid4", MagicMock(return_value=SimpleNamespace(hex="file-123"))) + + manager = TraceQueueManager(app_id="app-id", user_id="user") + + class DummyTraceInfo: + def model_dump(self): + return {"trace": "info"} + + class DummyTask: + def __init__(self): + self.app_id = "app-id" + + def execute(self): + return DummyTraceInfo() + + task = DummyTask() + manager.send_to_celery([task]) + storage_save.assert_called_once() + process_delay.assert_called_once_with({"file_id": "file-123", "app_id": "app-id"}) diff --git a/api/tests/unit_tests/core/ops/test_utils.py b/api/tests/unit_tests/core/ops/test_utils.py index e1084001b7..8a89422782 100644 --- a/api/tests/unit_tests/core/ops/test_utils.py +++ b/api/tests/unit_tests/core/ops/test_utils.py @@ -1,9 +1,20 @@ import re from datetime import datetime +from unittest.mock import MagicMock, patch import pytest -from core.ops.utils import generate_dotted_order, validate_project_name, validate_url, validate_url_with_path +from core.ops.utils import ( + filter_none_values, + generate_dotted_order, + get_message_data, + measure_time, + replace_text_with_content, + validate_integer_id, + validate_project_name, + validate_url, + validate_url_with_path, +) class TestValidateUrl: @@ -187,3 +198,92 @@ class TestGenerateDottedOrder: result = generate_dotted_order(run_id, start_time, None) assert "." not in result + + def test_dotted_order_with_string_start_time(self): + """Test dotted_order generation with string start_time.""" + start_time = "2025-12-23T04:19:55.111000" + run_id = "test-run-id" + result = generate_dotted_order(run_id, start_time) + + assert result == "20251223T041955111000Ztest-run-id" + + +class TestFilterNoneValues: + """Test cases for filter_none_values function""" + + def test_filter_none_values(self): + data = {"a": 1, "b": None, "c": "test", "d": datetime(2025, 1, 1, 12, 0, 0)} + result = filter_none_values(data) + assert result == {"a": 1, "c": "test", "d": "2025-01-01T12:00:00"} + + def test_filter_none_values_empty(self): + assert filter_none_values({}) == {} + + +class TestGetMessageData: + """Test cases for get_message_data function""" + + @patch("core.ops.utils.db") + @patch("core.ops.utils.Message") + @patch("core.ops.utils.select") + def test_get_message_data(self, mock_select, mock_message, mock_db): + mock_scalar = mock_db.session.scalar + mock_msg_instance = MagicMock() + mock_scalar.return_value = mock_msg_instance + + result = get_message_data("message-id") + + assert result == mock_msg_instance + mock_select.assert_called_once() + mock_scalar.assert_called_once() + + +class TestMeasureTime: + """Test cases for measure_time function""" + + def test_measure_time(self): + with measure_time() as timing_info: + assert "start" in timing_info + assert isinstance(timing_info["start"], datetime) + assert timing_info["end"] is None + + assert timing_info["end"] is not None + assert isinstance(timing_info["end"], datetime) + assert timing_info["end"] >= timing_info["start"] + + +class TestReplaceTextWithContent: + """Test cases for replace_text_with_content function""" + + def test_replace_text_with_content_dict(self): + data = {"text": "hello", "other": "world"} + assert replace_text_with_content(data) == {"content": "hello", "other": "world"} + + def test_replace_text_with_content_nested(self): + data = {"text": "v1", "nested": {"text": "v2", "list": [{"text": "v3"}]}} + expected = {"content": "v1", "nested": {"content": "v2", "list": [{"content": "v3"}]}} + assert replace_text_with_content(data) == expected + + def test_replace_text_with_content_list(self): + data = [{"text": "v1"}, "v2"] + assert replace_text_with_content(data) == [{"content": "v1"}, "v2"] + + def test_replace_text_with_content_primitive(self): + assert replace_text_with_content(123) == 123 + assert replace_text_with_content("text") == "text" + + +class TestValidateIntegerId: + """Test cases for validate_integer_id function""" + + def test_valid_integer_id(self): + assert validate_integer_id("123") == "123" + assert validate_integer_id(" 456 ") == "456" + + def test_invalid_integer_id_raises_error(self): + with pytest.raises(ValueError, match="ID must be a valid integer"): + validate_integer_id("abc") + + def test_empty_integer_id_raises_error(self): + with pytest.raises(ValueError, match="ID must be a valid integer"): + validate_integer_id("") diff --git a/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py b/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py new file mode 100644 index 0000000000..8057bbbad5 --- /dev/null +++ b/api/tests/unit_tests/core/ops/weave_trace/test_weave_trace.py @@ -0,0 +1,1196 @@ +"""Comprehensive tests for core.ops.weave_trace.weave_trace module.""" + +from __future__ import annotations + +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from weave.trace_server.trace_server_interface import TraceStatus + +from core.ops.entities.config_entity import WeaveConfig +from core.ops.entities.trace_entity import ( + DatasetRetrievalTraceInfo, + GenerateNameTraceInfo, + MessageTraceInfo, + ModerationTraceInfo, + SuggestedQuestionTraceInfo, + ToolTraceInfo, + TraceTaskName, + WorkflowTraceInfo, +) +from core.ops.weave_trace.entities.weave_trace_entity import WeaveTraceModel +from core.ops.weave_trace.weave_trace import WeaveDataTrace +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def _dt() -> datetime: + return datetime(2024, 1, 1, 0, 0, 0, tzinfo=UTC) + + +def _make_weave_config(**overrides) -> WeaveConfig: + defaults = { + "api_key": "wv-api-key", + "project": "my-project", + "entity": "my-entity", + "host": None, + } + defaults.update(overrides) + return WeaveConfig(**defaults) + + +def _make_workflow_trace_info(**overrides) -> WorkflowTraceInfo: + defaults = { + "workflow_id": "wf-id", + "tenant_id": "tenant-1", + "workflow_run_id": "run-1", + "workflow_run_elapsed_time": 1.0, + "workflow_run_status": "succeeded", + "workflow_run_inputs": {"key": "val"}, + "workflow_run_outputs": {"answer": "42"}, + "workflow_run_version": "v1", + "total_tokens": 10, + "file_list": [], + "query": "hello", + "metadata": {"user_id": "u1", "app_id": "app-1"}, + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + } + defaults.update(overrides) + return WorkflowTraceInfo(**defaults) + + +def _make_message_trace_info(**overrides) -> MessageTraceInfo: + msg_data = MagicMock() + msg_data.id = "msg-1" + msg_data.from_account_id = "acc-1" + msg_data.from_end_user_id = None + defaults = { + "conversation_model": "chat", + "message_tokens": 5, + "answer_tokens": 10, + "total_tokens": 15, + "conversation_mode": "chat", + "metadata": {"conversation_id": "c1"}, + "message_id": "msg-1", + "message_data": msg_data, + "inputs": {"prompt": "hi"}, + "outputs": "ok", + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + "error": None, + } + defaults.update(overrides) + return MessageTraceInfo(**defaults) + + +def _make_moderation_trace_info(**overrides) -> ModerationTraceInfo: + defaults = { + "flagged": False, + "action": "allow", + "preset_response": "", + "query": "test", + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + } + defaults.update(overrides) + return ModerationTraceInfo(**defaults) + + +def _make_suggested_question_trace_info(**overrides) -> SuggestedQuestionTraceInfo: + defaults = { + "suggested_question": ["q1", "q2"], + "level": "info", + "total_tokens": 5, + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "message_data": SimpleNamespace(created_at=_dt(), updated_at=_dt()), + "inputs": {"i": 1}, + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + "error": None, + } + defaults.update(overrides) + return SuggestedQuestionTraceInfo(**defaults) + + +def _make_dataset_retrieval_trace_info(**overrides) -> DatasetRetrievalTraceInfo: + msg_data = MagicMock() + msg_data.created_at = _dt() + msg_data.updated_at = _dt() + defaults = { + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "message_data": msg_data, + "inputs": "query", + "documents": [{"content": "doc"}], + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + } + defaults.update(overrides) + return DatasetRetrievalTraceInfo(**defaults) + + +def _make_tool_trace_info(**overrides) -> ToolTraceInfo: + defaults = { + "tool_name": "my_tool", + "tool_inputs": {"x": 1}, + "tool_outputs": "output", + "tool_config": {"desc": "d"}, + "tool_parameters": {"p": "v"}, + "time_cost": 0.5, + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "inputs": {"i": "v"}, + "outputs": {"o": "v"}, + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + "error": None, + } + defaults.update(overrides) + return ToolTraceInfo(**defaults) + + +def _make_generate_name_trace_info(**overrides) -> GenerateNameTraceInfo: + defaults = { + "tenant_id": "t1", + "metadata": {"user_id": "u1"}, + "message_id": "msg-1", + "inputs": {"i": 1}, + "outputs": {"name": "test"}, + "start_time": _dt(), + "end_time": _dt() + timedelta(seconds=1), + } + defaults.update(overrides) + return GenerateNameTraceInfo(**defaults) + + +def _make_node(**overrides): + """Create a mock workflow node execution object.""" + defaults = { + "id": "node-1", + "title": "Node Title", + "node_type": BuiltinNodeTypes.CODE, + "status": "succeeded", + "inputs": {"key": "value"}, + "outputs": {"result": "ok"}, + "created_at": _dt(), + "elapsed_time": 1.0, + "process_data": None, + "metadata": {}, + } + defaults.update(overrides) + return SimpleNamespace(**defaults) + + +# ── Fixtures ───────────────────────────────────────────────────────────────── + + +@pytest.fixture +def mock_wandb(): + with patch("core.ops.weave_trace.weave_trace.wandb") as mock: + mock.login.return_value = True + yield mock + + +@pytest.fixture +def mock_weave(): + with patch("core.ops.weave_trace.weave_trace.weave") as mock: + client = MagicMock() + client.entity = "my-entity" + client.project = "my-project" + mock.init.return_value = client + yield mock, client + + +@pytest.fixture +def trace_instance(mock_wandb, mock_weave): + """Create a WeaveDataTrace instance with mocked wandb/weave.""" + _, weave_client = mock_weave + config = _make_weave_config() + instance = WeaveDataTrace(config) + return instance + + +@pytest.fixture +def trace_instance_with_host(mock_wandb, mock_weave): + """Create a WeaveDataTrace instance with host configured.""" + _, weave_client = mock_weave + config = _make_weave_config(host="https://my.wandb.host") + instance = WeaveDataTrace(config) + return instance + + +# ── TestInit ───────────────────────────────────────────────────────────────── + + +class TestInit: + def test_init_without_host(self, mock_wandb, mock_weave): + """Test __init__ calls wandb.login without host.""" + mock_w, weave_client = mock_weave + config = _make_weave_config(host=None) + instance = WeaveDataTrace(config) + + mock_wandb.login.assert_called_once_with(key="wv-api-key", verify=True, relogin=True) + mock_w.init.assert_called_once_with(project_name="my-entity/my-project") + assert instance.weave_api_key == "wv-api-key" + assert instance.project_name == "my-project" + assert instance.entity == "my-entity" + assert instance.calls == {} + + def test_init_with_host(self, mock_wandb, mock_weave): + """Test __init__ calls wandb.login with host.""" + config = _make_weave_config(host="https://my.wandb.host") + instance = WeaveDataTrace(config) + + mock_wandb.login.assert_called_once_with( + key="wv-api-key", verify=True, relogin=True, host="https://my.wandb.host" + ) + assert instance.host == "https://my.wandb.host" + + def test_init_without_entity(self, mock_wandb, mock_weave): + """Test __init__ initializes weave without entity prefix when entity is None.""" + mock_w, weave_client = mock_weave + config = _make_weave_config(entity=None) + instance = WeaveDataTrace(config) + + mock_w.init.assert_called_once_with(project_name="my-project") + + def test_init_login_failure_raises(self, mock_wandb, mock_weave): + """Test __init__ raises ValueError when wandb.login returns False.""" + mock_wandb.login.return_value = False + config = _make_weave_config() + + with pytest.raises(ValueError, match="Weave login failed"): + WeaveDataTrace(config) + + def test_init_files_url_from_env(self, mock_wandb, mock_weave, monkeypatch): + """Test FILES_URL is read from environment.""" + monkeypatch.setenv("FILES_URL", "http://files.example.com") + config = _make_weave_config() + instance = WeaveDataTrace(config) + assert instance.file_base_url == "http://files.example.com" + + def test_init_files_url_default(self, mock_wandb, mock_weave, monkeypatch): + """Test FILES_URL defaults to http://127.0.0.1:5001.""" + monkeypatch.delenv("FILES_URL", raising=False) + config = _make_weave_config() + instance = WeaveDataTrace(config) + assert instance.file_base_url == "http://127.0.0.1:5001" + + def test_project_id_set_correctly(self, trace_instance): + """Test that project_id is set from weave_client entity/project.""" + assert trace_instance.project_id == "my-entity/my-project" + + +# ── TestGetProjectUrl ───────────────────────────────────────────────────────── + + +class TestGetProjectUrl: + def test_get_project_url_with_entity(self, trace_instance): + """Returns wandb URL with entity/project.""" + url = trace_instance.get_project_url() + assert url == "https://wandb.ai/my-entity/my-project" + + def test_get_project_url_without_entity(self, mock_wandb, mock_weave): + """Returns wandb URL with project only when entity is None.""" + config = _make_weave_config(entity=None) + instance = WeaveDataTrace(config) + url = instance.get_project_url() + assert url == "https://wandb.ai/my-project" + + def test_get_project_url_exception_raises(self, trace_instance, monkeypatch): + """Raises ValueError when exception occurs in get_project_url.""" + monkeypatch.setattr(trace_instance, "entity", None) + monkeypatch.setattr(trace_instance, "project_name", None) + # Force an error by making string formatting fail + with patch("core.ops.weave_trace.weave_trace.logger") as mock_logger: + # Simulate exception via property + original_entity = trace_instance.entity + trace_instance.entity = None + trace_instance.project_name = None + url = trace_instance.get_project_url() + assert "https://wandb.ai/" in url + + +# ── TestTraceDispatcher ───────────────────────────────────────────────────── + + +class TestTraceDispatcher: + def test_dispatches_workflow_trace(self, trace_instance): + with patch.object(trace_instance, "workflow_trace") as mock_wt: + trace_instance.trace(_make_workflow_trace_info()) + mock_wt.assert_called_once() + + def test_dispatches_message_trace(self, trace_instance): + with patch.object(trace_instance, "message_trace") as mock_mt: + trace_instance.trace(_make_message_trace_info()) + mock_mt.assert_called_once() + + def test_dispatches_moderation_trace(self, trace_instance): + with patch.object(trace_instance, "moderation_trace") as mock_mod: + msg_data = MagicMock() + msg_data.created_at = _dt() + trace_instance.trace(_make_moderation_trace_info(message_data=msg_data)) + mock_mod.assert_called_once() + + def test_dispatches_suggested_question_trace(self, trace_instance): + with patch.object(trace_instance, "suggested_question_trace") as mock_sq: + trace_instance.trace(_make_suggested_question_trace_info()) + mock_sq.assert_called_once() + + def test_dispatches_dataset_retrieval_trace(self, trace_instance): + with patch.object(trace_instance, "dataset_retrieval_trace") as mock_dr: + trace_instance.trace(_make_dataset_retrieval_trace_info()) + mock_dr.assert_called_once() + + def test_dispatches_tool_trace(self, trace_instance): + with patch.object(trace_instance, "tool_trace") as mock_tool: + trace_instance.trace(_make_tool_trace_info()) + mock_tool.assert_called_once() + + def test_dispatches_generate_name_trace(self, trace_instance): + with patch.object(trace_instance, "generate_name_trace") as mock_gn: + trace_instance.trace(_make_generate_name_trace_info()) + mock_gn.assert_called_once() + + +# ── TestNormalizeTime ───────────────────────────────────────────────────────── + + +class TestNormalizeTime: + def test_none_returns_utc_now(self, trace_instance): + now_before = datetime.now(UTC) + result = trace_instance._normalize_time(None) + now_after = datetime.now(UTC) + assert result.tzinfo is not None + assert now_before <= result <= now_after + + def test_naive_datetime_gets_utc(self, trace_instance): + naive = datetime(2024, 6, 15, 12, 0, 0) + result = trace_instance._normalize_time(naive) + assert result.tzinfo == UTC + assert result.year == 2024 + assert result.month == 6 + + def test_aware_datetime_unchanged(self, trace_instance): + aware = datetime(2024, 6, 15, 12, 0, 0, tzinfo=UTC) + result = trace_instance._normalize_time(aware) + assert result == aware + assert result.tzinfo == UTC + + +# ── TestStartCall ───────────────────────────────────────────────────────────── + + +class TestStartCall: + def test_start_call_basic(self, trace_instance): + """Test basic start_call stores call metadata.""" + run = WeaveTraceModel( + id="run-1", + op="test-op", + inputs={"key": "val"}, + attributes={"trace_id": "t-1", "start_time": _dt()}, + ) + trace_instance.start_call(run) + + assert "run-1" in trace_instance.calls + assert trace_instance.calls["run-1"]["trace_id"] == "t-1" + assert trace_instance.calls["run-1"]["parent_id"] is None + trace_instance.weave_client.server.call_start.assert_called_once() + + def test_start_call_with_parent(self, trace_instance): + """Test start_call records parent_run_id.""" + run = WeaveTraceModel( + id="child-1", + op="child-op", + inputs={}, + attributes={"trace_id": "t-1", "start_time": _dt()}, + ) + trace_instance.start_call(run, parent_run_id="parent-1") + + assert trace_instance.calls["child-1"]["parent_id"] == "parent-1" + + def test_start_call_none_inputs_becomes_empty_dict(self, trace_instance): + """Test that None inputs is normalized to {}.""" + run = WeaveTraceModel( + id="run-2", + op="op", + inputs=None, + attributes={"trace_id": "t-2", "start_time": _dt()}, + ) + trace_instance.start_call(run) + call_args = trace_instance.weave_client.server.call_start.call_args + req = call_args[0][0] + assert req.start.inputs == {} + + def test_start_call_non_dict_inputs_becomes_str_dict(self, trace_instance): + """Test that non-dict inputs is wrapped as string.""" + run = WeaveTraceModel( + id="run-3", + op="op", + inputs="some string input", + attributes={"trace_id": "t-3", "start_time": _dt()}, + ) + trace_instance.start_call(run) + call_args = trace_instance.weave_client.server.call_start.call_args + req = call_args[0][0] + # String inputs gets converted by validator to a dict + assert isinstance(req.start.inputs, dict) + + def test_start_call_none_attributes_becomes_empty_dict(self, trace_instance): + """Test that None attributes is handled properly.""" + run = WeaveTraceModel( + id="run-4", + op="op", + inputs={}, + attributes=None, + ) + trace_instance.start_call(run) + # trace_id should fall back to run_data.id + assert trace_instance.calls["run-4"]["trace_id"] == "run-4" + + def test_start_call_non_dict_attributes_becomes_dict(self, trace_instance): + """Test that non-dict attributes is wrapped.""" + run = WeaveTraceModel( + id="run-5", + op="op", + inputs={}, + attributes=None, + ) + # Manually override after construction + run.attributes = "some-attr-string" + trace_instance.start_call(run) + call_args = trace_instance.weave_client.server.call_start.call_args + req = call_args[0][0] + assert isinstance(req.start.attributes, dict) + assert req.start.attributes == {"attributes": "some-attr-string"} + + def test_start_call_trace_id_falls_back_to_run_id(self, trace_instance): + """When trace_id not in attributes, falls back to run_data.id.""" + run = WeaveTraceModel( + id="run-6", + op="op", + inputs={}, + attributes={"start_time": _dt()}, + ) + trace_instance.start_call(run) + assert trace_instance.calls["run-6"]["trace_id"] == "run-6" + + +# ── TestFinishCall ────────────────────────────────────────────────────────── + + +class TestFinishCall: + def _setup_call(self, trace_instance, run_id="run-1", trace_id="t-1"): + """Helper: register a call so finish_call can find it.""" + trace_instance.calls[run_id] = {"trace_id": trace_id, "parent_id": None} + + def test_finish_call_success(self, trace_instance): + """Test finish_call sends call_end with SUCCESS status.""" + self._setup_call(trace_instance) + run = WeaveTraceModel( + id="run-1", + op="op", + inputs={}, + outputs={"result": "ok"}, + attributes={"start_time": _dt(), "end_time": _dt() + timedelta(seconds=1)}, + exception=None, + ) + trace_instance.finish_call(run) + trace_instance.weave_client.server.call_end.assert_called_once() + call_args = trace_instance.weave_client.server.call_end.call_args + req = call_args[0][0] + assert req.end.summary["status_counts"][TraceStatus.SUCCESS] == 1 + assert req.end.summary["status_counts"][TraceStatus.ERROR] == 0 + assert req.end.exception is None + + def test_finish_call_with_error(self, trace_instance): + """Test finish_call sends call_end with ERROR status when exception is set.""" + self._setup_call(trace_instance) + run = WeaveTraceModel( + id="run-1", + op="op", + inputs={}, + outputs={}, + attributes={"start_time": _dt(), "end_time": _dt() + timedelta(seconds=1)}, + exception="Something broke", + ) + trace_instance.finish_call(run) + call_args = trace_instance.weave_client.server.call_end.call_args + req = call_args[0][0] + assert req.end.summary["status_counts"][TraceStatus.ERROR] == 1 + assert req.end.summary["status_counts"][TraceStatus.SUCCESS] == 0 + assert req.end.exception == "Something broke" + + def test_finish_call_missing_id_raises(self, trace_instance): + """Test finish_call raises ValueError when call id not found.""" + run = WeaveTraceModel( + id="nonexistent", + op="op", + inputs={}, + ) + with pytest.raises(ValueError, match="Call with id nonexistent not found"): + trace_instance.finish_call(run) + + def test_finish_call_elapsed_negative_clamped_to_zero(self, trace_instance): + """Test that negative elapsed time is clamped to 0.""" + self._setup_call(trace_instance) + run = WeaveTraceModel( + id="run-1", + op="op", + inputs={}, + attributes={ + "start_time": _dt() + timedelta(seconds=5), + "end_time": _dt(), # end before start + }, + ) + trace_instance.finish_call(run) + call_args = trace_instance.weave_client.server.call_end.call_args + req = call_args[0][0] + assert req.end.summary["weave"]["latency_ms"] == 0 + + def test_finish_call_none_attributes(self, trace_instance): + """Test finish_call handles None attributes.""" + self._setup_call(trace_instance) + run = WeaveTraceModel( + id="run-1", + op="op", + inputs={}, + attributes=None, + ) + trace_instance.finish_call(run) + trace_instance.weave_client.server.call_end.assert_called_once() + + def test_finish_call_non_dict_attributes(self, trace_instance): + """Test finish_call handles non-dict attributes.""" + self._setup_call(trace_instance) + run = WeaveTraceModel( + id="run-1", + op="op", + inputs={}, + attributes=None, + ) + run.attributes = "some string attr" + trace_instance.finish_call(run) + trace_instance.weave_client.server.call_end.assert_called_once() + + +# ── TestWorkflowTrace ───────────────────────────────────────────────────────── + + +class TestWorkflowTrace: + def _setup_repo(self, monkeypatch, nodes=None): + """Helper to patch session/repo dependencies.""" + if nodes is None: + nodes = [] + + repo = MagicMock() + repo.get_by_workflow_run.return_value = nodes + + mock_factory = MagicMock() + mock_factory.create_workflow_node_execution_repository.return_value = repo + + monkeypatch.setattr("core.ops.weave_trace.weave_trace.DifyCoreRepositoryFactory", mock_factory) + monkeypatch.setattr("core.ops.weave_trace.weave_trace.sessionmaker", lambda bind: MagicMock()) + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", MagicMock(engine="engine")) + return repo + + def test_workflow_trace_no_nodes_no_message_id(self, trace_instance, monkeypatch): + """Workflow trace with no nodes and no message_id.""" + self._setup_repo(monkeypatch, nodes=[]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # Only workflow run: start_call and finish_call each called once + assert trace_instance.start_call.call_count == 1 + assert trace_instance.finish_call.call_count == 1 + + def test_workflow_trace_with_message_id(self, trace_instance, monkeypatch): + """Workflow trace with message_id creates both message and workflow runs.""" + self._setup_repo(monkeypatch, nodes=[]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id="msg-1") + trace_instance.workflow_trace(trace_info) + + # message run + workflow run = 2 start_call / finish_call + assert trace_instance.start_call.call_count == 2 + assert trace_instance.finish_call.call_count == 2 + + def test_workflow_trace_with_node_execution(self, trace_instance, monkeypatch): + """Workflow trace iterates node executions and creates node runs.""" + node = _make_node( + id="node-1", + node_type=BuiltinNodeTypes.CODE, + inputs={"k": "v"}, + outputs={"r": "ok"}, + elapsed_time=0.5, + created_at=_dt(), + metadata={WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 5}, + ) + self._setup_repo(monkeypatch, nodes=[node]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # workflow run + node run = 2 calls + assert trace_instance.start_call.call_count == 2 + + def test_workflow_trace_with_llm_node(self, trace_instance, monkeypatch): + """LLM node uses process_data prompts as inputs.""" + node = _make_node( + node_type=BuiltinNodeTypes.LLM, + process_data={ + "prompts": [{"role": "user", "content": "hi"}], + "model_mode": "chat", + "model_provider": "openai", + "model_name": "gpt-4", + }, + inputs={"key": "val"}, + ) + self._setup_repo(monkeypatch, nodes=[node]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # Check node start_call was called with prompts input + node_call_args = trace_instance.start_call.call_args_list[-1] + node_run = node_call_args[0][0] + # WeaveTraceModel validator wraps list prompts into {"messages": [...]} + # The key "messages" should be present (validator transforms the list) + assert "messages" in node_run.inputs + + def test_workflow_trace_with_non_llm_node_uses_inputs(self, trace_instance, monkeypatch): + """Non-LLM node uses node_execution.inputs directly.""" + node = _make_node( + node_type=BuiltinNodeTypes.TOOL, + inputs={"tool_input": "val"}, + process_data=None, + ) + self._setup_repo(monkeypatch, nodes=[node]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # node run inputs should be from node.inputs; validator adds usage_metadata + file_list + node_call_args = trace_instance.start_call.call_args_list[-1] + node_run = node_call_args[0][0] + assert node_run.inputs.get("tool_input") == "val" + + def test_workflow_trace_missing_app_id_raises(self, trace_instance, monkeypatch): + """Raises ValueError when app_id is missing from metadata.""" + monkeypatch.setattr("core.ops.weave_trace.weave_trace.sessionmaker", lambda bind: MagicMock()) + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", MagicMock(engine="engine")) + + trace_info = _make_workflow_trace_info( + message_id=None, + metadata={"user_id": "u1"}, # no app_id + ) + + with pytest.raises(ValueError, match="No app_id found in trace_info metadata"): + trace_instance.workflow_trace(trace_info) + + def test_workflow_trace_start_time_none_defaults_to_now(self, trace_instance, monkeypatch): + """start_time defaults to datetime.now() when None.""" + self._setup_repo(monkeypatch, nodes=[]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None, start_time=None) + trace_instance.workflow_trace(trace_info) + + assert trace_instance.start_call.call_count == 1 + + def test_workflow_trace_node_created_at_none(self, trace_instance, monkeypatch): + """Node with created_at=None uses datetime.now().""" + node = _make_node(created_at=None, elapsed_time=0.5) + self._setup_repo(monkeypatch, nodes=[node]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + assert trace_instance.start_call.call_count == 2 + + def test_workflow_trace_chat_mode_llm_node_adds_provider(self, trace_instance, monkeypatch): + """Chat mode LLM node adds ls_provider and ls_model_name to attributes.""" + node = _make_node( + node_type=BuiltinNodeTypes.LLM, + process_data={"model_mode": "chat", "model_provider": "openai", "model_name": "gpt-4", "prompts": []}, + ) + self._setup_repo(monkeypatch, nodes=[node]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + start_calls = [] + + def capture_start(run, parent_run_id=None): + start_calls.append((run, parent_run_id)) + + trace_instance.start_call = capture_start + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # Last start call is the node run + node_run, _ = start_calls[-1] + assert node_run.attributes.get("ls_provider") == "openai" + assert node_run.attributes.get("ls_model_name") == "gpt-4" + + def test_workflow_trace_nodes_sorted_by_created_at(self, trace_instance, monkeypatch): + """Nodes are sorted by created_at before processing.""" + node1 = _make_node(id="node-b", created_at=_dt() + timedelta(seconds=2)) + node2 = _make_node(id="node-a", created_at=_dt()) + self._setup_repo(monkeypatch, nodes=[node1, node2]) + monkeypatch.setattr(trace_instance, "get_service_account_with_tenant", lambda app_id: MagicMock()) + + processed_ids = [] + + def capture_start(run, parent_run_id=None): + processed_ids.append(run.id) + + trace_instance.start_call = capture_start + trace_instance.finish_call = MagicMock() + + trace_info = _make_workflow_trace_info(message_id=None) + trace_instance.workflow_trace(trace_info) + + # First call = workflow run, then node-a, then node-b + assert processed_ids[1] == "node-a" + assert processed_ids[2] == "node-b" + + +# ── TestMessageTrace ────────────────────────────────────────────────────────── + + +class TestMessageTrace: + def test_returns_early_when_no_message_data(self, trace_instance): + """message_trace returns early when message_data is None.""" + trace_info = _make_message_trace_info(message_data=None) + trace_instance.start_call = MagicMock() + trace_instance.message_trace(trace_info) + trace_instance.start_call.assert_not_called() + + def test_basic_message_trace(self, trace_instance, monkeypatch): + """message_trace creates message run and llm child run.""" + monkeypatch.setattr( + "core.ops.weave_trace.weave_trace.db.session.query", + lambda model: MagicMock(where=lambda: MagicMock(first=lambda: None)), + ) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_message_trace_info() + trace_instance.message_trace(trace_info) + + # message run + llm child run + assert trace_instance.start_call.call_count == 2 + assert trace_instance.finish_call.call_count == 2 + + def test_message_trace_with_file_data(self, trace_instance, monkeypatch): + """message_trace appends file URL to file_list.""" + file_data = MagicMock() + file_data.url = "path/to/file.png" + trace_instance.file_base_url = "http://files.test" + + mock_db = MagicMock() + mock_db.session.query.return_value.where.return_value.first.return_value = None + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", mock_db) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_message_trace_info( + message_file_data=file_data, + file_list=["existing.txt"], + ) + trace_instance.message_trace(trace_info) + + # The first start_call arg (the message run) should have file in outputs or inputs + message_run = trace_instance.start_call.call_args_list[0][0][0] + assert "http://files.test/path/to/file.png" in message_run.file_list + + def test_message_trace_with_end_user(self, trace_instance, monkeypatch): + """message_trace looks up end user and sets end_user_id attribute.""" + end_user = MagicMock() + end_user.session_id = "session-xyz" + + mock_db = MagicMock() + mock_db.session.query.return_value.where.return_value.first.return_value = end_user + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", mock_db) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + msg_data = MagicMock() + msg_data.id = "msg-1" + msg_data.from_account_id = "acc-1" + msg_data.from_end_user_id = "eu-1" + + trace_info = _make_message_trace_info(message_data=msg_data) + trace_instance.message_trace(trace_info) + + message_run = trace_instance.start_call.call_args_list[0][0][0] + assert message_run.attributes.get("end_user_id") == "session-xyz" + + def test_message_trace_no_end_user(self, trace_instance, monkeypatch): + """message_trace handles when from_end_user_id is None.""" + mock_db = MagicMock() + mock_db.session.query.return_value.where.return_value.first.return_value = None + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", mock_db) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + msg_data = MagicMock() + msg_data.id = "msg-1" + msg_data.from_account_id = "acc-1" + msg_data.from_end_user_id = None + + trace_info = _make_message_trace_info(message_data=msg_data) + trace_instance.message_trace(trace_info) + assert trace_instance.start_call.call_count == 2 + + def test_message_trace_trace_id_fallback_to_message_id(self, trace_instance, monkeypatch): + """trace_id falls back to message_id when trace_id is None.""" + mock_db = MagicMock() + mock_db.session.query.return_value.where.return_value.first.return_value = None + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", mock_db) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_message_trace_info(trace_id=None) + trace_instance.message_trace(trace_info) + + message_run = trace_instance.start_call.call_args_list[0][0][0] + assert message_run.id == "msg-1" + + def test_message_trace_file_list_none(self, trace_instance, monkeypatch): + """message_trace handles file_list=None gracefully.""" + mock_db = MagicMock() + mock_db.session.query.return_value.where.return_value.first.return_value = None + monkeypatch.setattr("core.ops.weave_trace.weave_trace.db", mock_db) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_message_trace_info(file_list=None, message_file_data=None) + trace_instance.message_trace(trace_info) + assert trace_instance.start_call.call_count == 2 + + +# ── TestModerationTrace ─────────────────────────────────────────────────────── + + +class TestModerationTrace: + def test_returns_early_when_no_message_data(self, trace_instance): + """moderation_trace returns early when message_data is None.""" + trace_info = _make_moderation_trace_info(message_data=None) + trace_instance.start_call = MagicMock() + trace_instance.moderation_trace(trace_info) + trace_instance.start_call.assert_not_called() + + def test_basic_moderation_trace(self, trace_instance): + """moderation_trace creates a run with correct outputs.""" + msg_data = MagicMock() + msg_data.created_at = _dt() + msg_data.updated_at = _dt() + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_moderation_trace_info( + message_data=msg_data, + start_time=_dt(), + end_time=_dt() + timedelta(seconds=1), + action="block", + flagged=True, + preset_response="blocked", + ) + trace_instance.moderation_trace(trace_info) + + trace_instance.start_call.assert_called_once() + trace_instance.finish_call.assert_called_once() + + run = trace_instance.start_call.call_args[0][0] + assert run.outputs["action"] == "block" + assert run.outputs["flagged"] is True + + def test_moderation_trace_with_no_times_uses_message_data_times(self, trace_instance): + """When start/end times are None, uses message_data created_at/updated_at.""" + msg_data = MagicMock() + msg_data.created_at = _dt() + msg_data.updated_at = _dt() + timedelta(seconds=1) + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_moderation_trace_info( + message_data=msg_data, + start_time=None, + end_time=None, + ) + trace_instance.moderation_trace(trace_info) + trace_instance.start_call.assert_called_once() + + def test_moderation_trace_trace_id_fallback(self, trace_instance): + """trace_id falls back to message_id when trace_id is None.""" + msg_data = MagicMock() + msg_data.created_at = _dt() + + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_moderation_trace_info( + message_data=msg_data, + trace_id=None, + ) + trace_instance.moderation_trace(trace_info) + + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "msg-1" + + +# ── TestSuggestedQuestionTrace ──────────────────────────────────────────────── + + +class TestSuggestedQuestionTrace: + def test_returns_early_when_no_message_data(self, trace_instance): + """suggested_question_trace returns early when message_data is None.""" + trace_info = _make_suggested_question_trace_info(message_data=None) + trace_instance.start_call = MagicMock() + trace_instance.suggested_question_trace(trace_info) + trace_instance.start_call.assert_not_called() + + def test_basic_suggested_question_trace(self, trace_instance): + """suggested_question_trace creates a run parented to trace_id.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_suggested_question_trace_info(trace_id="t-1") + trace_instance.suggested_question_trace(trace_info) + + trace_instance.start_call.assert_called_once() + trace_instance.finish_call.assert_called_once() + + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "t-1" + + def test_suggested_question_trace_trace_id_fallback(self, trace_instance): + """trace_id falls back to message_id when trace_id is None.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_suggested_question_trace_info(trace_id=None) + trace_instance.suggested_question_trace(trace_info) + + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "msg-1" + + +# ── TestDatasetRetrievalTrace ───────────────────────────────────────────────── + + +class TestDatasetRetrievalTrace: + def test_returns_early_when_no_message_data(self, trace_instance): + """dataset_retrieval_trace returns early when message_data is None.""" + trace_info = _make_dataset_retrieval_trace_info(message_data=None) + trace_instance.start_call = MagicMock() + trace_instance.dataset_retrieval_trace(trace_info) + trace_instance.start_call.assert_not_called() + + def test_basic_dataset_retrieval_trace(self, trace_instance): + """dataset_retrieval_trace creates a run with documents as outputs.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_dataset_retrieval_trace_info( + documents=[{"id": "d1"}, {"id": "d2"}], + trace_id="t-1", + ) + trace_instance.dataset_retrieval_trace(trace_info) + + run = trace_instance.start_call.call_args[0][0] + # WeaveTraceModel validator injects usage_metadata/file_list into dict outputs + assert run.outputs.get("documents") == [{"id": "d1"}, {"id": "d2"}] + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "t-1" + + def test_dataset_retrieval_trace_trace_id_fallback(self, trace_instance): + """trace_id falls back to message_id when trace_id is None.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_dataset_retrieval_trace_info(trace_id=None) + trace_instance.dataset_retrieval_trace(trace_info) + + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "msg-1" + + +# ── TestToolTrace ───────────────────────────────────────────────────────────── + + +class TestToolTrace: + def test_basic_tool_trace(self, trace_instance): + """tool_trace creates a run with correct op as tool_name.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_tool_trace_info(trace_id="t-1") + trace_instance.tool_trace(trace_info) + + run = trace_instance.start_call.call_args[0][0] + assert run.op == "my_tool" + # WeaveTraceModel validator injects usage_metadata/file_list into dict inputs + assert run.inputs.get("x") == 1 + + def test_tool_trace_with_file_url(self, trace_instance): + """tool_trace adds file_url to file_list when provided.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_tool_trace_info(file_url="http://files/file.pdf") + trace_instance.tool_trace(trace_info) + + run = trace_instance.start_call.call_args[0][0] + assert "http://files/file.pdf" in run.file_list + + def test_tool_trace_without_file_url(self, trace_instance): + """tool_trace uses empty file_list when file_url is None.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_tool_trace_info(file_url=None) + trace_instance.tool_trace(trace_info) + + run = trace_instance.start_call.call_args[0][0] + assert run.file_list == [] + + def test_tool_trace_trace_id_from_message_id(self, trace_instance): + """trace_id uses message_id fallback.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_tool_trace_info(trace_id=None) + trace_instance.tool_trace(trace_info) + + _, kwargs = trace_instance.start_call.call_args + assert kwargs.get("parent_run_id") == "msg-1" + + def test_tool_trace_message_id_none_uses_conversation_id(self, trace_instance): + """When message_id is None, tries conversation_id attribute.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_tool_trace_info(trace_id=None, message_id=None) + trace_instance.tool_trace(trace_info) + + # No crash; parent_run_id is None since no fallback + _, kwargs = trace_instance.start_call.call_args + # parent_run_id should be None when no message_id and no trace_id + assert kwargs.get("parent_run_id") is None + + +# ── TestGenerateNameTrace ───────────────────────────────────────────────────── + + +class TestGenerateNameTrace: + def test_basic_generate_name_trace(self, trace_instance): + """generate_name_trace creates a run with correct op.""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_generate_name_trace_info() + trace_instance.generate_name_trace(trace_info) + + trace_instance.start_call.assert_called_once() + trace_instance.finish_call.assert_called_once() + + run = trace_instance.start_call.call_args[0][0] + assert run.op == str(TraceTaskName.GENERATE_NAME_TRACE) + + def test_generate_name_trace_no_parent(self, trace_instance): + """generate_name_trace has no parent run (no parent_run_id).""" + trace_instance.start_call = MagicMock() + trace_instance.finish_call = MagicMock() + + trace_info = _make_generate_name_trace_info() + trace_instance.generate_name_trace(trace_info) + + _, kwargs = trace_instance.start_call.call_args + # No parent_run_id passed to generate_name start_call + assert kwargs == {} or kwargs.get("parent_run_id") is None + + +# ── TestApiCheck ────────────────────────────────────────────────────────────── + + +class TestApiCheck: + def test_api_check_success_without_host(self, trace_instance, mock_wandb): + """api_check returns True on successful login without host.""" + trace_instance.host = None + mock_wandb.login.return_value = True + + result = trace_instance.api_check() + + assert result is True + mock_wandb.login.assert_called_with(key=trace_instance.weave_api_key, verify=True, relogin=True) + + def test_api_check_success_with_host(self, trace_instance, mock_wandb): + """api_check returns True on successful login with host.""" + trace_instance.host = "https://my.wandb.host" + mock_wandb.login.return_value = True + + result = trace_instance.api_check() + + assert result is True + mock_wandb.login.assert_called_with( + key=trace_instance.weave_api_key, verify=True, relogin=True, host="https://my.wandb.host" + ) + + def test_api_check_login_failure_raises(self, trace_instance, mock_wandb): + """api_check raises ValueError when login returns False.""" + trace_instance.host = None + mock_wandb.login.return_value = False + + with pytest.raises(ValueError, match="Weave API check failed"): + trace_instance.api_check() + + def test_api_check_exception_raises_value_error(self, trace_instance, mock_wandb): + """api_check raises ValueError when wandb.login raises exception.""" + trace_instance.host = None + mock_wandb.login.side_effect = Exception("network error") + + with pytest.raises(ValueError, match="Weave API check failed: network error"): + trace_instance.api_check() diff --git a/api/tests/unit_tests/core/plugin/impl/__init__.py b/api/tests/unit_tests/core/plugin/impl/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/plugin/impl/test_agent_client.py b/api/tests/unit_tests/core/plugin/impl/test_agent_client.py new file mode 100644 index 0000000000..1537ffacf5 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_agent_client.py @@ -0,0 +1,91 @@ +from types import SimpleNamespace + +from core.plugin.entities.request import PluginInvokeContext +from core.plugin.impl.agent import PluginAgentClient + + +def _agent_provider(name: str = "agent") -> SimpleNamespace: + return SimpleNamespace( + plugin_id="org/plugin", + declaration=SimpleNamespace( + identity=SimpleNamespace(name=name), + strategies=[SimpleNamespace(identity=SimpleNamespace(provider=""))], + ), + ) + + +class TestPluginAgentClient: + def test_fetch_agent_strategy_providers(self, mocker): + client = PluginAgentClient() + provider = _agent_provider("remote") + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": [ + { + "declaration": { + "identity": {"name": "remote"}, + "strategies": [{"identity": {"provider": "old"}}], + } + } + ] + } + transformed = transformer(payload) + assert transformed["data"][0]["declaration"]["strategies"][0]["identity"]["provider"] == "remote" + return [provider] + + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = client.fetch_agent_strategy_providers("tenant-1") + + assert request_mock.call_count == 1 + assert result[0].declaration.identity.name == "org/plugin/remote" + assert result[0].declaration.strategies[0].identity.provider == "org/plugin/remote" + + def test_fetch_agent_strategy_provider(self, mocker): + client = PluginAgentClient() + provider = _agent_provider("provider") + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + assert transformer({"data": None}) == {"data": None} + payload = {"data": {"declaration": {"strategies": [{"identity": {"provider": "old"}}]}}} + transformed = transformer(payload) + assert transformed["data"]["declaration"]["strategies"][0]["identity"]["provider"] == "provider" + return provider + + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = client.fetch_agent_strategy_provider("tenant-1", "org/plugin/provider") + + assert request_mock.call_count == 1 + assert result.declaration.identity.name == "org/plugin/provider" + assert result.declaration.strategies[0].identity.provider == "org/plugin/provider" + + def test_invoke_merges_chunks_and_passes_context(self, mocker): + client = PluginAgentClient() + stream_mock = mocker.patch.object( + client, "_request_with_plugin_daemon_response_stream", return_value=iter(["raw"]) + ) + merge_mock = mocker.patch("core.plugin.impl.agent.merge_blob_chunks", return_value=["merged"]) + context = PluginInvokeContext() + + result = client.invoke( + tenant_id="tenant-1", + user_id="user-1", + agent_provider="org/plugin/provider", + agent_strategy="router", + agent_params={"k": "v"}, + conversation_id="conv-1", + app_id="app-1", + message_id="msg-1", + context=context, + ) + + assert result == ["merged"] + assert merge_mock.call_count == 1 + payload = stream_mock.call_args.kwargs["data"] + assert payload["data"]["agent_strategy_provider"] == "provider" + assert payload["context"] == context.model_dump() + assert stream_mock.call_args.kwargs["headers"]["X-Plugin-ID"] == "org/plugin" diff --git a/api/tests/unit_tests/core/plugin/impl/test_asset_manager.py b/api/tests/unit_tests/core/plugin/impl/test_asset_manager.py new file mode 100644 index 0000000000..5f564062d5 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_asset_manager.py @@ -0,0 +1,45 @@ +from unittest.mock import MagicMock + +import pytest + +from core.plugin.impl.asset import PluginAssetManager + + +class TestPluginAssetManager: + def test_fetch_asset_success(self, mocker): + manager = PluginAssetManager() + response = MagicMock(status_code=200, content=b"asset-bytes") + request_mock = mocker.patch.object(manager, "_request", return_value=response) + + result = manager.fetch_asset("tenant-1", "asset-1") + + assert result == b"asset-bytes" + request_mock.assert_called_once_with(method="GET", path="plugin/tenant-1/asset/asset-1") + + def test_fetch_asset_not_found_raises(self, mocker): + manager = PluginAssetManager() + mocker.patch.object(manager, "_request", return_value=MagicMock(status_code=404, content=b"")) + + with pytest.raises(ValueError, match="can not found asset asset-1"): + manager.fetch_asset("tenant-1", "asset-1") + + def test_extract_asset_success(self, mocker): + manager = PluginAssetManager() + response = MagicMock(status_code=200, content=b"file-content") + request_mock = mocker.patch.object(manager, "_request", return_value=response) + + result = manager.extract_asset("tenant-1", "org/plugin:1", "README.md") + + assert result == b"file-content" + request_mock.assert_called_once_with( + method="GET", + path="plugin/tenant-1/extract-asset/", + params={"plugin_unique_identifier": "org/plugin:1", "file_path": "README.md"}, + ) + + def test_extract_asset_not_found_raises(self, mocker): + manager = PluginAssetManager() + mocker.patch.object(manager, "_request", return_value=MagicMock(status_code=404, content=b"")) + + with pytest.raises(ValueError, match="can not found asset org/plugin:1, 404"): + manager.extract_asset("tenant-1", "org/plugin:1", "README.md") diff --git a/api/tests/unit_tests/core/plugin/impl/test_base_client_impl.py b/api/tests/unit_tests/core/plugin/impl/test_base_client_impl.py new file mode 100644 index 0000000000..c216906d68 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_base_client_impl.py @@ -0,0 +1,137 @@ +import json + +import pytest + +from core.plugin.endpoint.exc import EndpointSetupFailedError +from core.plugin.entities.plugin_daemon import PluginDaemonInnerError +from core.plugin.impl.base import BasePluginClient +from core.trigger.errors import ( + EventIgnoreError, + TriggerInvokeError, + TriggerPluginInvokeError, + TriggerProviderCredentialValidationError, +) + + +class _ResponseStub: + def __init__(self, payload): + self._payload = payload + + def raise_for_status(self): + return None + + def json(self): + return self._payload + + +class _StreamContext: + def __init__(self, lines): + self._lines = lines + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def iter_lines(self): + return self._lines + + +class TestBasePluginClientImpl: + def test_inject_trace_headers(self, mocker): + client = BasePluginClient() + mocker.patch("core.plugin.impl.base.dify_config.ENABLE_OTEL", True) + trace_header = "00-abc-xyz-01" + mocker.patch("core.helper.trace_id_helper.generate_traceparent_header", return_value=trace_header) + + headers = {} + client._inject_trace_headers(headers) + + assert headers["traceparent"] == trace_header + + headers_with_existing = {"TraceParent": "exists"} + client._inject_trace_headers(headers_with_existing) + assert headers_with_existing["TraceParent"] == "exists" + + def test_stream_request_handles_data_lines_and_dict_payload(self, mocker): + client = BasePluginClient() + stream_mock = mocker.patch( + "core.plugin.impl.base.httpx.stream", + return_value=_StreamContext([b"", b"data: hello", "world"]), + ) + + result = list(client._stream_request("POST", "plugin/tenant/stream", data={"k": "v"})) + + assert result == ["hello", "world"] + assert stream_mock.call_args.kwargs["data"] == {"k": "v"} + + def test_request_with_plugin_daemon_response_handles_request_exception(self, mocker): + client = BasePluginClient() + mocker.patch.object(client, "_request", side_effect=RuntimeError("boom")) + + with pytest.raises(ValueError, match="Failed to request plugin daemon"): + client._request_with_plugin_daemon_response("GET", "plugin/tenant/path", bool) + + def test_request_with_plugin_daemon_response_applies_transformer(self, mocker): + client = BasePluginClient() + mocker.patch.object(client, "_request", return_value=_ResponseStub({"code": 0, "message": "", "data": True})) + + transformed = {} + + def transformer(payload): + transformed.update(payload) + return payload + + result = client._request_with_plugin_daemon_response("GET", "plugin/tenant/path", bool, transformer=transformer) + + assert result is True + assert transformed == {"code": 0, "message": "", "data": True} + + def test_request_with_plugin_daemon_response_stream_malformed_json_error(self, mocker): + client = BasePluginClient() + mocker.patch.object(client, "_stream_request", return_value=iter(['{"error":"bad-line"}'])) + + with pytest.raises(ValueError, match="bad-line"): + list(client._request_with_plugin_daemon_response_stream("GET", "p", bool)) + + def test_request_with_plugin_daemon_response_stream_plugin_daemon_inner_error(self, mocker): + client = BasePluginClient() + mocker.patch.object( + client, "_stream_request", return_value=iter(['{"code":-500,"message":"not-json","data":null}']) + ) + + with pytest.raises(PluginDaemonInnerError) as exc_info: + list(client._request_with_plugin_daemon_response_stream("GET", "p", bool)) + assert exc_info.value.message == "not-json" + + def test_request_with_plugin_daemon_response_stream_plugin_daemon_error(self, mocker): + client = BasePluginClient() + mocker.patch.object(client, "_stream_request", return_value=iter(['{"code":-1,"message":"err","data":null}'])) + + with pytest.raises(ValueError, match="plugin daemon: err, code: -1"): + list(client._request_with_plugin_daemon_response_stream("GET", "p", bool)) + + def test_request_with_plugin_daemon_response_stream_empty_data_error(self, mocker): + client = BasePluginClient() + mocker.patch.object(client, "_stream_request", return_value=iter(['{"code":0,"message":"","data":null}'])) + + with pytest.raises(ValueError, match="got empty data"): + list(client._request_with_plugin_daemon_response_stream("GET", "p", bool)) + + @pytest.mark.parametrize( + ("error_type", "expected"), + [ + (EndpointSetupFailedError.__name__, EndpointSetupFailedError), + (TriggerProviderCredentialValidationError.__name__, TriggerProviderCredentialValidationError), + (TriggerPluginInvokeError.__name__, TriggerPluginInvokeError), + (TriggerInvokeError.__name__, TriggerInvokeError), + (EventIgnoreError.__name__, EventIgnoreError), + ], + ) + def test_handle_plugin_daemon_error_trigger_branches(self, error_type, expected): + client = BasePluginClient() + message = json.dumps({"error_type": error_type, "message": "m"}) + + with pytest.raises(expected): + client._handle_plugin_daemon_error("PluginInvokeError", message) diff --git a/api/tests/unit_tests/core/plugin/impl/test_datasource_manager.py b/api/tests/unit_tests/core/plugin/impl/test_datasource_manager.py new file mode 100644 index 0000000000..4c5987d759 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_datasource_manager.py @@ -0,0 +1,234 @@ +from types import SimpleNamespace + +from core.datasource.entities.datasource_entities import ( + GetOnlineDocumentPageContentRequest, + OnlineDriveBrowseFilesRequest, + OnlineDriveDownloadFileRequest, +) +from core.plugin.impl.datasource import PluginDatasourceManager + + +def _datasource_provider(name: str = "provider") -> SimpleNamespace: + return SimpleNamespace( + plugin_id="org/plugin", + declaration=SimpleNamespace( + identity=SimpleNamespace(name=name), + datasources=[SimpleNamespace(identity=SimpleNamespace(provider=""))], + ), + ) + + +class TestPluginDatasourceManager: + def test_fetch_datasource_providers(self, mocker): + manager = PluginDatasourceManager() + provider = _datasource_provider("remote") + repack = mocker.patch("core.plugin.impl.datasource.ToolTransformService.repack_provider") + mocker.patch("core.plugin.impl.datasource.resolve_dify_schema_refs", return_value={"resolved": True}) + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": [ + { + "declaration": { + "identity": {"name": "remote"}, + "datasources": [{"identity": {"provider": "old"}, "output_schema": {"$ref": "#/doc"}}], + } + } + ] + } + transformed = transformer(payload) + assert transformed["data"][0]["declaration"]["datasources"][0]["output_schema"] == {"resolved": True} + return [provider] + + request_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = manager.fetch_datasource_providers("tenant-1") + + assert request_mock.call_count == 1 + assert len(result) == 2 + assert result[0].plugin_id == "langgenius/file" + assert result[1].declaration.identity.name == "org/plugin/remote" + assert result[1].declaration.datasources[0].identity.provider == "org/plugin/remote" + repack.assert_called_once_with(tenant_id="tenant-1", provider=provider) + + def test_fetch_installed_datasource_providers(self, mocker): + manager = PluginDatasourceManager() + provider = _datasource_provider("remote") + repack = mocker.patch("core.plugin.impl.datasource.ToolTransformService.repack_provider") + mocker.patch("core.plugin.impl.datasource.resolve_dify_schema_refs", return_value={"resolved": True}) + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": [ + { + "declaration": { + "identity": {"name": "remote"}, + "datasources": [{"identity": {"provider": "old"}, "output_schema": {"$ref": "#/doc"}}], + } + } + ] + } + transformer(payload) + return [provider] + + request_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = manager.fetch_installed_datasource_providers("tenant-1") + + assert request_mock.call_count == 1 + assert len(result) == 1 + assert result[0].declaration.identity.name == "org/plugin/remote" + assert result[0].declaration.datasources[0].identity.provider == "org/plugin/remote" + repack.assert_called_once_with(tenant_id="tenant-1", provider=provider) + + def test_fetch_datasource_provider_local_and_remote(self, mocker): + manager = PluginDatasourceManager() + + local = manager.fetch_datasource_provider("tenant-1", "langgenius/file/file") + assert local.plugin_id == "langgenius/file" + + remote = _datasource_provider("provider") + mocker.patch("core.plugin.impl.datasource.resolve_dify_schema_refs", return_value={"resolved": True}) + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": { + "declaration": { + "datasources": [{"identity": {"provider": "old"}, "output_schema": {"$ref": "#/x"}}] + } + } + } + transformed = transformer(payload) + assert transformed["data"]["declaration"]["datasources"][0]["output_schema"] == {"resolved": True} + return remote + + request_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = manager.fetch_datasource_provider("tenant-1", "org/plugin/provider") + + assert request_mock.call_count == 1 + assert result.declaration.identity.name == "org/plugin/provider" + assert result.declaration.datasources[0].identity.provider == "org/plugin/provider" + + def test_get_website_crawl_streaming(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter(["crawl"]) + + assert list( + manager.get_website_crawl( + "tenant-1", + "user-1", + "org/plugin/provider", + "crawl", + {"k": "v"}, + {"url": "https://example.com"}, + "website", + ) + ) == ["crawl"] + + assert stream_mock.call_count == 1 + + def test_get_online_document_pages_streaming(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter(["pages"]) + + assert list( + manager.get_online_document_pages( + "tenant-1", + "user-1", + "org/plugin/provider", + "docs", + {"k": "v"}, + {"workspace": "w1"}, + "online_document", + ) + ) == ["pages"] + + assert stream_mock.call_count == 1 + + def test_get_online_document_page_content_streaming(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter(["content"]) + + assert list( + manager.get_online_document_page_content( + "tenant-1", + "user-1", + "org/plugin/provider", + "docs", + {"k": "v"}, + GetOnlineDocumentPageContentRequest(workspace_id="w", page_id="p", type="doc"), + "online_document", + ) + ) == ["content"] + + assert stream_mock.call_count == 1 + + def test_online_drive_browse_files_streaming(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter(["browse"]) + + assert list( + manager.online_drive_browse_files( + "tenant-1", + "user-1", + "org/plugin/provider", + "drive", + {"k": "v"}, + OnlineDriveBrowseFilesRequest(prefix="/"), + "online_drive", + ) + ) == ["browse"] + + assert stream_mock.call_count == 1 + + def test_online_drive_download_file_streaming(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter(["download"]) + + assert list( + manager.online_drive_download_file( + "tenant-1", + "user-1", + "org/plugin/provider", + "drive", + {"k": "v"}, + OnlineDriveDownloadFileRequest(id="file-1"), + "online_drive", + ) + ) == ["download"] + + assert stream_mock.call_count == 1 + + def test_validate_provider_credentials_returns_true_when_stream_yields_result(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter([SimpleNamespace(result=True)]) + + assert manager.validate_provider_credentials("tenant-1", "user-1", "provider", "org/plugin", {"k": "v"}) is True + + def test_validate_provider_credentials_returns_false_when_stream_empty(self, mocker): + manager = PluginDatasourceManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + stream_mock.return_value = iter([]) + + assert ( + manager.validate_provider_credentials("tenant-1", "user-1", "provider", "org/plugin", {"k": "v"}) is False + ) + + def test_local_file_provider_template(self): + manager = PluginDatasourceManager() + + payload = manager._get_local_file_datasource_provider() + + assert payload["plugin_id"] == "langgenius/file" + assert payload["provider"] == "file" + assert payload["declaration"]["provider_type"] == "local_file" diff --git a/api/tests/unit_tests/core/plugin/impl/test_debugging_client.py b/api/tests/unit_tests/core/plugin/impl/test_debugging_client.py new file mode 100644 index 0000000000..c80785aee0 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_debugging_client.py @@ -0,0 +1,21 @@ +from types import SimpleNamespace + +from core.plugin.impl.debugging import PluginDebuggingClient + + +class TestPluginDebuggingClient: + def test_get_debugging_key(self, mocker): + client = PluginDebuggingClient() + request_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response", + return_value=SimpleNamespace(key="debug-key"), + ) + + result = client.get_debugging_key("tenant-1") + + assert result == "debug-key" + request_mock.assert_called_once() + args = request_mock.call_args.args + assert args[0] == "POST" + assert args[1] == "plugin/tenant-1/debugging/key" diff --git a/api/tests/unit_tests/core/plugin/impl/test_endpoint_client_impl.py b/api/tests/unit_tests/core/plugin/impl/test_endpoint_client_impl.py new file mode 100644 index 0000000000..4cf657a050 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_endpoint_client_impl.py @@ -0,0 +1,71 @@ +import pytest + +from core.plugin.impl.endpoint import PluginEndpointClient +from core.plugin.impl.exc import PluginDaemonInternalServerError + + +class TestPluginEndpointClientImpl: + def test_create_endpoint(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=True) + + result = client.create_endpoint("tenant-1", "user-1", "org/plugin:1", "endpoint-a", {"k": "v"}) + + assert result is True + assert request_mock.call_count == 1 + args = request_mock.call_args.args + kwargs = request_mock.call_args.kwargs + assert args[:3] == ("POST", "plugin/tenant-1/endpoint/setup", bool) + assert kwargs["data"]["plugin_unique_identifier"] == "org/plugin:1" + + def test_list_endpoints(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=["endpoint"]) + + result = client.list_endpoints("tenant-1", "user-1", 2, 20) + + assert result == ["endpoint"] + assert request_mock.call_args.args[1] == "plugin/tenant-1/endpoint/list" + assert request_mock.call_args.kwargs["params"] == {"page": 2, "page_size": 20} + + def test_list_endpoints_for_single_plugin(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=["endpoint"]) + + result = client.list_endpoints_for_single_plugin("tenant-1", "user-1", "org/plugin", 1, 10) + + assert result == ["endpoint"] + assert request_mock.call_args.args[1] == "plugin/tenant-1/endpoint/list/plugin" + assert request_mock.call_args.kwargs["params"] == {"plugin_id": "org/plugin", "page": 1, "page_size": 10} + + def test_update_endpoint(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=True) + + result = client.update_endpoint("tenant-1", "user-1", "endpoint-1", "renamed", {"x": 1}) + + assert result is True + assert request_mock.call_args.args[:3] == ("POST", "plugin/tenant-1/endpoint/update", bool) + + def test_enable_and_disable_endpoint(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=True) + + assert client.enable_endpoint("tenant-1", "user-1", "endpoint-1") is True + assert client.disable_endpoint("tenant-1", "user-1", "endpoint-1") is True + + calls = request_mock.call_args_list + assert calls[0].args[1] == "plugin/tenant-1/endpoint/enable" + assert calls[1].args[1] == "plugin/tenant-1/endpoint/disable" + + def test_delete_endpoint_idempotent_and_re_raise(self, mocker): + client = PluginEndpointClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response") + + request_mock.side_effect = PluginDaemonInternalServerError("record not found") + assert client.delete_endpoint("tenant-1", "user-1", "endpoint-1") is True + + request_mock.side_effect = PluginDaemonInternalServerError("permission denied") + with pytest.raises(PluginDaemonInternalServerError) as exc_info: + client.delete_endpoint("tenant-1", "user-1", "endpoint-1") + assert "permission denied" in exc_info.value.description diff --git a/api/tests/unit_tests/core/plugin/impl/test_exc_impl.py b/api/tests/unit_tests/core/plugin/impl/test_exc_impl.py new file mode 100644 index 0000000000..8c6f1c6b7f --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_exc_impl.py @@ -0,0 +1,41 @@ +import json + +from core.plugin.impl import exc as exc_module +from core.plugin.impl.exc import PluginDaemonError, PluginInvokeError + + +class TestPluginImplExceptions: + def test_plugin_daemon_error_str_contains_request_id(self, mocker): + mocker.patch("core.plugin.impl.exc.get_request_id", return_value="req-123") + error = PluginDaemonError("bad") + + assert str(error) == "req_id: req-123 PluginDaemonError: bad" + + def test_plugin_invoke_error_with_json_payload(self): + err = PluginInvokeError(json.dumps({"error_type": "RateLimit", "message": "too many"})) + + assert err.get_error_type() == "RateLimit" + assert err.get_error_message() == "too many" + friendly = err.to_user_friendly_error("test-plugin") + assert "test-plugin" in friendly + assert "RateLimit" in friendly + assert "too many" in friendly + + def test_plugin_invoke_error_invalid_json_and_fallback(self, mocker): + err = PluginInvokeError("plain text") + + assert err._get_error_object() == {} + assert err.get_error_type() == "unknown" + assert err.get_error_message() == "unknown" + + mocker.patch.object(PluginInvokeError, "_get_error_object", side_effect=RuntimeError("boom")) + err2 = PluginInvokeError("plain text") + assert err2.get_error_message() == "plain text" + + def test_plugin_invoke_error_get_error_object_handles_adapter_exception(self, mocker): + adapter = mocker.patch.object(exc_module, "TypeAdapter") + adapter.return_value.validate_json.side_effect = RuntimeError("invalid") + + err = PluginInvokeError("not-json") + + assert err._get_error_object() == {} diff --git a/api/tests/unit_tests/core/plugin/impl/test_model_client.py b/api/tests/unit_tests/core/plugin/impl/test_model_client.py new file mode 100644 index 0000000000..bcbebbb38b --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_model_client.py @@ -0,0 +1,490 @@ +from __future__ import annotations + +import io +from types import SimpleNamespace + +import pytest + +from core.plugin.entities.plugin_daemon import PluginDaemonInnerError +from core.plugin.impl.model import PluginModelClient + + +class TestPluginModelClient: + def test_fetch_model_providers(self, mocker): + client = PluginModelClient() + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", return_value=["provider-a"]) + + result = client.fetch_model_providers("tenant-1") + + assert result == ["provider-a"] + assert request_mock.call_args.args[:2] == ( + "GET", + "plugin/tenant-1/management/models", + ) + assert request_mock.call_args.kwargs["params"] == {"page": 1, "page_size": 256} + + def test_get_model_schema(self, mocker): + client = PluginModelClient() + schema = SimpleNamespace(name="schema") + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(model_schema=schema)]), + ) + + result = client.get_model_schema( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model_type="llm", + model="gpt-test", + credentials={"api_key": "key"}, + ) + + assert result is schema + assert stream_mock.call_args.args[:2] == ("POST", "plugin/tenant-1/dispatch/model/schema") + + def test_get_model_schema_empty_stream_returns_none(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + result = client.get_model_schema("tenant-1", "user-1", "org/plugin:1", "provider-a", "llm", "gpt-test", {}) + + assert result is None + + def test_validate_provider_credentials(self, mocker): + client = PluginModelClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result=True, credentials={"api_key": "new"})]), + ) + credentials = {"api_key": "old"} + + result = client.validate_provider_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + credentials=credentials, + ) + + assert result is True + assert credentials["api_key"] == "new" + assert stream_mock.call_args.args[:2] == ( + "POST", + "plugin/tenant-1/dispatch/model/validate_provider_credentials", + ) + + def test_validate_provider_credentials_without_dict_update(self, mocker): + client = PluginModelClient() + mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result=False, credentials="not-a-dict")]), + ) + credentials = {"api_key": "same"} + + result = client.validate_provider_credentials("tenant-1", "user-1", "org/plugin:1", "provider-a", credentials) + + assert result is False + assert credentials == {"api_key": "same"} + + def test_validate_provider_credentials_empty_returns_false(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + assert client.validate_provider_credentials("tenant-1", "user-1", "org/plugin:1", "provider-a", {}) is False + + def test_validate_model_credentials(self, mocker): + client = PluginModelClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result=True, credentials={"token": "rotated"})]), + ) + credentials = {"token": "old"} + + result = client.validate_model_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model_type="llm", + model="gpt-test", + credentials=credentials, + ) + + assert result is True + assert credentials["token"] == "rotated" + assert stream_mock.call_args.args[:2] == ( + "POST", + "plugin/tenant-1/dispatch/model/validate_model_credentials", + ) + + def test_validate_model_credentials_empty_returns_false(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + assert ( + client.validate_model_credentials("tenant-1", "user-1", "org/plugin:1", "provider-a", "llm", "gpt-test", {}) + is False + ) + + def test_invoke_llm(self, mocker): + client = PluginModelClient() + stream_mock = mocker.patch.object( + client, "_request_with_plugin_daemon_response_stream", return_value=iter(["chunk-1"]) + ) + + result = list( + client.invoke_llm( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="gpt-test", + credentials={"api_key": "key"}, + prompt_messages=[], + model_parameters={"temperature": 0.1}, + tools=[], + stop=["STOP"], + stream=False, + ) + ) + + assert result == ["chunk-1"] + call_kwargs = stream_mock.call_args.kwargs + assert call_kwargs["path"] == "plugin/tenant-1/dispatch/llm/invoke" + assert call_kwargs["data"]["data"]["stream"] is False + assert call_kwargs["data"]["data"]["model_parameters"] == {"temperature": 0.1} + + def test_invoke_llm_wraps_plugin_daemon_inner_error(self, mocker): + client = PluginModelClient() + + def _boom(): + raise PluginDaemonInnerError(code=-500, message="invoke failed") + yield # pragma: no cover + + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=_boom()) + + with pytest.raises(ValueError, match="invoke failed-500"): + list( + client.invoke_llm( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="gpt-test", + credentials={}, + prompt_messages=[], + ) + ) + + def test_get_llm_num_tokens(self, mocker): + client = PluginModelClient() + mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(num_tokens=42)]), + ) + + result = client.get_llm_num_tokens( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model_type="llm", + model="gpt-test", + credentials={}, + prompt_messages=[], + tools=[], + ) + + assert result == 42 + + def test_get_llm_num_tokens_empty_returns_zero(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + assert ( + client.get_llm_num_tokens("tenant-1", "user-1", "org/plugin:1", "provider-a", "llm", "gpt-test", {}, []) + == 0 + ) + + def test_invoke_text_embedding(self, mocker): + client = PluginModelClient() + embedding_result = SimpleNamespace(data=[[0.1, 0.2]]) + mocker.patch.object( + client, "_request_with_plugin_daemon_response_stream", return_value=iter([embedding_result]) + ) + + result = client.invoke_text_embedding( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="embedding-a", + credentials={}, + texts=["hello"], + input_type="search_document", + ) + + assert result is embedding_result + + def test_invoke_text_embedding_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke text embedding"): + client.invoke_text_embedding( + "tenant-1", "user-1", "org/plugin:1", "provider-a", "embedding-a", {}, ["hello"], "x" + ) + + def test_invoke_multimodal_embedding(self, mocker): + client = PluginModelClient() + embedding_result = SimpleNamespace(data=[[0.3, 0.4]]) + mocker.patch.object( + client, "_request_with_plugin_daemon_response_stream", return_value=iter([embedding_result]) + ) + + result = client.invoke_multimodal_embedding( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="embedding-a", + credentials={}, + documents=[{"type": "image", "value": "abc"}], + input_type="search_document", + ) + + assert result is embedding_result + + def test_invoke_multimodal_embedding_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke file embedding"): + client.invoke_multimodal_embedding( + "tenant-1", "user-1", "org/plugin:1", "provider-a", "embedding-a", {}, [{"type": "image"}], "x" + ) + + def test_get_text_embedding_num_tokens(self, mocker): + client = PluginModelClient() + mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(num_tokens=[1, 2, 3])]), + ) + + assert client.get_text_embedding_num_tokens( + "tenant-1", "user-1", "org/plugin:1", "provider-a", "embedding-a", {}, ["a"] + ) == [ + 1, + 2, + 3, + ] + + def test_get_text_embedding_num_tokens_empty_returns_list(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + assert ( + client.get_text_embedding_num_tokens( + "tenant-1", "user-1", "org/plugin:1", "provider-a", "embedding-a", {}, ["a"] + ) + == [] + ) + + def test_invoke_rerank(self, mocker): + client = PluginModelClient() + rerank_result = SimpleNamespace(scores=[0.9]) + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([rerank_result])) + + result = client.invoke_rerank( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="rerank-a", + credentials={}, + query="q", + docs=["doc-1"], + score_threshold=0.2, + top_n=5, + ) + + assert result is rerank_result + + def test_invoke_rerank_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke rerank"): + client.invoke_rerank("tenant-1", "user-1", "org/plugin:1", "provider-a", "rerank-a", {}, "q", ["doc-1"]) + + def test_invoke_multimodal_rerank(self, mocker): + client = PluginModelClient() + rerank_result = SimpleNamespace(scores=[0.8]) + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([rerank_result])) + + result = client.invoke_multimodal_rerank( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="rerank-a", + credentials={}, + query={"type": "text", "value": "q"}, + docs=[{"type": "image", "value": "doc"}], + score_threshold=0.1, + top_n=3, + ) + + assert result is rerank_result + + def test_invoke_multimodal_rerank_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke multimodal rerank"): + client.invoke_multimodal_rerank( + "tenant-1", + "user-1", + "org/plugin:1", + "provider-a", + "rerank-a", + {}, + {"type": "text"}, + [{"type": "image"}], + ) + + def test_invoke_tts(self, mocker): + client = PluginModelClient() + mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result="68656c6c6f"), SimpleNamespace(result="21")]), + ) + + result = list( + client.invoke_tts( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="tts-a", + credentials={}, + content_text="hello", + voice="alloy", + ) + ) + + assert result == [b"hello", b"!"] + + def test_invoke_tts_wraps_plugin_daemon_inner_error(self, mocker): + client = PluginModelClient() + + def _boom(): + raise PluginDaemonInnerError(code=-400, message="tts error") + yield # pragma: no cover + + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=_boom()) + + with pytest.raises(ValueError, match="tts error-400"): + list(client.invoke_tts("tenant-1", "user-1", "org/plugin:1", "provider-a", "tts-a", {}, "hello", "alloy")) + + def test_get_tts_model_voices(self, mocker): + client = PluginModelClient() + mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter( + [ + SimpleNamespace( + voices=[ + SimpleNamespace(name="Alloy", value="alloy"), + SimpleNamespace(name="Echo", value="echo"), + ] + ) + ] + ), + ) + + result = client.get_tts_model_voices( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="tts-a", + credentials={}, + language="en", + ) + + assert result == [{"name": "Alloy", "value": "alloy"}, {"name": "Echo", "value": "echo"}] + + def test_get_tts_model_voices_empty_returns_list(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + assert client.get_tts_model_voices("tenant-1", "user-1", "org/plugin:1", "provider-a", "tts-a", {}) == [] + + def test_invoke_speech_to_text(self, mocker): + client = PluginModelClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result="transcribed text")]), + ) + + result = client.invoke_speech_to_text( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="stt-a", + credentials={}, + file=io.BytesIO(b"abc"), + ) + + assert result == "transcribed text" + assert stream_mock.call_args.kwargs["data"]["data"]["file"] == "616263" + + def test_invoke_speech_to_text_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke speech to text"): + client.invoke_speech_to_text( + "tenant-1", "user-1", "org/plugin:1", "provider-a", "stt-a", {}, io.BytesIO(b"abc") + ) + + def test_invoke_moderation(self, mocker): + client = PluginModelClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(result=True)]), + ) + + result = client.invoke_moderation( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin:1", + provider="provider-a", + model="moderation-a", + credentials={}, + text="safe text", + ) + + assert result is True + assert stream_mock.call_args.kwargs["path"] == "plugin/tenant-1/dispatch/moderation/invoke" + + def test_invoke_moderation_empty_raises(self, mocker): + client = PluginModelClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Failed to invoke moderation"): + client.invoke_moderation("tenant-1", "user-1", "org/plugin:1", "provider-a", "moderation-a", {}, "unsafe") diff --git a/api/tests/unit_tests/core/plugin/impl/test_oauth_handler.py b/api/tests/unit_tests/core/plugin/impl/test_oauth_handler.py new file mode 100644 index 0000000000..6fb4c99432 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_oauth_handler.py @@ -0,0 +1,147 @@ +from io import BytesIO +from types import SimpleNamespace + +import pytest +from werkzeug import Request + +from core.plugin.impl.oauth import OAuthHandler + + +def _build_request(body: bytes = b"payload") -> Request: + environ = { + "REQUEST_METHOD": "POST", + "PATH_INFO": "/oauth/callback", + "QUERY_STRING": "code=123", + "SERVER_NAME": "localhost", + "SERVER_PORT": "80", + "wsgi.input": BytesIO(body), + "wsgi.url_scheme": "http", + "CONTENT_LENGTH": str(len(body)), + "HTTP_HOST": "localhost", + "SERVER_PROTOCOL": "HTTP/1.1", + "HTTP_X_TEST": "yes", + } + return Request(environ) + + +class TestOAuthHandler: + def test_get_authorization_url(self, mocker): + handler = OAuthHandler() + stream_mock = mocker.patch.object( + handler, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(authorization_url="https://auth.example.com")]), + ) + + response = handler.get_authorization_url( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={"client_id": "id"}, + ) + + assert response.authorization_url == "https://auth.example.com" + assert stream_mock.call_count == 1 + + def test_get_authorization_url_no_response_raises(self, mocker): + handler = OAuthHandler() + mocker.patch.object(handler, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Error getting authorization URL"): + handler.get_authorization_url( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={}, + ) + + def test_get_credentials(self, mocker): + handler = OAuthHandler() + captured_data = {} + + def fake_stream(*args, **kwargs): + captured_data.update(kwargs["data"]) + return iter([SimpleNamespace(credentials={"token": "abc"}, metadata={}, expires_at=1)]) + + stream_mock = mocker.patch.object( + handler, "_request_with_plugin_daemon_response_stream", side_effect=fake_stream + ) + + response = handler.get_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={"client_id": "id"}, + request=_build_request(), + ) + + assert response.credentials == {"token": "abc"} + assert "raw_http_request" in captured_data["data"] + assert stream_mock.call_count == 1 + + def test_get_credentials_no_response_raises(self, mocker): + handler = OAuthHandler() + mocker.patch.object(handler, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Error getting credentials"): + handler.get_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={}, + request=_build_request(), + ) + + def test_refresh_credentials(self, mocker): + handler = OAuthHandler() + stream_mock = mocker.patch.object( + handler, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(credentials={"token": "new"}, metadata={}, expires_at=1)]), + ) + + response = handler.refresh_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={"client_id": "id"}, + credentials={"refresh_token": "r"}, + ) + + assert response.credentials == {"token": "new"} + assert stream_mock.call_count == 1 + + def test_refresh_credentials_no_response_raises(self, mocker): + handler = OAuthHandler() + mocker.patch.object(handler, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="Error refreshing credentials"): + handler.refresh_credentials( + tenant_id="tenant-1", + user_id="user-1", + plugin_id="org/plugin", + provider="provider", + redirect_uri="https://dify.example.com/callback", + system_credentials={}, + credentials={}, + ) + + def test_convert_request_to_raw_data(self): + handler = OAuthHandler() + request = _build_request(b"body-data") + + raw = handler._convert_request_to_raw_data(request) + + assert raw.startswith(b"POST /oauth/callback?code=123 HTTP/1.1\r\n") + assert b"X-Test: yes\r\n" in raw + assert raw.endswith(b"body-data") diff --git a/api/tests/unit_tests/core/plugin/impl/test_tool_manager.py b/api/tests/unit_tests/core/plugin/impl/test_tool_manager.py new file mode 100644 index 0000000000..80cf46f9bb --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_tool_manager.py @@ -0,0 +1,121 @@ +from types import SimpleNamespace + +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.impl.tool import PluginToolManager + + +def _tool_provider(name: str = "provider") -> SimpleNamespace: + return SimpleNamespace( + plugin_id="org/plugin", + declaration=SimpleNamespace( + identity=SimpleNamespace(name=name), + tools=[SimpleNamespace(identity=SimpleNamespace(provider=""))], + ), + ) + + +class TestPluginToolManager: + def test_fetch_tool_providers(self, mocker): + manager = PluginToolManager() + provider = _tool_provider("remote") + mocker.patch("core.plugin.impl.tool.resolve_dify_schema_refs", return_value={"resolved": True}) + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": [ + { + "declaration": { + "identity": {"name": "remote"}, + "tools": [{"identity": {"provider": "old"}, "output_schema": {"$ref": "#/x"}}], + } + } + ] + } + transformed = transformer(payload) + assert transformed["data"][0]["declaration"]["tools"][0]["output_schema"] == {"resolved": True} + return [provider] + + request_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = manager.fetch_tool_providers("tenant-1") + + assert request_mock.call_count == 1 + assert result[0].declaration.identity.name == "org/plugin/remote" + assert result[0].declaration.tools[0].identity.provider == "org/plugin/remote" + + def test_fetch_tool_provider(self, mocker): + manager = PluginToolManager() + provider = _tool_provider("provider") + mocker.patch("core.plugin.impl.tool.resolve_dify_schema_refs", return_value={"resolved": True}) + + def fake_request(method, path, type_, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": { + "declaration": {"tools": [{"identity": {"provider": "old"}, "output_schema": {"$ref": "#/x"}}]} + } + } + transformed = transformer(payload) + assert transformed["data"]["declaration"]["tools"][0]["output_schema"] == {"resolved": True} + return provider + + request_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = manager.fetch_tool_provider("tenant-1", "org/plugin/provider") + + assert request_mock.call_count == 1 + assert result.declaration.identity.name == "org/plugin/provider" + assert result.declaration.tools[0].identity.provider == "org/plugin/provider" + + def test_invoke_merges_chunks(self, mocker): + manager = PluginToolManager() + stream_mock = mocker.patch.object( + manager, "_request_with_plugin_daemon_response_stream", return_value=iter(["chunk"]) + ) + merge_mock = mocker.patch("core.plugin.impl.tool.merge_blob_chunks", return_value=["merged"]) + + result = manager.invoke( + tenant_id="tenant-1", + user_id="user-1", + tool_provider="org/plugin/provider", + tool_name="search", + credentials={"api_key": "k"}, + credential_type=CredentialType.API_KEY, + tool_parameters={"q": "python"}, + conversation_id="conv-1", + app_id="app-1", + message_id="msg-1", + ) + + assert result == ["merged"] + assert merge_mock.call_count == 1 + assert stream_mock.call_args.kwargs["headers"]["X-Plugin-ID"] == "org/plugin" + + def test_validate_credentials_paths(self, mocker): + manager = PluginToolManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + + stream_mock.return_value = iter([SimpleNamespace(result=True)]) + assert manager.validate_provider_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) is True + + stream_mock.return_value = iter([]) + assert manager.validate_provider_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) is False + + stream_mock.return_value = iter([SimpleNamespace(result=True)]) + assert manager.validate_datasource_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) is True + + stream_mock.return_value = iter([]) + assert manager.validate_datasource_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) is False + + def test_get_runtime_parameters_paths(self, mocker): + manager = PluginToolManager() + stream_mock = mocker.patch.object(manager, "_request_with_plugin_daemon_response_stream") + + stream_mock.return_value = iter([SimpleNamespace(parameters=[{"name": "p"}])]) + params = manager.get_runtime_parameters("tenant-1", "user-1", "org/plugin/provider", {}, "search") + assert params == [{"name": "p"}] + + stream_mock.return_value = iter([]) + params = manager.get_runtime_parameters("tenant-1", "user-1", "org/plugin/provider", {}, "search") + assert params == [] diff --git a/api/tests/unit_tests/core/plugin/impl/test_trigger_client.py b/api/tests/unit_tests/core/plugin/impl/test_trigger_client.py new file mode 100644 index 0000000000..76da51c2c8 --- /dev/null +++ b/api/tests/unit_tests/core/plugin/impl/test_trigger_client.py @@ -0,0 +1,226 @@ +from io import BytesIO +from types import SimpleNamespace + +import pytest +from werkzeug import Request + +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.impl.trigger import PluginTriggerClient +from core.trigger.entities.entities import Subscription +from models.provider_ids import TriggerProviderID + + +def _request() -> Request: + environ = { + "REQUEST_METHOD": "POST", + "PATH_INFO": "/events", + "QUERY_STRING": "", + "SERVER_NAME": "localhost", + "SERVER_PORT": "80", + "wsgi.input": BytesIO(b"payload"), + "wsgi.url_scheme": "http", + "CONTENT_LENGTH": "7", + "HTTP_HOST": "localhost", + } + return Request(environ) + + +def _subscription() -> Subscription: + return Subscription(expires_at=123, endpoint="https://example.com/hook", parameters={"a": 1}, properties={"p": 1}) + + +def _trigger_provider(name: str = "provider") -> SimpleNamespace: + return SimpleNamespace( + plugin_id="org/plugin", + declaration=SimpleNamespace( + identity=SimpleNamespace(name=name), + events=[SimpleNamespace(identity=SimpleNamespace(provider=""))], + ), + ) + + +def _subscription_call_kwargs(method_name: str) -> dict: + if method_name == "subscribe": + return { + "tenant_id": "tenant-1", + "user_id": "user-1", + "provider": "org/plugin/provider", + "credentials": {"token": "x"}, + "credential_type": CredentialType.API_KEY, + "endpoint": "https://example.com/hook", + "parameters": {"k": "v"}, + } + + return { + "tenant_id": "tenant-1", + "user_id": "user-1", + "provider": "org/plugin/provider", + "subscription": _subscription(), + "credentials": {"token": "x"}, + "credential_type": CredentialType.API_KEY, + } + + +class TestPluginTriggerClient: + def test_fetch_trigger_providers(self, mocker): + client = PluginTriggerClient() + provider = _trigger_provider("remote") + + def fake_request(*args, **kwargs): + transformer = kwargs["transformer"] + payload = { + "data": [ + { + "plugin_id": "org/plugin", + "provider": "remote", + "declaration": {"events": [{"identity": {"provider": "old"}}]}, + } + ] + } + transformed = transformer(payload) + assert transformed["data"][0]["declaration"]["events"][0]["identity"]["provider"] == "org/plugin/remote" + return [provider] + + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = client.fetch_trigger_providers("tenant-1") + + assert request_mock.call_count == 1 + assert result[0].declaration.identity.name == "org/plugin/remote" + assert result[0].declaration.events[0].identity.provider == "org/plugin/remote" + + def test_fetch_trigger_provider(self, mocker): + client = PluginTriggerClient() + provider = _trigger_provider("provider") + + def fake_request(*args, **kwargs): + transformer = kwargs["transformer"] + payload = {"data": {"declaration": {"events": [{"identity": {"provider": "old"}}]}}} + transformed = transformer(payload) + assert transformed["data"]["declaration"]["events"][0]["identity"]["provider"] == "org/plugin/provider" + return provider + + request_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response", side_effect=fake_request) + + result = client.fetch_trigger_provider("tenant-1", TriggerProviderID("org/plugin/provider")) + + assert request_mock.call_count == 1 + assert result.declaration.identity.name == "org/plugin/provider" + assert result.declaration.events[0].identity.provider == "org/plugin/provider" + + def test_invoke_trigger_event(self, mocker): + client = PluginTriggerClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(variables={"ok": True}, cancelled=False)]), + ) + + result = client.invoke_trigger_event( + tenant_id="tenant-1", + user_id="user-1", + provider="org/plugin/provider", + event_name="created", + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + request=_request(), + parameters={"k": "v"}, + subscription=_subscription(), + payload={"payload": 1}, + ) + + assert result.variables == {"ok": True} + assert stream_mock.call_count == 1 + + def test_invoke_trigger_event_no_response_raises(self, mocker): + client = PluginTriggerClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + + with pytest.raises(ValueError, match="No response received from plugin daemon for invoke trigger"): + client.invoke_trigger_event( + tenant_id="tenant-1", + user_id="user-1", + provider="org/plugin/provider", + event_name="created", + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + request=_request(), + parameters={"k": "v"}, + subscription=_subscription(), + payload={"payload": 1}, + ) + + def test_validate_provider_credentials(self, mocker): + client = PluginTriggerClient() + stream_mock = mocker.patch.object(client, "_request_with_plugin_daemon_response_stream") + + stream_mock.return_value = iter([SimpleNamespace(result=True)]) + assert client.validate_provider_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) is True + + stream_mock.return_value = iter([]) + with pytest.raises( + ValueError, match="No response received from plugin daemon for validate provider credentials" + ): + client.validate_provider_credentials("tenant-1", "user-1", "org/plugin/provider", {"k": "v"}) + + def test_dispatch_event(self, mocker): + client = PluginTriggerClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(user_id="u", events=["e"])]), + ) + + result = client.dispatch_event( + tenant_id="tenant-1", + provider="org/plugin/provider", + subscription={"id": "sub"}, + request=_request(), + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result.user_id == "u" + assert stream_mock.call_count == 1 + + stream_mock.return_value = iter([]) + with pytest.raises(ValueError, match="No response received from plugin daemon for dispatch event"): + client.dispatch_event( + tenant_id="tenant-1", + provider="org/plugin/provider", + subscription={"id": "sub"}, + request=_request(), + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + @pytest.mark.parametrize("method_name", ["subscribe", "unsubscribe", "refresh"]) + def test_subscription_operations_success(self, mocker, method_name): + client = PluginTriggerClient() + stream_mock = mocker.patch.object( + client, + "_request_with_plugin_daemon_response_stream", + return_value=iter([SimpleNamespace(subscription={"id": "sub"})]), + ) + + method = getattr(client, method_name) + result = method(**_subscription_call_kwargs(method_name)) + + assert result.subscription == {"id": "sub"} + assert stream_mock.call_count == 1 + + @pytest.mark.parametrize( + ("method_name", "expected"), + [ + ("subscribe", "No response received from plugin daemon for subscribe"), + ("unsubscribe", "No response received from plugin daemon for unsubscribe"), + ("refresh", "No response received from plugin daemon for refresh"), + ], + ) + def test_subscription_operations_no_response(self, mocker, method_name, expected): + client = PluginTriggerClient() + mocker.patch.object(client, "_request_with_plugin_daemon_response_stream", return_value=iter([])) + method = getattr(client, method_name) + + with pytest.raises(ValueError, match=expected): + method(**_subscription_call_kwargs(method_name)) diff --git a/api/tests/unit_tests/core/plugin/test_backwards_invocation_app.py b/api/tests/unit_tests/core/plugin/test_backwards_invocation_app.py index a380149554..c2778f082b 100644 --- a/api/tests/unit_tests/core/plugin/test_backwards_invocation_app.py +++ b/api/tests/unit_tests/core/plugin/test_backwards_invocation_app.py @@ -1,72 +1,359 @@ +import json from types import SimpleNamespace from unittest.mock import MagicMock +import pytest +from pydantic import BaseModel + from core.app.layers.pause_state_persist_layer import PauseStateLayerConfig from core.plugin.backwards_invocation.app import PluginAppBackwardsInvocation +from core.plugin.backwards_invocation.base import BaseBackwardsInvocation from models.model import AppMode -def test_invoke_chat_app_advanced_chat_injects_pause_state_config(mocker): - workflow = MagicMock() - workflow.created_by = "owner-id" +class _Chunk(BaseModel): + value: int - app = MagicMock() - app.mode = AppMode.ADVANCED_CHAT - app.workflow = workflow - mocker.patch( - "core.plugin.backwards_invocation.app.db", - SimpleNamespace(engine=MagicMock()), +class TestBaseBackwardsInvocation: + def test_convert_to_event_stream_with_generator_and_error(self): + def _stream(): + yield _Chunk(value=1) + yield {"x": 2} + yield "ignored" + raise RuntimeError("boom") + + chunks = list(BaseBackwardsInvocation.convert_to_event_stream(_stream())) + + assert len(chunks) == 3 + first = json.loads(chunks[0].decode()) + second = json.loads(chunks[1].decode()) + error = json.loads(chunks[2].decode()) + assert first["data"]["value"] == 1 + assert second["data"]["x"] == 2 + assert error["error"] == "boom" + + def test_convert_to_event_stream_with_non_generator(self): + chunks = list(BaseBackwardsInvocation.convert_to_event_stream({"ok": True})) + payload = json.loads(chunks[0].decode()) + assert payload["data"] == {"ok": True} + assert payload["error"] == "" + + +class TestPluginAppBackwardsInvocation: + def test_fetch_app_info_workflow_path(self, mocker): + workflow = MagicMock() + workflow.features_dict = {"feature": "v"} + workflow.user_input_form.return_value = [{"name": "foo"}] + app = MagicMock(mode=AppMode.WORKFLOW, workflow=workflow) + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=app) + mapper = mocker.patch( + "core.plugin.backwards_invocation.app.get_parameters_from_feature_dict", + return_value={"mapped": True}, + ) + + result = PluginAppBackwardsInvocation.fetch_app_info("app-1", "tenant-1") + + assert result == {"data": {"mapped": True}} + mapper.assert_called_once_with(features_dict={"feature": "v"}, user_input_form=[{"name": "foo"}]) + + def test_fetch_app_info_model_config_path(self, mocker): + model_config = MagicMock() + model_config.to_dict.return_value = {"user_input_form": [{"name": "bar"}], "k": "v"} + app = MagicMock(mode=AppMode.COMPLETION, app_model_config=model_config) + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=app) + mocker.patch( + "core.plugin.backwards_invocation.app.get_parameters_from_feature_dict", + return_value={"mapped": True}, + ) + + result = PluginAppBackwardsInvocation.fetch_app_info("app-1", "tenant-1") + + assert result["data"] == {"mapped": True} + + @pytest.mark.parametrize( + ("mode", "route_method"), + [ + (AppMode.CHAT, "invoke_chat_app"), + (AppMode.ADVANCED_CHAT, "invoke_chat_app"), + (AppMode.AGENT_CHAT, "invoke_chat_app"), + (AppMode.WORKFLOW, "invoke_workflow_app"), + (AppMode.COMPLETION, "invoke_completion_app"), + ], ) - generator_spy = mocker.patch( - "core.plugin.backwards_invocation.app.AdvancedChatAppGenerator.generate", - return_value={"result": "ok"}, + def test_invoke_app_routes_by_mode(self, mocker, mode, route_method): + app = MagicMock(mode=mode) + user = MagicMock() + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=app) + mocker.patch.object(PluginAppBackwardsInvocation, "_get_user", return_value=user) + route = mocker.patch.object(PluginAppBackwardsInvocation, route_method, return_value={"routed": True}) + + result = PluginAppBackwardsInvocation.invoke_app( + app_id="app", + user_id="user", + tenant_id="tenant", + conversation_id=None, + query="hello", + stream=False, + inputs={"x": 1}, + files=[], + ) + + assert result == {"routed": True} + assert route.call_count == 1 + + def test_invoke_app_uses_end_user_when_user_id_missing(self, mocker): + app = MagicMock(mode=AppMode.WORKFLOW) + end_user = MagicMock() + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=app) + get_or_create = mocker.patch( + "core.plugin.backwards_invocation.app.EndUserService.get_or_create_end_user", + return_value=end_user, + ) + route = mocker.patch.object(PluginAppBackwardsInvocation, "invoke_workflow_app", return_value={"ok": True}) + + result = PluginAppBackwardsInvocation.invoke_app( + app_id="app", + user_id="", + tenant_id="tenant", + conversation_id="", + query=None, + stream=True, + inputs={}, + files=[], + ) + + assert result == {"ok": True} + get_or_create.assert_called_once_with(app) + assert route.call_args.args[1] is end_user + + def test_invoke_app_missing_query_for_chat_raises(self, mocker): + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=MagicMock(mode=AppMode.CHAT)) + mocker.patch.object(PluginAppBackwardsInvocation, "_get_user", return_value=MagicMock()) + + with pytest.raises(ValueError, match="missing query"): + PluginAppBackwardsInvocation.invoke_app( + app_id="app", + user_id="user", + tenant_id="tenant", + conversation_id=None, + query="", + stream=False, + inputs={}, + files=[], + ) + + def test_invoke_app_unexpected_mode_raises(self, mocker): + mocker.patch.object(PluginAppBackwardsInvocation, "_get_app", return_value=MagicMock(mode="other")) + mocker.patch.object(PluginAppBackwardsInvocation, "_get_user", return_value=MagicMock()) + + with pytest.raises(ValueError, match="unexpected app type"): + PluginAppBackwardsInvocation.invoke_app( + app_id="app", + user_id="user", + tenant_id="tenant", + conversation_id=None, + query="q", + stream=False, + inputs={}, + files=[], + ) + + @pytest.mark.parametrize( + ("mode", "generator_path"), + [ + (AppMode.AGENT_CHAT, "core.plugin.backwards_invocation.app.AgentChatAppGenerator.generate"), + (AppMode.CHAT, "core.plugin.backwards_invocation.app.ChatAppGenerator.generate"), + ], ) + def test_invoke_chat_app_agent_and_chat(self, mocker, mode, generator_path): + app = MagicMock(mode=mode, workflow=None) + spy = mocker.patch(generator_path, return_value={"result": "ok"}) - result = PluginAppBackwardsInvocation.invoke_chat_app( - app=app, - user=MagicMock(), - conversation_id="conv-1", - query="hello", - stream=False, - inputs={"k": "v"}, - files=[], - ) + result = PluginAppBackwardsInvocation.invoke_chat_app( + app=app, + user=MagicMock(), + conversation_id="conv-1", + query="hello", + stream=False, + inputs={"k": "v"}, + files=[], + ) - assert result == {"result": "ok"} - call_kwargs = generator_spy.call_args.kwargs - pause_state_config = call_kwargs.get("pause_state_config") - assert isinstance(pause_state_config, PauseStateLayerConfig) - assert pause_state_config.state_owner_user_id == "owner-id" + assert result == {"result": "ok"} + assert spy.call_count == 1 + def test_invoke_chat_app_advanced_chat_injects_pause_state_config(self, mocker): + workflow = MagicMock() + workflow.created_by = "owner-id" -def test_invoke_workflow_app_injects_pause_state_config(mocker): - workflow = MagicMock() - workflow.created_by = "owner-id" + app = MagicMock() + app.mode = AppMode.ADVANCED_CHAT + app.workflow = workflow - app = MagicMock() - app.mode = AppMode.WORKFLOW - app.workflow = workflow + mocker.patch( + "core.plugin.backwards_invocation.app.db", + SimpleNamespace(engine=MagicMock()), + ) + generator_spy = mocker.patch( + "core.plugin.backwards_invocation.app.AdvancedChatAppGenerator.generate", + return_value={"result": "ok"}, + ) - mocker.patch( - "core.plugin.backwards_invocation.app.db", - SimpleNamespace(engine=MagicMock()), - ) - generator_spy = mocker.patch( - "core.plugin.backwards_invocation.app.WorkflowAppGenerator.generate", - return_value={"result": "ok"}, - ) + result = PluginAppBackwardsInvocation.invoke_chat_app( + app=app, + user=MagicMock(), + conversation_id="conv-1", + query="hello", + stream=False, + inputs={"k": "v"}, + files=[], + ) - result = PluginAppBackwardsInvocation.invoke_workflow_app( - app=app, - user=MagicMock(), - stream=False, - inputs={"k": "v"}, - files=[], - ) + assert result == {"result": "ok"} + call_kwargs = generator_spy.call_args.kwargs + pause_state_config = call_kwargs.get("pause_state_config") + assert isinstance(pause_state_config, PauseStateLayerConfig) + assert pause_state_config.state_owner_user_id == "owner-id" - assert result == {"result": "ok"} - call_kwargs = generator_spy.call_args.kwargs - pause_state_config = call_kwargs.get("pause_state_config") - assert isinstance(pause_state_config, PauseStateLayerConfig) - assert pause_state_config.state_owner_user_id == "owner-id" + def test_invoke_chat_app_advanced_chat_without_workflow_raises(self): + app = MagicMock(mode=AppMode.ADVANCED_CHAT, workflow=None) + with pytest.raises(ValueError, match="unexpected app type"): + PluginAppBackwardsInvocation.invoke_chat_app( + app=app, + user=MagicMock(), + conversation_id="conv-1", + query="hello", + stream=False, + inputs={}, + files=[], + ) + + def test_invoke_chat_app_unexpected_mode_raises(self): + app = MagicMock(mode="invalid") + with pytest.raises(ValueError, match="unexpected app type"): + PluginAppBackwardsInvocation.invoke_chat_app( + app=app, + user=MagicMock(), + conversation_id="conv-1", + query="hello", + stream=False, + inputs={}, + files=[], + ) + + def test_invoke_workflow_app_injects_pause_state_config(self, mocker): + workflow = MagicMock() + workflow.created_by = "owner-id" + + app = MagicMock() + app.mode = AppMode.WORKFLOW + app.workflow = workflow + + mocker.patch( + "core.plugin.backwards_invocation.app.db", + SimpleNamespace(engine=MagicMock()), + ) + generator_spy = mocker.patch( + "core.plugin.backwards_invocation.app.WorkflowAppGenerator.generate", + return_value={"result": "ok"}, + ) + + result = PluginAppBackwardsInvocation.invoke_workflow_app( + app=app, + user=MagicMock(), + stream=False, + inputs={"k": "v"}, + files=[], + ) + + assert result == {"result": "ok"} + call_kwargs = generator_spy.call_args.kwargs + pause_state_config = call_kwargs.get("pause_state_config") + assert isinstance(pause_state_config, PauseStateLayerConfig) + assert pause_state_config.state_owner_user_id == "owner-id" + + def test_invoke_workflow_app_without_workflow_raises(self): + app = MagicMock(mode=AppMode.WORKFLOW, workflow=None) + with pytest.raises(ValueError, match="unexpected app type"): + PluginAppBackwardsInvocation.invoke_workflow_app( + app=app, + user=MagicMock(), + stream=False, + inputs={}, + files=[], + ) + + def test_invoke_completion_app(self, mocker): + spy = mocker.patch( + "core.plugin.backwards_invocation.app.CompletionAppGenerator.generate", return_value={"ok": 1} + ) + app = MagicMock(mode=AppMode.COMPLETION) + + result = PluginAppBackwardsInvocation.invoke_completion_app(app, MagicMock(), False, {"x": 1}, []) + + assert result == {"ok": 1} + assert spy.call_count == 1 + + def test_get_user_returns_end_user(self, mocker): + session = MagicMock() + session.scalar.side_effect = [MagicMock(id="end-user")] + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + mocker.patch("core.plugin.backwards_invocation.app.Session", return_value=session_ctx) + mocker.patch("core.plugin.backwards_invocation.app.db", SimpleNamespace(engine=MagicMock())) + + user = PluginAppBackwardsInvocation._get_user("uid") + assert user.id == "end-user" + + def test_get_user_falls_back_to_account_user(self, mocker): + session = MagicMock() + session.scalar.side_effect = [None, MagicMock(id="account-user")] + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + mocker.patch("core.plugin.backwards_invocation.app.Session", return_value=session_ctx) + mocker.patch("core.plugin.backwards_invocation.app.db", SimpleNamespace(engine=MagicMock())) + + user = PluginAppBackwardsInvocation._get_user("uid") + assert user.id == "account-user" + + def test_get_user_raises_when_user_not_found(self, mocker): + session = MagicMock() + session.scalar.side_effect = [None, None] + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = None + mocker.patch("core.plugin.backwards_invocation.app.Session", return_value=session_ctx) + mocker.patch("core.plugin.backwards_invocation.app.db", SimpleNamespace(engine=MagicMock())) + + with pytest.raises(ValueError, match="user not found"): + PluginAppBackwardsInvocation._get_user("uid") + + def test_get_app_returns_app(self, mocker): + query_chain = MagicMock() + query_chain.where.return_value = query_chain + app_obj = MagicMock(id="app") + query_chain.first.return_value = app_obj + db = SimpleNamespace(session=MagicMock(query=MagicMock(return_value=query_chain))) + mocker.patch("core.plugin.backwards_invocation.app.db", db) + + assert PluginAppBackwardsInvocation._get_app("app", "tenant") is app_obj + + def test_get_app_raises_when_missing(self, mocker): + query_chain = MagicMock() + query_chain.where.return_value = query_chain + query_chain.first.return_value = None + db = SimpleNamespace(session=MagicMock(query=MagicMock(return_value=query_chain))) + mocker.patch("core.plugin.backwards_invocation.app.db", db) + + with pytest.raises(ValueError, match="app not found"): + PluginAppBackwardsInvocation._get_app("app", "tenant") + + def test_get_app_raises_when_query_fails(self, mocker): + db = SimpleNamespace(session=MagicMock(query=MagicMock(side_effect=RuntimeError("db down")))) + mocker.patch("core.plugin.backwards_invocation.app.db", db) + + with pytest.raises(ValueError, match="app not found"): + PluginAppBackwardsInvocation._get_app("app", "tenant") diff --git a/api/tests/unit_tests/core/plugin/test_plugin_entities.py b/api/tests/unit_tests/core/plugin/test_plugin_entities.py new file mode 100644 index 0000000000..b0b64a601b --- /dev/null +++ b/api/tests/unit_tests/core/plugin/test_plugin_entities.py @@ -0,0 +1,347 @@ +import binascii +import datetime +from enum import StrEnum + +import pytest +from flask import Response +from pydantic import ValidationError + +from core.plugin.entities.endpoint import EndpointEntityWithInstance +from core.plugin.entities.marketplace import MarketplacePluginDeclaration, MarketplacePluginSnapshot +from core.plugin.entities.parameters import ( + PluginParameter, + PluginParameterOption, + PluginParameterType, + as_normal_type, + cast_parameter_value, + init_frontend_parameter, +) +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.entities.request import ( + RequestInvokeLLM, + RequestInvokeSpeech2Text, + TriggerDispatchResponse, + TriggerInvokeEventResponse, +) +from core.plugin.utils.http_parser import serialize_response +from core.tools.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + SystemPromptMessage, + ToolPromptMessage, + UserPromptMessage, +) + + +class TestEndpointEntity: + def test_endpoint_entity_with_instance_renders_url(self, mocker): + mocker.patch("core.plugin.entities.endpoint.dify_config.ENDPOINT_URL_TEMPLATE", "https://dify.test/{hook_id}") + now = datetime.datetime.now(datetime.UTC) + + entity = EndpointEntityWithInstance.model_validate( + { + "id": "ep-1", + "created_at": now, + "updated_at": now, + "settings": {}, + "tenant_id": "tenant", + "plugin_id": "org/plugin", + "expired_at": now, + "name": "my-endpoint", + "enabled": True, + "hook_id": "hook-123", + } + ) + + assert entity.url == "https://dify.test/hook-123" + + def test_endpoint_entity_with_instance_keeps_existing_url(self): + now = datetime.datetime.now(datetime.UTC) + entity = EndpointEntityWithInstance.model_validate( + { + "id": "ep-1", + "created_at": now, + "updated_at": now, + "settings": {}, + "tenant_id": "tenant", + "plugin_id": "org/plugin", + "expired_at": now, + "name": "my-endpoint", + "enabled": True, + "hook_id": "hook-123", + "url": "https://preset.test/hook-123", + } + ) + assert entity.url == "https://preset.test/hook-123" + + +class TestMarketplaceEntities: + def test_marketplace_declaration_strips_empty_optional_fields(self): + declaration = MarketplacePluginDeclaration.model_validate( + { + "name": "plugin", + "org": "org", + "plugin_id": "org/plugin", + "icon": "icon.png", + "label": {"en_US": "Plugin"}, + "brief": {"en_US": "Brief"}, + "resource": {"memory": 256}, + "endpoint": {}, + "model": {}, + "tool": {}, + "latest_version": "1.0.0", + "latest_package_identifier": "org/plugin@1.0.0", + "status": "active", + "deprecated_reason": "", + "alternative_plugin_id": "", + } + ) + + assert declaration.endpoint is None + assert declaration.model is None + assert declaration.tool is None + + def test_marketplace_snapshot_computed_plugin_id(self): + snapshot = MarketplacePluginSnapshot( + org="langgenius", + name="search", + latest_version="1.0.0", + latest_package_identifier="langgenius/search@1.0.0", + latest_package_url="https://example.com/pkg", + ) + assert snapshot.plugin_id == "langgenius/search" + + +class TestPluginParameterEntities: + def _label(self) -> I18nObject: + return I18nObject(en_US="label") + + def test_parameter_option_value_casts_to_string(self): + option = PluginParameterOption(value=123, label=self._label()) + assert option.value == "123" + + def test_plugin_parameter_options_non_list_defaults_to_empty(self): + parameter = PluginParameter(name="p", label=self._label(), options="invalid") # type: ignore[arg-type] + assert parameter.options == [] + + @pytest.mark.parametrize( + ("parameter_type", "expected"), + [ + (PluginParameterType.SECRET_INPUT, "string"), + (PluginParameterType.SELECT, "string"), + (PluginParameterType.CHECKBOX, "string"), + (PluginParameterType.NUMBER, PluginParameterType.NUMBER.value), + ], + ) + def test_as_normal_type(self, parameter_type, expected): + assert as_normal_type(parameter_type) == expected + + @pytest.mark.parametrize( + ("value", "expected"), + [(None, ""), (1, "1"), ("abc", "abc")], + ) + def test_cast_parameter_value_string_like(self, value, expected): + assert cast_parameter_value(PluginParameterType.STRING, value) == expected + + @pytest.mark.parametrize( + ("value", "expected"), + [ + (None, False), + ("true", True), + ("yes", True), + ("1", True), + ("false", False), + ("0", False), + ("random", True), + (1, True), + (0, False), + ], + ) + def test_cast_parameter_value_boolean(self, value, expected): + assert cast_parameter_value(PluginParameterType.BOOLEAN, value) is expected + + @pytest.mark.parametrize( + ("value", "expected"), + [ + (1, 1), + (1.5, 1.5), + ("2", 2), + ("2.5", 2.5), + ], + ) + def test_cast_parameter_value_number(self, value, expected): + assert cast_parameter_value(PluginParameterType.NUMBER, value) == expected + + def test_cast_parameter_value_file_and_files(self): + assert cast_parameter_value(PluginParameterType.FILES, "f1") == ["f1"] + assert cast_parameter_value(PluginParameterType.SYSTEM_FILES, ["f1", "f2"]) == ["f1", "f2"] + assert cast_parameter_value(PluginParameterType.FILE, ["one"]) == "one" + assert cast_parameter_value(PluginParameterType.FILE, "one") == "one" + with pytest.raises(ValueError, match="only accepts one file"): + cast_parameter_value(PluginParameterType.FILE, ["a", "b"]) + + @pytest.mark.parametrize( + ("parameter_type", "value", "expected"), + [ + (PluginParameterType.MODEL_SELECTOR, {"m": "gpt"}, {"m": "gpt"}), + (PluginParameterType.APP_SELECTOR, {"app": "a"}, {"app": "a"}), + (PluginParameterType.TOOLS_SELECTOR, [], []), + (PluginParameterType.ANY, {"k": "v"}, {"k": "v"}), + ], + ) + def test_cast_parameter_value_selectors_valid(self, parameter_type, value, expected): + assert cast_parameter_value(parameter_type, value) == expected + + @pytest.mark.parametrize( + ("parameter_type", "value", "message"), + [ + (PluginParameterType.MODEL_SELECTOR, "bad", "selector must be a dictionary"), + (PluginParameterType.APP_SELECTOR, "bad", "selector must be a dictionary"), + (PluginParameterType.TOOLS_SELECTOR, "bad", "tools selector must be a list"), + (PluginParameterType.ANY, object(), "var selector must be"), + ], + ) + def test_cast_parameter_value_selectors_invalid(self, parameter_type, value, message): + with pytest.raises(ValueError, match=message): + cast_parameter_value(parameter_type, value) + + @pytest.mark.parametrize( + ("parameter_type", "value", "expected"), + [ + (PluginParameterType.ARRAY, [1, 2], [1, 2]), + (PluginParameterType.ARRAY, "[1, 2]", [1, 2]), + (PluginParameterType.OBJECT, {"k": "v"}, {"k": "v"}), + (PluginParameterType.OBJECT, '{"a":1}', {"a": 1}), + ], + ) + def test_cast_parameter_value_array_and_object_valid(self, parameter_type, value, expected): + assert cast_parameter_value(parameter_type, value) == expected + + @pytest.mark.parametrize( + ("parameter_type", "value", "expected"), + [ + (PluginParameterType.ARRAY, "bad-json", ["bad-json"]), + (PluginParameterType.OBJECT, "bad-json", {}), + ], + ) + def test_cast_parameter_value_array_and_object_invalid_json_fallback(self, parameter_type, value, expected): + assert cast_parameter_value(parameter_type, value) == expected + + def test_cast_parameter_value_default_branch_and_wrapped_exception(self): + class _Unknown(StrEnum): + CUSTOM = "custom" + + assert cast_parameter_value(_Unknown.CUSTOM, 12) == "12" + + class _BadString: + def __str__(self): + raise RuntimeError("boom") + + with pytest.raises( + ValueError, + match=r"The tool parameter value <.*_BadString object at .* is not in correct type of string\.", + ): + cast_parameter_value(PluginParameterType.STRING, _BadString()) + + def test_init_frontend_parameter(self): + rule = PluginParameter( + name="choice", + label=self._label(), + required=True, + default="a", + options=[PluginParameterOption(value="a", label=self._label())], + ) + + assert init_frontend_parameter(rule, PluginParameterType.SELECT, None) == "a" + assert init_frontend_parameter(rule, PluginParameterType.NUMBER, 0) == 0 + with pytest.raises(ValueError, match="not in options"): + init_frontend_parameter(rule, PluginParameterType.SELECT, "b") + + required_rule = PluginParameter(name="required", label=self._label(), required=True, default=None) + with pytest.raises(ValueError, match="not found in tool config"): + init_frontend_parameter(required_rule, PluginParameterType.STRING, None) + + +class TestPluginDaemonEntities: + def test_credential_type_helpers(self): + assert CredentialType.API_KEY.get_name() == "API KEY" + assert CredentialType.OAUTH2.get_name() == "AUTH" + assert CredentialType.UNAUTHORIZED.get_name() == "UNAUTHORIZED" + + class _FakeCredential: + value = "custom-type" + + assert CredentialType.get_name(_FakeCredential()) == "CUSTOM TYPE" + assert CredentialType.API_KEY.is_editable() is True + assert CredentialType.OAUTH2.is_editable() is False + assert CredentialType.API_KEY.is_validate_allowed() is True + assert CredentialType.UNAUTHORIZED.is_validate_allowed() is False + assert set(CredentialType.values()) == {"api-key", "oauth2", "unauthorized"} + + @pytest.mark.parametrize( + ("raw", "expected"), + [ + ("api-key", CredentialType.API_KEY), + ("api_key", CredentialType.API_KEY), + ("oauth2", CredentialType.OAUTH2), + ("oauth", CredentialType.OAUTH2), + ("unauthorized", CredentialType.UNAUTHORIZED), + ], + ) + def test_credential_type_of(self, raw, expected): + assert CredentialType.of(raw) == expected + + def test_credential_type_of_invalid(self): + with pytest.raises(ValueError, match="Invalid credential type"): + CredentialType.of("invalid") + + +class TestPluginRequestEntities: + def test_request_invoke_llm_converts_prompt_messages(self): + payload = RequestInvokeLLM( + provider="openai", + model="gpt-4", + mode="chat", + prompt_messages=[ + {"role": "user", "content": "u"}, + {"role": "assistant", "content": "a"}, + {"role": "system", "content": "s"}, + {"role": "tool", "content": "t", "tool_call_id": "call-1"}, + ], + ) + + assert isinstance(payload.prompt_messages[0], UserPromptMessage) + assert isinstance(payload.prompt_messages[1], AssistantPromptMessage) + assert isinstance(payload.prompt_messages[2], SystemPromptMessage) + assert isinstance(payload.prompt_messages[3], ToolPromptMessage) + + def test_request_invoke_llm_prompt_messages_must_be_list(self): + with pytest.raises(ValidationError): + RequestInvokeLLM(provider="openai", model="gpt-4", mode="chat", prompt_messages="invalid") # type: ignore[arg-type] + + def test_request_invoke_speech2text_hex_conversion_and_error(self): + payload = RequestInvokeSpeech2Text(provider="openai", model="m", file=binascii.hexlify(b"abc").decode()) + assert payload.file == b"abc" + with pytest.raises(ValidationError): + RequestInvokeSpeech2Text(provider="openai", model="m", file=b"abc") # type: ignore[arg-type] + + def test_trigger_invoke_event_response_variables_conversion(self): + converted = TriggerInvokeEventResponse(variables='{"a": 1}', cancelled=False) + assert converted.variables == {"a": 1} + passthrough = TriggerInvokeEventResponse(variables={"b": 2}, cancelled=True) + assert passthrough.variables == {"b": 2} + + def test_trigger_dispatch_response_convert_response(self): + response = Response("ok", status=202, headers={"X-Req": "1"}) + encoded = binascii.hexlify(serialize_response(response)).decode() + parsed = TriggerDispatchResponse(user_id="u", events=["e"], response=encoded) + assert parsed.response.status_code == 202 + assert parsed.response.get_data() == b"ok" + with pytest.raises(ValidationError): + TriggerDispatchResponse(user_id="u", events=["e"], response="not-hex") + + def test_trigger_dispatch_response_payload_default(self): + response = Response("ok", status=200) + encoded = binascii.hexlify(serialize_response(response)).decode() + parsed = TriggerDispatchResponse(user_id="u", events=["e"], response=encoded) + assert parsed.payload == {} diff --git a/api/tests/unit_tests/core/plugin/utils/test_chunk_merger.py b/api/tests/unit_tests/core/plugin/utils/test_chunk_merger.py index e0eace0f2d..c7e94aa4cf 100644 --- a/api/tests/unit_tests/core/plugin/utils/test_chunk_merger.py +++ b/api/tests/unit_tests/core/plugin/utils/test_chunk_merger.py @@ -4,7 +4,10 @@ import pytest from core.agent.entities import AgentInvokeMessage from core.plugin.utils.chunk_merger import FileChunk, merge_blob_chunks -from core.tools.entities.tool_entities import ToolInvokeMessage +from core.plugin.utils.converter import convert_parameters_to_plugin_format +from core.tools.entities.tool_entities import ToolInvokeMessage, ToolParameter, ToolSelector +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File class TestChunkMerger: @@ -458,3 +461,89 @@ class TestChunkMerger: assert len(result) == 1 assert isinstance(result[0].message, ToolInvokeMessage.BlobMessage) assert result[0].message.blob == b"FirstSecondThird" + + +class TestConverter: + def test_convert_parameters_to_plugin_format_with_single_file_and_selector(self): + file_param = File( + tenant_id="tenant-1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/file.png", + storage_key="", + ) + selector = ToolSelector( + provider_id="org/plugin/provider", + credential_id=None, + tool_name="search", + tool_description="search tool", + tool_configuration={"k": "v"}, + tool_parameters={ + "query": ToolSelector.Parameter( + name="query", + type=ToolParameter.ToolParameterType.STRING, + required=True, + description="query", + default="python", + options=[], + ) + }, + ) + params = {"file": file_param, "selector": selector, "plain": 123} + + converted = convert_parameters_to_plugin_format(params) + + assert converted["file"]["url"] == "https://example.com/file.png" + assert converted["selector"]["provider_id"] == "org/plugin/provider" + assert converted["plain"] == 123 + + def test_convert_parameters_to_plugin_format_with_lists_and_passthrough_values(self): + file_one = File( + tenant_id="tenant-1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/a.txt", + storage_key="", + ) + file_two = File( + tenant_id="tenant-1", + type=FileType.DOCUMENT, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/b.txt", + storage_key="", + ) + selector_one = ToolSelector( + provider_id="org/plugin/provider", + credential_id="cred-1", + tool_name="t1", + tool_description="tool 1", + tool_configuration={}, + tool_parameters={}, + ) + selector_two = ToolSelector( + provider_id="org/plugin/provider", + credential_id="cred-2", + tool_name="t2", + tool_description="tool 2", + tool_configuration={}, + tool_parameters={}, + ) + + params = { + "files": [file_one, file_two], + "selectors": [selector_one, selector_two], + "empty_list": [], + "mixed_list": [file_one, "raw"], + "none_value": None, + } + + converted = convert_parameters_to_plugin_format(params) + + assert [item["url"] for item in converted["files"]] == [ + "https://example.com/a.txt", + "https://example.com/b.txt", + ] + assert [item["tool_name"] for item in converted["selectors"]] == ["t1", "t2"] + assert converted["empty_list"] == [] + assert converted["mixed_list"] == [file_one, "raw"] + assert converted["none_value"] is None diff --git a/api/tests/unit_tests/core/plugin/utils/test_http_parser.py b/api/tests/unit_tests/core/plugin/utils/test_http_parser.py index 1c2e0c96f8..71144695bc 100644 --- a/api/tests/unit_tests/core/plugin/utils/test_http_parser.py +++ b/api/tests/unit_tests/core/plugin/utils/test_http_parser.py @@ -381,6 +381,54 @@ class TestEdgeCases: assert response.status_code == 200 assert response.get_data() == binary_body + def test_deserialize_request_with_lf_only_newlines(self): + raw_data = b"POST /lf-only?x=1 HTTP/1.1\nHost: localhost\nX-Test: yes\n\npayload" + + request = deserialize_request(raw_data) + + assert request.method == "POST" + assert request.path == "/lf-only" + assert request.args.get("x") == "1" + assert request.headers.get("X-Test") == "yes" + assert request.get_data() == b"payload" + + def test_deserialize_request_without_header_separator_uses_full_input_as_headers(self): + raw_data = b"GET /no-separator HTTP/1.1\nHost: localhost\nInvalidHeader\n" + + request = deserialize_request(raw_data) + + assert request.method == "GET" + assert request.path == "/no-separator" + assert request.headers.get("Host") == "localhost" + assert request.headers.get("InvalidHeader") is None + + def test_deserialize_request_empty_payload_raises(self): + with pytest.raises(ValueError, match="Empty HTTP request"): + deserialize_request(b"") + + def test_deserialize_response_with_lf_only_newlines(self): + raw_data = b"HTTP/1.1 202 Accepted\nX-Test: yes\n\nbody" + + response = deserialize_response(raw_data) + + assert response.status_code == 202 + assert response.headers.get("X-Test") == "yes" + assert response.get_data() == b"body" + + def test_deserialize_response_without_header_separator_uses_full_input_as_headers(self): + raw_data = b"HTTP/1.1 204 No Content\nX-Test: yes\nInvalidHeader\n" + + response = deserialize_response(raw_data) + + assert response.status_code == 204 + assert response.headers.get("X-Test") == "yes" + assert response.headers.get("InvalidHeader") is None + assert response.get_data() == b"" + + def test_deserialize_response_empty_payload_raises(self): + with pytest.raises(ValueError, match="Empty HTTP response"): + deserialize_response(b"") + class TestFileUploads: def test_serialize_request_with_text_file_upload(self): diff --git a/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py b/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py index 3e184cbf21..3d08525aba 100644 --- a/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py +++ b/api/tests/unit_tests/core/prompt/test_advanced_prompt_transform.py @@ -1,3 +1,4 @@ +from typing import cast from unittest.mock import MagicMock, patch import pytest @@ -13,6 +14,8 @@ from dify_graph.model_runtime.entities.message_entities import ( AssistantPromptMessage, ImagePromptMessageContent, PromptMessageRole, + SystemPromptMessage, + TextPromptMessageContent, UserPromptMessage, ) from models.model import Conversation @@ -188,3 +191,328 @@ def get_chat_model_args(): context = "I am superman." return model_config_mock, memory_config, prompt_messages, inputs, context + + +def test_get_prompt_dispatches_completion_and_chat_and_invalid(): + transform = AdvancedPromptTransform() + model_config = MagicMock(spec=ModelConfigEntity) + completion_template = CompletionModelPromptTemplate(text="Hello {{name}}", edition_type="basic") + chat_template = [ChatModelMessage(text="Hello {{name}}", role=PromptMessageRole.USER, edition_type="basic")] + + transform._get_completion_model_prompt_messages = MagicMock(return_value=[UserPromptMessage(content="c")]) + transform._get_chat_model_prompt_messages = MagicMock(return_value=[UserPromptMessage(content="h")]) + + completion_result = transform.get_prompt( + prompt_template=completion_template, + inputs={"name": "john"}, + query="q", + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config, + ) + assert completion_result[0].content == "c" + + chat_result = transform.get_prompt( + prompt_template=chat_template, + inputs={"name": "john"}, + query="q", + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config, + ) + assert chat_result[0].content == "h" + + invalid_result = transform.get_prompt( + prompt_template=cast(list, ["not-chat-model-message"]), + inputs={"name": "john"}, + query="q", + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config, + ) + assert invalid_result == [] + + +def test_completion_prompt_jinja2_with_files(): + model_config_mock = MagicMock(spec=ModelConfigEntity) + transform = AdvancedPromptTransform() + completion_template = CompletionModelPromptTemplate(text="Hi {{name}}", edition_type="jinja2") + + file = File( + id="file1", + tenant_id="tenant1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/image.jpg", + storage_key="", + ) + + with ( + patch("core.prompt.advanced_prompt_transform.Jinja2Formatter.format", return_value="Hi John"), + patch("core.prompt.advanced_prompt_transform.file_manager.to_prompt_message_content") as to_content, + ): + to_content.return_value = ImagePromptMessageContent( + url="https://example.com/image.jpg", format="jpg", mime_type="image/jpg" + ) + messages = transform._get_completion_model_prompt_messages( + prompt_template=completion_template, + inputs={"name": "John"}, + query="", + files=[file], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + + assert len(messages) == 1 + assert isinstance(messages[0].content, list) + assert messages[0].content[0].data == "https://example.com/image.jpg" + assert isinstance(messages[0].content[1], TextPromptMessageContent) + assert messages[0].content[1].data == "Hi John" + + +def test_completion_prompt_basic_sets_query_variable(): + model_config_mock = MagicMock(spec=ModelConfigEntity) + transform = AdvancedPromptTransform() + template = CompletionModelPromptTemplate(text="Q={{#query#}}", edition_type="basic") + + messages = transform._get_completion_model_prompt_messages( + prompt_template=template, + inputs={}, + query="what?", + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + + assert messages[0].content == "Q=what?" + + +def test_chat_prompt_with_variable_template_and_context(): + transform = AdvancedPromptTransform(with_variable_tmpl=True) + model_config_mock = MagicMock(spec=ModelConfigEntity) + prompt_template = [ChatModelMessage(text="sys={{#node.name#}} ctx={{#context#}}", role=PromptMessageRole.SYSTEM)] + + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_template, + inputs={"#node.name#": "john"}, + query=None, + files=[], + context="context-text", + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + + assert len(messages) == 1 + assert isinstance(messages[0], SystemPromptMessage) + assert messages[0].content == "sys=john ctx=context-text" + + +def test_chat_prompt_jinja2_branch_and_invalid_edition(): + transform = AdvancedPromptTransform() + model_config_mock = MagicMock(spec=ModelConfigEntity) + prompt_template = [ChatModelMessage(text="Hello {{name}}", role=PromptMessageRole.USER, edition_type="jinja2")] + + with patch("core.prompt.advanced_prompt_transform.Jinja2Formatter.format", return_value="Hello John"): + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_template, + inputs={"name": "John"}, + query=None, + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + assert messages[0].content == "Hello John" + + bad_prompt_template = [ChatModelMessage.model_construct(text="bad", role=PromptMessageRole.USER, edition_type="x")] + with pytest.raises(ValueError, match="Invalid edition type"): + transform._get_chat_model_prompt_messages( + prompt_template=bad_prompt_template, + inputs={}, + query=None, + files=[], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + + +def test_chat_prompt_query_template_and_query_only_branch(): + transform = AdvancedPromptTransform() + model_config_mock = MagicMock(spec=ModelConfigEntity) + memory_config = MemoryConfig( + window=MemoryConfig.WindowConfig(enabled=False), + query_prompt_template="query={{#sys.query#}} ctx={{#context#}}", + ) + prompt_template = [ChatModelMessage(text="sys", role=PromptMessageRole.SYSTEM)] + + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_template, + inputs={}, + query="what", + files=[], + context="ctx", + memory_config=memory_config, + memory=None, + model_config=model_config_mock, + ) + assert messages[-1].content == "query={{#sys.query#}} ctx=ctx" + + +def test_chat_prompt_memory_with_files_and_query(): + transform = AdvancedPromptTransform() + model_config_mock = MagicMock(spec=ModelConfigEntity) + memory_config = MemoryConfig(window=MemoryConfig.WindowConfig(enabled=False)) + memory = MagicMock(spec=TokenBufferMemory) + prompt_template = [ChatModelMessage(text="sys", role=PromptMessageRole.SYSTEM)] + file = File( + id="file1", + tenant_id="tenant1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/image.jpg", + storage_key="", + ) + + transform._append_chat_histories = MagicMock( + side_effect=lambda memory, memory_config, prompt_messages, **kwargs: prompt_messages + ) + with patch("core.prompt.advanced_prompt_transform.file_manager.to_prompt_message_content") as to_content: + to_content.return_value = ImagePromptMessageContent( + url="https://example.com/image.jpg", format="jpg", mime_type="image/jpg" + ) + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_template, + inputs={}, + query="q", + files=[file], + context=None, + memory_config=memory_config, + memory=memory, + model_config=model_config_mock, + ) + + assert isinstance(messages[-1].content, list) + assert messages[-1].content[1].data == "q" + + +def test_chat_prompt_files_without_query_updates_last_user_or_appends_new(): + transform = AdvancedPromptTransform() + model_config_mock = MagicMock(spec=ModelConfigEntity) + file = File( + id="file1", + tenant_id="tenant1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/image.jpg", + storage_key="", + ) + + prompt_with_last_user = [ChatModelMessage(text="u", role=PromptMessageRole.USER)] + with patch("core.prompt.advanced_prompt_transform.file_manager.to_prompt_message_content") as to_content: + to_content.return_value = ImagePromptMessageContent( + url="https://example.com/image.jpg", format="jpg", mime_type="image/jpg" + ) + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_with_last_user, + inputs={}, + query=None, + files=[file], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + assert isinstance(messages[-1].content, list) + assert messages[-1].content[1].data == "u" + + prompt_without_last_user = [ChatModelMessage(text="s", role=PromptMessageRole.SYSTEM)] + with patch("core.prompt.advanced_prompt_transform.file_manager.to_prompt_message_content") as to_content: + to_content.return_value = ImagePromptMessageContent( + url="https://example.com/image.jpg", format="jpg", mime_type="image/jpg" + ) + messages = transform._get_chat_model_prompt_messages( + prompt_template=prompt_without_last_user, + inputs={}, + query=None, + files=[file], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + assert isinstance(messages[-1], UserPromptMessage) + assert isinstance(messages[-1].content, list) + assert messages[-1].content[1].data == "" + + +def test_chat_prompt_files_with_query_branch(): + transform = AdvancedPromptTransform() + model_config_mock = MagicMock(spec=ModelConfigEntity) + file = File( + id="file1", + tenant_id="tenant1", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/image.jpg", + storage_key="", + ) + + with patch("core.prompt.advanced_prompt_transform.file_manager.to_prompt_message_content") as to_content: + to_content.return_value = ImagePromptMessageContent( + url="https://example.com/image.jpg", format="jpg", mime_type="image/jpg" + ) + messages = transform._get_chat_model_prompt_messages( + prompt_template=[], + inputs={}, + query="query-text", + files=[file], + context=None, + memory_config=None, + memory=None, + model_config=model_config_mock, + ) + + assert isinstance(messages[-1].content, list) + assert messages[-1].content[1].data == "query-text" + + +def test_set_context_query_histories_variable_helpers(): + transform = AdvancedPromptTransform() + parser_context = PromptTemplateParser(template="{{#context#}}") + parser_query = PromptTemplateParser(template="{{#query#}}") + parser_hist = PromptTemplateParser(template="{{#histories#}}") + model_config_mock = MagicMock(spec=ModelConfigEntity) + memory_config = MemoryConfig( + role_prefix=MemoryConfig.RolePrefix(user="Human", assistant="Assistant"), + window=MemoryConfig.WindowConfig(enabled=False), + ) + + assert transform._set_context_variable(None, parser_context, {})["#context#"] == "" + assert transform._set_query_variable("", parser_query, {})["#query#"] == "" + assert transform._set_query_variable("x", parser_query, {})["#query#"] == "x" + assert ( + transform._set_histories_variable( + memory=None, # type: ignore[arg-type] + memory_config=memory_config, + raw_prompt="{{#histories#}}", + role_prefix=memory_config.role_prefix, # type: ignore[arg-type] + parser=parser_hist, + prompt_inputs={}, + model_config=model_config_mock, + )["#histories#"] + == "" + ) diff --git a/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py index e3e500e310..1b114b369a 100644 --- a/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py +++ b/api/tests/unit_tests/core/prompt/test_extract_thread_messages.py @@ -2,12 +2,14 @@ from uuid import uuid4 from constants import UUID_NIL from core.prompt.utils.extract_thread_messages import extract_thread_messages +from core.prompt.utils.get_thread_messages_length import get_thread_messages_length class MockMessage: - def __init__(self, id, parent_message_id): + def __init__(self, id, parent_message_id, answer="answer"): self.id = id self.parent_message_id = parent_message_id + self.answer = answer def __getitem__(self, item): return getattr(self, item) @@ -89,3 +91,44 @@ def test_extract_thread_messages_mixed_with_legacy_messages(): result = extract_thread_messages(messages) assert len(result) == 4 assert [msg["id"] for msg in result] == [id5, id4, id2, id1] + + +def test_extract_thread_messages_breaks_when_parent_is_none(): + id1, id2 = str(uuid4()), str(uuid4()) + messages = [MockMessage(id2, None), MockMessage(id1, UUID_NIL)] + + result = extract_thread_messages(messages) + + assert len(result) == 1 + assert result[0].id == id2 + + +def test_get_thread_messages_length_excludes_newly_created_empty_answer(mocker): + id1, id2 = str(uuid4()), str(uuid4()) + messages = [ + MockMessage(id2, id1, answer=""), # newest generated message should be excluded + MockMessage(id1, UUID_NIL, answer="ok"), + ] + + mock_scalars = mocker.patch("core.prompt.utils.get_thread_messages_length.db.session.scalars") + mock_scalars.return_value.all.return_value = messages + + length = get_thread_messages_length("conversation-1") + + assert length == 1 + mock_scalars.assert_called_once() + + +def test_get_thread_messages_length_keeps_non_empty_latest_answer(mocker): + id1, id2 = str(uuid4()), str(uuid4()) + messages = [ + MockMessage(id2, id1, answer="latest-answer"), + MockMessage(id1, UUID_NIL, answer="older-answer"), + ] + + mock_scalars = mocker.patch("core.prompt.utils.get_thread_messages_length.db.session.scalars") + mock_scalars.return_value.all.return_value = messages + + length = get_thread_messages_length("conversation-2") + + assert length == 2 diff --git a/api/tests/unit_tests/core/prompt/test_prompt_message.py b/api/tests/unit_tests/core/prompt/test_prompt_message.py index 4136816562..9fc300348a 100644 --- a/api/tests/unit_tests/core/prompt/test_prompt_message.py +++ b/api/tests/unit_tests/core/prompt/test_prompt_message.py @@ -1,6 +1,11 @@ +from core.prompt.simple_prompt_transform import ModelMode +from core.prompt.utils.prompt_message_util import PromptMessageUtil from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + AudioPromptMessageContent, ImagePromptMessageContent, TextPromptMessageContent, + ToolPromptMessage, UserPromptMessage, ) @@ -25,3 +30,82 @@ def test_dump_prompt_message(): ) data = prompt.model_dump() assert data["content"][0].get("url") == example_url + + +def test_prompt_messages_to_prompt_for_saving_chat_mode(): + chat_messages = [ + UserPromptMessage( + content=[ + TextPromptMessageContent(data="hello "), + ImagePromptMessageContent( + url="https://example.com/image1.jpg", + format="jpg", + mime_type="image/jpeg", + detail=ImagePromptMessageContent.DETAIL.HIGH, + ), + AudioPromptMessageContent( + url="https://example.com/audio1.mp3", + format="mp3", + mime_type="audio/mpeg", + ), + TextPromptMessageContent(data="world"), + ] + ), + AssistantPromptMessage( + content="assistant-text", + tool_calls=[ + { + "id": "tool-1", + "type": "function", + "function": {"name": "search", "arguments": '{"q":"python"}'}, + } + ], + ), + ToolPromptMessage(content="tool-output", name="search", tool_call_id="tool-1"), + UserPromptMessage.model_construct(role="unknown", content="skip"), # type: ignore[arg-type] + ] + + prompts = PromptMessageUtil.prompt_messages_to_prompt_for_saving(ModelMode.CHAT, chat_messages) + + assert len(prompts) == 3 + assert prompts[0]["role"] == "user" + assert prompts[0]["text"] == "hello world" + assert prompts[0]["files"][0]["type"] == "image" + assert prompts[0]["files"][1]["type"] == "audio" + + assert prompts[1]["role"] == "assistant" + assert prompts[1]["text"] == "assistant-text" + assert prompts[1]["tool_calls"][0]["function"]["name"] == "search" + assert prompts[2]["role"] == "tool" + + +def test_prompt_messages_to_prompt_for_saving_completion_mode_with_and_without_files(): + completion_message_with_files = UserPromptMessage( + content=[ + TextPromptMessageContent(data="first "), + TextPromptMessageContent(data="second"), + ImagePromptMessageContent( + url="https://example.com/image2.jpg", + format="jpg", + mime_type="image/jpeg", + detail=ImagePromptMessageContent.DETAIL.LOW, + ), + ] + ) + prompts = PromptMessageUtil.prompt_messages_to_prompt_for_saving( + ModelMode.COMPLETION, [completion_message_with_files] + ) + assert prompts == [ + { + "role": "user", + "text": "first second", + "files": prompts[0]["files"], + } + ] + assert prompts[0]["files"][0]["type"] == "image" + + completion_message_text_only = UserPromptMessage(content="plain text") + prompts = PromptMessageUtil.prompt_messages_to_prompt_for_saving( + ModelMode.COMPLETION, [completion_message_text_only] + ) + assert prompts == [{"role": "user", "text": "plain text"}] diff --git a/api/tests/unit_tests/core/prompt/test_prompt_transform.py b/api/tests/unit_tests/core/prompt/test_prompt_transform.py index 7976120547..d379e3067a 100644 --- a/api/tests/unit_tests/core/prompt/test_prompt_transform.py +++ b/api/tests/unit_tests/core/prompt/test_prompt_transform.py @@ -1,4 +1,10 @@ -# from unittest.mock import MagicMock +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from core.prompt.prompt_transform import PromptTransform +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey # from core.app.app_config.entities import ModelConfigEntity # from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle @@ -9,44 +15,217 @@ # from core.prompt.prompt_transform import PromptTransform -# def test__calculate_rest_token(): -# model_schema_mock = MagicMock(spec=AIModelEntity) -# parameter_rule_mock = MagicMock(spec=ParameterRule) -# parameter_rule_mock.name = "max_tokens" -# model_schema_mock.parameter_rules = [parameter_rule_mock] -# model_schema_mock.model_properties = {ModelPropertyKey.CONTEXT_SIZE: 62} +class TestPromptTransform: + def test_resolve_model_runtime_requires_model_config_or_instance(self): + transform = PromptTransform() -# large_language_model_mock = MagicMock(spec=LargeLanguageModel) -# large_language_model_mock.get_num_tokens.return_value = 6 + with pytest.raises(ValueError, match="Either model_config or model_instance must be provided."): + transform._resolve_model_runtime() -# provider_mock = MagicMock(spec=ProviderEntity) -# provider_mock.provider = "openai" + def test_resolve_model_runtime_builds_model_instance_from_model_config(self): + transform = PromptTransform() + fake_model_schema = SimpleNamespace(model_properties={}, parameter_rules=[]) + fake_model_type_instance = MagicMock() + fake_model_type_instance.get_model_schema.return_value = fake_model_schema + fake_model_instance = SimpleNamespace( + model_type_instance=fake_model_type_instance, + model_name="resolved-model", + credentials=None, + parameters=None, + stop=None, + ) + model_config = SimpleNamespace( + provider_model_bundle=object(), + model="config-model", + credentials={"api_key": "secret"}, + parameters={"temperature": 0.1}, + stop=["END"], + model_schema=SimpleNamespace(model_properties={}, parameter_rules=[]), + ) -# provider_configuration_mock = MagicMock(spec=ProviderConfiguration) -# provider_configuration_mock.provider = provider_mock -# provider_configuration_mock.model_settings = None + with patch( + "core.prompt.prompt_transform.ModelInstance", return_value=fake_model_instance + ) as model_instance_cls: + model_instance, model_schema = transform._resolve_model_runtime(model_config=model_config) -# provider_model_bundle_mock = MagicMock(spec=ProviderModelBundle) -# provider_model_bundle_mock.model_type_instance = large_language_model_mock -# provider_model_bundle_mock.configuration = provider_configuration_mock + model_instance_cls.assert_called_once_with( + provider_model_bundle=model_config.provider_model_bundle, + model=model_config.model, + ) + fake_model_type_instance.get_model_schema.assert_called_once_with( + model="resolved-model", + credentials={"api_key": "secret"}, + ) + assert model_instance is fake_model_instance + assert model_instance.credentials == {"api_key": "secret"} + assert model_instance.parameters == {"temperature": 0.1} + assert model_instance.stop == ["END"] + assert model_schema is fake_model_schema -# model_config_mock = MagicMock(spec=ModelConfigEntity) -# model_config_mock.model = "gpt-4" -# model_config_mock.credentials = {} -# model_config_mock.parameters = {"max_tokens": 50} -# model_config_mock.model_schema = model_schema_mock -# model_config_mock.provider_model_bundle = provider_model_bundle_mock + def test_resolve_model_runtime_uses_model_config_schema_fallback(self): + transform = PromptTransform() + fallback_schema = SimpleNamespace(model_properties={}, parameter_rules=[]) + fake_model_type_instance = MagicMock() + fake_model_type_instance.get_model_schema.return_value = None + model_instance = SimpleNamespace( + model_type_instance=fake_model_type_instance, + model_name="resolved-model", + credentials={"api_key": "secret"}, + parameters={}, + ) + model_config = SimpleNamespace(model_schema=fallback_schema) -# prompt_transform = PromptTransform() + resolved_model_instance, resolved_schema = transform._resolve_model_runtime( + model_config=model_config, + model_instance=model_instance, + ) -# prompt_messages = [UserPromptMessage(content="Hello, how are you?")] -# rest_tokens = prompt_transform._calculate_rest_token(prompt_messages, model_config_mock) + assert resolved_model_instance is model_instance + assert resolved_schema is fallback_schema -# # Validate based on the mock configuration and expected logic -# expected_rest_tokens = ( -# model_schema_mock.model_properties[ModelPropertyKey.CONTEXT_SIZE] -# - model_config_mock.parameters["max_tokens"] -# - large_language_model_mock.get_num_tokens.return_value -# ) -# assert rest_tokens == expected_rest_tokens -# assert rest_tokens == 6 + def test_resolve_model_runtime_raises_when_schema_missing_without_model_config(self): + transform = PromptTransform() + fake_model_type_instance = MagicMock() + fake_model_type_instance.get_model_schema.return_value = None + model_instance = SimpleNamespace( + model_type_instance=fake_model_type_instance, + model_name="resolved-model", + credentials={"api_key": "secret"}, + parameters={}, + ) + + with pytest.raises(ValueError, match="Model schema not found for the provided model instance."): + transform._resolve_model_runtime(model_instance=model_instance) + + def test_calculate_rest_token_defaults_when_context_size_missing(self): + transform = PromptTransform() + fake_model_instance = SimpleNamespace(parameters={}, get_llm_num_tokens=lambda _: 0) + fake_model_schema = SimpleNamespace(model_properties={}, parameter_rules=[]) + transform._resolve_model_runtime = MagicMock(return_value=(fake_model_instance, fake_model_schema)) + model_config = SimpleNamespace( + model_schema=SimpleNamespace(model_properties={}, parameter_rules=[]), + provider_model_bundle=object(), + model="test-model", + parameters={}, + ) + + rest = transform._calculate_rest_token([], model_config=model_config) + + assert rest == 2000 + + def test_calculate_rest_token_uses_max_tokens_and_clamps_to_zero(self): + transform = PromptTransform() + + parameter_rule = SimpleNamespace(name="max_tokens", use_template=None) + fake_model_instance = SimpleNamespace(parameters={"max_tokens": 50}, get_llm_num_tokens=lambda _: 95) + fake_model_schema = SimpleNamespace( + model_properties={ModelPropertyKey.CONTEXT_SIZE: 100}, + parameter_rules=[parameter_rule], + ) + transform._resolve_model_runtime = MagicMock(return_value=(fake_model_instance, fake_model_schema)) + model_config = SimpleNamespace( + model_schema=SimpleNamespace( + model_properties={ModelPropertyKey.CONTEXT_SIZE: 100}, + parameter_rules=[parameter_rule], + ), + provider_model_bundle=object(), + model="test-model", + parameters={"max_tokens": 50}, + ) + + rest = transform._calculate_rest_token([SimpleNamespace()], model_config=model_config) + + assert rest == 0 + + def test_calculate_rest_token_supports_use_template_parameter(self): + transform = PromptTransform() + + parameter_rule = SimpleNamespace(name="generation_max", use_template="max_tokens") + fake_model_instance = SimpleNamespace(parameters={"max_tokens": 30}, get_llm_num_tokens=lambda _: 20) + fake_model_schema = SimpleNamespace( + model_properties={ModelPropertyKey.CONTEXT_SIZE: 200}, + parameter_rules=[parameter_rule], + ) + transform._resolve_model_runtime = MagicMock(return_value=(fake_model_instance, fake_model_schema)) + model_config = SimpleNamespace( + model_schema=SimpleNamespace( + model_properties={ModelPropertyKey.CONTEXT_SIZE: 200}, + parameter_rules=[parameter_rule], + ), + provider_model_bundle=object(), + model="test-model", + parameters={"max_tokens": 30}, + ) + + rest = transform._calculate_rest_token([SimpleNamespace()], model_config=model_config) + + assert rest == 150 + + def test_get_history_messages_from_memory_with_and_without_window(self): + transform = PromptTransform() + memory = MagicMock() + memory.get_history_prompt_text.return_value = "history" + + memory_config_with_window = SimpleNamespace(window=SimpleNamespace(enabled=True, size=3)) + result = transform._get_history_messages_from_memory( + memory=memory, + memory_config=memory_config_with_window, + max_token_limit=100, + human_prefix="Human", + ai_prefix="Assistant", + ) + + assert result == "history" + memory.get_history_prompt_text.assert_called_with( + max_token_limit=100, + human_prefix="Human", + ai_prefix="Assistant", + message_limit=3, + ) + + memory.reset_mock() + memory_config_no_window = SimpleNamespace(window=SimpleNamespace(enabled=False, size=2)) + transform._get_history_messages_from_memory( + memory=memory, + memory_config=memory_config_no_window, + max_token_limit=50, + ) + memory.get_history_prompt_text.assert_called_with(max_token_limit=50) + + def test_get_history_messages_list_from_memory_with_and_without_window(self): + transform = PromptTransform() + memory = MagicMock() + memory.get_history_prompt_messages.return_value = ["m1", "m2"] + + memory_config_window = SimpleNamespace(window=SimpleNamespace(enabled=True, size=2)) + result = transform._get_history_messages_list_from_memory(memory, memory_config_window, 120) + assert result == ["m1", "m2"] + memory.get_history_prompt_messages.assert_called_with(max_token_limit=120, message_limit=2) + + memory.reset_mock() + memory.get_history_prompt_messages.return_value = ["only"] + memory_config_no_window = SimpleNamespace(window=SimpleNamespace(enabled=True, size=0)) + result = transform._get_history_messages_list_from_memory(memory, memory_config_no_window, 10) + assert result == ["only"] + memory.get_history_prompt_messages.assert_called_with(max_token_limit=10, message_limit=None) + + def test_append_chat_histories_extends_prompt_messages(self, monkeypatch): + transform = PromptTransform() + memory = MagicMock() + memory_config = SimpleNamespace(window=SimpleNamespace(enabled=False, size=None)) + + monkeypatch.setattr(transform, "_calculate_rest_token", lambda prompt_messages, **kwargs: 99) + monkeypatch.setattr( + transform, + "_get_history_messages_list_from_memory", + lambda memory, memory_config, max_token_limit: ["h1", "h2"], + ) + + result = transform._append_chat_histories( + memory=memory, + memory_config=memory_config, + prompt_messages=["p1"], + model_config=SimpleNamespace(), + ) + + assert result == ["p1", "h1", "h2"] diff --git a/api/tests/unit_tests/core/prompt/test_simple_prompt_transform.py b/api/tests/unit_tests/core/prompt/test_simple_prompt_transform.py index 2ef66e8a96..e6d28224d7 100644 --- a/api/tests/unit_tests/core/prompt/test_simple_prompt_transform.py +++ b/api/tests/unit_tests/core/prompt/test_simple_prompt_transform.py @@ -1,9 +1,29 @@ -from unittest.mock import MagicMock +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest from core.app.entities.app_invoke_entities import ModelConfigWithCredentialsEntity from core.memory.token_buffer_memory import TokenBufferMemory +from core.prompt.prompt_templates.advanced_prompt_templates import ( + BAICHUAN_CHAT_APP_CHAT_PROMPT_CONFIG, + BAICHUAN_CHAT_APP_COMPLETION_PROMPT_CONFIG, + BAICHUAN_COMPLETION_APP_CHAT_PROMPT_CONFIG, + BAICHUAN_COMPLETION_APP_COMPLETION_PROMPT_CONFIG, + BAICHUAN_CONTEXT, + CHAT_APP_CHAT_PROMPT_CONFIG, + CHAT_APP_COMPLETION_PROMPT_CONFIG, + COMPLETION_APP_CHAT_PROMPT_CONFIG, + COMPLETION_APP_COMPLETION_PROMPT_CONFIG, + CONTEXT, +) from core.prompt.simple_prompt_transform import SimplePromptTransform -from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage, UserPromptMessage +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + ImagePromptMessageContent, + TextPromptMessageContent, + UserPromptMessage, +) from models.model import AppMode, Conversation @@ -244,3 +264,178 @@ def test__get_completion_model_prompt_messages(): assert len(prompt_messages) == 1 assert stops == prompt_rules.get("stops") assert prompt_messages[0].content == real_prompt + + +def test_get_prompt_dispatches_chat_and_completion(): + transform = SimplePromptTransform() + model_config_chat = MagicMock(spec=ModelConfigWithCredentialsEntity) + model_config_chat.mode = "chat" + model_config_completion = MagicMock(spec=ModelConfigWithCredentialsEntity) + model_config_completion.mode = "completion" + prompt_entity = SimpleNamespace(simple_prompt_template="hello") + + transform._get_chat_model_prompt_messages = MagicMock(return_value=(["chat-msg"], None)) + transform._get_completion_model_prompt_messages = MagicMock(return_value=(["completion-msg"], ["stop"])) + + chat_messages, chat_stops = transform.get_prompt( + app_mode=AppMode.CHAT, + prompt_template_entity=prompt_entity, + inputs={"n": 1}, + query="q", + files=[], + context=None, + memory=None, + model_config=model_config_chat, + ) + assert chat_messages == ["chat-msg"] + assert chat_stops is None + + completion_messages, completion_stops = transform.get_prompt( + app_mode=AppMode.CHAT, + prompt_template_entity=prompt_entity, + inputs={"n": 1}, + query="q", + files=[], + context=None, + memory=None, + model_config=model_config_completion, + ) + assert completion_messages == ["completion-msg"] + assert completion_stops == ["stop"] + + +def test_get_prompt_str_and_rules_type_validation_errors(): + transform = SimplePromptTransform() + model_config = MagicMock(spec=ModelConfigWithCredentialsEntity) + model_config.provider = "openai" + model_config.model = "gpt-4" + valid_prompt_template = SimplePromptTransform().get_prompt_template( + AppMode.CHAT, "openai", "gpt-4", "", False, False + )["prompt_template"] + + bad_custom_keys = { + "prompt_template": valid_prompt_template, + "custom_variable_keys": "not-list", + "special_variable_keys": [], + "prompt_rules": {}, + } + transform.get_prompt_template = MagicMock(return_value=bad_custom_keys) + with pytest.raises(TypeError, match="custom_variable_keys"): + transform._get_prompt_str_and_rules(AppMode.CHAT, model_config, "", {}, query=None, context=None) + + bad_special_keys = { + **bad_custom_keys, + "custom_variable_keys": [], + "special_variable_keys": "not-list", + } + transform.get_prompt_template = MagicMock(return_value=bad_special_keys) + with pytest.raises(TypeError, match="special_variable_keys"): + transform._get_prompt_str_and_rules(AppMode.CHAT, model_config, "", {}, query=None, context=None) + + bad_prompt_template = { + **bad_custom_keys, + "custom_variable_keys": [], + "special_variable_keys": [], + "prompt_template": 123, + } + transform.get_prompt_template = MagicMock(return_value=bad_prompt_template) + with pytest.raises(TypeError, match="PromptTemplateParser"): + transform._get_prompt_str_and_rules(AppMode.CHAT, model_config, "", {}, query=None, context=None) + + bad_prompt_rules = { + **bad_custom_keys, + "custom_variable_keys": [], + "special_variable_keys": [], + "prompt_template": valid_prompt_template, + "prompt_rules": "not-dict", + } + transform.get_prompt_template = MagicMock(return_value=bad_prompt_rules) + with pytest.raises(TypeError, match="prompt_rules"): + transform._get_prompt_str_and_rules(AppMode.CHAT, model_config, "", {}, query=None, context=None) + + +def test_chat_model_prompt_messages_uses_prompt_when_query_empty(): + transform = SimplePromptTransform() + model_config = MagicMock(spec=ModelConfigWithCredentialsEntity) + transform._get_prompt_str_and_rules = MagicMock(return_value=("prompt-text", {})) + transform._get_last_user_message = MagicMock(return_value=UserPromptMessage(content="prompt-text")) + + prompt_messages, _ = transform._get_chat_model_prompt_messages( + app_mode=AppMode.CHAT, + pre_prompt="", + inputs={}, + query="", + files=[], + context=None, + memory=None, + model_config=model_config, + ) + + assert prompt_messages[0].content == "prompt-text" + transform._get_last_user_message.assert_called_once_with("prompt-text", [], None, None) + + +def test_completion_model_prompt_messages_empty_stops_becomes_none(): + transform = SimplePromptTransform() + model_config = MagicMock(spec=ModelConfigWithCredentialsEntity) + transform._get_prompt_str_and_rules = MagicMock(return_value=("prompt", {"stops": []})) + + prompt_messages, stops = transform._get_completion_model_prompt_messages( + app_mode=AppMode.CHAT, + pre_prompt="", + inputs={}, + query="q", + files=[], + context=None, + memory=None, + model_config=model_config, + ) + + assert len(prompt_messages) == 1 + assert stops is None + + +def test_get_last_user_message_with_files_and_context_files(): + transform = SimplePromptTransform() + file = SimpleNamespace() + context_file = SimpleNamespace() + + with patch("core.prompt.simple_prompt_transform.file_manager.to_prompt_message_content") as to_content: + to_content.side_effect = [ + ImagePromptMessageContent(url="https://example.com/a.jpg", format="jpg", mime_type="image/jpg"), + ImagePromptMessageContent(url="https://example.com/b.jpg", format="jpg", mime_type="image/jpg"), + ] + message = transform._get_last_user_message( + prompt="hello", + files=[file], + context_files=[context_file], + image_detail_config=None, + ) + + assert isinstance(message.content, list) + assert message.content[0].data == "https://example.com/a.jpg" + assert message.content[1].data == "https://example.com/b.jpg" + assert isinstance(message.content[2], TextPromptMessageContent) + assert message.content[2].data == "hello" + + +def test_prompt_file_name_branches(): + transform = SimplePromptTransform() + + assert transform._prompt_file_name(AppMode.CHAT, "openai", "gpt-4") == "common_chat" + assert transform._prompt_file_name(AppMode.COMPLETION, "openai", "gpt-4") == "common_completion" + assert transform._prompt_file_name(AppMode.COMPLETION, "baichuan", "Baichuan2") == "baichuan_completion" + assert transform._prompt_file_name(AppMode.CHAT, "huggingface_hub", "baichuan-13b") == "baichuan_chat" + + +def test_advanced_prompt_templates_constants_are_importable(): + assert isinstance(CONTEXT, str) + assert isinstance(BAICHUAN_CONTEXT, str) + assert "completion_prompt_config" in CHAT_APP_COMPLETION_PROMPT_CONFIG + assert "chat_prompt_config" in CHAT_APP_CHAT_PROMPT_CONFIG + assert "chat_prompt_config" in COMPLETION_APP_CHAT_PROMPT_CONFIG + assert "completion_prompt_config" in COMPLETION_APP_COMPLETION_PROMPT_CONFIG + assert "completion_prompt_config" in BAICHUAN_CHAT_APP_COMPLETION_PROMPT_CONFIG + assert "chat_prompt_config" in BAICHUAN_CHAT_APP_CHAT_PROMPT_CONFIG + assert "chat_prompt_config" in BAICHUAN_COMPLETION_APP_CHAT_PROMPT_CONFIG + assert "completion_prompt_config" in BAICHUAN_COMPLETION_APP_COMPLETION_PROMPT_CONFIG diff --git a/api/tests/unit_tests/core/rag/docstore/test_dataset_docstore.py b/api/tests/unit_tests/core/rag/docstore/test_dataset_docstore.py new file mode 100644 index 0000000000..13285cdad0 --- /dev/null +++ b/api/tests/unit_tests/core/rag/docstore/test_dataset_docstore.py @@ -0,0 +1,813 @@ +""" +Unit tests for DatasetDocumentStore. + +Tests cover all public methods and error paths of the DatasetDocumentStore class +which provides document storage and retrieval functionality for datasets in the RAG system. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from core.rag.docstore.dataset_docstore import DatasetDocumentStore, DocumentSegment +from core.rag.models.document import AttachmentDocument, Document +from models.dataset import Dataset + + +class TestDatasetDocumentStoreInit: + """Tests for DatasetDocumentStore initialization.""" + + def test_init_with_all_parameters(self): + """Test initialization with dataset, user_id, and document_id.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + assert store._dataset == mock_dataset + assert store._user_id == "test-user-id" + assert store._document_id == "test-doc-id" + assert store.dataset_id == "test-dataset-id" + assert store.user_id == "test-user-id" + + def test_init_without_document_id(self): + """Test initialization without document_id.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + assert store._document_id is None + assert store.dataset_id == "test-dataset-id" + + +class TestDatasetDocumentStoreSerialization: + """Tests for to_dict and from_dict methods.""" + + def test_to_dict(self): + """Test serialization to dictionary.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.to_dict() + + assert result == {"dataset_id": "test-dataset-id"} + + def test_from_dict(self): + """Test deserialization from dictionary.""" + + config_dict = { + "dataset": MagicMock(spec=["id"]), + "user_id": "test-user", + "document_id": "test-doc", + } + config_dict["dataset"].id = "ds-123" + + store = DatasetDocumentStore.from_dict(config_dict) + + assert store._user_id == "test-user" + assert store._document_id == "test-doc" + + +class TestDatasetDocumentStoreDocs: + """Tests for the docs property.""" + + def test_docs_returns_document_dict(self): + """Test that docs property returns a dictionary of documents.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock(spec=DocumentSegment) + mock_segment.index_node_id = "node-1" + mock_segment.index_node_hash = "hash-1" + mock_segment.document_id = "doc-1" + mock_segment.dataset_id = "test-dataset-id" + mock_segment.content = "Test content" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.scalars.return_value.all.return_value = [mock_segment] + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.docs + + assert "node-1" in result + assert isinstance(result["node-1"], Document) + + def test_docs_empty_dataset(self): + """Test docs property with no segments.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.scalars.return_value.all.return_value = [] + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.docs + + assert result == {} + + +class TestDatasetDocumentStoreAddDocuments: + """Tests for add_documents method.""" + + def test_add_documents_new_document_with_embedding(self): + """Test adding new documents with embedding model.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "high_quality" + mock_dataset.embedding_model_provider = "provider" + mock_dataset.embedding_model = "model" + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Test content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "hash-1", + } + mock_doc.attachments = None + mock_doc.children = None + + mock_model_instance = MagicMock() + mock_model_instance.get_text_embedding_num_tokens.return_value = [10] + + with ( + patch("core.rag.docstore.dataset_docstore.db") as mock_db, + patch("core.rag.docstore.dataset_docstore.ModelManager") as mock_manager_class, + ): + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = None + + mock_manager = MagicMock() + mock_manager.get_model_instance.return_value = mock_model_instance + mock_manager_class.return_value = mock_manager + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc]) + + mock_db.session.add.assert_called() + mock_db.session.commit.assert_called() + + def test_add_documents_update_existing_document(self): + """Test updating existing document with allow_update=True.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + mock_dataset.embedding_model_provider = None + mock_dataset.embedding_model = None + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Updated content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "new-hash", + } + mock_doc.attachments = None + mock_doc.children = None + + mock_existing_segment = MagicMock() + mock_existing_segment.id = "seg-1" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = 5 + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_existing_segment): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc]) + + mock_db.session.commit.assert_called() + + def test_add_documents_raises_when_not_allowed(self): + """Test that adding existing doc without allow_update raises ValueError.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Test content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "hash-1", + } + mock_doc.attachments = None + mock_doc.children = None + + mock_existing_segment = MagicMock() + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_existing_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + with pytest.raises(ValueError, match="already exists"): + store.add_documents([mock_doc], allow_update=False) + + def test_add_documents_with_answer_metadata(self): + """Test adding document with answer in metadata.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Test content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "hash-1", + "answer": "Test answer", + } + mock_doc.attachments = None + mock_doc.children = None + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = None + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc]) + + mock_db.session.add.assert_called() + + def test_add_documents_with_invalid_document_type(self): + """Test that non-Document raises ValueError.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + with pytest.raises(ValueError, match="must be a Document"): + store.add_documents(["not a document"]) + + def test_add_documents_with_none_metadata(self): + """Test that document with None metadata raises ValueError.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Test content" + mock_doc.metadata = None + + with patch("core.rag.docstore.dataset_docstore.db"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + with pytest.raises(ValueError, match="metadata must be a dict"): + store.add_documents([mock_doc]) + + def test_add_documents_with_save_child(self): + """Test adding documents with save_child=True.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + + mock_child = MagicMock(spec=Document) + mock_child.page_content = "Child content" + mock_child.metadata = { + "doc_id": "child-1", + "doc_hash": "child-hash", + } + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Test content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "hash-1", + } + mock_doc.attachments = None + mock_doc.children = [mock_child] + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = None + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc], save_child=True) + + mock_db.session.add.assert_called() + + +class TestDatasetDocumentStoreExists: + """Tests for document_exists method.""" + + def test_document_exists_returns_true(self): + """Test document_exists returns True when segment exists.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock() + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.document_exists("doc-1") + + assert result is True + + def test_document_exists_returns_false(self): + """Test document_exists returns False when segment doesn't exist.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.document_exists("doc-1") + + assert result is False + + +class TestDatasetDocumentStoreGetDocument: + """Tests for get_document method.""" + + def test_get_document_success(self): + """Test getting a document successfully.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock(spec=DocumentSegment) + mock_segment.index_node_id = "node-1" + mock_segment.index_node_hash = "hash-1" + mock_segment.document_id = "doc-1" + mock_segment.dataset_id = "test-dataset-id" + mock_segment.content = "Test content" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document("node-1", raise_error=False) + + assert isinstance(result, Document) + assert result.page_content == "Test content" + + def test_get_document_returns_none_when_not_found(self): + """Test get_document returns None when not found and raise_error=False.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document("nonexistent", raise_error=False) + + assert result is None + + def test_get_document_raises_when_not_found(self): + """Test get_document raises ValueError when not found and raise_error=True.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + with pytest.raises(ValueError, match="not found"): + store.get_document("nonexistent", raise_error=True) + + +class TestDatasetDocumentStoreDeleteDocument: + """Tests for delete_document method.""" + + def test_delete_document_success(self): + """Test deleting a document successfully.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock() + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + store.delete_document("doc-1") + + mock_db.session.delete.assert_called_with(mock_segment) + mock_db.session.commit.assert_called() + + def test_delete_document_returns_none_when_not_found(self): + """Test delete_document returns None when not found and raise_error=False.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.delete_document("nonexistent", raise_error=False) + + assert result is None + + def test_delete_document_raises_when_not_found(self): + """Test delete_document raises ValueError when not found and raise_error=True.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + with pytest.raises(ValueError, match="not found"): + store.delete_document("nonexistent", raise_error=True) + + +class TestDatasetDocumentStoreHashOperations: + """Tests for set_document_hash and get_document_hash methods.""" + + def test_set_document_hash_success(self): + """Test setting document hash successfully.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock() + mock_segment.index_node_hash = "old-hash" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + store.set_document_hash("doc-1", "new-hash") + + assert mock_segment.index_node_hash == "new-hash" + mock_db.session.commit.assert_called() + + def test_set_document_hash_returns_none_when_not_found(self): + """Test set_document_hash returns None when segment not found.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.set_document_hash("nonexistent", "new-hash") + + assert result is None + + def test_get_document_hash_success(self): + """Test getting document hash successfully.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock() + mock_segment.index_node_hash = "test-hash" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_segment): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document_hash("doc-1") + + assert result == "test-hash" + + def test_get_document_hash_returns_none_when_not_found(self): + """Test get_document_hash returns None when segment not found.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db"): + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=None): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document_hash("nonexistent") + + assert result is None + + +class TestDatasetDocumentStoreSegment: + """Tests for get_document_segment method.""" + + def test_get_document_segment_returns_segment(self): + """Test getting a document segment.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + mock_segment = MagicMock(spec=DocumentSegment) + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.scalar.return_value = mock_segment + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document_segment("doc-1") + + assert result == mock_segment + + def test_get_document_segment_returns_none(self): + """Test getting a non-existent document segment.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.scalar.return_value = None + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + ) + + result = store.get_document_segment("nonexistent") + + assert result is None + + +class TestDatasetDocumentStoreMultimodelBinding: + """Tests for add_multimodel_documents_binding method.""" + + def test_add_multimodel_documents_binding_with_attachments(self): + """Test adding multimodel document bindings.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + + mock_attachment = MagicMock(spec=AttachmentDocument) + mock_attachment.metadata = {"doc_id": "attachment-1"} + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_multimodel_documents_binding("seg-1", [mock_attachment]) + + mock_db.session.add.assert_called() + + def test_add_multimodel_documents_binding_without_attachments(self): + """Test adding bindings with None attachments.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_multimodel_documents_binding("seg-1", None) + + mock_db.session.add.assert_not_called() + + def test_add_multimodel_documents_binding_with_empty_list(self): + """Test adding bindings with empty list.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_multimodel_documents_binding("seg-1", []) + + mock_db.session.add.assert_not_called() + + +class TestDatasetDocumentStoreAddDocumentsUpdateChild: + """Tests for add_documents when updating existing documents with children.""" + + def test_add_documents_update_existing_with_children(self): + """Test updating existing document with save_child=True and children.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + + mock_child = MagicMock(spec=Document) + mock_child.page_content = "Updated child content" + mock_child.metadata = { + "doc_id": "child-1", + "doc_hash": "new-child-hash", + } + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Updated content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "new-hash", + } + mock_doc.attachments = None + mock_doc.children = [mock_child] + + mock_existing_segment = MagicMock() + mock_existing_segment.id = "seg-1" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = 5 + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_existing_segment): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc], save_child=True) + + mock_db.session.query.return_value.where.return_value.delete.assert_called() + mock_db.session.commit.assert_called() + + +class TestDatasetDocumentStoreAddDocumentsUpdateAnswer: + """Tests for add_documents when updating existing documents with answer metadata.""" + + def test_add_documents_update_existing_with_answer(self): + """Test updating existing document with answer in metadata.""" + + mock_dataset = MagicMock(spec=Dataset) + mock_dataset.id = "test-dataset-id" + mock_dataset.tenant_id = "tenant-1" + mock_dataset.indexing_technique = "economy" + + mock_doc = MagicMock(spec=Document) + mock_doc.page_content = "Updated content" + mock_doc.metadata = { + "doc_id": "doc-1", + "doc_hash": "new-hash", + "answer": "Updated answer", + } + mock_doc.attachments = None + mock_doc.children = None + + mock_existing_segment = MagicMock() + mock_existing_segment.id = "seg-1" + + with patch("core.rag.docstore.dataset_docstore.db") as mock_db: + mock_session = MagicMock() + mock_db.session = mock_session + mock_db.session.query.return_value.where.return_value.scalar.return_value = 5 + + with patch.object(DatasetDocumentStore, "get_document_segment", return_value=mock_existing_segment): + with patch.object(DatasetDocumentStore, "add_multimodel_documents_binding"): + store = DatasetDocumentStore( + dataset=mock_dataset, + user_id="test-user-id", + document_id="test-doc-id", + ) + + store.add_documents([mock_doc]) + + mock_db.session.commit.assert_called() diff --git a/api/tests/unit_tests/core/rag/embedding/test_cached_embedding.py b/api/tests/unit_tests/core/rag/embedding/test_cached_embedding.py new file mode 100644 index 0000000000..a0db25174d --- /dev/null +++ b/api/tests/unit_tests/core/rag/embedding/test_cached_embedding.py @@ -0,0 +1,555 @@ +"""Unit tests for cached_embedding.py - CacheEmbedding class. + +This test file covers the methods not fully tested in test_embedding_service.py: +- embed_multimodal_documents +- embed_multimodal_query +- Error handling scenarios in embed_query (DEBUG mode) +""" + +import base64 +from decimal import Decimal +from unittest.mock import Mock, patch + +import numpy as np +import pytest +from sqlalchemy.exc import IntegrityError + +from core.rag.embedding.cached_embedding import CacheEmbedding +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey +from dify_graph.model_runtime.entities.text_embedding_entities import EmbeddingResult, EmbeddingUsage +from models.dataset import Embedding + + +class TestCacheEmbeddingMultimodalDocuments: + """Test suite for CacheEmbedding.embed_multimodal_documents method.""" + + @pytest.fixture + def mock_model_instance(self): + """Create a mock ModelInstance for testing.""" + model_instance = Mock() + model_instance.model = "vision-embedding-model" + model_instance.provider = "openai" + model_instance.credentials = {"api_key": "test-key"} + + model_type_instance = Mock() + model_instance.model_type_instance = model_type_instance + + model_schema = Mock() + model_schema.model_properties = {ModelPropertyKey.MAX_CHUNKS: 10} + model_type_instance.get_model_schema.return_value = model_schema + + return model_instance + + @pytest.fixture + def sample_multimodal_result(self): + """Create a sample multimodal EmbeddingResult.""" + embedding_vector = np.random.randn(1536) + normalized_vector = (embedding_vector / np.linalg.norm(embedding_vector)).tolist() + + usage = EmbeddingUsage( + tokens=10, + total_tokens=10, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.000001"), + currency="USD", + latency=0.5, + ) + + return EmbeddingResult( + model="vision-embedding-model", + embeddings=[normalized_vector], + usage=usage, + ) + + def test_embed_single_multimodal_document_cache_miss(self, mock_model_instance, sample_multimodal_result): + """Test embedding a single multimodal document when cache is empty.""" + cache_embedding = CacheEmbedding(mock_model_instance, user="test-user") + documents = [{"file_id": "file123", "content": "test content"}] + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = sample_multimodal_result + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 1 + assert isinstance(result[0], list) + assert len(result[0]) == 1536 + + mock_model_instance.invoke_multimodal_embedding.assert_called_once() + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + + def test_embed_multiple_multimodal_documents_cache_miss(self, mock_model_instance): + """Test embedding multiple multimodal documents when cache is empty.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [ + {"file_id": "file1", "content": "content 1"}, + {"file_id": "file2", "content": "content 2"}, + {"file_id": "file3", "content": "content 3"}, + ] + + embeddings = [] + for _ in range(3): + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + embeddings.append(normalized) + + usage = EmbeddingUsage( + tokens=30, + total_tokens=30, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.000003"), + currency="USD", + latency=0.8, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=embeddings, + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 3 + assert all(len(emb) == 1536 for emb in result) + + def test_embed_multimodal_documents_cache_hit(self, mock_model_instance): + """Test embedding multimodal documents when embeddings are cached.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [{"file_id": "file123"}] + + cached_vector = np.random.randn(1536) + normalized_cached = (cached_vector / np.linalg.norm(cached_vector)).tolist() + + mock_cached_embedding = Mock(spec=Embedding) + mock_cached_embedding.get_embedding.return_value = normalized_cached + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = mock_cached_embedding + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 1 + assert result[0] == normalized_cached + mock_model_instance.invoke_multimodal_embedding.assert_not_called() + + def test_embed_multimodal_documents_partial_cache_hit(self, mock_model_instance): + """Test embedding multimodal documents with mixed cache hits and misses.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [ + {"file_id": "cached_file"}, + {"file_id": "new_file_1"}, + {"file_id": "new_file_2"}, + ] + + cached_vector = np.random.randn(1536) + normalized_cached = (cached_vector / np.linalg.norm(cached_vector)).tolist() + + mock_cached_embedding = Mock(spec=Embedding) + mock_cached_embedding.get_embedding.return_value = normalized_cached + + new_embeddings = [] + for _ in range(2): + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + new_embeddings.append(normalized) + + usage = EmbeddingUsage( + tokens=20, + total_tokens=20, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.000002"), + currency="USD", + latency=0.6, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=new_embeddings, + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + call_count = [0] + + def mock_filter_by(**kwargs): + call_count[0] += 1 + mock_query = Mock() + if call_count[0] == 1: + mock_query.first.return_value = mock_cached_embedding + else: + mock_query.first.return_value = None + return mock_query + + mock_session.query.return_value.filter_by = mock_filter_by + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 3 + assert result[0] == normalized_cached + + def test_embed_multimodal_documents_nan_handling(self, mock_model_instance): + """Test handling of NaN values in multimodal embeddings.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [{"file_id": "valid"}, {"file_id": "nan"}] + + valid_vector = np.random.randn(1536).tolist() + nan_vector = [float("nan")] * 1536 + + usage = EmbeddingUsage( + tokens=20, + total_tokens=20, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.000002"), + currency="USD", + latency=0.5, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=[valid_vector, nan_vector], + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + + with patch("core.rag.embedding.cached_embedding.logger") as mock_logger: + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 2 + assert result[0] is not None + assert result[1] is None + + mock_logger.warning.assert_called_once() + + def test_embed_multimodal_documents_large_batch(self, mock_model_instance): + """Test embedding large batch of multimodal documents respecting MAX_CHUNKS.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [{"file_id": f"file{i}"} for i in range(25)] + + def create_batch_result(batch_size): + embeddings = [] + for _ in range(batch_size): + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + embeddings.append(normalized) + + usage = EmbeddingUsage( + tokens=batch_size * 10, + total_tokens=batch_size * 10, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal(str(batch_size * 0.000001)), + currency="USD", + latency=0.5, + ) + + return EmbeddingResult( + model="vision-embedding-model", + embeddings=embeddings, + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + + batch_results = [create_batch_result(10), create_batch_result(10), create_batch_result(5)] + mock_model_instance.invoke_multimodal_embedding.side_effect = batch_results + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 25 + assert mock_model_instance.invoke_multimodal_embedding.call_count == 3 + + def test_embed_multimodal_documents_api_error(self, mock_model_instance): + """Test handling of API errors during multimodal embedding.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [{"file_id": "file123"}] + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + mock_model_instance.invoke_multimodal_embedding.side_effect = Exception("API Error") + + with pytest.raises(Exception) as exc_info: + cache_embedding.embed_multimodal_documents(documents) + + assert "API Error" in str(exc_info.value) + mock_session.rollback.assert_called() + + def test_embed_multimodal_documents_integrity_error_during_transform( + self, mock_model_instance, sample_multimodal_result + ): + """Test handling of IntegrityError during embedding transformation.""" + cache_embedding = CacheEmbedding(mock_model_instance) + documents = [{"file_id": "file123"}] + + with patch("core.rag.embedding.cached_embedding.db.session") as mock_session: + mock_session.query.return_value.filter_by.return_value.first.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = sample_multimodal_result + + mock_session.commit.side_effect = IntegrityError("Duplicate key", None, None) + + result = cache_embedding.embed_multimodal_documents(documents) + + assert len(result) == 1 + mock_session.rollback.assert_called() + + +class TestCacheEmbeddingMultimodalQuery: + """Test suite for CacheEmbedding.embed_multimodal_query method.""" + + @pytest.fixture + def mock_model_instance(self): + """Create a mock ModelInstance for testing.""" + model_instance = Mock() + model_instance.model = "vision-embedding-model" + model_instance.provider = "openai" + model_instance.credentials = {"api_key": "test-key"} + return model_instance + + def test_embed_multimodal_query_cache_miss(self, mock_model_instance): + """Test embedding multimodal query when Redis cache is empty.""" + cache_embedding = CacheEmbedding(mock_model_instance, user="test-user") + document = {"file_id": "file123"} + + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + + usage = EmbeddingUsage( + tokens=5, + total_tokens=5, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.0000005"), + currency="USD", + latency=0.3, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=[normalized], + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + + result = cache_embedding.embed_multimodal_query(document) + + assert isinstance(result, list) + assert len(result) == 1536 + mock_redis.setex.assert_called_once() + + def test_embed_multimodal_query_cache_hit(self, mock_model_instance): + """Test embedding multimodal query when Redis cache has the value.""" + cache_embedding = CacheEmbedding(mock_model_instance) + document = {"file_id": "file123"} + + embedding_vector = np.random.randn(1536) + vector_bytes = embedding_vector.tobytes() + encoded_vector = base64.b64encode(vector_bytes).decode("utf-8") + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = encoded_vector.encode() + + result = cache_embedding.embed_multimodal_query(document) + + assert isinstance(result, list) + assert len(result) == 1536 + mock_redis.expire.assert_called_once() + mock_model_instance.invoke_multimodal_embedding.assert_not_called() + + def test_embed_multimodal_query_nan_handling(self, mock_model_instance): + """Test handling of NaN values in multimodal query embeddings.""" + cache_embedding = CacheEmbedding(mock_model_instance) + + nan_vector = [float("nan")] * 1536 + + usage = EmbeddingUsage( + tokens=5, + total_tokens=5, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.0000005"), + currency="USD", + latency=0.3, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=[nan_vector], + usage=usage, + ) + + document = {"file_id": "file123"} + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + + with pytest.raises(ValueError) as exc_info: + cache_embedding.embed_multimodal_query(document) + + assert "Normalized embedding is nan" in str(exc_info.value) + + def test_embed_multimodal_query_api_error(self, mock_model_instance): + """Test handling of API errors during multimodal query embedding.""" + cache_embedding = CacheEmbedding(mock_model_instance) + document = {"file_id": "file123"} + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_multimodal_embedding.side_effect = Exception("API Error") + + with patch("core.rag.embedding.cached_embedding.dify_config") as mock_config: + mock_config.DEBUG = False + + with pytest.raises(Exception) as exc_info: + cache_embedding.embed_multimodal_query(document) + + assert "API Error" in str(exc_info.value) + + def test_embed_multimodal_query_redis_set_error(self, mock_model_instance): + """Test handling of Redis set errors during multimodal query embedding.""" + cache_embedding = CacheEmbedding(mock_model_instance) + document = {"file_id": "file123"} + + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + + usage = EmbeddingUsage( + tokens=5, + total_tokens=5, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.0000005"), + currency="USD", + latency=0.3, + ) + + embedding_result = EmbeddingResult( + model="vision-embedding-model", + embeddings=[normalized], + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_multimodal_embedding.return_value = embedding_result + mock_redis.setex.side_effect = RuntimeError("Redis Error") + + with patch("core.rag.embedding.cached_embedding.dify_config") as mock_config: + mock_config.DEBUG = True + + with pytest.raises(RuntimeError): + cache_embedding.embed_multimodal_query(document) + + +class TestCacheEmbeddingQueryErrors: + """Test suite for error handling in CacheEmbedding.embed_query method.""" + + @pytest.fixture + def mock_model_instance(self): + """Create a mock ModelInstance for testing.""" + model_instance = Mock() + model_instance.model = "text-embedding-ada-002" + model_instance.provider = "openai" + model_instance.credentials = {"api_key": "test-key"} + return model_instance + + def test_embed_query_api_error_debug_mode(self, mock_model_instance): + """Test handling of API errors in debug mode.""" + cache_embedding = CacheEmbedding(mock_model_instance) + query = "test query" + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_text_embedding.side_effect = RuntimeError("API Error") + + with patch("core.rag.embedding.cached_embedding.dify_config") as mock_config: + mock_config.DEBUG = True + + with patch("core.rag.embedding.cached_embedding.logger") as mock_logger: + with pytest.raises(RuntimeError) as exc_info: + cache_embedding.embed_query(query) + + assert "API Error" in str(exc_info.value) + mock_logger.exception.assert_called() + + def test_embed_query_redis_set_error_debug_mode(self, mock_model_instance): + """Test handling of Redis set errors in debug mode.""" + cache_embedding = CacheEmbedding(mock_model_instance) + query = "test query" + + vector = np.random.randn(1536) + normalized = (vector / np.linalg.norm(vector)).tolist() + + usage = EmbeddingUsage( + tokens=5, + total_tokens=5, + unit_price=Decimal("0.0001"), + price_unit=Decimal(1000), + total_price=Decimal("0.0000005"), + currency="USD", + latency=0.3, + ) + + embedding_result = EmbeddingResult( + model="text-embedding-ada-002", + embeddings=[normalized], + usage=usage, + ) + + with patch("core.rag.embedding.cached_embedding.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_model_instance.invoke_text_embedding.return_value = embedding_result + mock_redis.setex.side_effect = RuntimeError("Redis Error") + + with patch("core.rag.embedding.cached_embedding.dify_config") as mock_config: + mock_config.DEBUG = True + + with patch("core.rag.embedding.cached_embedding.logger") as mock_logger: + with pytest.raises(RuntimeError): + cache_embedding.embed_query(query) + + mock_logger.exception.assert_called() + + +class TestCacheEmbeddingInitialization: + """Test suite for CacheEmbedding initialization.""" + + def test_initialization_with_user(self): + """Test CacheEmbedding initialization with user parameter.""" + model_instance = Mock() + model_instance.model = "test-model" + model_instance.provider = "test-provider" + + cache_embedding = CacheEmbedding(model_instance, user="test-user") + + assert cache_embedding._model_instance == model_instance + assert cache_embedding._user == "test-user" + + def test_initialization_without_user(self): + """Test CacheEmbedding initialization without user parameter.""" + model_instance = Mock() + model_instance.model = "test-model" + model_instance.provider = "test-provider" + + cache_embedding = CacheEmbedding(model_instance) + + assert cache_embedding._model_instance == model_instance + assert cache_embedding._user is None diff --git a/api/tests/unit_tests/core/rag/embedding/test_embedding_base.py b/api/tests/unit_tests/core/rag/embedding/test_embedding_base.py new file mode 100644 index 0000000000..033933e886 --- /dev/null +++ b/api/tests/unit_tests/core/rag/embedding/test_embedding_base.py @@ -0,0 +1,220 @@ +"""Unit tests for embedding_base.py - the abstract Embeddings base class.""" + +import asyncio +import inspect +from typing import Any + +import pytest + +from core.rag.embedding.embedding_base import Embeddings + + +class ConcreteEmbeddings(Embeddings): + """Concrete implementation of Embeddings for testing.""" + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + return [[1.0] * 10 for _ in texts] + + def embed_multimodal_documents(self, multimodel_documents: list[dict[str, Any]]) -> list[list[float]]: + return [[1.0] * 10 for _ in multimodel_documents] + + def embed_query(self, text: str) -> list[float]: + return [1.0] * 10 + + def embed_multimodal_query(self, multimodel_document: dict[str, Any]) -> list[float]: + return [1.0] * 10 + + +class TestEmbeddingsBase: + """Test suite for the abstract Embeddings base class.""" + + def test_embeddings_is_abc(self): + """Test that Embeddings is an abstract base class.""" + assert hasattr(Embeddings, "__abstractmethods__") + assert len(Embeddings.__abstractmethods__) > 0 + + def test_embed_documents_is_abstract(self): + """Test that embed_documents is an abstract method.""" + assert "embed_documents" in Embeddings.__abstractmethods__ + + def test_embed_multimodal_documents_is_abstract(self): + """Test that embed_multimodal_documents is an abstract method.""" + assert "embed_multimodal_documents" in Embeddings.__abstractmethods__ + + def test_embed_query_is_abstract(self): + """Test that embed_query is an abstract method.""" + assert "embed_query" in Embeddings.__abstractmethods__ + + def test_embed_multimodal_query_is_abstract(self): + """Test that embed_multimodal_query is an abstract method.""" + assert "embed_multimodal_query" in Embeddings.__abstractmethods__ + + def test_embed_documents_raises_not_implemented(self): + """Test that embed_documents raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.embed_documents) + assert "raise NotImplementedError" in source + + def test_embed_multimodal_documents_raises_not_implemented(self): + """Test that embed_multimodal_documents raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.embed_multimodal_documents) + assert "raise NotImplementedError" in source + + def test_embed_query_raises_not_implemented(self): + """Test that embed_query raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.embed_query) + assert "raise NotImplementedError" in source + + def test_embed_multimodal_query_raises_not_implemented(self): + """Test that embed_multimodal_query raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.embed_multimodal_query) + assert "raise NotImplementedError" in source + + def test_aembed_documents_raises_not_implemented(self): + """Test that aembed_documents raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.aembed_documents) + assert "raise NotImplementedError" in source + + def test_aembed_query_raises_not_implemented(self): + """Test that aembed_query raises NotImplementedError in its body.""" + source = inspect.getsource(Embeddings.aembed_query) + assert "raise NotImplementedError" in source + + def test_concrete_implementation_works(self): + """Test that a concrete implementation of Embeddings works correctly.""" + concrete = ConcreteEmbeddings() + result = concrete.embed_documents(["test1", "test2"]) + assert len(result) == 2 + assert all(len(emb) == 10 for emb in result) + + def test_concrete_implementation_embed_query(self): + """Test concrete implementation of embed_query.""" + concrete = ConcreteEmbeddings() + result = concrete.embed_query("test query") + assert len(result) == 10 + + def test_concrete_implementation_embed_multimodal_documents(self): + """Test concrete implementation of embed_multimodal_documents.""" + concrete = ConcreteEmbeddings() + docs: list[dict[str, Any]] = [{"file_id": "file1"}, {"file_id": "file2"}] + result = concrete.embed_multimodal_documents(docs) + assert len(result) == 2 + + def test_concrete_implementation_embed_multimodal_query(self): + """Test concrete implementation of embed_multimodal_query.""" + concrete = ConcreteEmbeddings() + result = concrete.embed_multimodal_query({"file_id": "test"}) + assert len(result) == 10 + + +class TestEmbeddingsNotImplemented: + """Test that abstract methods raise NotImplementedError when called.""" + + def test_embed_query_raises_not_implemented(self): + """Test that embed_query raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + with pytest.raises(NotImplementedError): + partial.embed_query("test") + + def test_embed_documents_raises_not_implemented(self): + """Test that embed_documents raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + with pytest.raises(NotImplementedError): + partial.embed_documents(["test"]) + + def test_embed_multimodal_documents_raises_not_implemented(self): + """Test that embed_multimodal_documents raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + with pytest.raises(NotImplementedError): + partial.embed_multimodal_documents([{"file_id": "test"}]) + + def test_embed_multimodal_query_raises_not_implemented(self): + """Test that embed_multimodal_query raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + with pytest.raises(NotImplementedError): + partial.embed_multimodal_query({"file_id": "test"}) + + def test_aembed_documents_raises_not_implemented(self): + """Test that aembed_documents raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + + async def run_test(): + with pytest.raises(NotImplementedError): + await partial.aembed_documents(["test"]) + + asyncio.run(run_test()) + + def test_aembed_query_raises_not_implemented(self): + """Test that aembed_query raises NotImplementedError.""" + + class PartialImpl: + pass + + PartialImpl.embed_query = lambda self, text: Embeddings.embed_query(self, text) + PartialImpl.embed_documents = lambda self, texts: Embeddings.embed_documents(self, texts) + PartialImpl.embed_multimodal_documents = lambda self, docs: Embeddings.embed_multimodal_documents(self, docs) + PartialImpl.embed_multimodal_query = lambda self, doc: Embeddings.embed_multimodal_query(self, doc) + PartialImpl.aembed_documents = lambda self, texts: Embeddings.aembed_documents(self, texts) + PartialImpl.aembed_query = lambda self, text: Embeddings.aembed_query(self, text) + + partial = PartialImpl() + + async def run_test(): + with pytest.raises(NotImplementedError): + await partial.aembed_query("test") + + asyncio.run(run_test()) diff --git a/api/tests/unit_tests/core/rag/extractor/blob/test_blob.py b/api/tests/unit_tests/core/rag/extractor/blob/test_blob.py new file mode 100644 index 0000000000..eb14622d7a --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/blob/test_blob.py @@ -0,0 +1,85 @@ +from io import BytesIO + +import pytest + +from core.rag.extractor.blob.blob import Blob + + +class TestBlob: + def test_requires_data_or_path(self): + with pytest.raises(ValueError, match="Either data or path must be provided"): + Blob() + + def test_source_property_and_repr_include_path(self, tmp_path): + file_path = tmp_path / "sample.txt" + file_path.write_text("hello", encoding="utf-8") + + blob = Blob.from_path(str(file_path)) + + assert blob.source == str(file_path) + assert str(file_path) in repr(blob) + + def test_as_string_from_bytes_and_str(self): + assert Blob.from_data(b"abc").as_string() == "abc" + assert Blob.from_data("plain-text").as_string() == "plain-text" + + def test_as_string_from_path(self, tmp_path): + file_path = tmp_path / "sample.txt" + file_path.write_text("from-file", encoding="utf-8") + + blob = Blob.from_path(str(file_path)) + + assert blob.as_string() == "from-file" + + def test_as_string_raises_for_invalid_state(self): + blob = Blob.model_construct(data=None, path=None, mimetype=None, encoding="utf-8") + + with pytest.raises(ValueError, match="Unable to get string for blob"): + blob.as_string() + + def test_as_bytes_from_bytes_str_and_path(self, tmp_path): + from_bytes = Blob.from_data(b"abc") + from_str = Blob.from_data("abc", encoding="utf-8") + + file_path = tmp_path / "sample.bin" + file_path.write_bytes(b"from-path") + from_path = Blob.from_path(str(file_path)) + + assert from_bytes.as_bytes() == b"abc" + assert from_str.as_bytes() == b"abc" + assert from_path.as_bytes() == b"from-path" + + def test_as_bytes_raises_for_invalid_state(self): + blob = Blob.model_construct(data=None, path=None, mimetype=None, encoding="utf-8") + + with pytest.raises(ValueError, match="Unable to get bytes for blob"): + blob.as_bytes() + + def test_as_bytes_io_for_bytes_and_path(self, tmp_path): + data_blob = Blob.from_data(b"bytes-io") + with data_blob.as_bytes_io() as stream: + assert isinstance(stream, BytesIO) + assert stream.read() == b"bytes-io" + + file_path = tmp_path / "stream.bin" + file_path.write_bytes(b"path-stream") + path_blob = Blob.from_path(str(file_path)) + with path_blob.as_bytes_io() as stream: + assert stream.read() == b"path-stream" + + def test_as_bytes_io_raises_for_unsupported_data_type(self): + blob = Blob.from_data("text-value") + + with pytest.raises(NotImplementedError, match="Unable to convert blob"): + with blob.as_bytes_io(): + pass + + def test_from_path_respects_guessing_and_explicit_mime(self, tmp_path): + file_path = tmp_path / "example.txt" + file_path.write_text("x", encoding="utf-8") + + guessed = Blob.from_path(str(file_path)) + explicit = Blob.from_path(str(file_path), mime_type="custom/type", guess_type=False) + + assert guessed.mimetype == "text/plain" + assert explicit.mimetype == "custom/type" diff --git a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py index 4ee04ddebc..d3040395be 100644 --- a/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py +++ b/api/tests/unit_tests/core/rag/extractor/firecrawl/test_firecrawl.py @@ -1,61 +1,337 @@ -import os +"""Unit tests for Firecrawl app and extractor integration points.""" + +import json +from collections.abc import Mapping +from typing import Any from unittest.mock import MagicMock import pytest from pytest_mock import MockerFixture +import core.rag.extractor.firecrawl.firecrawl_app as firecrawl_module from core.rag.extractor.firecrawl.firecrawl_app import FirecrawlApp -from tests.unit_tests.core.rag.extractor.test_notion_extractor import _mock_response +from core.rag.extractor.firecrawl.firecrawl_web_extractor import FirecrawlWebExtractor -def test_firecrawl_web_extractor_crawl_mode(mocker: MockerFixture): - url = "https://firecrawl.dev" - api_key = os.getenv("FIRECRAWL_API_KEY") or "fc-" - base_url = "https://api.firecrawl.dev" - firecrawl_app = FirecrawlApp(api_key=api_key, base_url=base_url) - params = { - "includePaths": [], - "excludePaths": [], - "maxDepth": 1, - "limit": 1, - } - mocked_firecrawl = { - "id": "test", - } - mocker.patch("httpx.post", return_value=_mock_response(mocked_firecrawl)) - job_id = firecrawl_app.crawl_url(url, params) - - assert job_id is not None - assert isinstance(job_id, str) +def _response(status_code: int, json_data: Mapping[str, Any] | None = None, text: str = "") -> MagicMock: + response = MagicMock() + response.status_code = status_code + response.text = text + response.json.return_value = json_data if json_data is not None else {} + return response -def test_build_url_normalizes_slashes_for_crawl(mocker: MockerFixture): - api_key = "fc-" - base_urls = ["https://custom.firecrawl.dev", "https://custom.firecrawl.dev/"] - for base in base_urls: - app = FirecrawlApp(api_key=api_key, base_url=base) - mock_post = mocker.patch("httpx.post") - mock_resp = MagicMock() - mock_resp.status_code = 200 - mock_resp.json.return_value = {"id": "job123"} - mock_post.return_value = mock_resp - app.crawl_url("https://example.com", params=None) - called_url = mock_post.call_args[0][0] - assert called_url == "https://custom.firecrawl.dev/v2/crawl" +class TestFirecrawlApp: + def test_init_requires_api_key_for_default_base_url(self): + with pytest.raises(ValueError, match="No API key provided"): + FirecrawlApp(api_key=None, base_url="https://api.firecrawl.dev") + + def test_prepare_headers_and_build_url(self): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev/") + + assert app._prepare_headers() == { + "Content-Type": "application/json", + "Authorization": "Bearer fc-key", + } + assert app._build_url("/v2/crawl") == "https://custom.firecrawl.dev/v2/crawl" + + def test_scrape_url_success(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch( + "httpx.post", + return_value=_response( + 200, + { + "data": { + "metadata": { + "title": "t", + "description": "d", + "sourceURL": "https://example.com", + }, + "markdown": "body", + } + }, + ), + ) + + result = app.scrape_url("https://example.com", params={"onlyMainContent": False}) + + assert result == { + "title": "t", + "description": "d", + "source_url": "https://example.com", + "markdown": "body", + } + + def test_scrape_url_handles_known_error_status(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mock_handle = mocker.patch.object(app, "_handle_error", side_effect=Exception("boom")) + mocker.patch("httpx.post", return_value=_response(429, {"error": "limit"})) + + with pytest.raises(Exception, match="boom"): + app.scrape_url("https://example.com") + + mock_handle.assert_called_once() + + def test_scrape_url_unknown_status_raises(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(404, text="Not Found")) + + with pytest.raises(Exception, match="Failed to scrape URL. Status code: 404"): + app.scrape_url("https://example.com") + + def test_crawl_url_success(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(200, {"id": "job-1"})) + + assert app.crawl_url("https://example.com") == "job-1" + + def test_crawl_url_non_200_uses_error_handler(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mock_handle = mocker.patch.object(app, "_handle_error", side_effect=Exception("crawl failed")) + mocker.patch("httpx.post", return_value=_response(500, {"error": "server"})) + + with pytest.raises(Exception, match="crawl failed"): + app.crawl_url("https://example.com") + + mock_handle.assert_called_once() + + def test_map_success(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(200, {"success": True, "links": ["a", "b"]})) + + assert app.map("https://example.com") == {"success": True, "links": ["a", "b"]} + + def test_map_known_error(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mock_handle = mocker.patch.object(app, "_handle_error") + mocker.patch("httpx.post", return_value=_response(409, {"error": "conflict"})) + + assert app.map("https://example.com") == {} + mock_handle.assert_called_once() + + def test_map_unknown_error_raises(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(418, text="teapot")) + + with pytest.raises(Exception, match="Failed to start map job. Status code: 418"): + app.map("https://example.com") + + def test_check_crawl_status_completed_with_data(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + payload = { + "status": "completed", + "total": 2, + "completed": 2, + "data": [ + { + "metadata": {"title": "a", "description": "desc-a", "sourceURL": "https://a"}, + "markdown": "m-a", + }, + { + "metadata": {"title": "b", "description": "desc-b", "sourceURL": "https://b"}, + "markdown": "m-b", + }, + {"metadata": {"title": "skip"}}, + ], + } + mocker.patch("httpx.get", return_value=_response(200, payload)) + + save_calls: list[tuple[str, bytes]] = [] + delete_calls: list[str] = [] + + mock_storage = MagicMock() + mock_storage.exists.return_value = True + mock_storage.delete.side_effect = lambda key: delete_calls.append(key) + mock_storage.save.side_effect = lambda key, data: save_calls.append((key, data)) + mocker.patch.object(firecrawl_module, "storage", mock_storage) + + result = app.check_crawl_status("job-42") + + assert result["status"] == "completed" + assert result["total"] == 2 + assert result["current"] == 2 + assert len(result["data"]) == 2 + assert delete_calls == ["website_files/job-42.txt"] + assert len(save_calls) == 1 + assert save_calls[0][0] == "website_files/job-42.txt" + + def test_check_crawl_status_completed_with_zero_total_raises(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.get", return_value=_response(200, {"status": "completed", "total": 0, "data": []})) + + with pytest.raises(Exception, match="No page found"): + app.check_crawl_status("job-1") + + def test_check_crawl_status_non_completed(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + payload = {"status": "processing", "total": 5, "completed": 1, "data": []} + mocker.patch("httpx.get", return_value=_response(200, payload)) + + assert app.check_crawl_status("job-1") == { + "status": "processing", + "total": 5, + "current": 1, + "data": [], + } + + def test_check_crawl_status_non_200_uses_error_handler(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mock_handle = mocker.patch.object(app, "_handle_error") + mocker.patch("httpx.get", return_value=_response(500, {"error": "server"})) + + assert app.check_crawl_status("job-1") == {} + mock_handle.assert_called_once() + + def test_check_crawl_status_save_failure_raises(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + payload = { + "status": "completed", + "total": 1, + "completed": 1, + "data": [{"metadata": {"title": "a", "sourceURL": "https://a"}, "markdown": "m-a"}], + } + mocker.patch("httpx.get", return_value=_response(200, payload)) + + mock_storage = MagicMock() + mock_storage.exists.return_value = False + mock_storage.save.side_effect = RuntimeError("save failed") + mocker.patch.object(firecrawl_module, "storage", mock_storage) + + with pytest.raises(Exception, match="Error saving crawl data"): + app.check_crawl_status("job-err") + + def test_extract_common_fields_and_status_formatter(self): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + + fields = app._extract_common_fields( + {"metadata": {"title": "t", "description": "d", "sourceURL": "u"}, "markdown": "m"} + ) + assert fields == {"title": "t", "description": "d", "source_url": "u", "markdown": "m"} + + status = app._format_crawl_status_response("completed", {"total": 1, "completed": 1}, [fields]) + assert status == {"status": "completed", "total": 1, "current": 1, "data": [fields]} + + def test_post_and_get_request_retry_logic(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + sleep_mock = mocker.patch.object(firecrawl_module.time, "sleep") + + resp_502_a = _response(502) + resp_502_b = _response(502) + resp_200 = _response(200) + + mocker.patch("httpx.post", side_effect=[resp_502_a, resp_200]) + post_result = app._post_request("u", {"x": 1}, {"h": 1}, retries=3, backoff_factor=0.5) + assert post_result is resp_200 + + mocker.patch("httpx.get", side_effect=[resp_502_b, _response(200)]) + get_result = app._get_request("u", {"h": 1}, retries=3, backoff_factor=0.25) + assert get_result.status_code == 200 + + assert sleep_mock.call_count == 2 + + def test_post_and_get_request_return_last_502(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + sleep_mock = mocker.patch.object(firecrawl_module.time, "sleep") + + last_post = _response(502) + mocker.patch("httpx.post", side_effect=[_response(502), last_post]) + assert app._post_request("u", {}, {}, retries=2).status_code == 502 + + last_get = _response(502) + mocker.patch("httpx.get", side_effect=[_response(502), last_get]) + assert app._get_request("u", {}, retries=2).status_code == 502 + + assert sleep_mock.call_count == 4 + + def test_handle_error_with_json_and_plain_text(self): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + + json_error = _response(400, {"message": "bad request"}) + with pytest.raises(Exception, match="bad request"): + app._handle_error(json_error, "run task") + + non_json = MagicMock() + non_json.status_code = 400 + non_json.text = "plain error" + non_json.json.side_effect = json.JSONDecodeError("bad", "x", 0) + + with pytest.raises(Exception, match="plain error"): + app._handle_error(non_json, "run task") + + def test_search_success(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(200, {"success": True, "data": [{"url": "x"}]})) + assert app.search("python")["success"] is True + + def test_search_warning_failure(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(200, {"success": False, "warning": "bad search"})) + with pytest.raises(Exception, match="bad search"): + app.search("python") + + def test_search_known_http_error(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mock_handle = mocker.patch.object(app, "_handle_error") + mocker.patch("httpx.post", return_value=_response(408, {"error": "timeout"})) + assert app.search("python") == {} + mock_handle.assert_called_once() + + def test_search_unknown_http_error(self, mocker: MockerFixture): + app = FirecrawlApp(api_key="fc-key", base_url="https://custom.firecrawl.dev") + mocker.patch("httpx.post", return_value=_response(418, text="teapot")) + with pytest.raises(Exception, match="Failed to perform search. Status code: 418"): + app.search("python") -def test_error_handler_handles_non_json_error_bodies(mocker: MockerFixture): - api_key = "fc-" - app = FirecrawlApp(api_key=api_key, base_url="https://custom.firecrawl.dev/") - mock_post = mocker.patch("httpx.post") - mock_resp = MagicMock() - mock_resp.status_code = 404 - mock_resp.text = "Not Found" - mock_resp.json.side_effect = Exception("Not JSON") - mock_post.return_value = mock_resp +class TestFirecrawlWebExtractor: + def test_extract_crawl_mode_returns_document(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.firecrawl.firecrawl_web_extractor.WebsiteService.get_crawl_url_data", + return_value={ + "markdown": "crawl content", + "source_url": "https://example.com", + "description": "desc", + "title": "title", + }, + ) - with pytest.raises(Exception) as excinfo: - app.scrape_url("https://example.com") + extractor = FirecrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + docs = extractor.extract() - # Should not raise a JSONDecodeError; current behavior reports status code only - assert str(excinfo.value) == "Failed to scrape URL. Status code: 404" + assert len(docs) == 1 + assert docs[0].page_content == "crawl content" + assert docs[0].metadata["source_url"] == "https://example.com" + + def test_extract_crawl_mode_with_missing_data_returns_empty(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.firecrawl.firecrawl_web_extractor.WebsiteService.get_crawl_url_data", + return_value=None, + ) + + extractor = FirecrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + assert extractor.extract() == [] + + def test_extract_scrape_mode_returns_document(self, mocker: MockerFixture): + mock_scrape = mocker.patch( + "core.rag.extractor.firecrawl.firecrawl_web_extractor.WebsiteService.get_scrape_url_data", + return_value={ + "markdown": "scrape content", + "source_url": "https://example.com", + "description": "desc", + "title": "title", + }, + ) + + extractor = FirecrawlWebExtractor( + "https://example.com", "job-1", "tenant-1", mode="scrape", only_main_content=False + ) + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "scrape content" + mock_scrape.assert_called_once_with("firecrawl", "https://example.com", "tenant-1", False) + + def test_extract_unknown_mode_returns_empty(self): + extractor = FirecrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="unknown") + assert extractor.extract() == [] diff --git a/api/tests/unit_tests/core/rag/extractor/test_csv_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_csv_extractor.py new file mode 100644 index 0000000000..e6a06f163e --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_csv_extractor.py @@ -0,0 +1,95 @@ +import csv +import io +from types import SimpleNamespace + +import pandas as pd +import pytest + +import core.rag.extractor.csv_extractor as csv_module +from core.rag.extractor.csv_extractor import CSVExtractor + + +class _ManagedStringIO(io.StringIO): + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + self.close() + return False + + +class TestCSVExtractor: + def test_extract_success_with_source_column(self, tmp_path): + file_path = tmp_path / "data.csv" + file_path.write_text("id,body\nsource-1,hello\n", encoding="utf-8") + + extractor = CSVExtractor(str(file_path), source_column="id") + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "id: source-1;body: hello" + assert docs[0].metadata == {"source": "source-1", "row": 0} + + def test_extract_raises_when_source_column_missing(self, tmp_path): + file_path = tmp_path / "data.csv" + file_path.write_text("id,body\nsource-1,hello\n", encoding="utf-8") + + extractor = CSVExtractor(str(file_path), source_column="missing_col") + + with pytest.raises(ValueError, match="Source column 'missing_col' not found"): + extractor.extract() + + def test_extract_wraps_unicode_error_when_autodetect_disabled(self, monkeypatch): + extractor = CSVExtractor("dummy.csv", autodetect_encoding=False) + + def raise_decode(*args, **kwargs): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode error") + + monkeypatch.setattr("builtins.open", raise_decode) + + with pytest.raises(RuntimeError, match="Error loading dummy.csv"): + extractor.extract() + + def test_extract_autodetect_encoding_success(self, monkeypatch): + extractor = CSVExtractor("dummy.csv", autodetect_encoding=True) + attempted_encodings: list[str | None] = [] + + def fake_open(path, newline="", encoding=None): + attempted_encodings.append(encoding) + if encoding is None: + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode error") + if encoding == "bad": + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode error") + return _ManagedStringIO("id,body\nsource-1,hello\n") + + monkeypatch.setattr("builtins.open", fake_open) + monkeypatch.setattr( + csv_module, + "detect_file_encodings", + lambda _: [SimpleNamespace(encoding="bad"), SimpleNamespace(encoding="utf-8")], + ) + + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "id: source-1;body: hello" + assert attempted_encodings == [None, "bad", "utf-8"] + + def test_extract_autodetect_encoding_all_attempts_fail_returns_empty(self, monkeypatch): + extractor = CSVExtractor("dummy.csv", autodetect_encoding=True) + + def always_raise(*args, **kwargs): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode error") + + monkeypatch.setattr("builtins.open", always_raise) + monkeypatch.setattr(csv_module, "detect_file_encodings", lambda _: [SimpleNamespace(encoding="bad")]) + + assert extractor.extract() == [] + + def test_read_from_file_re_raises_csv_error(self, monkeypatch): + extractor = CSVExtractor("dummy.csv") + + monkeypatch.setattr(pd, "read_csv", lambda *args, **kwargs: (_ for _ in ()).throw(csv.Error("bad csv"))) + + with pytest.raises(csv.Error, match="bad csv"): + extractor._read_from_file(io.StringIO("x")) diff --git a/api/tests/unit_tests/core/rag/extractor/test_excel_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_excel_extractor.py new file mode 100644 index 0000000000..d2bcc1e2c4 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_excel_extractor.py @@ -0,0 +1,117 @@ +from types import SimpleNamespace + +import pandas as pd +import pytest + +import core.rag.extractor.excel_extractor as excel_module +from core.rag.extractor.excel_extractor import ExcelExtractor + + +class _FakeCell: + def __init__(self, value, hyperlink=None): + self.value = value + self.hyperlink = hyperlink + + +class _FakeSheet: + def __init__(self, header_rows, data_rows): + self._header_rows = header_rows + self._data_rows = data_rows + + def iter_rows(self, min_row=1, max_row=None, max_col=None, values_only=False): + if values_only: + for row in self._header_rows: + yield tuple(row) + return + + for row in self._data_rows: + if max_col is not None: + yield tuple(row[:max_col]) + else: + yield tuple(row) + + +class _FakeWorkbook: + def __init__(self, sheets): + self._sheets = sheets + self.sheetnames = list(sheets.keys()) + self.closed = False + + def __getitem__(self, key): + return self._sheets[key] + + def close(self): + self.closed = True + + +class TestExcelExtractor: + def test_extract_xlsx_with_hyperlinks_and_sheet_skip(self, monkeypatch): + sheet_with_data = _FakeSheet( + header_rows=[("Name", "Link")], + data_rows=[ + (_FakeCell("Alice"), _FakeCell("Doc", hyperlink=SimpleNamespace(target="https://example.com/doc"))), + (_FakeCell(None), _FakeCell(123)), + (_FakeCell(None), _FakeCell(None)), + ], + ) + empty_sheet = _FakeSheet(header_rows=[(None, None)], data_rows=[]) + + workbook = _FakeWorkbook({"Data": sheet_with_data, "Empty": empty_sheet}) + monkeypatch.setattr(excel_module, "load_workbook", lambda *args, **kwargs: workbook) + + extractor = ExcelExtractor("/tmp/sample.xlsx") + docs = extractor.extract() + + assert workbook.closed is True + assert len(docs) == 2 + assert docs[0].page_content == '"Name":"Alice";"Link":"[Doc](https://example.com/doc)"' + assert docs[1].page_content == '"Name":"";"Link":"123"' + assert all(doc.metadata["source"] == "/tmp/sample.xlsx" for doc in docs) + + def test_extract_xls_path(self, monkeypatch): + class FakeExcelFile: + sheet_names = ["Sheet1"] + + def parse(self, sheet_name): + assert sheet_name == "Sheet1" + return pd.DataFrame([{"A": "x", "B": 1}, {"A": None, "B": None}]) + + monkeypatch.setattr(pd, "ExcelFile", lambda path, engine=None: FakeExcelFile()) + + extractor = ExcelExtractor("/tmp/sample.xls") + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == '"A":"x";"B":"1.0"' + assert docs[0].metadata == {"source": "/tmp/sample.xls"} + + def test_extract_unsupported_extension_raises(self): + extractor = ExcelExtractor("/tmp/sample.txt") + + with pytest.raises(ValueError, match="Unsupported file extension"): + extractor.extract() + + def test_find_header_and_columns_prefers_first_row_with_two_columns(self): + sheet = _FakeSheet( + header_rows=[(None, None, None), ("A", "B", None), ("X", None, None)], + data_rows=[], + ) + extractor = ExcelExtractor("dummy.xlsx") + + header_row_idx, column_map, max_col_idx = extractor._find_header_and_columns(sheet) + + assert header_row_idx == 2 + assert column_map == {0: "A", 1: "B"} + assert max_col_idx == 2 + + def test_find_header_and_columns_fallback_and_empty_case(self): + extractor = ExcelExtractor("dummy.xlsx") + + fallback_sheet = _FakeSheet(header_rows=[("Only", None), (None, "Second")], data_rows=[]) + row_idx, column_map, max_col_idx = extractor._find_header_and_columns(fallback_sheet) + assert row_idx == 1 + assert column_map == {0: "Only"} + assert max_col_idx == 1 + + empty_sheet = _FakeSheet(header_rows=[(None, None)], data_rows=[]) + assert extractor._find_header_and_columns(empty_sheet) == (0, {}, 0) diff --git a/api/tests/unit_tests/core/rag/extractor/test_extract_processor.py b/api/tests/unit_tests/core/rag/extractor/test_extract_processor.py new file mode 100644 index 0000000000..5beed88971 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_extract_processor.py @@ -0,0 +1,272 @@ +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.extract_processor as processor_module +from core.rag.extractor.entity.datasource_type import DatasourceType +from core.rag.extractor.extract_processor import ExtractProcessor +from core.rag.models.document import Document + + +class _ExtractorFactory: + def __init__(self) -> None: + self.calls = [] + + def make(self, name: str) -> type[object]: + calls = self.calls + + class DummyExtractor: + def __init__(self, *args, **kwargs): + calls.append((name, args, kwargs)) + + def extract(self): + return [Document(page_content=f"extracted-by-{name}")] + + return DummyExtractor + + +def _patch_all_extractors(monkeypatch) -> _ExtractorFactory: + factory = _ExtractorFactory() + + for cls_name in [ + "CSVExtractor", + "ExcelExtractor", + "FirecrawlWebExtractor", + "HtmlExtractor", + "JinaReaderWebExtractor", + "MarkdownExtractor", + "NotionExtractor", + "PdfExtractor", + "TextExtractor", + "UnstructuredEmailExtractor", + "UnstructuredEpubExtractor", + "UnstructuredMarkdownExtractor", + "UnstructuredMsgExtractor", + "UnstructuredPPTExtractor", + "UnstructuredPPTXExtractor", + "UnstructuredWordExtractor", + "UnstructuredXmlExtractor", + "WaterCrawlWebExtractor", + "WordExtractor", + ]: + monkeypatch.setattr(processor_module, cls_name, factory.make(cls_name)) + + return factory + + +class TestExtractProcessorLoaders: + def test_load_from_upload_file_return_docs_and_text(self, monkeypatch): + monkeypatch.setattr(processor_module, "ExtractSetting", lambda **kwargs: SimpleNamespace(**kwargs)) + + monkeypatch.setattr( + ExtractProcessor, + "extract", + lambda extract_setting, is_automatic=False, file_path=None: [ + Document(page_content="doc-1"), + Document(page_content="doc-2"), + ], + ) + + upload_file = SimpleNamespace(key="file.txt") + + docs = ExtractProcessor.load_from_upload_file(upload_file=upload_file, return_text=False) + text = ExtractProcessor.load_from_upload_file(upload_file=upload_file, return_text=True) + + assert len(docs) == 2 + assert text == "doc-1\ndoc-2" + + @pytest.mark.parametrize( + ("url", "headers", "expected_suffix"), + [ + ("https://example.com/file.txt", {"Content-Type": "text/plain"}, ".txt"), + ("https://example.com/no_suffix", {"Content-Type": "application/pdf"}, ".pdf"), + ( + "https://example.com/no_suffix", + {"Content-Disposition": 'attachment; filename="report.md"'}, + ".md", + ), + ( + "https://example.com/no_suffix", + {"Content-Disposition": 'attachment; filename="report"'}, + "", + ), + ], + ) + def test_load_from_url_builds_temp_file_with_correct_suffix(self, monkeypatch, url, headers, expected_suffix): + response = SimpleNamespace(headers=headers, content=b"body") + monkeypatch.setattr(processor_module.ssrf_proxy, "get", lambda *args, **kwargs: response) + monkeypatch.setattr(processor_module, "ExtractSetting", lambda **kwargs: SimpleNamespace(**kwargs)) + + captured = {} + + def fake_extract(extract_setting, is_automatic=False, file_path=None): + key = "file_path_docs" if "file_path_docs" not in captured else "file_path_text" + captured[key] = file_path + return [Document(page_content="u1"), Document(page_content="u2")] + + monkeypatch.setattr(ExtractProcessor, "extract", fake_extract) + + docs = ExtractProcessor.load_from_url(url, return_text=False) + assert captured["file_path_docs"].endswith(expected_suffix) + + text = ExtractProcessor.load_from_url(url, return_text=True) + assert captured["file_path_text"].endswith(expected_suffix) + + assert len(docs) == 2 + assert text == "u1\nu2" + + +class TestExtractProcessorFileRouting: + @pytest.fixture(autouse=True) + def _set_unstructured_config(self, monkeypatch): + monkeypatch.setattr(processor_module.dify_config, "UNSTRUCTURED_API_URL", "https://unstructured") + monkeypatch.setattr(processor_module.dify_config, "UNSTRUCTURED_API_KEY", "key") + + def _run_extract_for_extension(self, monkeypatch, extension: str, etl_type: str, is_automatic: bool = False): + factory = _patch_all_extractors(monkeypatch) + monkeypatch.setattr(processor_module.dify_config, "ETL_TYPE", etl_type) + + def fake_download(key: str, local_path: str): + Path(local_path).write_text("content", encoding="utf-8") + + monkeypatch.setattr(processor_module.storage, "download", fake_download) + monkeypatch.setattr(processor_module.tempfile, "_get_candidate_names", lambda: iter(["candidate-name"])) + + setting = SimpleNamespace( + datasource_type=DatasourceType.FILE, + upload_file=SimpleNamespace(key=f"uploaded{extension}", tenant_id="tenant-1", created_by="user-1"), + ) + + docs = ExtractProcessor.extract(setting, is_automatic=is_automatic) + + assert len(docs) == 1 + assert docs[0].page_content.startswith("extracted-by-") + return factory.calls[-1][0], factory.calls[-1][1], factory.calls[-1][2] + + @pytest.mark.parametrize( + ("extension", "expected_extractor", "is_automatic"), + [ + (".xlsx", "ExcelExtractor", False), + (".xls", "ExcelExtractor", False), + (".pdf", "PdfExtractor", False), + (".md", "UnstructuredMarkdownExtractor", True), + (".mdx", "MarkdownExtractor", False), + (".htm", "HtmlExtractor", False), + (".html", "HtmlExtractor", False), + (".docx", "WordExtractor", False), + (".doc", "UnstructuredWordExtractor", False), + (".csv", "CSVExtractor", False), + (".msg", "UnstructuredMsgExtractor", False), + (".eml", "UnstructuredEmailExtractor", False), + (".ppt", "UnstructuredPPTExtractor", False), + (".pptx", "UnstructuredPPTXExtractor", False), + (".xml", "UnstructuredXmlExtractor", False), + (".epub", "UnstructuredEpubExtractor", False), + (".txt", "TextExtractor", False), + ], + ) + def test_extract_routes_file_extensions_for_unstructured_mode( + self, monkeypatch, extension, expected_extractor, is_automatic + ): + extractor_name, args, kwargs = self._run_extract_for_extension( + monkeypatch, extension, etl_type="Unstructured", is_automatic=is_automatic + ) + + assert extractor_name == expected_extractor + assert args + + @pytest.mark.parametrize( + ("extension", "expected_extractor"), + [ + (".xlsx", "ExcelExtractor"), + (".pdf", "PdfExtractor"), + (".markdown", "MarkdownExtractor"), + (".html", "HtmlExtractor"), + (".docx", "WordExtractor"), + (".csv", "CSVExtractor"), + (".epub", "UnstructuredEpubExtractor"), + (".txt", "TextExtractor"), + ], + ) + def test_extract_routes_file_extensions_for_default_mode(self, monkeypatch, extension, expected_extractor): + extractor_name, _, _ = self._run_extract_for_extension(monkeypatch, extension, etl_type="SelfHosted") + + assert extractor_name == expected_extractor + + def test_extract_requires_upload_file_when_file_path_not_provided(self): + setting = SimpleNamespace(datasource_type=DatasourceType.FILE, upload_file=None) + + with pytest.raises(AssertionError, match="upload_file is required"): + ExtractProcessor.extract(setting) + + +class TestExtractProcessorDatasourceRouting: + def test_extract_routes_notion_datasource(self, monkeypatch): + factory = _patch_all_extractors(monkeypatch) + + notion_info = SimpleNamespace( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + document="doc", + tenant_id="tenant", + credential_id="cred", + ) + setting = SimpleNamespace(datasource_type=DatasourceType.NOTION, notion_info=notion_info) + + docs = ExtractProcessor.extract(setting) + + assert docs[0].page_content == "extracted-by-NotionExtractor" + assert factory.calls[-1][0] == "NotionExtractor" + + @pytest.mark.parametrize( + ("provider", "expected"), + [ + ("firecrawl", "FirecrawlWebExtractor"), + ("watercrawl", "WaterCrawlWebExtractor"), + ("jinareader", "JinaReaderWebExtractor"), + ], + ) + def test_extract_routes_website_datasource_providers(self, monkeypatch, provider: str, expected: str): + factory = _patch_all_extractors(monkeypatch) + + website_info = SimpleNamespace( + provider=provider, + url="https://example.com", + job_id="job", + tenant_id="tenant", + mode="crawl", + only_main_content=True, + ) + setting = SimpleNamespace(datasource_type=DatasourceType.WEBSITE, website_info=website_info) + + docs = ExtractProcessor.extract(setting) + assert docs[0].page_content == f"extracted-by-{expected}" + assert factory.calls[-1][0] == expected + + def test_extract_unsupported_website_provider(self): + bad_provider = SimpleNamespace( + provider="unknown", + url="https://example.com", + job_id="job", + tenant_id="tenant", + mode="crawl", + only_main_content=True, + ) + setting = SimpleNamespace(datasource_type=DatasourceType.WEBSITE, website_info=bad_provider) + + with pytest.raises(ValueError, match="Unsupported website provider"): + ExtractProcessor.extract(setting) + + def test_extract_unsupported_datasource_type(self): + with pytest.raises(ValueError, match="Unsupported datasource type"): + ExtractProcessor.extract(SimpleNamespace(datasource_type="unknown")) + + def test_extract_requires_notion_info(self): + with pytest.raises(AssertionError, match="notion_info is required"): + ExtractProcessor.extract(SimpleNamespace(datasource_type=DatasourceType.NOTION, notion_info=None)) + + def test_extract_requires_website_info(self): + with pytest.raises(AssertionError, match="website_info is required"): + ExtractProcessor.extract(SimpleNamespace(datasource_type=DatasourceType.WEBSITE, website_info=None)) diff --git a/api/tests/unit_tests/core/rag/extractor/test_extractor_base.py b/api/tests/unit_tests/core/rag/extractor/test_extractor_base.py new file mode 100644 index 0000000000..1d5f27181b --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_extractor_base.py @@ -0,0 +1,26 @@ +import pytest + +from core.rag.extractor.extractor_base import BaseExtractor + + +class _CallsBaseExtractor(BaseExtractor): + def extract(self): + return super().extract() + + +class _ConcreteExtractor(BaseExtractor): + def extract(self): + return ["ok"] + + +class TestBaseExtractor: + def test_extract_default_raises_not_implemented(self): + extractor = _CallsBaseExtractor() + + with pytest.raises(NotImplementedError): + extractor.extract() + + def test_concrete_extractor_can_override(self): + extractor = _ConcreteExtractor() + + assert extractor.extract() == ["ok"] diff --git a/api/tests/unit_tests/core/rag/extractor/test_helpers.py b/api/tests/unit_tests/core/rag/extractor/test_helpers.py index edf8735e57..74387f749d 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_helpers.py +++ b/api/tests/unit_tests/core/rag/extractor/test_helpers.py @@ -1,10 +1,55 @@ import tempfile +from types import SimpleNamespace -from core.rag.extractor.helpers import FileEncoding, detect_file_encodings +import pytest + +from core.rag.extractor import helpers +from core.rag.extractor.helpers import detect_file_encodings -def test_detect_file_encodings() -> None: - with tempfile.NamedTemporaryFile(mode="w+t", suffix=".txt") as temp: - temp.write("Shared data") - temp_path = temp.name - assert detect_file_encodings(temp_path) == [FileEncoding(encoding="utf_8", confidence=0.0, language="Unknown")] +class TestHelpers: + def test_detect_file_encodings(self) -> None: + with tempfile.NamedTemporaryFile(mode="w+t", suffix=".txt") as temp: + temp.write("Shared data") + temp.flush() + temp_path = temp.name + encodings = detect_file_encodings(temp_path) + + assert len(encodings) == 1 + assert encodings[0].encoding in {"utf_8", "ascii"} + assert encodings[0].confidence == 0.0 + # Assert the language field for full coverage + assert encodings[0].language is not None + + def test_detect_file_encodings_timeout(self, monkeypatch): + class FakeFuture: + def result(self, timeout=None): + raise helpers.concurrent.futures.TimeoutError() + + class FakeExecutor: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def submit(self, fn, file_path): + return FakeFuture() + + monkeypatch.setattr(helpers.concurrent.futures, "ThreadPoolExecutor", lambda: FakeExecutor()) + + with pytest.raises(TimeoutError, match="Timeout reached while detecting encoding"): + detect_file_encodings("file.txt", timeout=1) + + def test_detect_file_encodings_raises_when_encoding_not_detected(self, monkeypatch): + class FakeResult: + encoding = None + coherence = 0.0 + language = None + + monkeypatch.setattr( + helpers.charset_normalizer, "from_path", lambda _: SimpleNamespace(best=lambda: FakeResult()) + ) + + with pytest.raises(RuntimeError, match="Could not detect encoding"): + detect_file_encodings("file.txt") diff --git a/api/tests/unit_tests/core/rag/extractor/test_html_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_html_extractor.py new file mode 100644 index 0000000000..8bc65e5654 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_html_extractor.py @@ -0,0 +1,21 @@ +from core.rag.extractor.html_extractor import HtmlExtractor + + +class TestHtmlExtractor: + def test_extract_returns_text_content(self, tmp_path): + file_path = tmp_path / "sample.html" + file_path.write_text("

Title

Hello

", encoding="utf-8") + + extractor = HtmlExtractor(str(file_path)) + docs = extractor.extract() + + assert len(docs) == 1 + assert "".join(docs[0].page_content.split()) == "TitleHello" + + def test_load_as_text_strips_whitespace_and_handles_empty(self, tmp_path): + file_path = tmp_path / "sample.html" + file_path.write_text(" \n ", encoding="utf-8") + + extractor = HtmlExtractor(str(file_path)) + + assert extractor._load_as_text() == "" diff --git a/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py new file mode 100644 index 0000000000..0b4c9bd809 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py @@ -0,0 +1,47 @@ +from pytest_mock import MockerFixture + +from core.rag.extractor.jina_reader_extractor import JinaReaderWebExtractor + + +class TestJinaReaderWebExtractor: + def test_extract_crawl_mode_returns_document(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value={ + "content": "markdown-content", + "url": "https://example.com", + "description": "desc", + "title": "title", + }, + ) + + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "markdown-content" + assert docs[0].metadata == { + "source_url": "https://example.com", + "description": "desc", + "title": "title", + } + + def test_extract_crawl_mode_with_missing_data_returns_empty(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value=None, + ) + + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + + assert extractor.extract() == [] + + def test_extract_non_crawl_mode_returns_empty(self, mocker: MockerFixture): + mock_get_crawl = mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value={"content": "unused"}, + ) + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="scrape") + + assert extractor.extract() == [] + mock_get_crawl.assert_not_called() diff --git a/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py index d4cf534c56..7e78c86c7d 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py @@ -1,8 +1,15 @@ +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.markdown_extractor as markdown_module from core.rag.extractor.markdown_extractor import MarkdownExtractor -def test_markdown_to_tups(): - markdown = """ +class TestMarkdownExtractor: + def test_markdown_to_tups(self): + markdown = """ this is some text without header # title 1 @@ -11,12 +18,113 @@ this is balabala text ## title 2 this is more specific text. """ - extractor = MarkdownExtractor(file_path="dummy_path") - updated_output = extractor.markdown_to_tups(markdown) - assert len(updated_output) == 3 - key, header_value = updated_output[0] - assert key == None - assert header_value.strip() == "this is some text without header" - title_1, value = updated_output[1] - assert title_1.strip() == "title 1" - assert value.strip() == "this is balabala text" + extractor = MarkdownExtractor(file_path="dummy_path") + updated_output = extractor.markdown_to_tups(markdown) + + assert len(updated_output) == 3 + key, header_value = updated_output[0] + assert key is None + assert header_value.strip() == "this is some text without header" + + title_1, value = updated_output[1] + assert title_1.strip() == "title 1" + assert value.strip() == "this is balabala text" + + def test_markdown_to_tups_keeps_code_block_headers_literal(self): + markdown = """# Header +before +```python +# this is not a heading +print('x') +``` +after +""" + extractor = MarkdownExtractor(file_path="dummy_path") + + tups = extractor.markdown_to_tups(markdown) + + assert len(tups) == 2 + assert tups[1][0] == "Header" + assert "# this is not a heading" in tups[1][1] + + def test_remove_images_and_hyperlinks(self): + extractor = MarkdownExtractor(file_path="dummy_path") + + with_images = "before ![[image.png]] after" + with_links = "[OpenAI](https://openai.com)" + + assert extractor.remove_images(with_images) == "before after" + assert extractor.remove_hyperlinks(with_links) == "OpenAI" + + def test_parse_tups_reads_file_and_applies_options(self, tmp_path): + markdown_file = tmp_path / "doc.md" + markdown_file.write_text("# Header\nText with [link](https://example.com) and ![[img.png]]", encoding="utf-8") + + extractor = MarkdownExtractor( + file_path=str(markdown_file), + remove_hyperlinks=True, + remove_images=True, + autodetect_encoding=False, + ) + + tups = extractor.parse_tups(str(markdown_file)) + + assert len(tups) == 2 + assert tups[1][0] == "Header" + assert "[link]" not in tups[1][1] + assert "img.png" not in tups[1][1] + + def test_parse_tups_autodetects_encoding_after_decode_error(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path", autodetect_encoding=True) + + calls: list[str | None] = [] + + def fake_read_text(self, encoding=None): + calls.append(encoding) + if encoding is None: + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + if encoding == "bad-encoding": + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + return "# H\ncontent" + + monkeypatch.setattr(Path, "read_text", fake_read_text, raising=True) + monkeypatch.setattr( + markdown_module, + "detect_file_encodings", + lambda _: [SimpleNamespace(encoding="bad-encoding"), SimpleNamespace(encoding="utf-8")], + ) + + tups = extractor.parse_tups("dummy_path") + + assert len(tups) == 2 + assert calls == [None, "bad-encoding", "utf-8"] + + def test_parse_tups_decode_error_with_autodetect_disabled_raises(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path", autodetect_encoding=False) + + def raise_decode(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + + monkeypatch.setattr(Path, "read_text", raise_decode, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy_path"): + extractor.parse_tups("dummy_path") + + def test_parse_tups_other_exceptions_are_wrapped(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path") + + def raise_other(self, encoding=None): + raise OSError("disk error") + + monkeypatch.setattr(Path, "read_text", raise_other, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy_path"): + extractor.parse_tups("dummy_path") + + def test_extract_builds_documents_for_header_and_non_header(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path") + monkeypatch.setattr(extractor, "parse_tups", lambda _: [(None, "plain"), ("Header", "value")]) + + docs = extractor.extract() + + assert [doc.page_content for doc in docs] == ["plain", "\n\nHeader\nvalue"] diff --git a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py index 58bec7d19e..6daee11f8f 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py @@ -1,93 +1,499 @@ +from types import SimpleNamespace from unittest import mock +import httpx +import pytest from pytest_mock import MockerFixture from core.rag.extractor import notion_extractor -user_id = "user1" -database_id = "database1" -page_id = "page1" - -extractor = notion_extractor.NotionExtractor( - notion_workspace_id="x", notion_obj_id="x", notion_page_type="page", tenant_id="x", notion_access_token="x" -) - - -def _generate_page(page_title: str): - return { - "object": "page", - "id": page_id, - "properties": { - "Page": { - "type": "title", - "title": [{"type": "text", "text": {"content": page_title}, "plain_text": page_title}], - } - }, - } - - -def _generate_block(block_id: str, block_type: str, block_text: str): - return { - "object": "block", - "id": block_id, - "parent": {"type": "page_id", "page_id": page_id}, - "type": block_type, - "has_children": False, - block_type: { - "rich_text": [ - { - "type": "text", - "text": {"content": block_text}, - "plain_text": block_text, - } - ] - }, - } - - -def _mock_response(data): +def _mock_response(data, status_code: int = 200, text: str = ""): response = mock.Mock() - response.status_code = 200 + response.status_code = status_code + response.text = text response.json.return_value = data return response -def _remove_multiple_new_lines(text): - while "\n\n" in text: - text = text.replace("\n\n", "\n") - return text.strip() +class TestNotionExtractorInitAndPublicMethods: + def test_init_with_explicit_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + assert extractor._notion_access_token == "token" + + def test_init_falls_back_to_env_token_when_credential_lookup_fails(self, monkeypatch): + monkeypatch.setattr( + notion_extractor.NotionExtractor, + "_get_access_token", + classmethod(lambda cls, tenant_id, credential_id: (_ for _ in ()).throw(Exception("credential error"))), + ) + monkeypatch.setattr(notion_extractor.dify_config, "NOTION_INTEGRATION_TOKEN", "env-token", raising=False) + + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + credential_id="cred", + ) + + assert extractor._notion_access_token == "env-token" + + def test_init_raises_if_no_credential_and_no_env_token(self, monkeypatch): + monkeypatch.setattr( + notion_extractor.NotionExtractor, + "_get_access_token", + classmethod(lambda cls, tenant_id, credential_id: (_ for _ in ()).throw(Exception("credential error"))), + ) + monkeypatch.setattr(notion_extractor.dify_config, "NOTION_INTEGRATION_TOKEN", None, raising=False) + + with pytest.raises(ValueError, match="Must specify `integration_token`"): + notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + credential_id="cred", + ) + + def test_extract_updates_last_edited_and_loads_documents(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + update_mock = mock.Mock() + load_mock = mock.Mock(return_value=[SimpleNamespace(page_content="doc")]) + monkeypatch.setattr(extractor, "update_last_edited_time", update_mock) + monkeypatch.setattr(extractor, "_load_data_as_documents", load_mock) + + docs = extractor.extract() + + update_mock.assert_called_once_with(None) + load_mock.assert_called_once_with("obj", "page") + assert len(docs) == 1 + + def test_load_data_as_documents_page_database_and_invalid(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + monkeypatch.setattr(extractor, "_get_notion_block_data", lambda _: ["line1", "line2"]) + page_docs = extractor._load_data_as_documents("page-id", "page") + assert page_docs[0].page_content == "line1\nline2" + + monkeypatch.setattr(extractor, "_get_notion_database_data", lambda _: [SimpleNamespace(page_content="db")]) + db_docs = extractor._load_data_as_documents("db-id", "database") + assert db_docs[0].page_content == "db" + + with pytest.raises(ValueError, match="notion page type not supported"): + extractor._load_data_as_documents("obj", "unsupported") -def test_notion_page(mocker: MockerFixture): - texts = ["Head 1", "1.1", "paragraph 1", "1.1.1"] - mocked_notion_page = { - "object": "list", - "results": [ - _generate_block("b1", "heading_1", texts[0]), - _generate_block("b2", "heading_2", texts[1]), - _generate_block("b3", "paragraph", texts[2]), - _generate_block("b4", "heading_3", texts[3]), - ], - "next_cursor": None, - } - mocker.patch("httpx.request", return_value=_mock_response(mocked_notion_page)) +class TestNotionDatabase: + def test_get_notion_database_data_parses_property_types_and_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) - page_docs = extractor._load_data_as_documents(page_id, "page") - assert len(page_docs) == 1 - content = _remove_multiple_new_lines(page_docs[0].page_content) - assert content == "# Head 1\n## 1.1\nparagraph 1\n### 1.1.1" + first_page = { + "results": [ + { + "properties": { + "tags": { + "type": "multi_select", + "multi_select": [{"name": "A"}, {"name": "B"}], + }, + "title_prop": {"type": "title", "title": [{"plain_text": "Title"}]}, + "empty_title": {"type": "title", "title": []}, + "rich": {"type": "rich_text", "rich_text": [{"plain_text": "RichText"}]}, + "empty_rich": {"type": "rich_text", "rich_text": []}, + "select_prop": {"type": "select", "select": {"name": "Selected"}}, + "empty_select": {"type": "select", "select": None}, + "status_prop": {"type": "status", "status": {"name": "Open"}}, + "empty_status": {"type": "status", "status": None}, + "number_prop": {"type": "number", "number": 10}, + "dict_prop": {"type": "date", "date": {"start": "2024-01-01", "end": None}}, + }, + "url": "https://notion.so/page-1", + } + ], + "has_more": True, + "next_cursor": "cursor-2", + } + second_page = {"results": [], "has_more": False, "next_cursor": None} + + mock_post = mocker.patch("httpx.post", side_effect=[_mock_response(first_page), _mock_response(second_page)]) + + docs = extractor._get_notion_database_data("db-1", query_dict={"filter": {"x": 1}}) + + assert len(docs) == 1 + content = docs[0].page_content + assert "tags:['A', 'B']" in content + assert "title_prop:Title" in content + assert "rich:RichText" in content + assert "number_prop:10" in content + assert "dict_prop:start:2024-01-01" in content + assert "Row Page URL:https://notion.so/page-1" in content + assert mock_post.call_count == 2 + + def test_get_notion_database_data_handles_missing_results_and_empty_content(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + + mocker.patch("httpx.post", return_value=_mock_response({"results": None})) + assert extractor._get_notion_database_data("db-1") == [] + + def test_get_notion_database_data_requires_access_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + extractor._notion_access_token = None + + with pytest.raises(AssertionError, match="Notion access token is required"): + extractor._get_notion_database_data("db-1") -def test_notion_database(mocker: MockerFixture): - page_title_list = ["page1", "page2", "page3"] - mocked_notion_database = { - "object": "list", - "results": [_generate_page(i) for i in page_title_list], - "next_cursor": None, - } - mocker.patch("httpx.post", return_value=_mock_response(mocked_notion_database)) - database_docs = extractor._load_data_as_documents(database_id, "database") - assert len(database_docs) == 1 - content = _remove_multiple_new_lines(database_docs[0].page_content) - assert content == "\n".join([f"Page:{i}" for i in page_title_list]) +class TestNotionBlocks: + def test_get_notion_block_data_success_with_table_headings_children_and_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + first_response = { + "results": [ + {"type": "table", "id": "tbl-1", "has_children": False, "table": {}}, + { + "type": "heading_1", + "id": "h1", + "has_children": False, + "heading_1": {"rich_text": [{"text": {"content": "Heading"}}]}, + }, + { + "type": "paragraph", + "id": "p1", + "has_children": True, + "paragraph": {"rich_text": [{"text": {"content": "Paragraph"}}]}, + }, + { + "type": "child_page", + "id": "cp1", + "has_children": True, + "child_page": {"rich_text": []}, + }, + ], + "next_cursor": "cursor-2", + } + second_response = { + "results": [ + { + "type": "heading_2", + "id": "h2", + "has_children": False, + "heading_2": {"rich_text": [{"text": {"content": "SubHeading"}}]}, + } + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(first_response), _mock_response(second_response)]) + mocker.patch.object(extractor, "_read_table_rows", return_value="TABLE") + mocker.patch.object(extractor, "_read_block", return_value="CHILD") + + lines = extractor._get_notion_block_data("page-1") + + assert lines[0] == "TABLE\n\n" + assert "# Heading" in lines[1] + assert "Paragraph\nCHILD\n\n" in lines[2] + assert "## SubHeading" in lines[-1] + + def test_get_notion_block_data_handles_http_error_and_invalid_payload(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + mocker.patch("httpx.request", side_effect=httpx.HTTPError("network")) + with pytest.raises(ValueError, match="Error fetching Notion block data"): + extractor._get_notion_block_data("page-1") + + mocker.patch("httpx.request", return_value=_mock_response({"bad": "payload"}, status_code=200)) + with pytest.raises(ValueError, match="Error fetching Notion block data"): + extractor._get_notion_block_data("page-1") + + mocker.patch("httpx.request", return_value=_mock_response({"results": []}, status_code=500, text="boom")) + with pytest.raises(ValueError, match="Error fetching Notion block data: boom"): + extractor._get_notion_block_data("page-1") + + def test_read_block_supports_heading_table_and_recursion(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + root_payload = { + "results": [ + { + "type": "heading_2", + "id": "h2", + "has_children": False, + "heading_2": {"rich_text": [{"text": {"content": "Root"}}]}, + }, + { + "type": "paragraph", + "id": "child-block", + "has_children": True, + "paragraph": {"rich_text": [{"text": {"content": "Parent"}}]}, + }, + {"type": "table", "id": "tbl-1", "has_children": False, "table": {}}, + ], + "next_cursor": None, + } + child_payload = { + "results": [ + { + "type": "paragraph", + "id": "leaf", + "has_children": False, + "paragraph": {"rich_text": [{"text": {"content": "Child"}}]}, + } + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(root_payload), _mock_response(child_payload)]) + mocker.patch.object(extractor, "_read_table_rows", return_value="TABLE-MD") + + content = extractor._read_block("root") + + assert "## Root" in content + assert "Parent" in content + assert "Child" in content + assert "TABLE-MD" in content + + def test_read_block_breaks_on_missing_results(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + mocker.patch("httpx.request", return_value=_mock_response({"results": None, "next_cursor": None})) + + assert extractor._read_block("root") == "" + + def test_read_table_rows_formats_markdown_with_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + page_one = { + "results": [ + { + "table_row": { + "cells": [ + [{"text": {"content": "H1"}}], + [{"text": {"content": "H2"}}], + ] + } + }, + { + "table_row": { + "cells": [ + [{"text": {"content": "R1C1"}}], + [{"text": {"content": "R1C2"}}], + ] + } + }, + ], + "next_cursor": "next", + } + page_two = { + "results": [ + { + "table_row": { + "cells": [ + [{"text": {"content": "H1"}}], + [], + ] + } + }, + { + "table_row": { + "cells": [ + [{"text": {"content": "R2C1"}}], + [{"text": {"content": "R2C2"}}], + ] + } + }, + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(page_one), _mock_response(page_two)]) + + markdown = extractor._read_table_rows("tbl-1") + + assert "| H1 | H2 |" in markdown + assert "| R1C1 | R1C2 |" in markdown + assert "| H1 | |" in markdown + assert "| R2C1 | R2C2 |" in markdown + + +class TestNotionMetadataAndCredentialMethods: + def test_update_last_edited_time_no_document_model(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + assert extractor.update_last_edited_time(None) is None + + def test_update_last_edited_time_updates_document_and_commits(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + class FakeDocumentModel: + data_source_info = "data_source_info" + + update_calls = [] + + class FakeQuery: + def filter_by(self, **kwargs): + return self + + def update(self, payload): + update_calls.append(payload) + + class FakeSession: + committed = False + + def query(self, model): + assert model is FakeDocumentModel + return FakeQuery() + + def commit(self): + self.committed = True + + fake_db = SimpleNamespace(session=FakeSession()) + monkeypatch.setattr(notion_extractor, "DocumentModel", FakeDocumentModel) + monkeypatch.setattr(notion_extractor, "db", fake_db) + monkeypatch.setattr(extractor, "get_notion_last_edited_time", lambda: "2026-01-01T00:00:00.000Z") + + doc_model = SimpleNamespace(id="doc-1", data_source_info_dict={"source": "notion"}) + extractor.update_last_edited_time(doc_model) + + assert update_calls + assert fake_db.session.committed is True + + def test_get_notion_last_edited_time_uses_page_and_database_urls(self, mocker: MockerFixture): + extractor_page = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="page-id", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + request_mock = mocker.patch( + "httpx.request", return_value=_mock_response({"last_edited_time": "2025-05-01T00:00:00.000Z"}) + ) + + assert extractor_page.get_notion_last_edited_time() == "2025-05-01T00:00:00.000Z" + assert "pages/page-id" in request_mock.call_args[0][1] + + extractor_db = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="db-id", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + request_mock = mocker.patch( + "httpx.request", return_value=_mock_response({"last_edited_time": "2025-06-01T00:00:00.000Z"}) + ) + + assert extractor_db.get_notion_last_edited_time() == "2025-06-01T00:00:00.000Z" + assert "databases/db-id" in request_mock.call_args[0][1] + + def test_get_notion_last_edited_time_requires_access_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + extractor._notion_access_token = None + + with pytest.raises(AssertionError, match="Notion access token is required"): + extractor.get_notion_last_edited_time() + + def test_get_access_token_success_and_errors(self, monkeypatch): + with pytest.raises(Exception, match="No credential id found"): + notion_extractor.NotionExtractor._get_access_token("tenant", None) + + class FakeProviderServiceMissing: + def get_datasource_credentials(self, **kwargs): + return None + + monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceMissing) + with pytest.raises(Exception, match="No notion credential found"): + notion_extractor.NotionExtractor._get_access_token("tenant", "cred") + + class FakeProviderServiceFound: + def get_datasource_credentials(self, **kwargs): + return {"integration_secret": "token-from-credential"} + + monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceFound) + + assert notion_extractor.NotionExtractor._get_access_token("tenant", "cred") == "token-from-credential" diff --git a/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py new file mode 100644 index 0000000000..fb3c6e52c6 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py @@ -0,0 +1,79 @@ +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.text_extractor as text_module +from core.rag.extractor.text_extractor import TextExtractor + + +class TestTextExtractor: + def test_extract_success(self, tmp_path): + file_path = tmp_path / "data.txt" + file_path.write_text("hello world", encoding="utf-8") + + extractor = TextExtractor(str(file_path)) + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "hello world" + assert docs[0].metadata == {"source": str(file_path)} + + def test_extract_autodetect_success_after_decode_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=True) + + calls = [] + + def fake_read_text(self, encoding=None): + calls.append(encoding) + if encoding is None: + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + if encoding == "bad": + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + return "decoded text" + + monkeypatch.setattr(Path, "read_text", fake_read_text, raising=True) + monkeypatch.setattr( + text_module, + "detect_file_encodings", + lambda _: [SimpleNamespace(encoding="bad"), SimpleNamespace(encoding="utf-8")], + ) + + docs = extractor.extract() + + assert docs[0].page_content == "decoded text" + assert calls == [None, "bad", "utf-8"] + + def test_extract_autodetect_all_fail_raises_runtime_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=True) + + def always_decode_error(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + + monkeypatch.setattr(Path, "read_text", always_decode_error, raising=True) + monkeypatch.setattr(text_module, "detect_file_encodings", lambda _: [SimpleNamespace(encoding="latin-1")]) + + with pytest.raises(RuntimeError, match="all detected encodings failed"): + extractor.extract() + + def test_extract_decode_error_without_autodetect_raises_runtime_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=False) + + def always_decode_error(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + + monkeypatch.setattr(Path, "read_text", always_decode_error, raising=True) + + with pytest.raises(RuntimeError, match="specified encoding failed"): + extractor.extract() + + def test_extract_wraps_non_decode_exceptions(self, monkeypatch): + extractor = TextExtractor("dummy.txt") + + def raise_other(self, encoding=None): + raise OSError("io error") + + monkeypatch.setattr(Path, "read_text", raise_other, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy.txt"): + extractor.extract() diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py index 0792ada194..64eb89590a 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py @@ -3,9 +3,12 @@ import io import os import tempfile +from collections import UserDict from pathlib import Path from types import SimpleNamespace +from unittest.mock import MagicMock +import pytest from docx import Document from docx.oxml import OxmlElement from docx.oxml.ns import qn @@ -136,7 +139,7 @@ def test_extract_images_from_docx(monkeypatch): monkeypatch.setattr(we, "UploadFile", FakeUploadFile) # Patch external image fetcher - def fake_get(url: str): + def fake_get(url: str, **kwargs): assert url == "https://example.com/image.png" return SimpleNamespace(status_code=200, headers={"Content-Type": "image/png"}, content=external_bytes) @@ -203,10 +206,8 @@ def test_extract_images_from_docx_uses_internal_files_url(): finally: # Restore original values - if original_files_url is not None: - dify_config.FILES_URL = original_files_url - if original_internal_files_url is not None: - dify_config.INTERNAL_FILES_URL = original_internal_files_url + dify_config.FILES_URL = original_files_url + dify_config.INTERNAL_FILES_URL = original_internal_files_url def test_extract_hyperlinks(monkeypatch): @@ -314,3 +315,405 @@ def test_extract_legacy_hyperlinks(monkeypatch): finally: if os.path.exists(tmp_path): os.remove(tmp_path) + + +def test_init_rejects_invalid_url_status(monkeypatch): + class FakeResponse: + status_code = 404 + content = b"" + closed = False + + def close(self): + self.closed = True + + fake_response = FakeResponse() + monkeypatch.setattr(we, "ssrf_proxy", SimpleNamespace(get=lambda url, **kwargs: fake_response)) + + with pytest.raises(ValueError, match="returned status code 404"): + WordExtractor("https://example.com/missing.docx", "tenant", "user") + + assert fake_response.closed is True + + +def test_init_expands_home_path_and_invalid_local_path(monkeypatch, tmp_path): + target_file = tmp_path / "expanded.docx" + target_file.write_bytes(b"docx") + + monkeypatch.setattr(we.os.path, "expanduser", lambda p: str(target_file)) + monkeypatch.setattr( + we.os.path, + "isfile", + lambda p: p == str(target_file), + ) + + extractor = WordExtractor("~/expanded.docx", "tenant", "user") + assert extractor.file_path == str(target_file) + + monkeypatch.setattr(we.os.path, "isfile", lambda p: False) + with pytest.raises(ValueError, match="is not a valid file or url"): + WordExtractor("not-a-file", "tenant", "user") + + +def test_del_closes_temp_file(): + extractor = object.__new__(WordExtractor) + extractor.temp_file = MagicMock() + + WordExtractor.__del__(extractor) + + extractor.temp_file.close.assert_called_once() + + +def test_extract_images_handles_invalid_external_cases(monkeypatch): + class FakeTargetRef: + def __contains__(self, item): + return item == "image" + + def split(self, sep): + return [None] + + rel_invalid_url = SimpleNamespace(is_external=True, target_ref="image-no-url") + rel_request_error = SimpleNamespace(is_external=True, target_ref="https://example.com/image-error") + rel_unknown_mime = SimpleNamespace(is_external=True, target_ref="https://example.com/image-unknown") + rel_internal_none_ext = SimpleNamespace(is_external=False, target_ref=FakeTargetRef(), target_part=object()) + + doc = SimpleNamespace( + part=SimpleNamespace( + rels={ + "r1": rel_invalid_url, + "r2": rel_request_error, + "r3": rel_unknown_mime, + "r4": rel_internal_none_ext, + } + ) + ) + + def fake_get(url, **kwargs): + if "image-error" in url: + raise RuntimeError("network") + return SimpleNamespace(status_code=200, headers={"Content-Type": "application/unknown"}, content=b"x") + + monkeypatch.setattr(we, "ssrf_proxy", SimpleNamespace(get=fake_get)) + db_stub = SimpleNamespace(session=SimpleNamespace(add=lambda obj: None, commit=MagicMock())) + monkeypatch.setattr(we, "db", db_stub) + monkeypatch.setattr(we, "storage", SimpleNamespace(save=lambda key, data: None)) + monkeypatch.setattr(we.dify_config, "FILES_URL", "http://files.local", raising=False) + + extractor = object.__new__(WordExtractor) + extractor.tenant_id = "tenant" + extractor.user_id = "user" + + result = extractor._extract_images_from_docx(doc) + + assert result == {} + db_stub.session.commit.assert_called_once() + + +def test_table_to_markdown_and_parse_helpers(monkeypatch): + extractor = object.__new__(WordExtractor) + + table = SimpleNamespace( + rows=[ + SimpleNamespace(cells=[1, 2]), + SimpleNamespace(cells=[3, 4]), + ] + ) + parse_row_mock = MagicMock(side_effect=[["H1", "H2"], ["A", "B"]]) + monkeypatch.setattr(extractor, "_parse_row", parse_row_mock) + + markdown = extractor._table_to_markdown(table, {}) + assert markdown == "| H1 | H2 |\n| --- | --- |\n| A | B |" + + class FakeBlip: + def __init__(self, image_id): + self.image_id = image_id + + def get(self, key): + return self.image_id + + class FakeRunChild: + def __init__(self, blips, text=""): + self._blips = blips + self.text = text + self.tag = qn("w:r") + + def xpath(self, pattern): + if pattern == ".//a:blip": + return self._blips + return [] + + class FakeRun: + def __init__(self, element, paragraph): + # Mirror the subset used by _parse_cell_paragraph + self.element = element + self.text = getattr(element, "text", "") + + # Patch we.Run so our lightweight child objects work with the extractor + monkeypatch.setattr(we, "Run", FakeRun) + + image_part = object() + paragraph = SimpleNamespace( + _element=[ + FakeRunChild([FakeBlip(None), FakeBlip("ext"), FakeBlip("int")], text=""), + FakeRunChild([], text="plain"), + ], + part=SimpleNamespace( + rels={ + "ext": SimpleNamespace(is_external=True), + "int": SimpleNamespace(is_external=False, target_part=image_part), + } + ), + ) + + image_map = {"ext": "EXT-IMG", image_part: "INT-IMG"} + assert extractor._parse_cell_paragraph(paragraph, image_map) == "EXT-IMGINT-IMGplain" + + cell = SimpleNamespace(paragraphs=[paragraph, paragraph]) + assert extractor._parse_cell(cell, image_map) == "EXT-IMGINT-IMGplain" + + +def test_parse_docx_covers_drawing_shapes_hyperlink_error_and_table_branch(monkeypatch): + extractor = object.__new__(WordExtractor) + + ext_image_id = "ext-image" + int_embed_id = "int-embed" + shape_ext_id = "shape-ext" + shape_int_id = "shape-int" + + internal_part = object() + shape_internal_part = object() + + class Rels(UserDict): + def get(self, key, default=None): + if key == "link-bad": + raise RuntimeError("cannot resolve relation") + return super().get(key, default) + + rels = Rels( + { + ext_image_id: SimpleNamespace(is_external=True, target_ref="https://img/ext.png"), + int_embed_id: SimpleNamespace(is_external=False, target_part=internal_part), + shape_ext_id: SimpleNamespace(is_external=True, target_ref="https://img/shape.png"), + shape_int_id: SimpleNamespace(is_external=False, target_part=shape_internal_part), + "link-ok": SimpleNamespace(is_external=True, target_ref="https://example.com"), + } + ) + + image_map = { + ext_image_id: "[EXT]", + internal_part: "[INT]", + shape_ext_id: "[SHAPE_EXT]", + shape_internal_part: "[SHAPE_INT]", + } + + class FakeBlip: + def __init__(self, embed_id): + self.embed_id = embed_id + + def get(self, key): + return self.embed_id + + class FakeDrawing: + def __init__(self, embed_ids): + self.embed_ids = embed_ids + + def findall(self, pattern): + return [FakeBlip(embed_id) for embed_id in self.embed_ids] + + class FakeNode: + def __init__(self, text=None, attrs=None): + self.text = text + self._attrs = attrs or {} + + def get(self, key): + return self._attrs.get(key) + + class FakeShape: + def __init__(self, bin_id=None, img_id=None): + self.bin_id = bin_id + self.img_id = img_id + + def find(self, pattern): + if "binData" in pattern and self.bin_id: + return FakeNode( + text="shape", + attrs={"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id": self.bin_id}, + ) + if "imagedata" in pattern and self.img_id: + return FakeNode(attrs={"id": self.img_id}) + return None + + class FakeChild: + def __init__( + self, + tag, + text="", + fld_chars=None, + instr_texts=None, + drawings=None, + shapes=None, + attrs=None, + hyperlink_runs=None, + ): + self.tag = tag + self.text = text + self._fld_chars = fld_chars or [] + self._instr_texts = instr_texts or [] + self._drawings = drawings or [] + self._shapes = shapes or [] + self._attrs = attrs or {} + self._hyperlink_runs = hyperlink_runs or [] + + def findall(self, pattern): + if pattern == qn("w:fldChar"): + return self._fld_chars + if pattern == qn("w:instrText"): + return self._instr_texts + if pattern == qn("w:r"): + return self._hyperlink_runs + if pattern.endswith("}drawing"): + return self._drawings + if pattern.endswith("}pict"): + return self._shapes + return [] + + def get(self, key): + return self._attrs.get(key) + + class FakeRun: + def __init__(self, element, paragraph): + self.element = element + self.text = getattr(element, "text", "") + + paragraph_main = SimpleNamespace( + _element=[ + FakeChild( + qn("w:r"), + text="run-text", + drawings=[FakeDrawing([ext_image_id, int_embed_id])], + shapes=[FakeShape(bin_id=shape_ext_id, img_id=shape_int_id)], + ), + FakeChild( + qn("w:r"), + text="", + drawings=[], + shapes=[FakeShape(bin_id=shape_ext_id)], + ), + FakeChild( + qn("w:hyperlink"), + attrs={qn("r:id"): "link-ok"}, + hyperlink_runs=[FakeChild(qn("w:r"), text="LinkText")], + ), + FakeChild( + qn("w:hyperlink"), + attrs={qn("r:id"): "link-bad"}, + hyperlink_runs=[FakeChild(qn("w:r"), text="BrokenLink")], + ), + ] + ) + paragraph_empty = SimpleNamespace(_element=[FakeChild(qn("w:r"), text=" ")]) + + fake_doc = SimpleNamespace( + part=SimpleNamespace(rels=rels, related_parts={int_embed_id: internal_part}), + paragraphs=[paragraph_main, paragraph_empty], + tables=[SimpleNamespace(rows=[])], + element=SimpleNamespace( + body=[SimpleNamespace(tag="w:p"), SimpleNamespace(tag="w:p"), SimpleNamespace(tag="w:tbl")] + ), + ) + + monkeypatch.setattr(we, "DocxDocument", lambda _: fake_doc) + monkeypatch.setattr(we, "Run", FakeRun) + monkeypatch.setattr(extractor, "_extract_images_from_docx", lambda doc: image_map) + monkeypatch.setattr(extractor, "_table_to_markdown", lambda table, image_map: "TABLE-MARKDOWN") + logger_exception = MagicMock() + monkeypatch.setattr(we.logger, "exception", logger_exception) + + content = extractor.parse_docx("dummy.docx") + + assert "[EXT]" in content + assert "[INT]" in content + assert "[SHAPE_EXT]" in content + assert "[LinkText](https://example.com)" in content + assert "BrokenLink" in content + assert "TABLE-MARKDOWN" in content + logger_exception.assert_called_once() + + +def test_parse_cell_paragraph_hyperlink_in_table_cell_http(): + doc = Document() + table = doc.add_table(rows=1, cols=1) + cell = table.cell(0, 0) + p = cell.paragraphs[0] + + # Build modern hyperlink inside table cell + r_id = "rIdHttp1" + hyperlink = OxmlElement("w:hyperlink") + hyperlink.set(qn("r:id"), r_id) + + run_elem = OxmlElement("w:r") + t = OxmlElement("w:t") + t.text = "Dify" + run_elem.append(t) + hyperlink.append(run_elem) + p._p.append(hyperlink) + + # Relationship for external http link + doc.part.rels.add_relationship( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", + "https://dify.ai", + r_id, + is_external=True, + ) + + with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp: + doc.save(tmp.name) + tmp_path = tmp.name + + try: + reopened = Document(tmp_path) + para = reopened.tables[0].cell(0, 0).paragraphs[0] + extractor = object.__new__(WordExtractor) + out = extractor._parse_cell_paragraph(para, {}) + assert out == "[Dify](https://dify.ai)" + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) + + +def test_parse_cell_paragraph_hyperlink_in_table_cell_mailto(): + doc = Document() + table = doc.add_table(rows=1, cols=1) + cell = table.cell(0, 0) + p = cell.paragraphs[0] + + r_id = "rIdMail1" + hyperlink = OxmlElement("w:hyperlink") + hyperlink.set(qn("r:id"), r_id) + + run_elem = OxmlElement("w:r") + t = OxmlElement("w:t") + t.text = "john@test.com" + run_elem.append(t) + hyperlink.append(run_elem) + p._p.append(hyperlink) + + doc.part.rels.add_relationship( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", + "mailto:john@test.com", + r_id, + is_external=True, + ) + + with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp: + doc.save(tmp.name) + tmp_path = tmp.name + + try: + reopened = Document(tmp_path) + para = reopened.tables[0].cell(0, 0).paragraphs[0] + extractor = object.__new__(WordExtractor) + out = extractor._parse_cell_paragraph(para, {}) + assert out == "[john@test.com](mailto:john@test.com)" + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) diff --git a/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py b/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py new file mode 100644 index 0000000000..26ce333e11 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py @@ -0,0 +1,300 @@ +"""Unit tests for unstructured extractors and their local/API partitioning paths.""" + +import base64 +import sys +import types +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.unstructured.unstructured_epub_extractor as epub_module +from core.rag.extractor.unstructured.unstructured_doc_extractor import UnstructuredWordExtractor +from core.rag.extractor.unstructured.unstructured_eml_extractor import UnstructuredEmailExtractor +from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor +from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor +from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor +from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor +from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor +from core.rag.extractor.unstructured.unstructured_xml_extractor import UnstructuredXmlExtractor + + +def _register_module(monkeypatch: pytest.MonkeyPatch, name: str, **attrs: object) -> types.ModuleType: + module = types.ModuleType(name) + for k, v in attrs.items(): + setattr(module, k, v) + monkeypatch.setitem(sys.modules, name, module) + return module + + +def _register_unstructured_packages(monkeypatch: pytest.MonkeyPatch) -> None: + _register_module(monkeypatch, "unstructured", __path__=[]) + _register_module(monkeypatch, "unstructured.partition", __path__=[]) + _register_module(monkeypatch, "unstructured.chunking", __path__=[]) + _register_module(monkeypatch, "unstructured.file_utils", __path__=[]) + + +def _install_chunk_by_title(monkeypatch: pytest.MonkeyPatch, chunks: list[SimpleNamespace]) -> None: + _register_unstructured_packages(monkeypatch) + + def chunk_by_title( + elements: list[SimpleNamespace], max_characters: int, combine_text_under_n_chars: int + ) -> list[SimpleNamespace]: + return chunks + + _register_module(monkeypatch, "unstructured.chunking.title", chunk_by_title=chunk_by_title) + + +class TestUnstructuredMarkdownMsgXml: + def test_markdown_extractor_without_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text=" chunk-1 "), SimpleNamespace(text=" chunk-2 ")]) + _register_module( + monkeypatch, "unstructured.partition.md", partition_md=lambda filename: [SimpleNamespace(text="x")] + ) + + docs = UnstructuredMarkdownExtractor("/tmp/file.md").extract() + + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + def test_markdown_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text=" via-api ")]) + calls = {} + + def partition_via_api(filename, api_url, api_key): + calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="ignored")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + docs = UnstructuredMarkdownExtractor("/tmp/file.md", api_url="https://u", api_key="k").extract() + + assert docs[0].page_content == "via-api" + assert calls == {"filename": "/tmp/file.md", "api_url": "https://u", "api_key": "k"} + + def test_msg_extractor_local(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="msg-doc")]) + _register_module( + monkeypatch, "unstructured.partition.msg", partition_msg=lambda filename: [SimpleNamespace(text="x")] + ) + + assert UnstructuredMsgExtractor("/tmp/file.msg").extract()[0].page_content == "msg-doc" + + def test_msg_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="msg-doc")]) + calls = {} + + def partition_via_api(filename, api_url, api_key): + calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + assert ( + UnstructuredMsgExtractor("/tmp/file.msg", api_url="https://u", api_key="k").extract()[0].page_content + == "msg-doc" + ) + assert calls["filename"] == "/tmp/file.msg" + + def test_xml_extractor_local_and_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="xml-doc")]) + + xml_calls = {} + + def partition_xml(filename, xml_keep_tags): + xml_calls.update({"filename": filename, "xml_keep_tags": xml_keep_tags}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.xml", partition_xml=partition_xml) + + assert UnstructuredXmlExtractor("/tmp/file.xml").extract()[0].page_content == "xml-doc" + assert xml_calls == {"filename": "/tmp/file.xml", "xml_keep_tags": True} + + api_calls = {} + + def partition_via_api(filename, api_url, api_key): + api_calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + assert ( + UnstructuredXmlExtractor("/tmp/file.xml", api_url="https://u", api_key="k").extract()[0].page_content + == "xml-doc" + ) + assert api_calls["filename"] == "/tmp/file.xml" + + +class TestUnstructuredEmailAndEpub: + def test_email_extractor_local_decodes_html_and_suppresses_decode_errors(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + captured = {} + + def chunk_by_title( + elements: list[SimpleNamespace], max_characters: int, combine_text_under_n_chars: int + ) -> list[SimpleNamespace]: + captured["elements"] = list(elements) + return [SimpleNamespace(text=" chunked-email ")] + + _register_module(monkeypatch, "unstructured.chunking.title", chunk_by_title=chunk_by_title) + + html = "

Hello Email

" + encoded_html = base64.b64encode(html.encode("utf-8")).decode("utf-8") + bad_base64 = "not-base64" + + elements = [SimpleNamespace(text=encoded_html), SimpleNamespace(text=bad_base64)] + _register_module(monkeypatch, "unstructured.partition.email", partition_email=lambda filename: elements) + + docs = UnstructuredEmailExtractor("/tmp/file.eml").extract() + + assert docs[0].page_content == "chunked-email" + chunk_elements = captured["elements"] + assert "Hello Email" in chunk_elements[0].text + assert chunk_elements[1].text == bad_base64 + + def test_email_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="api-email")]) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="abc")], + ) + + docs = UnstructuredEmailExtractor("/tmp/file.eml", api_url="https://u", api_key="k").extract() + + assert docs[0].page_content == "api-email" + + def test_epub_extractor_local_and_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="epub-doc")]) + + calls = {"download": 0, "partition": 0} + + def fake_download_pandoc(): + calls["download"] += 1 + + def partition_epub(filename, xml_keep_tags): + calls["partition"] += 1 + assert xml_keep_tags is True + return [SimpleNamespace(text="x")] + + monkeypatch.setattr(epub_module.pypandoc, "download_pandoc", fake_download_pandoc) + _register_module(monkeypatch, "unstructured.partition.epub", partition_epub=partition_epub) + + docs = UnstructuredEpubExtractor("/tmp/file.epub").extract() + + assert docs[0].page_content == "epub-doc" + assert calls == {"download": 1, "partition": 1} + + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="x")], + ) + + docs = UnstructuredEpubExtractor("/tmp/file.epub", api_url="https://u", api_key="k").extract() + assert docs[0].page_content == "epub-doc" + + +class TestUnstructuredPPTAndPPTX: + def test_ppt_extractor_requires_api_url(self): + with pytest.raises(NotImplementedError, match="Unstructured API Url is not configured"): + UnstructuredPPTExtractor("/tmp/file.ppt").extract() + + def test_ppt_extractor_groups_text_by_page(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [ + SimpleNamespace(text="A", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="B", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="skip", metadata=SimpleNamespace(page_number=None)), + SimpleNamespace(text="C", metadata=SimpleNamespace(page_number=2)), + ], + ) + + docs = UnstructuredPPTExtractor("/tmp/file.ppt", api_url="https://u", api_key="k").extract() + + assert [doc.page_content for doc in docs] == ["A\nB", "C"] + + def test_pptx_extractor_local_and_api(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + _register_module( + monkeypatch, + "unstructured.partition.pptx", + partition_pptx=lambda filename: [ + SimpleNamespace(text="P1", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="P2", metadata=SimpleNamespace(page_number=2)), + SimpleNamespace(text="Skip", metadata=SimpleNamespace(page_number=None)), + ], + ) + + docs = UnstructuredPPTXExtractor("/tmp/file.pptx").extract() + assert [doc.page_content for doc in docs] == ["P1", "P2"] + + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [ + SimpleNamespace(text="X", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="Y", metadata=SimpleNamespace(page_number=1)), + ], + ) + + docs = UnstructuredPPTXExtractor("/tmp/file.pptx", api_url="https://u", api_key="k").extract() + assert [doc.page_content for doc in docs] == ["X\nY"] + + +class TestUnstructuredWord: + def _install_doc_modules(self, monkeypatch, version: str, filetype_value): + _register_unstructured_packages(monkeypatch) + + class FileType: + DOC = "doc" + + _register_module(monkeypatch, "unstructured.__version__", __version__=version) + _register_module( + monkeypatch, + "unstructured.file_utils.filetype", + FileType=FileType, + detect_filetype=lambda filename: filetype_value, + ) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="api-doc")], + ) + _register_module( + monkeypatch, + "unstructured.partition.docx", + partition_docx=lambda filename: [SimpleNamespace(text="docx-doc")], + ) + _register_module( + monkeypatch, + "unstructured.chunking.title", + chunk_by_title=lambda elements, max_characters, combine_text_under_n_chars: [ + SimpleNamespace(text="chunk-1"), + SimpleNamespace(text="chunk-2"), + ], + ) + + def test_word_extractor_rejects_doc_on_old_unstructured_version(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.10", filetype_value="doc") + + with pytest.raises(ValueError, match="Partitioning .doc files is only supported"): + UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() + + def test_word_extractor_doc_and_docx_paths(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.11", filetype_value="doc") + + docs = UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + self._install_doc_modules(monkeypatch, version="0.5.0", filetype_value="not-doc") + docs = UnstructuredWordExtractor("/tmp/file.docx", "https://u", "k").extract() + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + def test_word_extractor_magic_import_error_fallback_to_extension(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.10", filetype_value="not-used") + monkeypatch.setitem(sys.modules, "magic", None) + + with pytest.raises(ValueError, match="Partitioning .doc files is only supported"): + UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() diff --git a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py new file mode 100644 index 0000000000..d758be218a --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py @@ -0,0 +1,434 @@ +"""Unit tests for WaterCrawl client, provider, and extractor behavior.""" + +import json +from typing import Any +from unittest.mock import MagicMock + +import pytest + +import core.rag.extractor.watercrawl.client as client_module +from core.rag.extractor.watercrawl.client import BaseAPIClient, WaterCrawlAPIClient +from core.rag.extractor.watercrawl.exceptions import ( + WaterCrawlAuthenticationError, + WaterCrawlBadRequestError, + WaterCrawlPermissionError, +) +from core.rag.extractor.watercrawl.extractor import WaterCrawlWebExtractor +from core.rag.extractor.watercrawl.provider import WaterCrawlProvider + + +def _response( + status_code: int, + json_data: dict[str, Any] | None = None, + content_type: str = "application/json", + content: bytes = b"", + text: str = "", +) -> MagicMock: + response = MagicMock() + response.status_code = status_code + response.headers = {"Content-Type": content_type} + response.content = content + response.text = text + response.json.return_value = json_data if json_data is not None else {} + response.raise_for_status.return_value = None + response.close.return_value = None + return response + + +class TestWaterCrawlExceptions: + def test_bad_request_error_properties_and_string(self): + response = _response(400, {"message": "bad request", "errors": {"url": ["invalid"]}}) + + err = WaterCrawlBadRequestError(response) + parsed_errors = json.loads(err.flat_errors) + + assert err.status_code == 400 + assert err.message == "bad request" + assert "url" in parsed_errors + assert any("invalid" in str(item) for item in parsed_errors["url"]) + assert "WaterCrawlBadRequestError" in str(err) + + def test_permission_and_authentication_error_strings(self): + response = _response(403, {"message": "quota exceeded", "errors": {}}) + + permission = WaterCrawlPermissionError(response) + authentication = WaterCrawlAuthenticationError(response) + + assert "exceeding your WaterCrawl API limits" in str(permission) + assert "API key is invalid or expired" in str(authentication) + + +class TestBaseAPIClient: + def test_init_session_builds_expected_headers(self, monkeypatch): + captured = {} + + def fake_client(**kwargs): + captured.update(kwargs) + return "session" + + monkeypatch.setattr(client_module.httpx, "Client", fake_client) + + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + assert client.session == "session" + assert captured["headers"]["X-API-Key"] == "k" + assert captured["headers"]["User-Agent"] == "WaterCrawl-Plugin" + + def test_request_stream_and_non_stream_paths(self, monkeypatch): + class FakeSession: + def __init__(self): + self.request_calls = [] + self.build_calls = [] + self.send_calls = [] + + def request(self, method, url, params=None, json=None, **kwargs): + self.request_calls.append((method, url, params, json, kwargs)) + return "non-stream-response" + + def build_request(self, method, url, params=None, json=None): + req = (method, url, params, json) + self.build_calls.append(req) + return req + + def send(self, request, stream=False, **kwargs): + self.send_calls.append((request, stream, kwargs)) + return "stream-response" + + fake_session = FakeSession() + monkeypatch.setattr(BaseAPIClient, "init_session", lambda self: fake_session) + + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + assert client._request("GET", "/v1/items", query_params={"a": 1}) == "non-stream-response" + assert fake_session.request_calls[0][1] == "https://watercrawl.dev/v1/items" + + assert client._request("GET", "/v1/items", stream=True) == "stream-response" + assert fake_session.build_calls + assert fake_session.send_calls[0][1] is True + + def test_http_method_helpers_delegate_to_request(self, monkeypatch): + monkeypatch.setattr(BaseAPIClient, "init_session", lambda self: MagicMock()) + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + calls = [] + + def fake_request(method, endpoint, query_params=None, data=None, **kwargs): + calls.append((method, endpoint, query_params, data)) + return "ok" + + monkeypatch.setattr(client, "_request", fake_request) + + assert client._get("/a") == "ok" + assert client._post("/b", data={"x": 1}) == "ok" + assert client._put("/c", data={"x": 2}) == "ok" + assert client._delete("/d") == "ok" + assert client._patch("/e", data={"x": 3}) == "ok" + assert [c[0] for c in calls] == ["GET", "POST", "PUT", "DELETE", "PATCH"] + + +class TestWaterCrawlAPIClient: + def test_process_eventstream_and_download(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + response = MagicMock() + response.iter_lines.return_value = [ + b"event: keep-alive", + b'data: {"type":"result","data":{"result":"http://x"}}', + b'data: {"type":"log","data":{"msg":"ok"}}', + ] + + monkeypatch.setattr(client, "download_result", lambda data: {"result": {"markdown": "body"}, "url": "u"}) + + events = list(client.process_eventstream(response, download=True)) + + assert events[0]["data"]["result"]["markdown"] == "body" + assert events[1]["type"] == "log" + response.close.assert_called_once() + + @pytest.mark.parametrize( + ("status", "expected_exception"), + [ + (401, WaterCrawlAuthenticationError), + (403, WaterCrawlPermissionError), + (422, WaterCrawlBadRequestError), + ], + ) + def test_process_response_error_statuses(self, status: int, expected_exception: type[Exception]): + client = WaterCrawlAPIClient(api_key="k") + + with pytest.raises(expected_exception): + client.process_response(_response(status, {"message": "bad", "errors": {"url": ["x"]}})) + + def test_process_response_204_returns_none(self): + client = WaterCrawlAPIClient(api_key="k") + assert client.process_response(_response(204, None)) is None + + def test_process_response_json_payloads(self): + client = WaterCrawlAPIClient(api_key="k") + assert client.process_response(_response(200, {"ok": True})) == {"ok": True} + assert client.process_response(_response(200, None)) == {} + + def test_process_response_octet_stream_returns_bytes(self): + client = WaterCrawlAPIClient(api_key="k") + assert ( + client.process_response(_response(200, content_type="application/octet-stream", content=b"bin")) == b"bin" + ) + + def test_process_response_event_stream_returns_generator(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + generator = (item for item in [{"type": "result", "data": {}}]) + monkeypatch.setattr(client, "process_eventstream", lambda response, download=False: generator) + assert client.process_response(_response(200, content_type="text/event-stream")) is generator + + def test_process_response_unknown_content_type_raises(self): + client = WaterCrawlAPIClient(api_key="k") + with pytest.raises(Exception, match="Unknown response type"): + client.process_response(_response(200, content_type="text/plain", text="x")) + + def test_process_response_uses_raise_for_status(self): + client = WaterCrawlAPIClient(api_key="k") + response = _response(500, {"message": "server"}) + response.raise_for_status.side_effect = RuntimeError("http error") + + with pytest.raises(RuntimeError, match="http error"): + client.process_response(response) + + def test_endpoint_wrappers(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + monkeypatch.setattr(client, "process_response", lambda resp: "processed") + monkeypatch.setattr(client, "_get", lambda *args, **kwargs: "get-resp") + monkeypatch.setattr(client, "_post", lambda *args, **kwargs: "post-resp") + monkeypatch.setattr(client, "_delete", lambda *args, **kwargs: "delete-resp") + + assert client.get_crawl_requests_list() == "processed" + assert client.get_crawl_request("id") == "processed" + assert client.create_crawl_request(url="https://x") == "processed" + assert client.stop_crawl_request("id") == "processed" + assert client.download_crawl_request("id") == "processed" + assert client.get_crawl_request_results("id") == "processed" + + def test_monitor_crawl_request_generator_and_validation(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + monkeypatch.setattr(client, "process_response", lambda _: (x for x in [{"type": "result", "data": 1}])) + monkeypatch.setattr(client, "_get", lambda *args, **kwargs: "stream-resp") + + events = list(client.monitor_crawl_request("job-1", prefetched=True)) + assert events == [{"type": "result", "data": 1}] + + monkeypatch.setattr(client, "process_response", lambda _: [{"type": "result"}]) + with pytest.raises(ValueError, match="Generator expected"): + list(client.monitor_crawl_request("job-1")) + + def test_scrape_url_sync_and_async(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + monkeypatch.setattr(client, "create_crawl_request", lambda **kwargs: {"uuid": "job-1"}) + + async_result = client.scrape_url("https://example.com", sync=False) + assert async_result == {"uuid": "job-1"} + + monkeypatch.setattr( + client, + "monitor_crawl_request", + lambda item_id, prefetched: iter( + [{"type": "log", "data": {}}, {"type": "result", "data": {"url": "https://example.com"}}] + ), + ) + sync_result = client.scrape_url("https://example.com", sync=True) + assert sync_result == {"url": "https://example.com"} + + def test_download_result_fetches_json_and_closes(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + response = _response(200, {"markdown": "body"}) + monkeypatch.setattr(client_module.httpx, "get", lambda *args, **kwargs: response) + + result = client.download_result({"result": "https://example.com/result.json"}) + + assert result["result"] == {"markdown": "body"} + response.close.assert_called_once() + + +class TestWaterCrawlProvider: + def test_crawl_url_builds_options_and_min_wait_time(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + captured_kwargs = {} + + def create_crawl_request_spy(**kwargs): + captured_kwargs.update(kwargs) + return {"uuid": "job-1"} + + monkeypatch.setattr(provider.client, "create_crawl_request", create_crawl_request_spy) + + result = provider.crawl_url( + "https://example.com", + { + "crawl_sub_pages": True, + "limit": 5, + "max_depth": 2, + "includes": "a,b", + "excludes": "x,y", + "exclude_tags": "nav,footer", + "include_tags": "main", + "wait_time": 100, + "only_main_content": False, + }, + ) + + assert result == {"status": "active", "job_id": "job-1"} + assert captured_kwargs["url"] == "https://example.com" + assert captured_kwargs["spider_options"] == { + "max_depth": 2, + "page_limit": 5, + "allowed_domains": [], + "exclude_paths": ["x", "y"], + "include_paths": ["a", "b"], + } + assert captured_kwargs["page_options"]["exclude_tags"] == ["nav", "footer"] + assert captured_kwargs["page_options"]["include_tags"] == ["main"] + assert captured_kwargs["page_options"]["only_main_content"] is False + assert captured_kwargs["page_options"]["wait_time"] == 1000 + + def test_get_crawl_status_active_and_completed(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + monkeypatch.setattr( + provider.client, + "get_crawl_request", + lambda job_id: { + "status": "running", + "uuid": job_id, + "options": {"spider_options": {"page_limit": 3}}, + "number_of_documents": 1, + "duration": "00:00:01.500000", + }, + ) + + active = provider.get_crawl_status("job-1") + assert active["status"] == "active" + assert active["data"] == [] + assert active["time_consuming"] == pytest.approx(1.5) + + monkeypatch.setattr( + provider.client, + "get_crawl_request", + lambda job_id: { + "status": "completed", + "uuid": job_id, + "options": {"spider_options": {"page_limit": 2}}, + "number_of_documents": 2, + "duration": "00:00:02.000000", + }, + ) + monkeypatch.setattr(provider, "_get_results", lambda crawl_request_id, query_params=None: iter([{"url": "u"}])) + + completed = provider.get_crawl_status("job-2") + assert completed["status"] == "completed" + assert completed["data"] == [{"url": "u"}] + + def test_get_crawl_url_data_and_scrape(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + monkeypatch.setattr(provider, "scrape_url", lambda url: {"source_url": url}) + assert provider.get_crawl_url_data("", "https://example.com") == {"source_url": "https://example.com"} + + monkeypatch.setattr(provider, "_get_results", lambda job_id, query_params=None: iter([{"source_url": "u1"}])) + assert provider.get_crawl_url_data("job", "u1") == {"source_url": "u1"} + + monkeypatch.setattr(provider, "_get_results", lambda job_id, query_params=None: iter([])) + assert provider.get_crawl_url_data("job", "u1") is None + + def test_structure_data_validation_and_get_results_pagination(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + with pytest.raises(ValueError, match="Invalid result object"): + provider._structure_data({"result": "not-a-dict"}) + + structured = provider._structure_data( + { + "url": "https://example.com", + "result": { + "metadata": {"title": "Title", "description": "Desc"}, + "markdown": "Body", + }, + } + ) + assert structured["title"] == "Title" + assert structured["markdown"] == "Body" + + responses = [ + { + "results": [ + { + "url": "https://a", + "result": {"metadata": {"title": "A", "description": "DA"}, "markdown": "MA"}, + } + ], + "next": "next-page", + }, + {"results": [], "next": None}, + ] + + monkeypatch.setattr( + provider.client, + "get_crawl_request_results", + lambda crawl_request_id, page, page_size, query_params: responses.pop(0), + ) + + results = list(provider._get_results("job-1")) + assert len(results) == 1 + assert results[0]["source_url"] == "https://a" + + def test_scrape_url_uses_client_and_structure(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + monkeypatch.setattr( + provider.client, "scrape_url", lambda **kwargs: {"result": {"metadata": {}, "markdown": "m"}, "url": "u"} + ) + + result = provider.scrape_url("u") + + assert result["source_url"] == "u" + + +class TestWaterCrawlWebExtractor: + def test_extract_crawl_and_scrape_modes(self, monkeypatch): + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_crawl_url_data", + lambda job_id, provider, url, tenant_id: { + "markdown": "crawl", + "source_url": url, + "description": "d", + "title": "t", + }, + ) + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_scrape_url_data", + lambda provider, url, tenant_id, only_main_content: { + "markdown": "scrape", + "source_url": url, + "description": "d", + "title": "t", + }, + ) + + crawl_extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + scrape_extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="scrape") + + assert crawl_extractor.extract()[0].page_content == "crawl" + assert scrape_extractor.extract()[0].page_content == "scrape" + + def test_extract_crawl_returns_empty_when_service_returns_none(self, monkeypatch): + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_crawl_url_data", + lambda job_id, provider, url, tenant_id: None, + ) + + extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + + assert extractor.extract() == [] + + def test_extract_unknown_mode_returns_empty(self): + extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="other") + + assert extractor.extract() == [] diff --git a/api/tests/unit_tests/core/rag/indexing/processor/conftest.py b/api/tests/unit_tests/core/rag/indexing/processor/conftest.py new file mode 100644 index 0000000000..2a3860e107 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/conftest.py @@ -0,0 +1,33 @@ +from contextlib import AbstractContextManager, nullcontext +from typing import Any + +import pytest + + +class _FakeFlaskApp: + def app_context(self) -> AbstractContextManager[None]: + return nullcontext() + + +class _FakeExecutor: + def __init__(self, future: Any) -> None: + self._future = future + + def __enter__(self) -> "_FakeExecutor": + return self + + def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> bool: + return False + + def submit(self, func: object, preview: object) -> Any: + return self._future + + +@pytest.fixture +def fake_flask_app() -> _FakeFlaskApp: + return _FakeFlaskApp() + + +@pytest.fixture +def fake_executor_cls() -> type[_FakeExecutor]: + return _FakeExecutor diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py new file mode 100644 index 0000000000..2451db70b6 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py @@ -0,0 +1,629 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor +from core.rag.models.document import AttachmentDocument, Document +from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage +from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage, ImagePromptMessageContent +from dify_graph.model_runtime.entities.model_entities import ModelFeature + + +class TestParagraphIndexProcessor: + @pytest.fixture + def processor(self) -> ParagraphIndexProcessor: + return ParagraphIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + return document + + @pytest.fixture + def process_rule(self) -> dict: + return { + "mode": "custom", + "rules": {"segmentation": {"max_tokens": 256, "chunk_overlap": 10, "separator": "\n"}}, + } + + def _rules(self) -> SimpleNamespace: + segmentation = SimpleNamespace(max_tokens=256, chunk_overlap=10, separator="\n") + return SimpleNamespace(segmentation=segmentation) + + def _llm_result(self, content: str = "summary") -> LLMResult: + return LLMResult( + model="llm-model", + message=AssistantPromptMessage(content=content), + usage=LLMUsage.empty_usage(), + ) + + def test_extract_forwards_automatic_flag(self, processor: ParagraphIndexProcessor) -> None: + extract_setting = Mock() + expected_docs = [Document(page_content="chunk", metadata={})] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ExtractProcessor.extract" + ) as mock_extract: + mock_extract.return_value = expected_docs + docs = processor.extract(extract_setting, process_rule_mode="hierarchical") + + assert docs == expected_docs + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_validates_process_rule(self, processor: ParagraphIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_validates_segmentation(self, processor: ParagraphIndexProcessor, process_rule: dict) -> None: + rules_without_segmentation = SimpleNamespace(segmentation=None) + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=rules_without_segmentation, + ): + with pytest.raises(ValueError, match="No segmentation found in rules"): + processor.transform( + [Document(page_content="text", metadata={})], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + ) + + def test_transform_builds_split_documents(self, processor: ParagraphIndexProcessor, process_rule: dict) -> None: + source_document = Document(page_content="source", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".first", metadata={}), + Document(page_content=" ", metadata={}), + ] + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=self._rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.CleanProcessor.clean", + return_value=".first", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.remove_leading_symbols", + side_effect=lambda text: text.lstrip("."), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + ): + documents = processor.transform([source_document], process_rule=process_rule) + + assert len(documents) == 1 + assert documents[0].page_content == "first" + assert documents[0].attachments is not None + assert documents[0].metadata["doc_hash"] == "hash" + + def test_transform_automatic_mode_uses_default_rules(self, processor: ParagraphIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [Document(page_content="text", metadata={})] + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=self._rules(), + ) as mock_validate, + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.remove_leading_symbols", + side_effect=lambda text: text, + ), + patch.object(processor, "_get_content_files", return_value=[]), + ): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "automatic"}) + + assert mock_validate.call_count == 1 + + def test_load_creates_vector_and_multimodal_when_high_quality( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + docs = [Document(page_content="chunk", metadata={})] + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.load(dataset, docs, multimodal_documents=multimodal_docs) + vector = mock_vector_cls.return_value + vector.create.assert_called_once_with(docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + mock_keyword_cls.assert_not_called() + + def test_load_uses_keyword_add_texts_with_keywords_when_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.load(dataset, docs, keywords_list=["k1", "k2"]) + + mock_keyword_cls.return_value.add_texts.assert_called_once_with(docs, keywords_list=["k1", "k2"]) + + def test_load_uses_keyword_add_texts_without_keywords_when_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.load(dataset, docs) + + mock_keyword_cls.return_value.add_texts.assert_called_once_with(docs) + + def test_clean_deletes_summaries_and_vector(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + segment_query = Mock() + segment_query.filter.return_value.all.return_value = [SimpleNamespace(id="seg-1")] + session = Mock() + session.query.return_value = segment_query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + vector.delete_by_ids.assert_called_once_with(["node-1"]) + + def test_clean_economy_deletes_summaries_and_keywords( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + mock_keyword_cls.return_value.delete.assert_called_once() + + def test_clean_deletes_keywords_by_ids(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + dataset.indexing_technique = "economy" + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.clean(dataset, ["node-2"], with_keywords=True) + + mock_keyword_cls.return_value.delete_by_ids.assert_called_once_with(["node-2"]) + + def test_retrieve_filters_by_threshold(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + accepted = SimpleNamespace(page_content="keep", metadata={"source": "a"}, score=0.9) + rejected = SimpleNamespace(page_content="drop", metadata={"source": "b"}, score=0.1) + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.RetrievalService.retrieve" + ) as mock_retrieve: + mock_retrieve.return_value = [accepted, rejected] + docs = processor.retrieve("semantic_search", "query", dataset, 5, 0.5, {}) + + assert len(docs) == 1 + assert docs[0].metadata["score"] == 0.9 + + def test_index_list_chunks_high_quality( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="img", metadata={})] + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore" + ) as mock_store_cls, + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + ): + processor.index(dataset, dataset_document, ["chunk-1", "chunk-2"]) + + mock_store_cls.return_value.add_documents.assert_called_once() + mock_vector_cls.return_value.create.assert_called_once() + mock_vector_cls.return_value.create_multimodal.assert_called_once() + + def test_index_list_chunks_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + dataset.indexing_technique = "economy" + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object(processor, "_get_content_files", return_value=[]), + patch("core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.index(dataset, dataset_document, ["chunk-3"]) + + mock_keyword_cls.return_value.add_texts.assert_called_once() + + def test_index_multimodal_structure_handles_files_and_account_lookup( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + chunk_with_files = SimpleNamespace( + content="content-1", + files=[SimpleNamespace(id="file-1", filename="image.png")], + ) + chunk_without_files = SimpleNamespace(content="content-2", files=None) + structure = SimpleNamespace(general_chunks=[chunk_with_files, chunk_without_files]) + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.MultimodalGeneralStructureChunk.model_validate", + return_value=structure, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.AccountService.load_user", + return_value=SimpleNamespace(id="user-1"), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="img", metadata={})] + ) as mock_files, + patch("core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector"), + ): + processor.index(dataset, dataset_document, {"general_chunks": []}) + + assert mock_files.call_count == 1 + + def test_index_multimodal_structure_requires_valid_account( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + structure = SimpleNamespace(general_chunks=[SimpleNamespace(content="content", files=None)]) + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.MultimodalGeneralStructureChunk.model_validate", + return_value=structure, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.AccountService.load_user", + return_value=None, + ), + ): + with pytest.raises(ValueError, match="Invalid account"): + processor.index(dataset, dataset_document, {"general_chunks": []}) + + def test_format_preview_validates_chunk_shape(self, processor: ParagraphIndexProcessor) -> None: + preview = processor.format_preview(["chunk-1", "chunk-2"]) + assert preview["chunk_structure"] == "text_model" + assert preview["total_segments"] == 2 + + with pytest.raises(ValueError, match="Chunks is not a list"): + processor.format_preview({"not": "a-list"}) + + def test_generate_summary_preview_success_and_failure(self, processor: ParagraphIndexProcessor) -> None: + preview_items = [PreviewDetail(content="chunk-1"), PreviewDetail(content="chunk-2")] + + with patch.object(processor, "generate_summary", return_value=("summary", LLMUsage.empty_usage())): + result = processor.generate_summary_preview( + "tenant-1", preview_items, {"enable": True}, doc_language="English" + ) + assert all(item.summary == "summary" for item in result) + + with patch.object(processor, "generate_summary", side_effect=RuntimeError("summary failed")): + with pytest.raises(ValueError, match="Failed to generate summaries"): + processor.generate_summary_preview("tenant-1", [PreviewDetail(content="chunk-1")], {"enable": True}) + + def test_generate_summary_preview_fallback_without_flask_context(self, processor: ParagraphIndexProcessor) -> None: + preview_items = [PreviewDetail(content="chunk-1")] + fake_current_app = SimpleNamespace(_get_current_object=Mock(side_effect=RuntimeError("no app"))) + + with ( + patch("flask.current_app", fake_current_app), + patch.object(processor, "generate_summary", return_value=("summary", LLMUsage.empty_usage())), + ): + result = processor.generate_summary_preview("tenant-1", preview_items, {"enable": True}) + + assert result[0].summary == "summary" + + def test_generate_summary_preview_timeout( + self, processor: ParagraphIndexProcessor, fake_executor_cls: type + ) -> None: + preview_items = [PreviewDetail(content="chunk-1")] + future = Mock() + executor = fake_executor_cls(future) + + with ( + patch("concurrent.futures.ThreadPoolExecutor", return_value=executor), + patch("concurrent.futures.wait", side_effect=[(set(), {future}), (set(), set())]), + ): + with pytest.raises(ValueError, match="timeout"): + processor.generate_summary_preview("tenant-1", preview_items, {"enable": True}) + + future.cancel.assert_called_once() + + def test_generate_summary_validates_input(self) -> None: + with pytest.raises(ValueError, match="must be enabled"): + ParagraphIndexProcessor.generate_summary("tenant-1", "text", {"enable": False}) + + with pytest.raises(ValueError, match="model_name and model_provider_name"): + ParagraphIndexProcessor.generate_summary("tenant-1", "text", {"enable": True}) + + def test_generate_summary_text_only_flow(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace(features=[]) + model_instance.invoke_llm.return_value = self._llm_result("text summary") + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.deduct_llm_quota", + side_effect=RuntimeError("quota"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + summary, usage = ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + document_language="English", + ) + + assert summary == "text summary" + assert isinstance(usage, LLMUsage) + mock_logger.warning.assert_called_with("Failed to deduct quota for summary generation: %s", "quota") + + def test_generate_summary_handles_vision_and_image_conversion(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace( + features=[ModelFeature.VISION] + ) + model_instance.invoke_llm.return_value = self._llm_result("vision summary") + image_file = SimpleNamespace() + image_content = ImagePromptMessageContent(format="url", mime_type="image/png", url="http://example.com/a.png") + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch.object( + ParagraphIndexProcessor, "_extract_images_from_segment_attachments", return_value=[image_file] + ), + patch.object(ParagraphIndexProcessor, "_extract_images_from_text", return_value=[]) as mock_extract_text, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.file_manager.to_prompt_message_content", + return_value=image_content, + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.deduct_llm_quota"), + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + summary, _ = ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + segment_id="seg-1", + ) + + assert summary == "vision summary" + mock_extract_text.assert_not_called() + + def test_generate_summary_fallbacks_for_prompt_and_result_types(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace( + features=[ModelFeature.VISION] + ) + model_instance.invoke_llm.return_value = object() + image_file = SimpleNamespace() + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.DEFAULT_GENERATOR_SUMMARY_PROMPT", + "Prompt {missing}", + ), + patch.object(ParagraphIndexProcessor, "_extract_images_from_segment_attachments", return_value=[]), + patch.object(ParagraphIndexProcessor, "_extract_images_from_text", return_value=[image_file]), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.file_manager.to_prompt_message_content", + side_effect=RuntimeError("bad image"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + with pytest.raises(ValueError, match="Expected LLMResult"): + ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + ) + + mock_logger.warning.assert_called_with( + "Failed to convert image file to prompt message content: %s", "bad image" + ) + + def test_extract_images_from_text_handles_patterns_and_build_errors(self) -> None: + text = ( + "![img](/files/11111111-1111-1111-1111-111111111111/image-preview) " + "![img2](/files/22222222-2222-2222-2222-222222222222/file-preview) " + "![tool](/files/tools/33333333-3333-3333-3333-333333333333.png)" + ) + image_upload = SimpleNamespace( + id="11111111-1111-1111-1111-111111111111", + tenant_id="tenant-1", + name="image.png", + mime_type="image/png", + extension="png", + source_url="", + size=1, + key="key", + ) + non_image_upload = SimpleNamespace( + id="22222222-2222-2222-2222-222222222222", + tenant_id="tenant-1", + name="file.txt", + mime_type="text/plain", + extension="txt", + source_url="", + size=1, + key="key", + ) + query = Mock() + query.where.return_value.all.return_value = [image_upload, non_image_upload] + session = Mock() + session.query.return_value = query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.build_from_mapping", + return_value=SimpleNamespace(id="file-1"), + ) as mock_builder, + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_text("tenant-1", text) + + assert len(files) == 1 + assert mock_builder.call_count == 1 + mock_logger.warning.assert_not_called() + + def test_extract_images_from_text_returns_empty_when_no_matches(self) -> None: + assert ParagraphIndexProcessor._extract_images_from_text("tenant-1", "no images here") == [] + + def test_extract_images_from_text_logs_when_build_fails(self) -> None: + text = "![img](/files/11111111-1111-1111-1111-111111111111/image-preview)" + image_upload = SimpleNamespace( + id="11111111-1111-1111-1111-111111111111", + tenant_id="tenant-1", + name="image.png", + mime_type="image/png", + extension="png", + source_url="", + size=1, + key="key", + ) + query = Mock() + query.where.return_value.all.return_value = [image_upload] + session = Mock() + session.query.return_value = query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.build_from_mapping", + side_effect=RuntimeError("build failed"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_text("tenant-1", text) + + assert files == [] + mock_logger.warning.assert_called_once() + + def test_extract_images_from_segment_attachments(self) -> None: + image_upload = SimpleNamespace( + id="file-1", + name="image", + extension="png", + mime_type="image/png", + source_url="", + size=1, + key="k1", + ) + bad_upload = SimpleNamespace( + id="file-2", + name="broken", + extension=None, + mime_type="image/png", + source_url="", + size=1, + key="k2", + ) + non_image_upload = SimpleNamespace( + id="file-3", + name="text", + extension="txt", + mime_type="text/plain", + source_url="", + size=1, + key="k3", + ) + execute_result = Mock() + execute_result.all.return_value = [(None, image_upload), (None, bad_upload), (None, non_image_upload)] + session = Mock() + session.execute.return_value = execute_result + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_segment_attachments("tenant-1", "seg-1") + + assert len(files) == 1 + mock_logger.warning.assert_called_once() + + def test_extract_images_from_segment_attachments_empty(self) -> None: + execute_result = Mock() + execute_result.all.return_value = [] + session = Mock() + session.execute.return_value = execute_result + + with patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session): + empty_files = ParagraphIndexProcessor._extract_images_from_segment_attachments("tenant-1", "seg-1") + + assert empty_files == [] diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py new file mode 100644 index 0000000000..abe40f05d1 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py @@ -0,0 +1,523 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor +from core.rag.models.document import AttachmentDocument, ChildDocument, Document +from services.entities.knowledge_entities.knowledge_entities import ParentMode + + +class TestParentChildIndexProcessor: + @pytest.fixture + def processor(self) -> ParentChildIndexProcessor: + return ParentChildIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + document.dataset_process_rule_id = None + return document + + def _segmentation(self) -> SimpleNamespace: + return SimpleNamespace(max_tokens=200, chunk_overlap=10, separator="\n") + + def _paragraph_rules(self) -> SimpleNamespace: + return SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + segmentation=self._segmentation(), + subchunk_segmentation=self._segmentation(), + ) + + def _full_doc_rules(self) -> SimpleNamespace: + return SimpleNamespace( + parent_mode=ParentMode.FULL_DOC, segmentation=None, subchunk_segmentation=self._segmentation() + ) + + def test_extract_forwards_automatic_flag(self, processor: ParentChildIndexProcessor) -> None: + extract_setting = Mock() + expected = [Document(page_content="chunk", metadata={})] + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.ExtractProcessor.extract" + ) as mock_extract: + mock_extract.return_value = expected + documents = processor.extract(extract_setting, process_rule_mode="hierarchical") + + assert documents == expected + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_validates_process_rule(self, processor: ParentChildIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_paragraph_requires_segmentation(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(parent_mode=ParentMode.PARAGRAPH, segmentation=None) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", return_value=rules + ): + with pytest.raises(ValueError, match="No segmentation found in rules"): + processor.transform( + [Document(page_content="text", metadata={})], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + ) + + def test_transform_paragraph_builds_parent_and_child_docs(self, processor: ParentChildIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".parent", metadata={}), + Document(page_content=" ", metadata={}), + ] + parent_document = Document(page_content="source", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + child_docs = [ChildDocument(page_content="child-1", metadata={"dataset_id": "dataset-1"})] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._paragraph_rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.CleanProcessor.clean", + return_value=".parent", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + patch.object(processor, "_split_child_nodes", return_value=child_docs), + ): + result = processor.transform( + [parent_document], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + preview=False, + ) + + assert len(result) == 1 + assert result[0].page_content == "parent" + assert result[0].children == child_docs + assert result[0].attachments is not None + + def test_transform_preview_returns_after_ten_parent_chunks(self, processor: ParentChildIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [Document(page_content=f"chunk-{i}", metadata={}) for i in range(10)] + documents = [ + Document(page_content="doc-1", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + Document(page_content="doc-2", metadata={"dataset_id": "dataset-1", "document_id": "doc-2"}), + ] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._paragraph_rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object(processor, "_get_content_files", return_value=[]), + patch.object(processor, "_split_child_nodes", return_value=[]), + ): + result = processor.transform( + documents, + process_rule={"mode": "custom", "rules": {"enabled": True}}, + preview=True, + ) + + assert len(result) == 10 + + def test_transform_full_doc_mode_trims_children_for_preview(self, processor: ParentChildIndexProcessor) -> None: + docs = [ + Document(page_content="first", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + Document(page_content="second", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + ] + child_docs = [ChildDocument(page_content=f"child-{i}", metadata={}) for i in range(5)] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._full_doc_rules(), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + patch.object(processor, "_split_child_nodes", return_value=child_docs), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.dify_config.CHILD_CHUNKS_PREVIEW_NUMBER", + 2, + ), + ): + result = processor.transform( + docs, + process_rule={"mode": "hierarchical", "rules": {"enabled": True}}, + preview=True, + ) + + assert len(result) == 1 + assert len(result[0].children or []) == 2 + assert result[0].attachments is not None + + def test_load_creates_vectors_for_child_docs(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + parent_doc = Document( + page_content="parent", + metadata={}, + children=[ + ChildDocument(page_content="child-1", metadata={}), + ChildDocument(page_content="child-2", metadata={}), + ], + ) + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls: + vector = mock_vector_cls.return_value + processor.load(dataset, [parent_doc], multimodal_documents=multimodal_docs) + + assert vector.create.call_count == 1 + formatted_docs = vector.create.call_args[0][0] + assert len(formatted_docs) == 2 + assert all(isinstance(doc, Document) for doc in formatted_docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + + def test_clean_with_precomputed_child_ids(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + delete_query = Mock() + where_query = Mock() + where_query.delete.return_value = 2 + session = Mock() + session.query.return_value.where.return_value = where_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean( + dataset, + ["node-1"], + delete_child_chunks=True, + precomputed_child_node_ids=["child-1", "child-2"], + ) + + vector.delete_by_ids.assert_called_once_with(["child-1", "child-2"]) + where_query.delete.assert_called_once_with(synchronize_session=False) + session.commit.assert_called_once() + + def test_clean_queries_child_ids_when_not_precomputed( + self, processor: ParentChildIndexProcessor, dataset: Mock + ) -> None: + child_query = Mock() + child_query.join.return_value.where.return_value.all.return_value = [("child-1",), (None,), ("child-2",)] + session = Mock() + session.query.return_value = child_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_child_chunks=False) + + vector.delete_by_ids.assert_called_once_with(["child-1", "child-2"]) + + def test_clean_dataset_wide_cleanup(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + where_query = Mock() + where_query.delete.return_value = 3 + session = Mock() + session.query.return_value.where.return_value = where_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, None, delete_child_chunks=True) + + vector.delete.assert_called_once() + where_query.delete.assert_called_once_with(synchronize_session=False) + session.commit.assert_called_once() + + def test_clean_deletes_summaries_when_requested(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + segment_query = Mock() + segment_query.filter.return_value.all.return_value = [SimpleNamespace(id="seg-1")] + session = Mock() + session.query.return_value = segment_query + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.session_factory.create_session", + return_value=session_ctx, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + ): + processor.clean(dataset, ["node-1"], delete_summaries=True, precomputed_child_node_ids=[]) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + + def test_clean_deletes_all_summaries_when_node_ids_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock + ) -> None: + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + ): + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + + def test_retrieve_filters_by_score_threshold(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + ok_result = SimpleNamespace(page_content="keep", metadata={"m": 1}, score=0.8) + low_result = SimpleNamespace(page_content="drop", metadata={"m": 2}, score=0.2) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.RetrievalService.retrieve" + ) as mock_retrieve: + mock_retrieve.return_value = [ok_result, low_result] + docs = processor.retrieve("semantic_search", "query", dataset, 3, 0.5, {}) + + assert len(docs) == 1 + assert docs[0].page_content == "keep" + assert docs[0].metadata["score"] == 0.8 + + def test_split_child_nodes_requires_subchunk_segmentation(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(subchunk_segmentation=None) + + with pytest.raises(ValueError, match="No subchunk segmentation found"): + processor._split_child_nodes(Document(page_content="parent", metadata={}), rules, "custom", None) + + def test_split_child_nodes_generates_child_documents(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(subchunk_segmentation=self._segmentation()) + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".child-1", metadata={}), + Document(page_content=" ", metadata={}), + ] + + with ( + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + ): + child_docs = processor._split_child_nodes( + Document(page_content="parent", metadata={}), rules, "custom", None + ) + + assert len(child_docs) == 1 + assert child_docs[0].page_content == "child-1" + assert child_docs[0].metadata["doc_hash"] == "hash" + + def test_index_creates_process_rule_segments_and_vectors( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[ + SimpleNamespace( + parent_content="parent text", + child_contents=["child-1", "child-2"], + files=[SimpleNamespace(id="file-1", filename="image.png")], + ) + ], + ) + dataset_rule = SimpleNamespace(id="rule-1") + session = Mock() + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetProcessRule", + return_value=dataset_rule, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + side_effect=lambda text: f"hash-{text}", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetDocumentStore" + ) as mock_store_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + assert dataset_document.dataset_process_rule_id == "rule-1" + session.add.assert_called_once_with(dataset_rule) + session.flush.assert_called_once() + session.commit.assert_called_once() + mock_store_cls.return_value.add_documents.assert_called_once() + assert mock_vector_cls.return_value.create.call_count == 1 + mock_vector_cls.return_value.create_multimodal.assert_called_once() + + def test_index_uses_content_files_when_files_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child"], files=None)], + ) + dataset_rule = SimpleNamespace(id="rule-1") + session = Mock() + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetProcessRule", + return_value=dataset_rule, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.AccountService.load_user", + return_value=SimpleNamespace(id="user-1"), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ) as mock_files, + patch("core.rag.index_processor.processor.parent_child_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + mock_files.assert_called_once() + + def test_index_raises_when_account_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child"], files=None)], + ) + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.AccountService.load_user", + return_value=None, + ), + ): + with pytest.raises(ValueError, match="Invalid account"): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + def test_format_preview_returns_parent_child_structure(self, processor: ParentChildIndexProcessor) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child-1", "child-2"])], + ) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ): + preview = processor.format_preview({"parent_child_chunks": []}) + + assert preview["chunk_structure"] == "hierarchical_model" + assert preview["parent_mode"] == ParentMode.PARAGRAPH + assert preview["total_segments"] == 1 + + def test_generate_summary_preview_sets_summaries(self, processor: ParentChildIndexProcessor) -> None: + preview_texts = [PreviewDetail(content="chunk-1"), PreviewDetail(content="chunk-2")] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + return_value=("summary", None), + ): + result = processor.generate_summary_preview( + "tenant-1", preview_texts, {"enable": True}, doc_language="English" + ) + + assert all(item.summary == "summary" for item in result) + + def test_generate_summary_preview_raises_when_worker_fails(self, processor: ParentChildIndexProcessor) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + side_effect=RuntimeError("summary failed"), + ): + with pytest.raises(ValueError, match="Failed to generate summaries"): + processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + def test_generate_summary_preview_falls_back_without_flask_context( + self, processor: ParentChildIndexProcessor + ) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + fake_current_app = SimpleNamespace(_get_current_object=Mock(side_effect=RuntimeError("no app"))) + + with ( + patch("flask.current_app", fake_current_app), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + return_value=("summary", None), + ), + ): + result = processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + assert result[0].summary == "summary" + + def test_generate_summary_preview_handles_timeout( + self, processor: ParentChildIndexProcessor, fake_executor_cls: type + ) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + future = Mock() + executor = fake_executor_cls(future) + + with ( + patch("concurrent.futures.ThreadPoolExecutor", return_value=executor), + patch("concurrent.futures.wait", side_effect=[(set(), {future}), (set(), set())]), + ): + with pytest.raises(ValueError, match="timeout"): + processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + future.cancel.assert_called_once() diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py new file mode 100644 index 0000000000..8596647ef3 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py @@ -0,0 +1,382 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pandas as pd +import pytest +from werkzeug.datastructures import FileStorage + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor +from core.rag.models.document import AttachmentDocument, Document + + +class _ImmediateThread: + def __init__(self, target, args=(), kwargs=None): + self._target = target + self._args = args + self._kwargs = kwargs or {} + + def start(self) -> None: + self._target(*self._args, **self._kwargs) + + def join(self) -> None: + return None + + +class TestQAIndexProcessor: + @pytest.fixture + def processor(self) -> QAIndexProcessor: + return QAIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + return document + + @pytest.fixture + def process_rule(self) -> dict: + return { + "mode": "custom", + "rules": {"segmentation": {"max_tokens": 256, "chunk_overlap": 10, "separator": "\n"}}, + } + + def _rules(self) -> SimpleNamespace: + segmentation = SimpleNamespace(max_tokens=256, chunk_overlap=10, separator="\n") + return SimpleNamespace(segmentation=segmentation) + + def test_extract_forwards_automatic_flag(self, processor: QAIndexProcessor) -> None: + extract_setting = Mock() + expected_docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.qa_index_processor.ExtractProcessor.extract") as mock_extract: + mock_extract.return_value = expected_docs + + docs = processor.extract(extract_setting, process_rule_mode="automatic") + + assert docs == expected_docs + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_rejects_none_process_rule(self, processor: QAIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + def test_transform_rejects_missing_rules_key(self, processor: QAIndexProcessor) -> None: + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_preview_calls_formatter_once( + self, processor: QAIndexProcessor, process_rule: dict, fake_flask_app + ) -> None: + document = Document(page_content="raw text", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + split_node = Document(page_content=".question", metadata={}) + splitter = Mock() + splitter.split_documents.return_value = [split_node] + + def _append_document(flask_app, tenant_id, document_node, all_qa_documents, document_language): + all_qa_documents.append(Document(page_content="Q1", metadata={"answer": "A1"})) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.Rule.model_validate", return_value=self._rules() + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.qa_index_processor.CleanProcessor.clean", return_value="clean text" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.remove_leading_symbols", + side_effect=lambda text: text.lstrip("."), + ), + patch.object(processor, "_format_qa_document", side_effect=_append_document) as mock_format, + patch("core.rag.index_processor.processor.qa_index_processor.current_app") as mock_current_app, + ): + mock_current_app._get_current_object.return_value = fake_flask_app + result = processor.transform( + [document], + process_rule=process_rule, + preview=True, + tenant_id="tenant-1", + doc_language="English", + ) + + assert len(result) == 1 + assert result[0].metadata["answer"] == "A1" + mock_format.assert_called_once() + + def test_transform_non_preview_uses_thread_batches( + self, processor: QAIndexProcessor, process_rule: dict, fake_flask_app + ) -> None: + documents = [ + Document(page_content="doc-1", metadata={"document_id": "doc-1", "dataset_id": "dataset-1"}), + Document(page_content="doc-2", metadata={"document_id": "doc-2", "dataset_id": "dataset-1"}), + ] + split_node = Document(page_content="question", metadata={}) + splitter = Mock() + splitter.split_documents.return_value = [split_node] + + def _append_document(flask_app, tenant_id, document_node, all_qa_documents, document_language): + all_qa_documents.append(Document(page_content=f"Q-{document_node.page_content}", metadata={"answer": "A"})) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.Rule.model_validate", return_value=self._rules() + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.qa_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.remove_leading_symbols", + side_effect=lambda text: text, + ), + patch.object(processor, "_format_qa_document", side_effect=_append_document) as mock_format, + patch("core.rag.index_processor.processor.qa_index_processor.current_app") as mock_current_app, + patch( + "core.rag.index_processor.processor.qa_index_processor.threading.Thread", side_effect=_ImmediateThread + ), + ): + mock_current_app._get_current_object.return_value = fake_flask_app + result = processor.transform(documents, process_rule=process_rule, preview=False, tenant_id="tenant-1") + + assert len(result) == 2 + assert mock_format.call_count == 2 + + def test_format_by_template_validates_file_type(self, processor: QAIndexProcessor) -> None: + not_csv_file = Mock(spec=FileStorage) + not_csv_file.filename = "qa.txt" + + with pytest.raises(ValueError, match="Only CSV files"): + processor.format_by_template(not_csv_file) + + def test_format_by_template_parses_csv_rows(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + dataframe = pd.DataFrame([["Q1", "A1"], ["Q2", "A2"]]) + + with patch("core.rag.index_processor.processor.qa_index_processor.pd.read_csv", return_value=dataframe): + docs = processor.format_by_template(csv_file) + + assert [doc.page_content for doc in docs] == ["Q1", "Q2"] + assert [doc.metadata["answer"] for doc in docs] == ["A1", "A2"] + + def test_format_by_template_raises_on_empty_csv(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + + with patch("core.rag.index_processor.processor.qa_index_processor.pd.read_csv", return_value=pd.DataFrame()): + with pytest.raises(ValueError, match="empty"): + processor.format_by_template(csv_file) + + def test_format_by_template_raises_on_invalid_csv(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + + with patch( + "core.rag.index_processor.processor.qa_index_processor.pd.read_csv", side_effect=Exception("bad csv") + ): + with pytest.raises(ValueError, match="bad csv"): + processor.format_by_template(csv_file) + + def test_load_creates_vectors_for_high_quality_dataset(self, processor: QAIndexProcessor, dataset: Mock) -> None: + docs = [Document(page_content="Q1", metadata={"answer": "A1"})] + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls: + vector = mock_vector_cls.return_value + processor.load(dataset, docs, multimodal_documents=multimodal_docs) + + vector.create.assert_called_once_with(docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + + def test_load_skips_vector_for_non_high_quality(self, processor: QAIndexProcessor, dataset: Mock) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="Q1", metadata={"answer": "A1"})] + + with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls: + processor.load(dataset, docs) + + mock_vector_cls.assert_not_called() + + def test_clean_handles_summary_deletion_and_vector_cleanup( + self, processor: QAIndexProcessor, dataset: Mock + ) -> None: + mock_segment = SimpleNamespace(id="seg-1") + mock_query = Mock() + mock_query.filter.return_value.all.return_value = [mock_segment] + mock_session = Mock() + mock_session.query.return_value = mock_query + session_context = MagicMock() + session_context.__enter__.return_value = mock_session + session_context.__exit__.return_value = False + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.session_factory.create_session", + return_value=session_context, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + vector.delete_by_ids.assert_called_once_with(["node-1"]) + + def test_clean_handles_dataset_wide_cleanup(self, processor: QAIndexProcessor, dataset: Mock) -> None: + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + vector.delete.assert_called_once() + + def test_retrieve_filters_by_score_threshold(self, processor: QAIndexProcessor, dataset: Mock) -> None: + result_ok = SimpleNamespace(page_content="accepted", metadata={"source": "a"}, score=0.9) + result_low = SimpleNamespace(page_content="rejected", metadata={"source": "b"}, score=0.1) + + with patch("core.rag.index_processor.processor.qa_index_processor.RetrievalService.retrieve") as mock_retrieve: + mock_retrieve.return_value = [result_ok, result_low] + docs = processor.retrieve("semantic_search", "query", dataset, 5, 0.5, {}) + + assert len(docs) == 1 + assert docs[0].page_content == "accepted" + assert docs[0].metadata["score"] == 0.9 + + def test_index_adds_documents_and_vectors_for_high_quality( + self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + qa_chunks = SimpleNamespace( + qa_chunks=[ + SimpleNamespace(question="Q1", answer="A1"), + SimpleNamespace(question="Q2", answer="A2"), + ] + ) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch("core.rag.index_processor.processor.qa_index_processor.DatasetDocumentStore") as mock_store_cls, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + processor.index(dataset, dataset_document, {"qa_chunks": []}) + + mock_store_cls.return_value.add_documents.assert_called_once() + mock_vector_cls.return_value.create.assert_called_once() + + def test_index_requires_high_quality( + self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + dataset.indexing_technique = "economy" + qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")]) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch("core.rag.index_processor.processor.qa_index_processor.DatasetDocumentStore"), + ): + with pytest.raises(ValueError, match="must be high quality"): + processor.index(dataset, dataset_document, {"qa_chunks": []}) + + def test_format_preview_returns_qa_preview(self, processor: QAIndexProcessor) -> None: + qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")]) + + with patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ): + preview = processor.format_preview({"qa_chunks": []}) + + assert preview["chunk_structure"] == "qa_model" + assert preview["total_segments"] == 1 + assert preview["qa_preview"] == [{"question": "Q1", "answer": "A1"}] + + def test_generate_summary_preview_returns_input(self, processor: QAIndexProcessor) -> None: + preview_items = [PreviewDetail(content="Q1")] + assert processor.generate_summary_preview("tenant-1", preview_items, {}) is preview_items + + def test_format_qa_document_ignores_blank_text(self, processor: QAIndexProcessor, fake_flask_app) -> None: + all_qa_documents: list[Document] = [] + blank_document = Document(page_content=" ", metadata={}) + + processor._format_qa_document(fake_flask_app, "tenant-1", blank_document, all_qa_documents, "English") + + assert all_qa_documents == [] + + def test_format_qa_document_builds_question_answer_documents( + self, processor: QAIndexProcessor, fake_flask_app + ) -> None: + all_qa_documents: list[Document] = [] + source_document = Document(page_content="source text", metadata={"origin": "doc-1"}) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.LLMGenerator.generate_qa_document", + return_value="Q1: What is this?\nA1: A test.\nQ2: Why?\nA2: Coverage.", + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + ): + processor._format_qa_document(fake_flask_app, "tenant-1", source_document, all_qa_documents, "English") + + assert len(all_qa_documents) == 2 + assert all_qa_documents[0].page_content == "What is this?" + assert all_qa_documents[0].metadata["answer"] == "A test." + assert all_qa_documents[1].metadata["answer"] == "Coverage." + + def test_format_qa_document_logs_errors(self, processor: QAIndexProcessor, fake_flask_app) -> None: + all_qa_documents: list[Document] = [] + source_document = Document(page_content="source text", metadata={"origin": "doc-1"}) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.LLMGenerator.generate_qa_document", + side_effect=RuntimeError("llm failure"), + ), + patch("core.rag.index_processor.processor.qa_index_processor.logger") as mock_logger, + ): + processor._format_qa_document(fake_flask_app, "tenant-1", source_document, all_qa_documents, "English") + + assert all_qa_documents == [] + mock_logger.exception.assert_called_once_with("Failed to format qa document") + + def test_format_split_text_extracts_question_answer_pairs(self, processor: QAIndexProcessor) -> None: + parsed = processor._format_split_text("Q1: First?\nA1: One.\nQ2: Second?\nA2: Two.\n") + + assert parsed == [{"question": "First?", "answer": "One."}, {"question": "Second?", "answer": "Two."}] diff --git a/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py b/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py new file mode 100644 index 0000000000..b31bb6eea7 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py @@ -0,0 +1,291 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import httpx +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.index_processor_base import BaseIndexProcessor +from core.rag.models.document import AttachmentDocument, Document + + +class _ForwardingBaseIndexProcessor(BaseIndexProcessor): + def extract(self, extract_setting, **kwargs): + return super().extract(extract_setting, **kwargs) + + def transform(self, documents, current_user=None, **kwargs): + return super().transform(documents, current_user=current_user, **kwargs) + + def generate_summary_preview(self, tenant_id, preview_texts, summary_index_setting, doc_language=None): + return super().generate_summary_preview( + tenant_id=tenant_id, + preview_texts=preview_texts, + summary_index_setting=summary_index_setting, + doc_language=doc_language, + ) + + def load(self, dataset, documents, multimodal_documents=None, with_keywords=True, **kwargs): + return super().load( + dataset=dataset, + documents=documents, + multimodal_documents=multimodal_documents, + with_keywords=with_keywords, + **kwargs, + ) + + def clean(self, dataset, node_ids, with_keywords=True, **kwargs): + return super().clean(dataset=dataset, node_ids=node_ids, with_keywords=with_keywords, **kwargs) + + def index(self, dataset, document, chunks): + return super().index(dataset=dataset, document=document, chunks=chunks) + + def format_preview(self, chunks): + return super().format_preview(chunks) + + def retrieve(self, retrieval_method, query, dataset, top_k, score_threshold, reranking_model): + return super().retrieve( + retrieval_method=retrieval_method, + query=query, + dataset=dataset, + top_k=top_k, + score_threshold=score_threshold, + reranking_model=reranking_model, + ) + + +class TestBaseIndexProcessor: + @pytest.fixture + def processor(self) -> _ForwardingBaseIndexProcessor: + return _ForwardingBaseIndexProcessor() + + def test_abstract_methods_raise_not_implemented(self, processor: _ForwardingBaseIndexProcessor) -> None: + with pytest.raises(NotImplementedError): + processor.extract(Mock()) + with pytest.raises(NotImplementedError): + processor.transform([]) + with pytest.raises(NotImplementedError): + processor.generate_summary_preview("tenant", [PreviewDetail(content="c")], {}) + with pytest.raises(NotImplementedError): + processor.load(Mock(), []) + with pytest.raises(NotImplementedError): + processor.clean(Mock(), None) + with pytest.raises(NotImplementedError): + processor.index(Mock(), Mock(), {}) + with pytest.raises(NotImplementedError): + processor.format_preview([]) + with pytest.raises(NotImplementedError): + processor.retrieve("semantic_search", "q", Mock(), 3, 0.5, {}) + + def test_get_splitter_validates_custom_length(self, processor: _ForwardingBaseIndexProcessor) -> None: + with patch( + "core.rag.index_processor.index_processor_base.dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH", 1000 + ): + with pytest.raises(ValueError, match="between 50 and 1000"): + processor._get_splitter("custom", 49, 0, "", None) + with pytest.raises(ValueError, match="between 50 and 1000"): + processor._get_splitter("custom", 1001, 0, "", None) + + def test_get_splitter_custom_mode_uses_fixed_splitter(self, processor: _ForwardingBaseIndexProcessor) -> None: + fixed_splitter = Mock() + with patch( + "core.rag.index_processor.index_processor_base.FixedRecursiveCharacterTextSplitter.from_encoder", + return_value=fixed_splitter, + ) as mock_fixed: + splitter = processor._get_splitter("hierarchical", 120, 10, "\\n\\n", None) + + assert splitter is fixed_splitter + assert mock_fixed.call_args.kwargs["fixed_separator"] == "\n\n" + assert mock_fixed.call_args.kwargs["chunk_size"] == 120 + + def test_get_splitter_automatic_mode_uses_enhance_splitter(self, processor: _ForwardingBaseIndexProcessor) -> None: + auto_splitter = Mock() + with patch( + "core.rag.index_processor.index_processor_base.EnhanceRecursiveCharacterTextSplitter.from_encoder", + return_value=auto_splitter, + ) as mock_enhance: + splitter = processor._get_splitter("automatic", 0, 0, "", None) + + assert splitter is auto_splitter + assert "chunk_size" in mock_enhance.call_args.kwargs + + def test_extract_markdown_images(self, processor: _ForwardingBaseIndexProcessor) -> None: + markdown = "text ![a](https://a/img.png) and ![b](/files/123/file-preview)" + images = processor._extract_markdown_images(markdown) + assert images == ["https://a/img.png", "/files/123/file-preview"] + + def test_get_content_files_without_images_returns_empty(self, processor: _ForwardingBaseIndexProcessor) -> None: + document = Document(page_content="no image markdown", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + assert processor._get_content_files(document) == [] + + def test_get_content_files_handles_all_sources_and_duplicates( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = [ + "/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview", + "/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview", + "/files/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/file-preview", + "/files/tools/cccccccc-cccc-cccc-cccc-cccccccccccc.png", + "https://example.com/remote.png?x=1", + ] + upload_a = SimpleNamespace(id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", name="a.png") + upload_b = SimpleNamespace(id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", name="b.png") + upload_tool = SimpleNamespace(id="tool-upload-id", name="tool.png") + upload_remote = SimpleNamespace(id="remote-upload-id", name="remote.png") + db_query = Mock() + db_query.where.return_value.all.return_value = [upload_a, upload_b, upload_tool, upload_remote] + db_session = Mock() + db_session.query.return_value = db_query + + with ( + patch.object(processor, "_extract_markdown_images", return_value=images), + patch.object(processor, "_download_tool_file", return_value="tool-upload-id") as mock_tool_download, + patch.object(processor, "_download_image", return_value="remote-upload-id") as mock_image_download, + patch("core.rag.index_processor.index_processor_base.db.session", db_session), + ): + files = processor._get_content_files(document, current_user=Mock()) + + assert len(files) == 5 + assert all(isinstance(file, AttachmentDocument) for file in files) + assert files[0].metadata["doc_type"] == DocType.IMAGE + assert files[0].metadata["document_id"] == "doc-1" + assert files[0].metadata["dataset_id"] == "ds-1" + assert files[0].metadata["doc_id"] == "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + assert files[1].metadata["doc_id"] == "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + mock_tool_download.assert_called_once() + mock_image_download.assert_called_once() + + def test_get_content_files_skips_tool_and_remote_download_without_user( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = ["/files/tools/cccccccc-cccc-cccc-cccc-cccccccccccc.png", "https://example.com/remote.png"] + + with patch.object(processor, "_extract_markdown_images", return_value=images): + files = processor._get_content_files(document, current_user=None) + + assert files == [] + + def test_get_content_files_ignores_missing_upload_records(self, processor: _ForwardingBaseIndexProcessor) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = ["/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview"] + db_query = Mock() + db_query.where.return_value.all.return_value = [] + db_session = Mock() + db_session.query.return_value = db_query + + with ( + patch.object(processor, "_extract_markdown_images", return_value=images), + patch("core.rag.index_processor.index_processor_base.db.session", db_session), + ): + files = processor._get_content_files(document) + + assert files == [] + + def test_download_image_success_with_filename_from_content_disposition( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + response = Mock() + response.headers = { + "Content-Length": "4", + "content-disposition": "attachment; filename=test-image.png", + "content-type": "image/png", + } + response.raise_for_status.return_value = None + response.iter_bytes.return_value = [b"data"] + upload_result = SimpleNamespace(id="upload-id") + + mock_db = Mock() + mock_db.engine = Mock() + + with ( + patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=response), + patch("core.rag.index_processor.index_processor_base.db", mock_db), + patch("services.file_service.FileService") as mock_file_service, + ): + mock_file_service.return_value.upload_file.return_value = upload_result + upload_id = processor._download_image("https://example.com/test.png", current_user=Mock()) + + assert upload_id == "upload-id" + mock_file_service.return_value.upload_file.assert_called_once() + + def test_download_image_validates_size_and_empty_content(self, processor: _ForwardingBaseIndexProcessor) -> None: + too_large = Mock() + too_large.headers = {"Content-Length": str(3 * 1024 * 1024), "content-type": "image/png"} + too_large.raise_for_status.return_value = None + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=too_large): + assert processor._download_image("https://example.com/too-large.png", current_user=Mock()) is None + + empty = Mock() + empty.headers = {"Content-Length": "0", "content-type": "image/png"} + empty.raise_for_status.return_value = None + empty.iter_bytes.return_value = [] + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=empty): + assert processor._download_image("https://example.com/empty.png", current_user=Mock()) is None + + def test_download_image_limits_stream_size(self, processor: _ForwardingBaseIndexProcessor) -> None: + response = Mock() + response.headers = {"content-type": "image/png"} + response.raise_for_status.return_value = None + response.iter_bytes.return_value = [b"a" * (3 * 1024 * 1024)] + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=response): + assert processor._download_image("https://example.com/big-stream.png", current_user=Mock()) is None + + def test_download_image_handles_timeout_request_and_unexpected_errors( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + request = httpx.Request("GET", "https://example.com/image.png") + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=httpx.TimeoutException("timeout"), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=httpx.RequestError("bad request", request=request), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=RuntimeError("unexpected"), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + def test_download_tool_file_returns_none_when_not_found(self, processor: _ForwardingBaseIndexProcessor) -> None: + db_query = Mock() + db_query.where.return_value.first.return_value = None + db_session = Mock() + db_session.query.return_value = db_query + + with patch("core.rag.index_processor.index_processor_base.db.session", db_session): + assert processor._download_tool_file("tool-id", current_user=Mock()) is None + + def test_download_tool_file_uploads_file_when_found(self, processor: _ForwardingBaseIndexProcessor) -> None: + tool_file = SimpleNamespace(file_key="k1", name="tool.png", mimetype="image/png") + db_query = Mock() + db_query.where.return_value.first.return_value = tool_file + db_session = Mock() + db_session.query.return_value = db_query + mock_db = Mock() + mock_db.session = db_session + mock_db.engine = Mock() + upload_result = SimpleNamespace(id="upload-id") + + with ( + patch("core.rag.index_processor.index_processor_base.db", mock_db), + patch("core.rag.index_processor.index_processor_base.storage.load_once", return_value=b"blob") as mock_load, + patch("services.file_service.FileService") as mock_file_service, + ): + mock_file_service.return_value.upload_file.return_value = upload_result + result = processor._download_tool_file("tool-id", current_user=Mock()) + + assert result == "upload-id" + mock_load.assert_called_once_with("k1") + mock_file_service.return_value.upload_file.assert_called_once() diff --git a/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py b/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py new file mode 100644 index 0000000000..0fc666dbbf --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py @@ -0,0 +1,42 @@ +import pytest + +from core.rag.index_processor.constant.index_type import IndexStructureType +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory +from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor +from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor +from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor + + +class TestIndexProcessorFactory: + def test_requires_index_type(self) -> None: + factory = IndexProcessorFactory(index_type=None) + + with pytest.raises(ValueError, match="Index type must be specified"): + factory.init_index_processor() + + def test_builds_paragraph_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.PARAGRAPH_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, ParagraphIndexProcessor) + + def test_builds_qa_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.QA_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, QAIndexProcessor) + + def test_builds_parent_child_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.PARENT_CHILD_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, ParentChildIndexProcessor) + + def test_rejects_unsupported_index_type(self) -> None: + factory = IndexProcessorFactory(index_type="unsupported") + + with pytest.raises(ValueError, match="is not supported"): + factory.init_index_processor() diff --git a/api/tests/unit_tests/core/rag/rerank/test_reranker.py b/api/tests/unit_tests/core/rag/rerank/test_reranker.py index 0e53482c51..b150d677f1 100644 --- a/api/tests/unit_tests/core/rag/rerank/test_reranker.py +++ b/api/tests/unit_tests/core/rag/rerank/test_reranker.py @@ -12,13 +12,18 @@ All tests use mocking to avoid external dependencies and ensure fast, reliable e Tests follow the Arrange-Act-Assert pattern for clarity. """ +from operator import itemgetter +from types import SimpleNamespace from unittest.mock import MagicMock, Mock, patch import pytest from core.model_manager import ModelInstance +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.constant.query_type import QueryType from core.rag.models.document import Document from core.rag.rerank.entity.weight import KeywordSetting, VectorSetting, Weights +from core.rag.rerank.rerank_base import BaseRerankRunner from core.rag.rerank.rerank_factory import RerankRunnerFactory from core.rag.rerank.rerank_model import RerankModelRunner from core.rag.rerank.rerank_type import RerankMode @@ -26,7 +31,7 @@ from core.rag.rerank.weight_rerank import WeightRerankRunner from dify_graph.model_runtime.entities.rerank_entities import RerankDocument, RerankResult -def create_mock_model_instance(): +def create_mock_model_instance() -> ModelInstance: """Create a properly configured mock ModelInstance for reranking tests.""" mock_instance = Mock(spec=ModelInstance) # Setup provider_model_bundle chain for check_model_support_vision @@ -59,14 +64,7 @@ class TestRerankModelRunner: @pytest.fixture def mock_model_instance(self): """Create a mock ModelInstance for reranking.""" - mock_instance = Mock(spec=ModelInstance) - # Setup provider_model_bundle chain for check_model_support_vision - mock_instance.provider_model_bundle = Mock() - mock_instance.provider_model_bundle.configuration = Mock() - mock_instance.provider_model_bundle.configuration.tenant_id = "test-tenant-id" - mock_instance.provider = "test-provider" - mock_instance.model_name = "test-model" - return mock_instance + return create_mock_model_instance() @pytest.fixture def rerank_runner(self, mock_model_instance): @@ -382,6 +380,206 @@ class TestRerankModelRunner: assert call_kwargs["user"] == "user123" +class _ForwardingBaseRerankRunner(BaseRerankRunner): + def run( + self, + query: str, + documents: list[Document], + score_threshold: float | None = None, + top_n: int | None = None, + user: str | None = None, + query_type: QueryType = QueryType.TEXT_QUERY, + ) -> list[Document]: + return super().run( + query=query, + documents=documents, + score_threshold=score_threshold, + top_n=top_n, + user=user, + query_type=query_type, + ) + + +class TestBaseRerankRunner: + def test_run_raises_not_implemented(self): + runner = _ForwardingBaseRerankRunner() + + with pytest.raises(NotImplementedError): + runner.run(query="python", documents=[]) + + +class TestRerankModelRunnerMultimodal: + @pytest.fixture + def mock_model_instance(self): + return create_mock_model_instance() + + @pytest.fixture + def rerank_runner(self, mock_model_instance): + return RerankModelRunner(rerank_model_instance=mock_model_instance) + + def test_run_returns_original_documents_for_non_text_query_without_vision_support( + self, rerank_runner, mock_model_instance + ): + documents = [ + Document(page_content="doc", metadata={"doc_id": "doc1"}, provider="dify"), + ] + + with patch("core.rag.rerank.rerank_model.ModelManager") as mock_mm: + mock_mm.return_value.check_model_support_vision.return_value = False + result = rerank_runner.run(query="image-file-id", documents=documents, query_type=QueryType.IMAGE_QUERY) + + assert result == documents + mock_model_instance.invoke_rerank.assert_not_called() + + def test_run_uses_multimodal_path_when_vision_support_is_enabled(self, rerank_runner): + documents = [ + Document(page_content="doc", metadata={"doc_id": "doc1", "source": "wiki"}, provider="dify"), + ] + rerank_result = RerankResult( + model="rerank-model", + docs=[RerankDocument(index=0, text="doc", score=0.88)], + ) + + with ( + patch("core.rag.rerank.rerank_model.ModelManager") as mock_mm, + patch.object( + rerank_runner, + "fetch_multimodal_rerank", + return_value=(rerank_result, documents), + ) as mock_multimodal, + ): + mock_mm.return_value.check_model_support_vision.return_value = True + result = rerank_runner.run(query="python", documents=documents, query_type=QueryType.TEXT_QUERY) + + mock_multimodal.assert_called_once() + assert len(result) == 1 + assert result[0].metadata["score"] == 0.88 + + def test_fetch_multimodal_rerank_builds_docs_and_calls_text_rerank(self, rerank_runner): + image_doc = Document( + page_content="image-content", + metadata={"doc_id": "img-1", "doc_type": DocType.IMAGE}, + provider="dify", + ) + text_doc = Document( + page_content="text-content", + metadata={"doc_id": "txt-1", "doc_type": DocType.TEXT}, + provider="dify", + ) + external_doc = Document( + page_content="external-content", + metadata={}, + provider="external", + ) + query = Mock() + query.where.return_value.first.return_value = SimpleNamespace(key="image-key") + rerank_result = RerankResult(model="rerank-model", docs=[]) + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query), + patch("core.rag.rerank.rerank_model.storage.load_once", return_value=b"image-bytes") as mock_load_once, + patch.object( + rerank_runner, + "fetch_text_rerank", + return_value=(rerank_result, [image_doc, text_doc, external_doc]), + ) as mock_text_rerank, + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[image_doc, text_doc, external_doc, external_doc], + query_type=QueryType.TEXT_QUERY, + ) + + assert result == rerank_result + assert len(unique_documents) == 3 + mock_load_once.assert_called_once_with("image-key") + text_rerank_call_args = mock_text_rerank.call_args.args + assert len(text_rerank_call_args[1]) == 3 + + def test_fetch_multimodal_rerank_skips_missing_image_upload(self, rerank_runner): + image_doc = Document( + page_content="image-content", + metadata={"doc_id": "img-missing", "doc_type": DocType.IMAGE}, + provider="dify", + ) + query = Mock() + query.where.return_value.first.return_value = None + rerank_result = RerankResult(model="rerank-model", docs=[]) + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query), + patch.object( + rerank_runner, + "fetch_text_rerank", + return_value=(rerank_result, [image_doc]), + ) as mock_text_rerank, + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[image_doc], + query_type=QueryType.TEXT_QUERY, + ) + + assert result == rerank_result + assert unique_documents == [image_doc] + docs_arg = mock_text_rerank.call_args.args[1] + assert len(docs_arg) == 1 + + def test_fetch_multimodal_rerank_image_query_invokes_multimodal_model(self, rerank_runner, mock_model_instance): + text_doc = Document( + page_content="text-content", + metadata={"doc_id": "txt-1", "doc_type": DocType.TEXT}, + provider="dify", + ) + query_chain = Mock() + query_chain.where.return_value.first.return_value = SimpleNamespace(key="query-image-key") + rerank_result = RerankResult( + model="rerank-model", + docs=[RerankDocument(index=0, text="text-content", score=0.77)], + ) + mock_model_instance.invoke_multimodal_rerank.return_value = rerank_result + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query_chain), + patch("core.rag.rerank.rerank_model.storage.load_once", return_value=b"query-image-bytes"), + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="query-upload-id", + documents=[text_doc], + score_threshold=0.2, + top_n=2, + user="user-1", + query_type=QueryType.IMAGE_QUERY, + ) + + assert result == rerank_result + assert unique_documents == [text_doc] + invoke_kwargs = mock_model_instance.invoke_multimodal_rerank.call_args.kwargs + assert invoke_kwargs["query"]["content_type"] == DocType.IMAGE + assert invoke_kwargs["docs"][0]["content"] == "text-content" + assert invoke_kwargs["user"] == "user-1" + + def test_fetch_multimodal_rerank_raises_when_query_image_not_found(self, rerank_runner): + query_chain = Mock() + query_chain.where.return_value.first.return_value = None + + with patch("core.rag.rerank.rerank_model.db.session.query", return_value=query_chain): + with pytest.raises(ValueError, match="Upload file not found for query"): + rerank_runner.fetch_multimodal_rerank( + query="missing-upload-id", + documents=[], + query_type=QueryType.IMAGE_QUERY, + ) + + def test_fetch_multimodal_rerank_rejects_unsupported_query_type(self, rerank_runner): + with pytest.raises(ValueError, match="is not supported"): + rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[], + query_type="unsupported_query_type", + ) + + class TestWeightRerankRunner: """Unit tests for WeightRerankRunner. @@ -512,34 +710,39 @@ class TestWeightRerankRunner: - TF-IDF scores are calculated correctly - Cosine similarity is computed for keyword vectors """ - # Arrange: Create runner runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - - # Mock keyword extraction with specific keywords + keyword_map = { + "python programming": ["python", "programming"], + "Python is a programming language": ["python", "programming", "language"], + "JavaScript for web development": ["javascript", "web"], + "Java object-oriented programming": ["java", "programming"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.side_effect = [ - ["python", "programming"], # query - ["python", "programming", "language"], # doc1 - ["javascript", "web"], # doc2 - ["java", "programming"], # doc3 - ] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2, 0.3, 0.4] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("python programming", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "python programming", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="python programming", documents=sample_documents_with_vectors) - # Assert: Keywords are extracted and scores are calculated - assert len(result) == 3 - # Document 1 should have highest keyword score (matches both query terms) - # Document 3 should have medium score (matches one term) - # Document 2 should have lowest score (matches no terms) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_vector_score_calculation( self, @@ -556,30 +759,42 @@ class TestWeightRerankRunner: - Cosine similarity is calculated with document vectors - Vector scores are properly normalized """ - # Arrange: Create runner runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test query": ["test"], + "Python is a programming language": ["python"], + "JavaScript for web development": ["javascript"], + "Java object-oriented programming": ["java"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding model mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance - # Mock cache embedding with specific query vector mock_cache_instance = MagicMock() query_vector = [0.2, 0.3, 0.4, 0.5] mock_cache_instance.embed_query.return_value = query_vector mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test query", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "test query", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="test query", documents=sample_documents_with_vectors) - # Assert: Vector scores are calculated - assert len(result) == 3 - # Verify cosine similarity was computed (doc2 vector is closest to query vector) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_score_threshold_filtering_weighted( self, @@ -742,28 +957,40 @@ class TestWeightRerankRunner: - Keyword weight (0.4) is applied to keyword scores - Combined score is the sum of weighted components """ - # Arrange: Create runner with known weights runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test": ["test"], + "Python is a programming language": ["python", "language"], + "JavaScript for web development": ["javascript", "web"], + "Java object-oriented programming": ["java", "programming"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2, 0.3, 0.4] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "test", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="test", documents=sample_documents_with_vectors) - # Assert: Scores are combined with weights - # Score = 0.6 * vector_score + 0.4 * keyword_score - assert len(result) == 3 - assert all("score" in doc.metadata for doc in result) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_existing_vector_score_in_metadata( self, @@ -778,7 +1005,6 @@ class TestWeightRerankRunner: - If document already has a score in metadata, it's used - Cosine similarity calculation is skipped for such documents """ - # Arrange: Documents with pre-existing scores documents = [ Document( page_content="Content with existing score", @@ -790,24 +1016,29 @@ class TestWeightRerankRunner: runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test": ["test"], + "Content with existing score": ["test"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test", documents) + vector_scores = runner._calculate_cosine("tenant123", "test", documents, weights_config.vector_setting) + expected_score = 0.6 * vector_scores[0] + 0.4 * query_scores[0] + result = runner.run(query="test", documents=documents) - # Assert: Existing score is used in calculation assert len(result) == 1 - # The final score should incorporate the existing score (0.95) with vector weight (0.6) + assert result[0].metadata["doc_id"] == "doc1" + assert result[0].metadata["score"] == pytest.approx(expected_score, rel=1e-6) class TestRerankRunnerFactory: diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py index ca08cb0591..d61f01c616 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py @@ -1,80 +1,41 @@ -""" -Unit tests for dataset retrieval functionality. - -This module provides comprehensive test coverage for the RetrievalService class, -which is responsible for retrieving relevant documents from datasets using various -search strategies. - -Core Retrieval Mechanisms Tested: -================================== -1. **Vector Search (Semantic Search)** - - Uses embedding vectors to find semantically similar documents - - Supports score thresholds and top-k limiting - - Can filter by document IDs and metadata - -2. **Keyword Search** - - Traditional text-based search using keyword matching - - Handles special characters and query escaping - - Supports document filtering - -3. **Full-Text Search** - - BM25-based full-text search for text matching - - Used in hybrid search scenarios - -4. **Hybrid Search** - - Combines vector and full-text search results - - Implements deduplication to avoid duplicate chunks - - Uses DataPostProcessor for score merging with configurable weights - -5. **Score Merging Algorithms** - - Deduplication based on doc_id - - Retains higher-scoring duplicates - - Supports weighted score combination - -6. **Metadata Filtering** - - Filters documents based on metadata conditions - - Supports document ID filtering - -Test Architecture: -================== -- **Fixtures**: Provide reusable mock objects (datasets, documents, Flask app) -- **Mocking Strategy**: Mock at the method level (embedding_search, keyword_search, etc.) - rather than at the class level to properly simulate the ThreadPoolExecutor behavior -- **Pattern**: All tests follow Arrange-Act-Assert (AAA) pattern -- **Isolation**: Each test is independent and doesn't rely on external state - -Running Tests: -============== - # Run all tests in this module - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py -v - - # Run a specific test class - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py::TestRetrievalService -v - - # Run a specific test - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py::\ -TestRetrievalService::test_vector_search_basic -v - -Notes: -====== -- The RetrievalService uses ThreadPoolExecutor for concurrent search operations -- Tests mock the individual search methods to avoid threading complexity -- All mocked search methods modify the all_documents list in-place -- Score thresholds and top-k limits are enforced by the search methods -""" - +import threading +from contextlib import contextmanager, nullcontext +from types import SimpleNamespace from unittest.mock import MagicMock, Mock, patch from uuid import uuid4 import pytest +from flask import Flask, current_app +from sqlalchemy import column +from core.app.app_config.entities import ( + Condition as AppCondition, +) +from core.app.app_config.entities import ( + DatasetEntity, + DatasetRetrieveConfigEntity, +) +from core.app.app_config.entities import ( + MetadataFilteringCondition as AppMetadataFilteringCondition, +) +from core.app.app_config.entities import ( + ModelConfig as AppModelConfig, +) +from core.app.app_config.entities import ModelConfig as WorkflowModelConfig +from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity +from core.entities.agent_entities import PlanningStrategy +from core.entities.model_entities import ModelStatus from core.rag.datasource.retrieval_service import RetrievalService +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.models.document import Document +from core.rag.rerank.rerank_type import RerankMode from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.retrieval_methods import RetrievalMethod +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.model_runtime.entities.model_entities import ModelFeature from models.dataset import Dataset # ==================== Helper Functions ==================== @@ -2013,3 +1974,3091 @@ class TestDocumentModel: assert doc1 == doc2 assert doc1 != doc3 + + +# ==================== Helper Functions ==================== + + +def create_mock_dataset_methods( + dataset_id: str | None = None, + tenant_id: str | None = None, + provider: str = "dify", + indexing_technique: str = "high_quality", + available_document_count: int = 10, +) -> Mock: + """ + Create a mock Dataset object for testing. + + Args: + dataset_id: Unique identifier for the dataset + tenant_id: Tenant ID for the dataset + provider: Provider type ("dify" or "external") + indexing_technique: Indexing technique ("high_quality" or "economy") + available_document_count: Number of available documents + + Returns: + Mock: A properly configured Dataset mock + """ + dataset = Mock(spec=Dataset) + dataset.id = dataset_id or str(uuid4()) + dataset.tenant_id = tenant_id or str(uuid4()) + dataset.name = "test_dataset" + dataset.provider = provider + dataset.indexing_technique = indexing_technique + dataset.available_document_count = available_document_count + dataset.embedding_model = "text-embedding-ada-002" + dataset.embedding_model_provider = "openai" + dataset.retrieval_model = { + "search_method": "semantic_search", + "reranking_enable": False, + "top_k": 4, + "score_threshold_enabled": False, + } + return dataset + + +def create_mock_document_methods( + content: str, + doc_id: str, + score: float = 0.8, + provider: str = "dify", + additional_metadata: dict | None = None, +) -> Document: + """ + Create a mock Document object for testing. + + Args: + content: The text content of the document + doc_id: Unique identifier for the document chunk + score: Relevance score (0.0 to 1.0) + provider: Document provider ("dify" or "external") + additional_metadata: Optional extra metadata fields + + Returns: + Document: A properly structured Document object + """ + metadata = { + "doc_id": doc_id, + "document_id": str(uuid4()), + "dataset_id": str(uuid4()), + "score": score, + } + + if additional_metadata: + metadata.update(additional_metadata) + + return Document( + page_content=content, + metadata=metadata, + provider=provider, + ) + + +# ==================== Test _check_knowledge_rate_limit ==================== + + +class TestCheckKnowledgeRateLimit: + """ + Test suite for _check_knowledge_rate_limit method. + + The _check_knowledge_rate_limit method validates whether a tenant has + exceeded their knowledge retrieval rate limit. This is important for: + - Preventing abuse of the knowledge retrieval system + - Enforcing subscription plan limits + - Tracking usage for billing purposes + + Test Cases: + ============ + 1. Rate limit disabled - no exception raised + 2. Rate limit enabled but not exceeded - no exception raised + 3. Rate limit enabled and exceeded - RateLimitExceededError raised + 4. Redis operations are performed correctly + 5. RateLimitLog is created when limit is exceeded + """ + + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + def test_rate_limit_disabled_no_exception(self, mock_redis, mock_feature_service): + """ + Test that when rate limit is disabled, no exception is raised. + + This test verifies the behavior when the tenant's subscription + does not have rate limiting enabled. + + Verifies: + - FeatureService.get_knowledge_rate_limit is called + - No Redis operations are performed + - No exception is raised + - Retrieval proceeds normally + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit disabled + mock_limit = Mock() + mock_limit.enabled = False + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Act & Assert - should not raise any exception + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify FeatureService was called + mock_feature_service.get_knowledge_rate_limit.assert_called_once_with(tenant_id) + + # Verify no Redis operations were performed + assert not mock_redis.zadd.called + assert not mock_redis.zremrangebyscore.called + assert not mock_redis.zcard.called + + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + @patch("core.rag.retrieval.dataset_retrieval.time") + def test_rate_limit_enabled_not_exceeded(self, mock_time, mock_redis, mock_feature_service, mock_session_factory): + """ + Test that when rate limit is enabled but not exceeded, no exception is raised. + + This test simulates a tenant making requests within their rate limit. + The Redis sorted set stores timestamps of recent requests, and old + requests (older than 60 seconds) are removed. + + Verifies: + - Redis zadd is called to track the request + - Redis zremrangebyscore removes old entries + - Redis zcard returns count within limit + - No exception is raised + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit enabled with limit of 100 requests per minute + mock_limit = Mock() + mock_limit.enabled = True + mock_limit.limit = 100 + mock_limit.subscription_plan = "professional" + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Mock time + current_time = 1234567890000 # Current time in milliseconds + mock_time.time.return_value = current_time / 1000 # Return seconds + mock_time.time.__mul__ = lambda self, x: int(self * x) # Multiply to get milliseconds + + # Mock Redis operations + # zcard returns 50 (within limit of 100) + mock_redis.zcard.return_value = 50 + + # Mock session_factory.create_session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + # Act & Assert - should not raise any exception + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify Redis operations + expected_key = f"rate_limit_{tenant_id}" + mock_redis.zadd.assert_called_once_with(expected_key, {current_time: current_time}) + mock_redis.zremrangebyscore.assert_called_once_with(expected_key, 0, current_time - 60000) + mock_redis.zcard.assert_called_once_with(expected_key) + + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + @patch("core.rag.retrieval.dataset_retrieval.time") + def test_rate_limit_enabled_exceeded_raises_exception( + self, mock_time, mock_redis, mock_feature_service, mock_session_factory + ): + """ + Test that when rate limit is enabled and exceeded, RateLimitExceededError is raised. + + This test simulates a tenant exceeding their rate limit. When the count + of recent requests exceeds the limit, an exception should be raised and + a RateLimitLog should be created. + + Verifies: + - Redis zcard returns count exceeding limit + - RateLimitExceededError is raised with correct message + - RateLimitLog is created in database + - Session operations are performed correctly + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit enabled with limit of 100 requests per minute + mock_limit = Mock() + mock_limit.enabled = True + mock_limit.limit = 100 + mock_limit.subscription_plan = "professional" + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Mock time + current_time = 1234567890000 + mock_time.time.return_value = current_time / 1000 + + # Mock Redis operations - return count exceeding limit + mock_redis.zcard.return_value = 150 # Exceeds limit of 100 + + # Mock session_factory.create_session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + # Act & Assert + with pytest.raises(exc.RateLimitExceededError) as exc_info: + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify exception message + assert "knowledge base request rate limit" in str(exc_info.value) + + # Verify RateLimitLog was created + mock_session.add.assert_called_once() + added_log = mock_session.add.call_args[0][0] + assert added_log.tenant_id == tenant_id + assert added_log.subscription_plan == "professional" + assert added_log.operation == "knowledge" + + +# ==================== Test _get_available_datasets ==================== + + +class TestGetAvailableDatasets: + """ + Test suite for _get_available_datasets method. + + The _get_available_datasets method retrieves datasets that are available + for retrieval. A dataset is considered available if: + - It belongs to the specified tenant + - It's in the list of requested dataset_ids + - It has at least one completed, enabled, non-archived document OR + - It's an external provider dataset + + Note: Due to SQLAlchemy subquery complexity, full testing is done in + integration tests. Unit tests here verify basic behavior. + """ + + def test_method_exists_and_has_correct_signature(self): + """ + Test that the method exists and has the correct signature. + + Verifies: + - Method exists on DatasetRetrieval class + - Accepts tenant_id and dataset_ids parameters + """ + # Arrange + dataset_retrieval = DatasetRetrieval() + + # Assert - method exists + assert hasattr(dataset_retrieval, "_get_available_datasets") + # Assert - method is callable + assert callable(dataset_retrieval._get_available_datasets) + + +# ==================== Test knowledge_retrieval ==================== + + +class TestDatasetRetrievalKnowledgeRetrieval: + """ + Test suite for knowledge_retrieval method. + + The knowledge_retrieval method is the main entry point for retrieving + knowledge from datasets. It orchestrates the entire retrieval process: + 1. Checks rate limits + 2. Gets available datasets + 3. Applies metadata filtering if enabled + 4. Performs retrieval (single or multiple mode) + 5. Formats and returns results + + Test Cases: + ============ + 1. Single mode retrieval + 2. Multiple mode retrieval + 3. Metadata filtering disabled + 4. Metadata filtering automatic + 5. Metadata filtering manual + 6. External documents handling + 7. Dify documents handling + 8. Empty results handling + 9. Rate limit exceeded + 10. No available datasets + """ + + def test_knowledge_retrieval_single_mode_basic(self): + """ + Test knowledge_retrieval in single retrieval mode - basic check. + + Note: Full single mode testing requires complex model mocking and + is better suited for integration tests. This test verifies the + method accepts single mode requests. + + Verifies: + - Method can accept single mode request + - Request parameters are correctly structured + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="single", + model_provider="openai", + model_name="gpt-4", + model_mode="chat", + completion_params={"temperature": 0.7}, + ) + + # Assert - request is properly structured + assert request.retrieval_mode == "single" + assert request.model_provider == "openai" + assert request.model_name == "gpt-4" + assert request.model_mode == "chat" + + @patch("core.rag.retrieval.dataset_retrieval.DataPostProcessor") + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + def test_knowledge_retrieval_multiple_mode(self, mock_session_factory, mock_data_processor): + """ + Test knowledge_retrieval in multiple retrieval mode. + + In multiple mode, retrieval is performed across all datasets and + results are combined and reranked. + + Verifies: + - Rate limit is checked + - Available datasets are retrieved + - Multiple retrieval is performed + - Results are combined and reranked + - Results are formatted correctly + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id1 = str(uuid4()) + dataset_id2 = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id1, dataset_id2], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + score_threshold=0.7, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock _check_knowledge_rate_limit + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + # Mock _get_available_datasets + mock_dataset1 = create_mock_dataset_methods(dataset_id=dataset_id1, tenant_id=tenant_id) + mock_dataset2 = create_mock_dataset_methods(dataset_id=dataset_id2, tenant_id=tenant_id) + with patch.object( + dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset1, mock_dataset2] + ): + # Mock get_metadata_filter_condition + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Mock multiple_retrieve to return documents + doc1 = create_mock_document_methods("Python is great", "doc1", score=0.9) + doc2 = create_mock_document_methods("Python is awesome", "doc2", score=0.8) + with patch.object( + dataset_retrieval, "multiple_retrieve", return_value=[doc1, doc2] + ) as mock_multiple_retrieve: + # Mock format_retrieval_documents + mock_record = Mock() + mock_record.segment = Mock() + mock_record.segment.dataset_id = dataset_id1 + mock_record.segment.document_id = str(uuid4()) + mock_record.segment.index_node_hash = "hash123" + mock_record.segment.hit_count = 5 + mock_record.segment.word_count = 100 + mock_record.segment.position = 1 + mock_record.segment.get_sign_content.return_value = "Python is great" + mock_record.segment.answer = None + mock_record.score = 0.9 + mock_record.child_chunks = [] + mock_record.summary = None + mock_record.files = None + + mock_retrieval_service = Mock() + mock_retrieval_service.format_retrieval_documents.return_value = [mock_record] + + with patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService", + return_value=mock_retrieval_service, + ): + # Mock database queries + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + mock_dataset_from_db = Mock() + mock_dataset_from_db.id = dataset_id1 + mock_dataset_from_db.name = "test_dataset" + + mock_document = Mock() + mock_document.id = str(uuid4()) + mock_document.name = "test_doc" + mock_document.data_source_type = "upload_file" + mock_document.doc_metadata = {} + + mock_session.query.return_value.filter.return_value.all.return_value = [ + mock_dataset_from_db + ] + mock_session.query.return_value.filter.return_value.all.__iter__ = lambda self: iter( + [mock_dataset_from_db, mock_document] + ) + + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + mock_multiple_retrieve.assert_called_once() + + def test_knowledge_retrieval_metadata_filtering_disabled(self): + """ + Test knowledge_retrieval with metadata filtering disabled. + + When metadata filtering is disabled, get_metadata_filter_condition is + NOT called (the method checks metadata_filtering_mode != "disabled"). + + Verifies: + - get_metadata_filter_condition is NOT called when mode is "disabled" + - Retrieval proceeds without metadata filters + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + metadata_filtering_mode="disabled", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id) + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + # Mock get_metadata_filter_condition - should NOT be called when disabled + with patch.object( + dataset_retrieval, + "get_metadata_filter_condition", + return_value=(None, None), + ) as mock_get_metadata: + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + # get_metadata_filter_condition should NOT be called when mode is "disabled" + mock_get_metadata.assert_not_called() + + def test_knowledge_retrieval_with_external_documents(self): + """ + Test knowledge_retrieval with external documents. + + External documents come from external knowledge bases and should + be formatted differently than Dify documents. + + Verifies: + - External documents are handled correctly + - Provider is set to "external" + - Metadata includes external-specific fields + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id, provider="external") + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Create external document + external_doc = create_mock_document_methods( + "External knowledge", + "doc1", + score=0.9, + provider="external", + additional_metadata={ + "dataset_id": dataset_id, + "dataset_name": "external_kb", + "document_id": "ext_doc1", + "title": "External Document", + }, + ) + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[external_doc]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + if result: + assert result[0].metadata.data_source_type == "external" + + def test_knowledge_retrieval_empty_results(self): + """ + Test knowledge_retrieval when no documents are found. + + Verifies: + - Empty list is returned + - No errors are raised + - All dependencies are still called + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id) + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Mock multiple_retrieve to return empty list + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert result == [] + + def test_knowledge_retrieval_rate_limit_exceeded(self): + """ + Test knowledge_retrieval when rate limit is exceeded. + + Verifies: + - RateLimitExceededError is raised + - No further processing occurs + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock _check_knowledge_rate_limit to raise exception + with patch.object( + dataset_retrieval, + "_check_knowledge_rate_limit", + side_effect=exc.RateLimitExceededError("Rate limit exceeded"), + ): + # Act & Assert + with pytest.raises(exc.RateLimitExceededError): + dataset_retrieval.knowledge_retrieval(request) + + def test_knowledge_retrieval_no_available_datasets(self): + """ + Test knowledge_retrieval when no datasets are available. + + Verifies: + - Empty list is returned + - No retrieval is attempted + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + # Mock _get_available_datasets to return empty list + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert result == [] + + def test_knowledge_retrieval_handles_multiple_documents_with_different_scores(self): + """ + Test that knowledge_retrieval processes multiple documents with different scores. + + Note: Full sorting and position testing requires complex SQLAlchemy mocking + which is better suited for integration tests. This test verifies documents + with different scores can be created and have their metadata. + + Verifies: + - Documents can be created with different scores + - Score metadata is properly set + """ + # Create documents with different scores + doc1 = create_mock_document_methods("Low score", "doc1", score=0.6) + doc2 = create_mock_document_methods("High score", "doc2", score=0.95) + doc3 = create_mock_document_methods("Medium score", "doc3", score=0.8) + + # Assert - each document has the correct score + assert doc1.metadata["score"] == 0.6 + assert doc2.metadata["score"] == 0.95 + assert doc3.metadata["score"] == 0.8 + + # Assert - documents are correctly sorted (not the retrieval result, just the list) + unsorted = [doc1, doc2, doc3] + sorted_docs = sorted(unsorted, key=lambda d: d.metadata["score"], reverse=True) + assert [d.metadata["score"] for d in sorted_docs] == [0.95, 0.8, 0.6] + + +class TestProcessMetadataFilterFunc: + """ + Comprehensive test suite for process_metadata_filter_func method. + + This test class validates all metadata filtering conditions supported by + the DatasetRetrieval class, including string operations, numeric comparisons, + null checks, and list operations. + + Method Signature: + ================== + def process_metadata_filter_func( + self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list + ) -> list: + + The method builds SQLAlchemy filter expressions by: + 1. Validating value is not None (except for empty/not empty conditions) + 2. Using DatasetDocument.doc_metadata JSON field operations + 3. Adding appropriate SQLAlchemy expressions to the filters list + 4. Returning the updated filters list + + Mocking Strategy: + ================== + - Mock DatasetDocument.doc_metadata to avoid database dependencies + - Verify filter expressions are created correctly + - Test with various data types (str, int, float, list) + """ + + @pytest.fixture + def retrieval(self): + """ + Create a DatasetRetrieval instance for testing. + + Returns: + DatasetRetrieval: Instance to test process_metadata_filter_func + """ + return DatasetRetrieval() + + @pytest.fixture + def mock_doc_metadata(self): + """ + Mock the DatasetDocument.doc_metadata JSON field. + + The method uses DatasetDocument.doc_metadata[metadata_name] to access + JSON fields. We mock this to avoid database dependencies. + + Returns: + Mock: Mocked doc_metadata attribute + """ + mock_metadata_field = MagicMock() + + # Create mock for string access + mock_string_access = MagicMock() + mock_string_access.like = MagicMock() + mock_string_access.notlike = MagicMock() + mock_string_access.__eq__ = MagicMock(return_value=MagicMock()) + mock_string_access.__ne__ = MagicMock(return_value=MagicMock()) + mock_string_access.in_ = MagicMock(return_value=MagicMock()) + + # Create mock for float access (for numeric comparisons) + mock_float_access = MagicMock() + mock_float_access.__eq__ = MagicMock(return_value=MagicMock()) + mock_float_access.__ne__ = MagicMock(return_value=MagicMock()) + mock_float_access.__lt__ = MagicMock(return_value=MagicMock()) + mock_float_access.__gt__ = MagicMock(return_value=MagicMock()) + mock_float_access.__le__ = MagicMock(return_value=MagicMock()) + mock_float_access.__ge__ = MagicMock(return_value=MagicMock()) + + # Create mock for null checks + mock_null_access = MagicMock() + mock_null_access.is_ = MagicMock(return_value=MagicMock()) + mock_null_access.isnot = MagicMock(return_value=MagicMock()) + + # Setup __getitem__ to return appropriate mock based on usage + def getitem_side_effect(name): + if name in ["author", "title", "category"]: + return mock_string_access + elif name in ["year", "price", "rating"]: + return mock_float_access + else: + return mock_string_access + + mock_metadata_field.__getitem__ = MagicMock(side_effect=getitem_side_effect) + mock_metadata_field.as_string.return_value = mock_string_access + mock_metadata_field.as_float.return_value = mock_float_access + mock_metadata_field[metadata_name:str].is_ = mock_null_access.is_ + mock_metadata_field[metadata_name:str].isnot = mock_null_access.isnot + + return mock_metadata_field + + # ==================== String Condition Tests ==================== + + def test_contains_condition_string_value(self, retrieval): + """ + Test 'contains' condition with string value. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses %value% syntax + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "John" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_contains_condition(self, retrieval): + """ + Test 'not contains' condition. + + Verifies: + - Filters list is populated with NOT LIKE expression + - Pattern matching uses %value% syntax with negation + """ + filters = [] + sequence = 0 + condition = "not contains" + metadata_name = "title" + value = "banned" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_start_with_condition(self, retrieval): + """ + Test 'start with' condition. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses value% syntax + """ + filters = [] + sequence = 0 + condition = "start with" + metadata_name = "category" + value = "tech" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_end_with_condition(self, retrieval): + """ + Test 'end with' condition. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses %value syntax + """ + filters = [] + sequence = 0 + condition = "end with" + metadata_name = "filename" + value = ".pdf" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Equality Condition Tests ==================== + + def test_is_condition_with_string_value(self, retrieval): + """ + Test 'is' (=) condition with string value. + + Verifies: + - Filters list is populated with equality expression + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "author" + value = "Jane Doe" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_equals_condition_with_string_value(self, retrieval): + """ + Test '=' condition with string value. + + Verifies: + - Same behavior as 'is' condition + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "=" + metadata_name = "category" + value = "technology" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_condition_with_int_value(self, retrieval): + """ + Test 'is' condition with integer value. + + Verifies: + - Numeric comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "year" + value = 2023 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_condition_with_float_value(self, retrieval): + """ + Test 'is' condition with float value. + + Verifies: + - Numeric comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "price" + value = 19.99 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_not_condition_with_string_value(self, retrieval): + """ + Test 'is not' (≠) condition with string value. + + Verifies: + - Filters list is populated with inequality expression + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "is not" + metadata_name = "author" + value = "Unknown" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_equals_condition(self, retrieval): + """ + Test '≠' condition with string value. + + Verifies: + - Same behavior as 'is not' condition + - Inequality expression is used + """ + filters = [] + sequence = 0 + condition = "≠" + metadata_name = "category" + value = "archived" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_not_condition_with_numeric_value(self, retrieval): + """ + Test 'is not' condition with numeric value. + + Verifies: + - Numeric inequality comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is not" + metadata_name = "year" + value = 2000 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Null Condition Tests ==================== + + def test_empty_condition(self, retrieval): + """ + Test 'empty' condition (null check). + + Verifies: + - Filters list is populated with IS NULL expression + - Value can be None for this condition + """ + filters = [] + sequence = 0 + condition = "empty" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_empty_condition(self, retrieval): + """ + Test 'not empty' condition (not null check). + + Verifies: + - Filters list is populated with IS NOT NULL expression + - Value can be None for this condition + """ + filters = [] + sequence = 0 + condition = "not empty" + metadata_name = "description" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Numeric Comparison Tests ==================== + + def test_before_condition(self, retrieval): + """ + Test 'before' (<) condition. + + Verifies: + - Filters list is populated with less than expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "before" + metadata_name = "year" + value = 2020 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_condition(self, retrieval): + """ + Test '<' condition. + + Verifies: + - Same behavior as 'before' condition + - Less than expression is used + """ + filters = [] + sequence = 0 + condition = "<" + metadata_name = "price" + value = 100.0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_after_condition(self, retrieval): + """ + Test 'after' (>) condition. + + Verifies: + - Filters list is populated with greater than expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "after" + metadata_name = "year" + value = 2020 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_condition(self, retrieval): + """ + Test '>' condition. + + Verifies: + - Same behavior as 'after' condition + - Greater than expression is used + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "rating" + value = 4.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_or_equal_condition_unicode(self, retrieval): + """ + Test '≤' condition. + + Verifies: + - Filters list is populated with less than or equal expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "≤" + metadata_name = "price" + value = 50.0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_or_equal_condition_ascii(self, retrieval): + """ + Test '<=' condition. + + Verifies: + - Same behavior as '≤' condition + - Less than or equal expression is used + """ + filters = [] + sequence = 0 + condition = "<=" + metadata_name = "year" + value = 2023 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_or_equal_condition_unicode(self, retrieval): + """ + Test '≥' condition. + + Verifies: + - Filters list is populated with greater than or equal expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "≥" + metadata_name = "rating" + value = 3.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_or_equal_condition_ascii(self, retrieval): + """ + Test '>=' condition. + + Verifies: + - Same behavior as '≥' condition + - Greater than or equal expression is used + """ + filters = [] + sequence = 0 + condition = ">=" + metadata_name = "year" + value = 2000 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== List/In Condition Tests ==================== + + def test_in_condition_with_comma_separated_string(self, retrieval): + """ + Test 'in' condition with comma-separated string value. + + Verifies: + - String is split into list + - Whitespace is trimmed from each value + - IN expression is created + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "tech, science, AI " + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_list_value(self, retrieval): + """ + Test 'in' condition with list value. + + Verifies: + - List is processed correctly + - None values are filtered out + - IN expression is created with valid values + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "tags" + value = ["python", "javascript", None, "golang"] + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_tuple_value(self, retrieval): + """ + Test 'in' condition with tuple value. + + Verifies: + - Tuple is processed like a list + - IN expression is created + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = ("tech", "science", "ai") + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_empty_string(self, retrieval): + """ + Test 'in' condition with empty string value. + + Verifies: + - Empty string results in literal(False) filter + - No valid values to match + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + # Verify it's a literal(False) expression + # This is a bit tricky to test without access to the actual expression + + def test_in_condition_with_only_whitespace(self, retrieval): + """ + Test 'in' condition with whitespace-only string value. + + Verifies: + - Whitespace-only string results in literal(False) filter + - All values are stripped and filtered out + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = " , , " + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_single_string(self, retrieval): + """ + Test 'in' condition with single non-comma string. + + Verifies: + - Single string is treated as single-item list + - IN expression is created with one value + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "technology" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Edge Case Tests ==================== + + def test_none_value_with_non_empty_condition(self, retrieval): + """ + Test None value with conditions that require value. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values (except empty/not empty) + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 # No filter added + + def test_none_value_with_equals_condition(self, retrieval): + """ + Test None value with 'is' (=) condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_none_value_with_numeric_condition(self, retrieval): + """ + Test None value with numeric comparison condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "year" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_existing_filters_preserved(self, retrieval): + """ + Test that existing filters are preserved. + + Verifies: + - Existing filters in the list are not removed + - New filters are appended to the list + """ + existing_filter = MagicMock() + filters = [existing_filter] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "test" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 2 + assert filters[0] == existing_filter + + def test_multiple_filters_accumulated(self, retrieval): + """ + Test multiple calls to accumulate filters. + + Verifies: + - Each call adds a new filter to the list + - All filters are preserved across calls + """ + filters = [] + + # First filter + retrieval.process_metadata_filter_func(0, "contains", "author", "John", filters) + assert len(filters) == 1 + + # Second filter + retrieval.process_metadata_filter_func(1, ">", "year", 2020, filters) + assert len(filters) == 2 + + # Third filter + retrieval.process_metadata_filter_func(2, "is", "category", "tech", filters) + assert len(filters) == 3 + + def test_unknown_condition(self, retrieval): + """ + Test unknown/unsupported condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for unknown conditions + """ + filters = [] + sequence = 0 + condition = "unknown_condition" + metadata_name = "author" + value = "test" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_empty_string_value_with_contains(self, retrieval): + """ + Test empty string value with 'contains' condition. + + Verifies: + - Filter is added even with empty string + - LIKE expression is created + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_special_characters_in_value(self, retrieval): + """ + Test special characters in value string. + + Verifies: + - Special characters are handled in value + - LIKE expression is created correctly + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "title" + value = "C++ & Python's features" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_zero_value_with_numeric_condition(self, retrieval): + """ + Test zero value with numeric comparison condition. + + Verifies: + - Zero is treated as valid value + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "price" + value = 0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_negative_value_with_numeric_condition(self, retrieval): + """ + Test negative value with numeric comparison condition. + + Verifies: + - Negative numbers are handled correctly + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = "<" + metadata_name = "temperature" + value = -10.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_float_value_with_integer_comparison(self, retrieval): + """ + Test float value with numeric comparison condition. + + Verifies: + - Float values work correctly + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = ">=" + metadata_name = "rating" + value = 4.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + +class TestKnowledgeRetrievalRegression: + @pytest.fixture + def mock_dataset(self) -> Dataset: + dataset = Mock(spec=Dataset) + dataset.id = str(uuid4()) + dataset.tenant_id = str(uuid4()) + dataset.name = "test_dataset" + dataset.indexing_technique = "high_quality" + dataset.provider = "dify" + return dataset + + def test_multiple_retrieve_reranking_with_app_context(self, mock_dataset): + """ + Repro test for current bug: + reranking runs after `with flask_app.app_context():` exits. + `_multiple_retrieve_thread` catches exceptions and stores them into `thread_exceptions`, + so we must assert from that list (not from an outer try/except). + """ + dataset_retrieval = DatasetRetrieval() + flask_app = Flask(__name__) + tenant_id = str(uuid4()) + + # second dataset to ensure dataset_count > 1 reranking branch + secondary_dataset = Mock(spec=Dataset) + secondary_dataset.id = str(uuid4()) + secondary_dataset.provider = "dify" + secondary_dataset.indexing_technique = "high_quality" + + # retriever returns 1 doc into internal list (all_documents_item) + document = Document( + page_content="Context aware doc", + metadata={ + "doc_id": "doc1", + "score": 0.95, + "document_id": str(uuid4()), + "dataset_id": mock_dataset.id, + }, + provider="dify", + ) + + def fake_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.append(document) + + called = {"init": 0, "invoke": 0} + + class ContextRequiredPostProcessor: + def __init__(self, *args, **kwargs): + called["init"] += 1 + # will raise RuntimeError if no Flask app context exists + _ = current_app.name + + def invoke(self, *args, **kwargs): + called["invoke"] += 1 + _ = current_app.name + return kwargs.get("documents") or args[1] + + # output list from _multiple_retrieve_thread + all_documents: list[Document] = [] + + # IMPORTANT: _multiple_retrieve_thread swallows exceptions and appends them here + thread_exceptions: list[Exception] = [] + + def target(): + with patch.object(dataset_retrieval, "_retriever", side_effect=fake_retriever): + with patch( + "core.rag.retrieval.dataset_retrieval.DataPostProcessor", + ContextRequiredPostProcessor, + ): + dataset_retrieval._multiple_retrieve_thread( + flask_app=flask_app, + available_datasets=[mock_dataset, secondary_dataset], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={ + "reranking_provider_name": "cohere", + "reranking_model_name": "rerank-v2", + }, + weights=None, + top_k=3, + score_threshold=0.0, + query="test query", + attachment_id=None, + dataset_count=2, # force reranking branch + thread_exceptions=thread_exceptions, # ✅ key + ) + + t = threading.Thread(target=target) + t.start() + t.join() + + # Ensure reranking branch was actually executed + assert called["init"] >= 1, "DataPostProcessor was never constructed; reranking branch may not have run." + + # Current buggy code should record an exception (not raise it) + assert not thread_exceptions, thread_exceptions + + +class _FakeFlaskApp: + def app_context(self): + return nullcontext() + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._alive = False + + def start(self) -> None: + self._alive = True + if self._target: + self._target(**self._kwargs) + self._alive = False + + def join(self, timeout=None) -> None: + return None + + def is_alive(self) -> bool: + return self._alive + + +class TestDatasetRetrievalAdditionalHelpers: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_llm_usage_and_record_usage(self, retrieval: DatasetRetrieval) -> None: + empty_usage = retrieval.llm_usage + assert empty_usage.total_tokens == 0 + + retrieval._record_usage(None) + assert retrieval.llm_usage.total_tokens == 0 + + usage_1 = LLMUsage.from_metadata({"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}) + usage_2 = LLMUsage.from_metadata({"prompt_tokens": 4, "completion_tokens": 1, "total_tokens": 5}) + retrieval._record_usage(usage_1) + retrieval._record_usage(usage_2) + assert retrieval.llm_usage.total_tokens == 10 + + def test_replace_metadata_filter_value(self, retrieval: DatasetRetrieval) -> None: + assert retrieval._replace_metadata_filter_value("plain", {}) == "plain" + replaced = retrieval._replace_metadata_filter_value( + "hello {{name}}\n\t{{missing}}", + {"name": "world"}, + ) + assert replaced == "hello world {{missing}}" + + def test_process_metadata_filter_in_with_scalar_fallback(self) -> None: + filters: list = [] + result = DatasetRetrieval.process_metadata_filter_func( + sequence=0, + condition="in", + metadata_name="category", + value=123, + filters=filters, + ) + assert result is filters + assert len(filters) == 1 + + def test_calculate_vector_score(self, retrieval: DatasetRetrieval) -> None: + doc_high = Document(page_content="a", metadata={"score": 0.9}, provider="dify") + doc_low = Document(page_content="b", metadata={"score": 0.2}, provider="dify") + doc_no_meta = Document(page_content="c", metadata={}, provider="dify") + + filtered = retrieval.calculate_vector_score([doc_low, doc_high, doc_no_meta], top_k=1, score_threshold=0.5) + assert len(filtered) == 1 + assert filtered[0].metadata["score"] == 0.9 + + assert retrieval.calculate_vector_score([doc_low], top_k=2, score_threshold=1.0) == [] + + def test_calculate_keyword_score(self, retrieval: DatasetRetrieval) -> None: + documents = [ + Document(page_content="python language", metadata={"doc_id": "1"}, provider="dify"), + Document(page_content="java language", metadata={"doc_id": "2"}, provider="dify"), + ] + keyword_handler = Mock() + keyword_handler.extract_keywords.side_effect = [ + ["python", "language"], + ["python", "language"], + ["java", "language"], + ] + + with patch("core.rag.retrieval.dataset_retrieval.JiebaKeywordTableHandler", return_value=keyword_handler): + ranked = retrieval.calculate_keyword_score("python language", documents, top_k=1) + + assert len(ranked) == 1 + assert "keywords" in ranked[0].metadata + assert ranked[0].metadata["doc_id"] == "1" + + def test_send_trace_task(self, retrieval: DatasetRetrieval) -> None: + trace_manager = Mock() + retrieval.application_generate_entity = SimpleNamespace(trace_manager=trace_manager) + docs = [Document(page_content="d", metadata={}, provider="dify")] + + retrieval._send_trace_task("m1", docs, {"cost": 1}) + trace_manager.add_trace_task.assert_called_once() + + retrieval.application_generate_entity = None + trace_manager.reset_mock() + retrieval._send_trace_task("m1", docs, {"cost": 1}) + trace_manager.add_trace_task.assert_not_called() + + def test_on_query(self, retrieval: DatasetRetrieval) -> None: + with patch("core.rag.retrieval.dataset_retrieval.db.session") as mock_session: + retrieval._on_query( + query=None, + attachment_ids=None, + dataset_ids=["d1"], + app_id="a1", + user_from="account", + user_id="u1", + ) + mock_session.add_all.assert_not_called() + + retrieval._on_query( + query="python", + attachment_ids=["f1"], + dataset_ids=["d1", "d2"], + app_id="a1", + user_from="account", + user_id="u1", + ) + mock_session.add_all.assert_called() + mock_session.commit.assert_called() + + def test_handle_invoke_result(self, retrieval: DatasetRetrieval) -> None: + usage = LLMUsage.empty_usage() + chunk_1 = SimpleNamespace( + model="m1", + prompt_messages=[Mock()], + delta=SimpleNamespace(message=SimpleNamespace(content="hello "), usage=usage), + ) + chunk_2 = SimpleNamespace( + model="m1", + prompt_messages=[Mock()], + delta=SimpleNamespace( + message=SimpleNamespace(content=[SimpleNamespace(data="world")]), + usage=None, + ), + ) + text, returned_usage = retrieval._handle_invoke_result(iter([chunk_1, chunk_2])) + assert text == "hello world" + assert returned_usage == usage + + text_empty, usage_empty = retrieval._handle_invoke_result(iter([])) + assert text_empty == "" + assert usage_empty == LLMUsage.empty_usage() + + def test_get_prompt_template(self, retrieval: DatasetRetrieval) -> None: + model_config_chat = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=["x"], + ) + model_config_completion = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="completion", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ) + + with patch("core.rag.retrieval.dataset_retrieval.AdvancedPromptTransform") as mock_prompt_transform: + mock_prompt_transform.return_value.get_prompt.return_value = ["prompt"] + prompt_messages, stop = retrieval._get_prompt_template( + model_config=model_config_chat, + mode="chat", + metadata_fields=["author"], + query="python", + ) + assert prompt_messages == ["prompt"] + assert stop == ["x"] + + with patch( + "core.rag.retrieval.dataset_retrieval.METADATA_FILTER_COMPLETION_PROMPT", + "{input_text} {metadata_fields}", + ): + prompt_messages_completion, stop_completion = retrieval._get_prompt_template( + model_config=model_config_completion, + mode="completion", + metadata_fields=["author"], + query="python", + ) + assert prompt_messages_completion == ["prompt"] + assert stop_completion == [] + + with pytest.raises(ValueError): + retrieval._get_prompt_template( + model_config=model_config_chat, + mode="unknown-mode", + metadata_fields=[], + query="python", + ) + + def test_fetch_model_config_validation_and_success(self, retrieval: DatasetRetrieval) -> None: + with pytest.raises(ValueError, match="single_retrieval_config is required"): + retrieval._fetch_model_config("tenant-1", None) # type: ignore[arg-type] + + model_cfg = AppModelConfig(provider="openai", name="gpt", mode="chat", completion_params={"stop": ["END"]}) + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.provider_model_bundle = Mock() + model_instance.model_type_instance = Mock() + model_instance.model_type_instance.get_model_schema.return_value = Mock() + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_manager, + patch("core.rag.retrieval.dataset_retrieval.ModelConfigWithCredentialsEntity") as mock_cfg_entity, + ): + mock_manager.return_value.get_model_instance.return_value = model_instance + mock_cfg_entity.return_value = SimpleNamespace( + provider="openai", + model="gpt", + stop=["END"], + parameters={"temperature": 0.1}, + ) + + model_instance.provider_model_bundle.configuration.get_provider_model.return_value = None + with pytest.raises(ValueError, match="not exist"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model = SimpleNamespace(status=ModelStatus.NO_CONFIGURE) + model_instance.provider_model_bundle.configuration.get_provider_model.return_value = provider_model + with pytest.raises(ValueError, match="credentials is not initialized"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.NO_PERMISSION + with pytest.raises(ValueError, match="currently not support"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.QUOTA_EXCEEDED + with pytest.raises(ValueError, match="quota exceeded"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.ACTIVE + bad_mode_cfg = AppModelConfig(provider="openai", name="gpt", mode="chat") + bad_mode_cfg.mode = None # type: ignore[assignment] + with pytest.raises(ValueError, match="LLM mode is required"): + retrieval._fetch_model_config("tenant-1", bad_mode_cfg) + + model_instance.model_type_instance.get_model_schema.return_value = None + with pytest.raises(ValueError, match="not exist"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + model_instance.model_type_instance.get_model_schema.return_value = Mock() + model_cfg_success = AppModelConfig( + provider="openai", + name="gpt", + mode="chat", + completion_params={"temperature": 0.1, "stop": ["END"]}, + ) + _, config = retrieval._fetch_model_config("tenant-1", model_cfg_success) + assert config.provider == "openai" + assert config.model == "gpt" + assert config.stop == ["END"] + assert "stop" not in config.parameters + + def test_automatic_metadata_filter_func(self, retrieval: DatasetRetrieval) -> None: + metadata_field = SimpleNamespace(name="author") + model_instance = Mock() + model_instance.invoke_llm.return_value = iter([Mock()]) + model_config = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}) + session_scalars = Mock() + session_scalars.all.return_value = [metadata_field] + + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", return_value=(model_instance, model_config)), + patch.object(retrieval, "_get_prompt_template", return_value=(["prompt"], [])), + patch.object(retrieval, "_handle_invoke_result", return_value=('{"metadata_map":[]}', usage)), + patch("core.rag.retrieval.dataset_retrieval.parse_and_check_json_markdown") as mock_parse, + patch.object(retrieval, "_record_usage") as mock_record_usage, + ): + mock_parse.return_value = { + "metadata_map": [ + { + "metadata_field_name": "author", + "metadata_field_value": "Alice", + "comparison_operator": "contains", + }, + { + "metadata_field_name": "ignored", + "metadata_field_value": "value", + "comparison_operator": "contains", + }, + ] + } + result = retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + ) + + assert result == [{"metadata_name": "author", "value": "Alice", "condition": "contains"}] + mock_record_usage.assert_called_once_with(usage) + + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", side_effect=RuntimeError("boom")), + ): + with pytest.raises(RuntimeError, match="boom"): + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + ) + + def test_get_metadata_filter_condition(self, retrieval: DatasetRetrieval) -> None: + db_query = Mock() + db_query.where.return_value = db_query + db_query.all.return_value = [SimpleNamespace(dataset_id="d1", id="doc-1")] + + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="disabled", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=None, + inputs={}, + ) + assert mapping is None + assert condition is None + + automatic_filters = [{"condition": "contains", "metadata_name": "author", "value": "Alice"}] + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query), + patch.object(retrieval, "_automatic_metadata_filter_func", return_value=automatic_filters), + ): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="automatic", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=AppMetadataFilteringCondition(logical_operator="or", conditions=[]), + inputs={}, + ) + assert mapping == {"d1": ["doc-1"]} + assert condition is not None + assert condition.logical_operator == "or" + + manual_conditions = AppMetadataFilteringCondition( + logical_operator="and", + conditions=[AppCondition(name="author", comparison_operator="contains", value="{{name}}")], + ) + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="manual", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=manual_conditions, + inputs={"name": "Alice"}, + ) + assert mapping == {"d1": ["doc-1"]} + assert condition is not None + assert condition.conditions[0].value == "Alice" + + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + with pytest.raises(ValueError, match="Invalid metadata filtering mode"): + retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="unsupported", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=None, + inputs={}, + ) + + def test_get_available_datasets(self, retrieval: DatasetRetrieval) -> None: + session = Mock() + subquery_query = Mock() + subquery_query.where.return_value = subquery_query + subquery_query.group_by.return_value = subquery_query + subquery_query.having.return_value = subquery_query + subquery_query.subquery.return_value = SimpleNamespace( + c=SimpleNamespace( + dataset_id=column("dataset_id"), available_document_count=column("available_document_count") + ) + ) + + dataset_query = Mock() + dataset_query.outerjoin.return_value = dataset_query + dataset_query.where.return_value = dataset_query + dataset_query.all.return_value = [SimpleNamespace(id="d1"), None, SimpleNamespace(id="d2")] + session.query.side_effect = [subquery_query, dataset_query] + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with patch("core.rag.retrieval.dataset_retrieval.session_factory.create_session", return_value=session_ctx): + available = retrieval._get_available_datasets("tenant-1", ["d1", "d2"]) + + assert [dataset.id for dataset in available] == ["d1", "d2"] + + def test_check_knowledge_rate_limit(self, retrieval: DatasetRetrieval) -> None: + with ( + patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit, + patch("core.rag.retrieval.dataset_retrieval.redis_client") as mock_redis, + patch("core.rag.retrieval.dataset_retrieval.time.time", return_value=100.0), + ): + mock_limit.return_value = SimpleNamespace(enabled=True, limit=2, subscription_plan="pro") + mock_redis.zcard.return_value = 1 + retrieval._check_knowledge_rate_limit("tenant-1") + mock_redis.zadd.assert_called_once() + + session = Mock() + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit, + patch("core.rag.retrieval.dataset_retrieval.redis_client") as mock_redis, + patch("core.rag.retrieval.dataset_retrieval.time.time", return_value=100.0), + patch("core.rag.retrieval.dataset_retrieval.session_factory.create_session", return_value=session_ctx), + ): + mock_limit.return_value = SimpleNamespace(enabled=True, limit=1, subscription_plan="pro") + mock_redis.zcard.return_value = 2 + with pytest.raises(exc.RateLimitExceededError): + retrieval._check_knowledge_rate_limit("tenant-1") + session.add.assert_called_once() + + with patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit: + mock_limit.return_value = SimpleNamespace(enabled=False) + retrieval._check_knowledge_rate_limit("tenant-1") + + +def _doc( + provider: str = "dify", + content: str = "content", + score: float = 0.9, + dataset_id: str = "dataset-1", + document_id: str = "document-1", + doc_id: str = "node-1", + extra: dict | None = None, +) -> Document: + metadata = { + "score": score, + "dataset_id": dataset_id, + "document_id": document_id, + "doc_id": doc_id, + } + if extra: + metadata.update(extra) + return Document(page_content=content, metadata=metadata, provider=provider) + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._alive = False + + def start(self) -> None: + self._alive = True + if self._target: + self._target(**self._kwargs) + self._alive = False + + def join(self, timeout=None) -> None: + return None + + def is_alive(self) -> bool: + return self._alive + + +class _JoinDrivenThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._started = False + self._alive = False + + def start(self) -> None: + self._started = True + self._alive = True + + def join(self, timeout=None) -> None: + if self._started and self._alive and self._target: + self._target(**self._kwargs) + self._alive = False + + def is_alive(self) -> bool: + return self._alive + + +@contextmanager +def _timer(): + yield {"cost": 1} + + +class TestKnowledgeRetrievalCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_returns_empty_when_query_missing(self, retrieval: DatasetRetrieval) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["d1"], + query=None, + retrieval_mode="multiple", + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + ): + assert retrieval.knowledge_retrieval(request) == [] + + def test_raises_when_metadata_model_config_missing(self, retrieval: DatasetRetrieval) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["d1"], + query="query", + retrieval_mode="multiple", + metadata_filtering_mode="automatic", + metadata_model_config=None, + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + ): + with pytest.raises(ValueError, match="metadata_model_config is required"): + retrieval.knowledge_retrieval(request) + + @pytest.mark.parametrize( + ("status", "error_cls"), + [ + (ModelStatus.NO_CONFIGURE, "ModelCredentialsNotInitializedError"), + (ModelStatus.NO_PERMISSION, "ModelNotSupportedError"), + (ModelStatus.QUOTA_EXCEEDED, "ModelQuotaExceededError"), + ], + ) + def test_single_mode_raises_for_model_status( + self, + retrieval: DatasetRetrieval, + status: ModelStatus, + error_cls: str, + ) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["dataset-1"], + query="python", + retrieval_mode="single", + model_provider="openai", + model_name="gpt-4", + ) + provider_model_bundle = Mock() + provider_model_bundle.configuration.get_provider_model.return_value = SimpleNamespace(status=status) + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = Mock() + model_instance = SimpleNamespace( + provider_model_bundle=provider_model_bundle, + model_type_instance=model_type_instance, + credentials={}, + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="dataset-1")]), + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + ): + mock_model_manager.return_value.get_model_instance.return_value = model_instance + with pytest.raises(Exception) as exc_info: + retrieval.knowledge_retrieval(request) + assert error_cls in type(exc_info.value).__name__ + + +class TestRetrieveCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def _build_model_config(self, features: list[ModelFeature] | None = None): + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = SimpleNamespace(features=features or []) + provider_bundle = SimpleNamespace(model_type_instance=model_type_instance) + return ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt-4", + model_schema=Mock(), + mode="chat", + provider_model_bundle=provider_bundle, + credentials={}, + parameters={}, + stop=[], + ) + + def test_returns_none_when_dataset_ids_empty(self, retrieval: DatasetRetrieval) -> None: + config = DatasetEntity( + dataset_ids=[], + retrieve_config=DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + ), + ) + result = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=self._build_model_config(), + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert result == (None, []) + + def test_returns_none_when_model_schema_missing(self, retrieval: DatasetRetrieval) -> None: + config = DatasetEntity( + dataset_ids=["d1"], + retrieve_config=DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + ), + ) + model_config = self._build_model_config() + model_config.provider_model_bundle.model_type_instance.get_model_schema.return_value = None + with patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager: + mock_model_manager.return_value.get_model_instance.return_value = Mock() + result = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert result == (None, []) + + def test_single_strategy_with_external_documents(self, retrieval: DatasetRetrieval) -> None: + retrieve_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE, + metadata_filtering_mode="disabled", + ) + config = DatasetEntity(dataset_ids=["d1"], retrieve_config=retrieve_config) + model_config = self._build_model_config() + external_doc = _doc( + provider="external", + content="external content", + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"title": "External", "dataset_name": "External DS"}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + patch.object(retrieval, "get_metadata_filter_condition", return_value=(None, None)), + patch.object(retrieval, "single_retrieve", return_value=[external_doc]), + ): + mock_model_manager.return_value.get_model_instance.return_value = Mock() + context, files = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert context == "external content" + assert files == [] + + def test_multiple_strategy_with_vision_and_source_details(self, retrieval: DatasetRetrieval) -> None: + retrieve_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + top_k=4, + score_threshold=0.1, + rerank_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v3"}, + reranking_enabled=True, + metadata_filtering_mode="disabled", + ) + config = DatasetEntity(dataset_ids=["d1"], retrieve_config=retrieve_config) + model_config = self._build_model_config(features=[ModelFeature.TOOL_CALL]) + external_doc = _doc( + provider="external", + content="external body", + score=0.8, + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"title": "External Title", "dataset_name": "External DS"}, + ) + dify_doc = _doc( + provider="dify", + content="dify body", + score=0.9, + dataset_id="d1", + document_id="doc-1", + doc_id="node-1", + ) + record = SimpleNamespace( + segment=SimpleNamespace( + id="segment-1", + dataset_id="d1", + document_id="doc-1", + tenant_id="tenant-1", + hit_count=3, + word_count=11, + position=1, + index_node_hash="hash-1", + content="segment content", + answer="segment answer", + get_sign_content=lambda: "segment content", + ), + score=0.9, + summary="short summary", + files=None, + ) + dataset_item = SimpleNamespace(id="d1", name="Dataset One") + document_item = SimpleNamespace( + id="doc-1", + name="Document One", + data_source_type="upload_file", + doc_metadata={"lang": "en"}, + ) + upload_file = SimpleNamespace( + id="file-1", + name="image", + extension="png", + mime_type="image/png", + source_url="https://example.com/img.png", + size=123, + key="k1", + ) + execute_attachments = SimpleNamespace(all=lambda: [(SimpleNamespace(), upload_file)]) + execute_docs = SimpleNamespace(scalars=lambda: SimpleNamespace(all=lambda: [document_item])) + execute_datasets = SimpleNamespace(scalars=lambda: SimpleNamespace(all=lambda: [dataset_item])) + hit_callback = Mock() + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + patch.object(retrieval, "get_metadata_filter_condition", return_value=(None, None)), + patch.object(retrieval, "multiple_retrieve", return_value=[external_doc, dify_doc]), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.format_retrieval_documents", + return_value=[record], + ), + patch("core.rag.retrieval.dataset_retrieval.sign_upload_file", return_value="https://signed"), + patch("core.rag.retrieval.dataset_retrieval.db.session.execute") as mock_execute, + ): + mock_model_manager.return_value.get_model_instance.return_value = Mock() + mock_execute.side_effect = [execute_attachments, execute_docs, execute_datasets] + context, files = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.DEBUGGER, + show_retrieve_source=True, + hit_callback=hit_callback, + message_id="m1", + vision_enabled=True, + ) + + assert "short summary" in (context or "") + assert "question:segment content answer:segment answer" in (context or "") + assert len(files or []) == 1 + hit_callback.return_retriever_resource_info.assert_called_once() + + +class TestSingleAndMultipleRetrieveCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_single_retrieve_external_path(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="External DS", + description=None, + provider="external", + tenant_id="tenant-1", + retrieval_model={"top_k": 2}, + indexing_technique="high_quality", + ) + app = Flask(__name__) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}) + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + patch( + "core.rag.retrieval.dataset_retrieval.ExternalDatasetService.fetch_external_knowledge_retrieval" + ) as mock_external, + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_on_retrieval_end") as mock_end, + patch.object(retrieval, "_on_query"), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", usage) + mock_external.return_value = [ + {"content": "ext result", "metadata": {"k": "v"}, "score": 0.9, "title": "Ext Doc"} + ] + result = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + message_id="m1", + ) + + assert len(result) == 1 + assert result[0].provider == "external" + mock_end.assert_called_once() + assert retrieval.llm_usage.total_tokens == 2 + + def test_single_retrieve_dify_path_and_filters(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="Internal DS", + description="dataset desc", + provider="dify", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "cohere", "reranking_model_name": "rerank"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {}}, + "top_k": 3, + "score_threshold_enabled": True, + "score_threshold": 0.2, + }, + ) + app = Flask(__name__) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 0, "total_tokens": 1}) + result_doc = _doc(provider="dify", score=0.7, dataset_id="ds-1", document_id="doc-1", doc_id="node-1") + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.FunctionCallMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.retrieve", return_value=[result_doc] + ) as mock_retrieve, + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_on_retrieval_end"), + patch.object(retrieval, "_on_query"), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", usage) + results = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.ROUTER, + metadata_filter_document_ids={"ds-1": ["doc-1"]}, + metadata_condition=SimpleNamespace(), + ) + + assert results == [result_doc] + assert mock_retrieve.call_args.kwargs["document_ids_filter"] == ["doc-1"] + assert retrieval.llm_usage.total_tokens == 1 + + def test_single_retrieve_returns_empty_when_no_dataset_selected(self, retrieval: DatasetRetrieval) -> None: + with patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls: + mock_router_cls.return_value.invoke.return_value = (None, LLMUsage.empty_usage()) + results = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[ + SimpleNamespace(id="ds-1", name="DS", description=None), + ], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + ) + assert results == [] + + def test_single_retrieve_respects_metadata_filter_shortcuts(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="Internal DS", + description="desc", + provider="dify", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={"top_k": 2, "search_method": "semantic_search", "reranking_enable": False}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", LLMUsage.empty_usage()) + no_filter = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + metadata_filter_document_ids=None, + metadata_condition=SimpleNamespace(), + ) + missing_doc_ids = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + metadata_filter_document_ids={"other-ds": ["x"]}, + metadata_condition=None, + ) + assert no_filter == [] + assert missing_doc_ids == [] + + def test_multiple_retrieve_validation_paths(self, retrieval: DatasetRetrieval) -> None: + assert ( + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=[], + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="reranking_model", + ) + == [] + ) + + mixed = [ + SimpleNamespace(id="d1", indexing_technique="high_quality"), + SimpleNamespace(id="d2", indexing_technique="economy"), + ] + with pytest.raises(ValueError, match="different indexing technique"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=mixed, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="weighted_score", + reranking_enable=False, + ) + + high_quality_mismatch = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + SimpleNamespace( + id="d2", + indexing_technique="high_quality", + embedding_model="model-b", + embedding_model_provider="provider-b", + ), + ] + with pytest.raises(ValueError, match="different embedding model"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=high_quality_mismatch, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode=RerankMode.WEIGHTED_SCORE, + reranking_enable=True, + ) + + def test_multiple_retrieve_threads_and_dedup(self, retrieval: DatasetRetrieval) -> None: + datasets = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + SimpleNamespace( + id="d2", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + ] + doc_a = _doc(provider="dify", score=0.8, dataset_id="d1", document_id="doc-1", doc_id="dup") + doc_b = _doc(provider="dify", score=0.7, dataset_id="d2", document_id="doc-2", doc_id="dup") + doc_external = _doc( + provider="external", + score=0.9, + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"dataset_name": "Ext", "title": "Ext"}, + ) + app = Flask(__name__) + weights = {"vector_setting": {}} + + def fake_multiple_thread(**kwargs): + if kwargs["query"]: + kwargs["all_documents"].extend([doc_a, doc_b]) + if kwargs["attachment_id"]: + kwargs["all_documents"].append(doc_external) + + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.measure_time", _timer), + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_multiple_retrieve_thread", side_effect=fake_multiple_thread), + patch.object(retrieval, "_on_query") as mock_on_query, + patch.object(retrieval, "_on_retrieval_end") as mock_end, + ): + result = retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=datasets, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode=RerankMode.WEIGHTED_SCORE, + reranking_enable=True, + weights=weights, + attachment_ids=["att-1"], + message_id="m1", + ) + + assert len(result) == 2 + assert any(doc.provider == "external" for doc in result) + assert weights["vector_setting"]["embedding_provider_name"] == "provider-a" + assert weights["vector_setting"]["embedding_model_name"] == "model-a" + mock_on_query.assert_called_once() + mock_end.assert_called_once() + + def test_multiple_retrieve_propagates_thread_exception(self, retrieval: DatasetRetrieval) -> None: + datasets = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ) + ] + app = Flask(__name__) + + def failing_thread(**kwargs): + kwargs["thread_exceptions"].append(RuntimeError("thread boom")) + + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.measure_time", _timer), + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_multiple_retrieve_thread", side_effect=failing_thread), + ): + with pytest.raises(RuntimeError, match="thread boom"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=datasets, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="reranking_model", + ) + + +class TestInternalHooksCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_on_retrieval_end_without_dify_documents(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + with patch.object(retrieval, "_send_trace_task") as mock_trace: + retrieval._on_retrieval_end( + flask_app=app, + documents=[_doc(provider="external")], + message_id="m1", + timer={"cost": 1}, + ) + mock_trace.assert_called_once() + + def test_on_retrieval_end_dify_without_document_ids(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + doc = Document(page_content="x", metadata={"doc_id": "n1"}, provider="dify") + with ( + patch("core.rag.retrieval.dataset_retrieval.db", SimpleNamespace(engine=Mock())), + patch.object(retrieval, "_send_trace_task") as mock_trace, + ): + retrieval._on_retrieval_end(flask_app=app, documents=[doc], message_id="m1", timer={"cost": 1}) + mock_trace.assert_called_once() + + def test_on_retrieval_end_updates_segments_for_text_and_image(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + docs = [ + _doc(provider="dify", document_id="doc-a", doc_id="idx-a", extra={"doc_type": "text"}), + _doc(provider="dify", document_id="doc-b", doc_id="att-b", extra={"doc_type": DocType.IMAGE}), + _doc(provider="dify", document_id="doc-c", doc_id="idx-c", extra={"doc_type": "text"}), + _doc(provider="dify", document_id="doc-d", doc_id="att-d", extra={"doc_type": DocType.IMAGE}), + ] + dataset_docs = [ + SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX), + SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX), + SimpleNamespace(id="doc-c", doc_form="qa_model"), + SimpleNamespace(id="doc-d", doc_form="qa_model"), + ] + child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")] + segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")] + bindings = [SimpleNamespace(segment_id="seg-b"), SimpleNamespace(segment_id="seg-d")] + + def _scalars(items): + result = Mock() + result.all.return_value = items + return result + + session = Mock() + session.scalars.side_effect = [ + _scalars(dataset_docs), + _scalars(child_chunks), + _scalars(segments), + _scalars(bindings), + ] + query = Mock() + query.where.return_value = query + session.query.return_value = query + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch("core.rag.retrieval.dataset_retrieval.db", SimpleNamespace(engine=Mock())), + patch("core.rag.retrieval.dataset_retrieval.Session", return_value=session_ctx), + patch.object(retrieval, "_send_trace_task") as mock_trace, + ): + retrieval._on_retrieval_end(flask_app=app, documents=docs, message_id="m1", timer={"cost": 1}) + + query.update.assert_called_once() + session.commit.assert_called_once() + mock_trace.assert_called_once() + + def test_retriever_variants(self, retrieval: DatasetRetrieval) -> None: + flask_app = SimpleNamespace(app_context=lambda: nullcontext()) + all_documents: list[Document] = [] + + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=None): + assert ( + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="d1", + query="python", + top_k=1, + all_documents=all_documents, + ) + == [] + ) + + external_dataset = SimpleNamespace( + id="ext-ds", + name="External", + provider="external", + tenant_id="tenant-1", + retrieval_model={"top_k": 2}, + indexing_technique="high_quality", + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=external_dataset), + patch( + "core.rag.retrieval.dataset_retrieval.ExternalDatasetService.fetch_external_knowledge_retrieval" + ) as mock_external, + ): + mock_external.return_value = [{"content": "e", "metadata": {}, "score": 0.8, "title": "Ext"}] + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="ext-ds", + query="python", + top_k=1, + all_documents=all_documents, + ) + + economy_dataset = SimpleNamespace( + id="eco-ds", + provider="dify", + retrieval_model={"top_k": 1}, + indexing_technique="economy", + ) + high_dataset = SimpleNamespace( + id="hq-ds", + provider="dify", + retrieval_model={ + "search_method": "semantic_search", + "top_k": 4, + "score_threshold": 0.3, + "score_threshold_enabled": True, + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "x", "reranking_model_name": "y"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {}}, + }, + indexing_technique="high_quality", + ) + with ( + patch( + "core.rag.retrieval.dataset_retrieval.db.session.scalar", side_effect=[economy_dataset, high_dataset] + ), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.retrieve", return_value=[_doc(provider="dify")] + ) as mock_retrieve, + ): + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="eco-ds", + query="python", + top_k=2, + all_documents=all_documents, + ) + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="hq-ds", + query="python", + top_k=2, + all_documents=all_documents, + attachment_ids=["att-1"], + ) + assert mock_retrieve.call_count == 2 + assert len(all_documents) >= 3 + + def test_to_dataset_retriever_tool_paths(self, retrieval: DatasetRetrieval) -> None: + dataset_skip_zero = SimpleNamespace(id="d1", provider="dify", available_document_count=0) + dataset_ok_single = SimpleNamespace( + id="d2", + provider="dify", + available_document_count=2, + retrieval_model={"top_k": 2, "score_threshold_enabled": True, "score_threshold": 0.1}, + ) + single_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE, + metadata_filtering_mode="disabled", + ) + with ( + patch( + "core.rag.retrieval.dataset_retrieval.db.session.scalar", + side_effect=[None, dataset_skip_zero, dataset_ok_single], + ), + patch( + "core.tools.utils.dataset_retriever.dataset_retriever_tool.DatasetRetrieverTool.from_dataset", + return_value="single-tool", + ) as mock_single_tool, + ): + single_tools = retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["missing", "d1", "d2"], + retrieve_config=single_config, + return_resource=True, + invoke_from=InvokeFrom.WEB_APP, + hit_callback=Mock(), + user_id="user-1", + inputs={"k": "v"}, + ) + + assert single_tools == ["single-tool"] + mock_single_tool.assert_called_once() + + multiple_config_missing = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + metadata_filtering_mode="disabled", + reranking_model=None, + ) + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset_ok_single): + with pytest.raises(ValueError, match="Reranking model is required"): + retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["d2"], + retrieve_config=multiple_config_missing, + return_resource=True, + invoke_from=InvokeFrom.WEB_APP, + hit_callback=Mock(), + user_id="user-1", + inputs={}, + ) + + multiple_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + metadata_filtering_mode="disabled", + top_k=3, + score_threshold=0.2, + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v3"}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset_ok_single), + patch( + "core.tools.utils.dataset_retriever.dataset_multi_retriever_tool.DatasetMultiRetrieverTool.from_dataset", + return_value="multi-tool", + ) as mock_multi_tool, + ): + multi_tools = retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["d2"], + retrieve_config=multiple_config, + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="user-1", + inputs={}, + ) + assert multi_tools == ["multi-tool"] + mock_multi_tool.assert_called_once() + + def test_additional_small_branches(self, retrieval: DatasetRetrieval) -> None: + keyword_handler = Mock() + keyword_handler.extract_keywords.side_effect = [[], []] + doc = Document(page_content="doc", metadata={"doc_id": "1"}, provider="dify") + with patch("core.rag.retrieval.dataset_retrieval.JiebaKeywordTableHandler", return_value=keyword_handler): + ranked = retrieval.calculate_keyword_score("query", [doc], top_k=1) + assert len(ranked) == 1 + assert ranked[0].metadata.get("score") == 0.0 + + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalars") as mock_scalars: + mock_scalars.return_value.all.return_value = [] + with pytest.raises(ValueError): + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="user-1", + metadata_model_config=None, # type: ignore[arg-type] + ) + + session_scalars = Mock() + session_scalars.all.return_value = [SimpleNamespace(name="author")] + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", return_value=(Mock(), Mock())), + patch.object(retrieval, "_get_prompt_template", return_value=(["prompt"], [])), + patch.object(retrieval, "_record_usage"), + ): + model_instance = Mock() + model_instance.invoke_llm.side_effect = RuntimeError("nope") + with patch.object(retrieval, "_fetch_model_config", return_value=(model_instance, Mock())): + assert ( + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="user-1", + metadata_model_config=WorkflowModelConfig(provider="openai", name="gpt", mode="chat"), + ) + is None + ) + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelMode", return_value=object()), + patch("core.rag.retrieval.dataset_retrieval.AdvancedPromptTransform"), + ): + with pytest.raises(ValueError, match="not support"): + retrieval._get_prompt_template( + model_config=ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ), + mode="chat", + metadata_fields=[], + query="q", + ) diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py deleted file mode 100644 index 07d6e51e4b..0000000000 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py +++ /dev/null @@ -1,873 +0,0 @@ -""" -Unit tests for DatasetRetrieval.process_metadata_filter_func. - -This module provides comprehensive test coverage for the process_metadata_filter_func -method in the DatasetRetrieval class, which is responsible for building SQLAlchemy -filter expressions based on metadata filtering conditions. - -Conditions Tested: -================== -1. **String Conditions**: contains, not contains, start with, end with -2. **Equality Conditions**: is / =, is not / ≠ -3. **Null Conditions**: empty, not empty -4. **Numeric Comparisons**: before / <, after / >, ≤ / <=, ≥ / >= -5. **List Conditions**: in -6. **Edge Cases**: None values, different data types (str, int, float) - -Test Architecture: -================== -- Direct instantiation of DatasetRetrieval -- Mocking of DatasetDocument model attributes -- Verification of SQLAlchemy filter expressions -- Follows Arrange-Act-Assert (AAA) pattern - -Running Tests: -============== - # Run all tests in this module - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py -v - - # Run a specific test - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py::\ -TestProcessMetadataFilterFunc::test_contains_condition -v -""" - -from unittest.mock import MagicMock - -import pytest - -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval - - -class TestProcessMetadataFilterFunc: - """ - Comprehensive test suite for process_metadata_filter_func method. - - This test class validates all metadata filtering conditions supported by - the DatasetRetrieval class, including string operations, numeric comparisons, - null checks, and list operations. - - Method Signature: - ================== - def process_metadata_filter_func( - self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list - ) -> list: - - The method builds SQLAlchemy filter expressions by: - 1. Validating value is not None (except for empty/not empty conditions) - 2. Using DatasetDocument.doc_metadata JSON field operations - 3. Adding appropriate SQLAlchemy expressions to the filters list - 4. Returning the updated filters list - - Mocking Strategy: - ================== - - Mock DatasetDocument.doc_metadata to avoid database dependencies - - Verify filter expressions are created correctly - - Test with various data types (str, int, float, list) - """ - - @pytest.fixture - def retrieval(self): - """ - Create a DatasetRetrieval instance for testing. - - Returns: - DatasetRetrieval: Instance to test process_metadata_filter_func - """ - return DatasetRetrieval() - - @pytest.fixture - def mock_doc_metadata(self): - """ - Mock the DatasetDocument.doc_metadata JSON field. - - The method uses DatasetDocument.doc_metadata[metadata_name] to access - JSON fields. We mock this to avoid database dependencies. - - Returns: - Mock: Mocked doc_metadata attribute - """ - mock_metadata_field = MagicMock() - - # Create mock for string access - mock_string_access = MagicMock() - mock_string_access.like = MagicMock() - mock_string_access.notlike = MagicMock() - mock_string_access.__eq__ = MagicMock(return_value=MagicMock()) - mock_string_access.__ne__ = MagicMock(return_value=MagicMock()) - mock_string_access.in_ = MagicMock(return_value=MagicMock()) - - # Create mock for float access (for numeric comparisons) - mock_float_access = MagicMock() - mock_float_access.__eq__ = MagicMock(return_value=MagicMock()) - mock_float_access.__ne__ = MagicMock(return_value=MagicMock()) - mock_float_access.__lt__ = MagicMock(return_value=MagicMock()) - mock_float_access.__gt__ = MagicMock(return_value=MagicMock()) - mock_float_access.__le__ = MagicMock(return_value=MagicMock()) - mock_float_access.__ge__ = MagicMock(return_value=MagicMock()) - - # Create mock for null checks - mock_null_access = MagicMock() - mock_null_access.is_ = MagicMock(return_value=MagicMock()) - mock_null_access.isnot = MagicMock(return_value=MagicMock()) - - # Setup __getitem__ to return appropriate mock based on usage - def getitem_side_effect(name): - if name in ["author", "title", "category"]: - return mock_string_access - elif name in ["year", "price", "rating"]: - return mock_float_access - else: - return mock_string_access - - mock_metadata_field.__getitem__ = MagicMock(side_effect=getitem_side_effect) - mock_metadata_field.as_string.return_value = mock_string_access - mock_metadata_field.as_float.return_value = mock_float_access - mock_metadata_field[metadata_name:str].is_ = mock_null_access.is_ - mock_metadata_field[metadata_name:str].isnot = mock_null_access.isnot - - return mock_metadata_field - - # ==================== String Condition Tests ==================== - - def test_contains_condition_string_value(self, retrieval): - """ - Test 'contains' condition with string value. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses %value% syntax - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "John" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_contains_condition(self, retrieval): - """ - Test 'not contains' condition. - - Verifies: - - Filters list is populated with NOT LIKE expression - - Pattern matching uses %value% syntax with negation - """ - filters = [] - sequence = 0 - condition = "not contains" - metadata_name = "title" - value = "banned" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_start_with_condition(self, retrieval): - """ - Test 'start with' condition. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses value% syntax - """ - filters = [] - sequence = 0 - condition = "start with" - metadata_name = "category" - value = "tech" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_end_with_condition(self, retrieval): - """ - Test 'end with' condition. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses %value syntax - """ - filters = [] - sequence = 0 - condition = "end with" - metadata_name = "filename" - value = ".pdf" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Equality Condition Tests ==================== - - def test_is_condition_with_string_value(self, retrieval): - """ - Test 'is' (=) condition with string value. - - Verifies: - - Filters list is populated with equality expression - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "author" - value = "Jane Doe" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_equals_condition_with_string_value(self, retrieval): - """ - Test '=' condition with string value. - - Verifies: - - Same behavior as 'is' condition - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "=" - metadata_name = "category" - value = "technology" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_condition_with_int_value(self, retrieval): - """ - Test 'is' condition with integer value. - - Verifies: - - Numeric comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "year" - value = 2023 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_condition_with_float_value(self, retrieval): - """ - Test 'is' condition with float value. - - Verifies: - - Numeric comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "price" - value = 19.99 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_not_condition_with_string_value(self, retrieval): - """ - Test 'is not' (≠) condition with string value. - - Verifies: - - Filters list is populated with inequality expression - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "is not" - metadata_name = "author" - value = "Unknown" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_equals_condition(self, retrieval): - """ - Test '≠' condition with string value. - - Verifies: - - Same behavior as 'is not' condition - - Inequality expression is used - """ - filters = [] - sequence = 0 - condition = "≠" - metadata_name = "category" - value = "archived" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_not_condition_with_numeric_value(self, retrieval): - """ - Test 'is not' condition with numeric value. - - Verifies: - - Numeric inequality comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is not" - metadata_name = "year" - value = 2000 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Null Condition Tests ==================== - - def test_empty_condition(self, retrieval): - """ - Test 'empty' condition (null check). - - Verifies: - - Filters list is populated with IS NULL expression - - Value can be None for this condition - """ - filters = [] - sequence = 0 - condition = "empty" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_empty_condition(self, retrieval): - """ - Test 'not empty' condition (not null check). - - Verifies: - - Filters list is populated with IS NOT NULL expression - - Value can be None for this condition - """ - filters = [] - sequence = 0 - condition = "not empty" - metadata_name = "description" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Numeric Comparison Tests ==================== - - def test_before_condition(self, retrieval): - """ - Test 'before' (<) condition. - - Verifies: - - Filters list is populated with less than expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "before" - metadata_name = "year" - value = 2020 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_condition(self, retrieval): - """ - Test '<' condition. - - Verifies: - - Same behavior as 'before' condition - - Less than expression is used - """ - filters = [] - sequence = 0 - condition = "<" - metadata_name = "price" - value = 100.0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_after_condition(self, retrieval): - """ - Test 'after' (>) condition. - - Verifies: - - Filters list is populated with greater than expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "after" - metadata_name = "year" - value = 2020 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_condition(self, retrieval): - """ - Test '>' condition. - - Verifies: - - Same behavior as 'after' condition - - Greater than expression is used - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "rating" - value = 4.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_or_equal_condition_unicode(self, retrieval): - """ - Test '≤' condition. - - Verifies: - - Filters list is populated with less than or equal expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "≤" - metadata_name = "price" - value = 50.0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_or_equal_condition_ascii(self, retrieval): - """ - Test '<=' condition. - - Verifies: - - Same behavior as '≤' condition - - Less than or equal expression is used - """ - filters = [] - sequence = 0 - condition = "<=" - metadata_name = "year" - value = 2023 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_or_equal_condition_unicode(self, retrieval): - """ - Test '≥' condition. - - Verifies: - - Filters list is populated with greater than or equal expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "≥" - metadata_name = "rating" - value = 3.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_or_equal_condition_ascii(self, retrieval): - """ - Test '>=' condition. - - Verifies: - - Same behavior as '≥' condition - - Greater than or equal expression is used - """ - filters = [] - sequence = 0 - condition = ">=" - metadata_name = "year" - value = 2000 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== List/In Condition Tests ==================== - - def test_in_condition_with_comma_separated_string(self, retrieval): - """ - Test 'in' condition with comma-separated string value. - - Verifies: - - String is split into list - - Whitespace is trimmed from each value - - IN expression is created - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "tech, science, AI " - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_list_value(self, retrieval): - """ - Test 'in' condition with list value. - - Verifies: - - List is processed correctly - - None values are filtered out - - IN expression is created with valid values - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "tags" - value = ["python", "javascript", None, "golang"] - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_tuple_value(self, retrieval): - """ - Test 'in' condition with tuple value. - - Verifies: - - Tuple is processed like a list - - IN expression is created - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = ("tech", "science", "ai") - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_empty_string(self, retrieval): - """ - Test 'in' condition with empty string value. - - Verifies: - - Empty string results in literal(False) filter - - No valid values to match - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - # Verify it's a literal(False) expression - # This is a bit tricky to test without access to the actual expression - - def test_in_condition_with_only_whitespace(self, retrieval): - """ - Test 'in' condition with whitespace-only string value. - - Verifies: - - Whitespace-only string results in literal(False) filter - - All values are stripped and filtered out - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = " , , " - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_single_string(self, retrieval): - """ - Test 'in' condition with single non-comma string. - - Verifies: - - Single string is treated as single-item list - - IN expression is created with one value - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "technology" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Edge Case Tests ==================== - - def test_none_value_with_non_empty_condition(self, retrieval): - """ - Test None value with conditions that require value. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values (except empty/not empty) - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 # No filter added - - def test_none_value_with_equals_condition(self, retrieval): - """ - Test None value with 'is' (=) condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_none_value_with_numeric_condition(self, retrieval): - """ - Test None value with numeric comparison condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "year" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_existing_filters_preserved(self, retrieval): - """ - Test that existing filters are preserved. - - Verifies: - - Existing filters in the list are not removed - - New filters are appended to the list - """ - existing_filter = MagicMock() - filters = [existing_filter] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "test" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 2 - assert filters[0] == existing_filter - - def test_multiple_filters_accumulated(self, retrieval): - """ - Test multiple calls to accumulate filters. - - Verifies: - - Each call adds a new filter to the list - - All filters are preserved across calls - """ - filters = [] - - # First filter - retrieval.process_metadata_filter_func(0, "contains", "author", "John", filters) - assert len(filters) == 1 - - # Second filter - retrieval.process_metadata_filter_func(1, ">", "year", 2020, filters) - assert len(filters) == 2 - - # Third filter - retrieval.process_metadata_filter_func(2, "is", "category", "tech", filters) - assert len(filters) == 3 - - def test_unknown_condition(self, retrieval): - """ - Test unknown/unsupported condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for unknown conditions - """ - filters = [] - sequence = 0 - condition = "unknown_condition" - metadata_name = "author" - value = "test" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_empty_string_value_with_contains(self, retrieval): - """ - Test empty string value with 'contains' condition. - - Verifies: - - Filter is added even with empty string - - LIKE expression is created - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_special_characters_in_value(self, retrieval): - """ - Test special characters in value string. - - Verifies: - - Special characters are handled in value - - LIKE expression is created correctly - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "title" - value = "C++ & Python's features" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_zero_value_with_numeric_condition(self, retrieval): - """ - Test zero value with numeric comparison condition. - - Verifies: - - Zero is treated as valid value - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "price" - value = 0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_negative_value_with_numeric_condition(self, retrieval): - """ - Test negative value with numeric comparison condition. - - Verifies: - - Negative numbers are handled correctly - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = "<" - metadata_name = "temperature" - value = -10.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_float_value_with_integer_comparison(self, retrieval): - """ - Test float value with numeric comparison condition. - - Verifies: - - Float values work correctly - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = ">=" - metadata_name = "rating" - value = 4.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py index 682a451117..48782515d0 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py @@ -5,8 +5,8 @@ import pytest from core.rag.models.document import Document from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from dify_graph.nodes.knowledge_retrieval import exc -from dify_graph.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from models.dataset import Dataset # ==================== Helper Functions ==================== diff --git a/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py deleted file mode 100644 index 5f461d53ae..0000000000 --- a/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py +++ /dev/null @@ -1,113 +0,0 @@ -import threading -from unittest.mock import Mock, patch -from uuid import uuid4 - -import pytest -from flask import Flask, current_app - -from core.rag.models.document import Document -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from models.dataset import Dataset - - -class TestRetrievalService: - @pytest.fixture - def mock_dataset(self) -> Dataset: - dataset = Mock(spec=Dataset) - dataset.id = str(uuid4()) - dataset.tenant_id = str(uuid4()) - dataset.name = "test_dataset" - dataset.indexing_technique = "high_quality" - dataset.provider = "dify" - return dataset - - def test_multiple_retrieve_reranking_with_app_context(self, mock_dataset): - """ - Repro test for current bug: - reranking runs after `with flask_app.app_context():` exits. - `_multiple_retrieve_thread` catches exceptions and stores them into `thread_exceptions`, - so we must assert from that list (not from an outer try/except). - """ - dataset_retrieval = DatasetRetrieval() - flask_app = Flask(__name__) - tenant_id = str(uuid4()) - - # second dataset to ensure dataset_count > 1 reranking branch - secondary_dataset = Mock(spec=Dataset) - secondary_dataset.id = str(uuid4()) - secondary_dataset.provider = "dify" - secondary_dataset.indexing_technique = "high_quality" - - # retriever returns 1 doc into internal list (all_documents_item) - document = Document( - page_content="Context aware doc", - metadata={ - "doc_id": "doc1", - "score": 0.95, - "document_id": str(uuid4()), - "dataset_id": mock_dataset.id, - }, - provider="dify", - ) - - def fake_retriever( - flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids - ): - all_documents.append(document) - - called = {"init": 0, "invoke": 0} - - class ContextRequiredPostProcessor: - def __init__(self, *args, **kwargs): - called["init"] += 1 - # will raise RuntimeError if no Flask app context exists - _ = current_app.name - - def invoke(self, *args, **kwargs): - called["invoke"] += 1 - _ = current_app.name - return kwargs.get("documents") or args[1] - - # output list from _multiple_retrieve_thread - all_documents: list[Document] = [] - - # IMPORTANT: _multiple_retrieve_thread swallows exceptions and appends them here - thread_exceptions: list[Exception] = [] - - def target(): - with patch.object(dataset_retrieval, "_retriever", side_effect=fake_retriever): - with patch( - "core.rag.retrieval.dataset_retrieval.DataPostProcessor", - ContextRequiredPostProcessor, - ): - dataset_retrieval._multiple_retrieve_thread( - flask_app=flask_app, - available_datasets=[mock_dataset, secondary_dataset], - metadata_condition=None, - metadata_filter_document_ids=None, - all_documents=all_documents, - tenant_id=tenant_id, - reranking_enable=True, - reranking_mode="reranking_model", - reranking_model={ - "reranking_provider_name": "cohere", - "reranking_model_name": "rerank-v2", - }, - weights=None, - top_k=3, - score_threshold=0.0, - query="test query", - attachment_id=None, - dataset_count=2, # force reranking branch - thread_exceptions=thread_exceptions, # ✅ key - ) - - t = threading.Thread(target=target) - t.start() - t.join() - - # Ensure reranking branch was actually executed - assert called["init"] >= 1, "DataPostProcessor was never constructed; reranking branch may not have run." - - # Current buggy code should record an exception (not raise it) - assert not thread_exceptions, thread_exceptions diff --git a/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py new file mode 100644 index 0000000000..cfa9094e12 --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py @@ -0,0 +1,100 @@ +from unittest.mock import Mock + +from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter +from dify_graph.model_runtime.entities.llm_entities import LLMUsage + + +class TestFunctionCallMultiDatasetRouter: + def test_invoke_returns_none_when_no_tools(self) -> None: + router = FunctionCallMultiDatasetRouter() + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[], + model_config=Mock(), + model_instance=Mock(), + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_single_tool_directly(self) -> None: + router = FunctionCallMultiDatasetRouter() + tool = Mock() + tool.name = "dataset-1" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool], + model_config=Mock(), + model_instance=Mock(), + ) + + assert dataset_id == "dataset-1" + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_tool_from_model_response(self) -> None: + router = FunctionCallMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + usage = LLMUsage.empty_usage() + response = Mock() + response.usage = usage + response.message.tool_calls = [Mock(function=Mock())] + response.message.tool_calls[0].function.name = "dataset-2" + model_instance = Mock() + model_instance.invoke_llm.return_value = response + + dataset_id, returned_usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id == "dataset-2" + assert returned_usage == usage + model_instance.invoke_llm.assert_called_once() + + def test_invoke_returns_none_when_no_tool_calls(self) -> None: + router = FunctionCallMultiDatasetRouter() + response = Mock() + response.usage = LLMUsage.empty_usage() + response.message.tool_calls = [] + model_instance = Mock() + model_instance.invoke_llm.return_value = response + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id is None + assert usage == response.usage + + def test_invoke_returns_empty_usage_when_model_raises(self) -> None: + router = FunctionCallMultiDatasetRouter() + model_instance = Mock() + model_instance.invoke_llm.side_effect = RuntimeError("boom") + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py new file mode 100644 index 0000000000..e429563739 --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py @@ -0,0 +1,252 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish +from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole + + +class TestReactMultiDatasetRouter: + def test_invoke_returns_none_when_no_tools(self) -> None: + router = ReactMultiDatasetRouter() + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_single_tool_directly(self) -> None: + router = ReactMultiDatasetRouter() + tool = Mock() + tool.name = "dataset-1" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id == "dataset-1" + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_tool_from_react_invoke(self) -> None: + router = ReactMultiDatasetRouter() + usage = LLMUsage.empty_usage() + tool_1 = Mock(name="dataset-1") + tool_1.name = "dataset-1" + tool_2 = Mock(name="dataset-2") + tool_2.name = "dataset-2" + + with patch.object(router, "_react_invoke", return_value=("dataset-2", usage)) as mock_react: + dataset_id, returned_usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + mock_react.assert_called_once() + assert dataset_id == "dataset-2" + assert returned_usage == usage + + def test_invoke_handles_react_invoke_errors(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + with patch.object(router, "_react_invoke", side_effect=RuntimeError("boom")): + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_react_invoke_returns_action_tool(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "chat" + model_config.parameters = {"temperature": 0.1} + usage = LLMUsage.empty_usage() + tools = [Mock(name="dataset-1"), Mock(name="dataset-2")] + tools[0].name = "dataset-1" + tools[0].description = "desc" + tools[1].name = "dataset-2" + tools[1].description = "desc" + + with ( + patch.object(router, "create_chat_prompt", return_value=[Mock()]) as mock_chat_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object(router, "_invoke_llm", return_value=('{"action":"dataset-2","action_input":{}}', usage)), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactAction("dataset-2", {}, "log") + + dataset_id, returned_usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=tools, + user_id="u1", + tenant_id="t1", + ) + + mock_chat_prompt.assert_called_once() + assert dataset_id == "dataset-2" + assert returned_usage == usage + + def test_react_invoke_returns_none_for_finish(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "completion" + model_config.parameters = {"temperature": 0.1} + usage = LLMUsage.empty_usage() + tool = Mock() + tool.name = "dataset-1" + tool.description = "desc" + + with ( + patch.object(router, "create_completion_prompt", return_value=Mock()) as mock_completion_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object( + router, "_invoke_llm", return_value=('{"action":"Final Answer","action_input":"done"}', usage) + ), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactFinish({"output": "done"}, "log") + + dataset_id, returned_usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=[tool], + user_id="u1", + tenant_id="t1", + ) + + mock_completion_prompt.assert_called_once() + assert dataset_id is None + assert returned_usage == usage + + def test_invoke_llm_and_handle_result(self) -> None: + router = ReactMultiDatasetRouter() + usage = LLMUsage.empty_usage() + delta = SimpleNamespace(message=SimpleNamespace(content="part"), usage=usage) + chunk = SimpleNamespace(model="m1", prompt_messages=[Mock()], delta=delta) + model_instance = Mock() + model_instance.invoke_llm.return_value = iter([chunk]) + + with patch("core.rag.retrieval.router.multi_dataset_react_route.deduct_llm_quota") as mock_deduct: + text, returned_usage = router._invoke_llm( + completion_param={"temperature": 0.1}, + model_instance=model_instance, + prompt_messages=[Mock()], + stop=["Observation:"], + user_id="u1", + tenant_id="t1", + ) + + assert text == "part" + assert returned_usage == usage + mock_deduct.assert_called_once() + + def test_handle_invoke_result_with_empty_usage(self) -> None: + router = ReactMultiDatasetRouter() + delta = SimpleNamespace(message=SimpleNamespace(content="part"), usage=None) + chunk = SimpleNamespace(model="m1", prompt_messages=[Mock()], delta=delta) + + text, usage = router._handle_invoke_result(iter([chunk])) + + assert text == "part" + assert usage == LLMUsage.empty_usage() + + def test_create_chat_prompt(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_1.description = "d1" + tool_2 = Mock() + tool_2.name = "dataset-2" + tool_2.description = "d2" + + chat_prompt = router.create_chat_prompt(query="python", tools=[tool_1, tool_2]) + assert len(chat_prompt) == 2 + assert chat_prompt[0].role == PromptMessageRole.SYSTEM + assert chat_prompt[1].role == PromptMessageRole.USER + assert "dataset-1" in chat_prompt[0].text + assert "dataset-2" in chat_prompt[0].text + + def test_create_completion_prompt(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_1.description = "d1" + tool_2 = Mock() + tool_2.name = "dataset-2" + tool_2.description = "d2" + + completion_prompt = router.create_completion_prompt(tools=[tool_1, tool_2]) + assert "dataset-1: d1" in completion_prompt.text + assert "dataset-2: d2" in completion_prompt.text + + def test_react_invoke_uses_completion_branch_for_non_chat_mode(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "unknown-mode" + model_config.parameters = {} + tool = Mock() + tool.name = "dataset-1" + tool.description = "desc" + + with ( + patch.object(router, "create_completion_prompt", return_value=Mock()) as mock_completion_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object( + router, + "_invoke_llm", + return_value=('{"action":"Final Answer","action_input":"done"}', LLMUsage.empty_usage()), + ), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactFinish({"output": "done"}, "log") + dataset_id, usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=[tool], + user_id="u1", + tenant_id="t1", + ) + + mock_completion_prompt.assert_called_once() + assert dataset_id is None + assert usage == LLMUsage.empty_usage() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py b/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py new file mode 100644 index 0000000000..c8fa0ea62f --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py @@ -0,0 +1,69 @@ +import pytest + +from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish +from core.rag.retrieval.output_parser.structured_chat import StructuredChatOutputParser + + +class TestStructuredChatOutputParser: + def test_parse_action_without_action_input(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"action":"some_action"}\n```' + result = parser.parse(text) + + assert isinstance(result, ReactAction) + assert result.tool == "some_action" + assert result.tool_input == {} + + def test_parse_json_without_action_key(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"not_action":"search"}\n```' + with pytest.raises(ValueError, match="Could not parse LLM output"): + parser.parse(text) + + def test_parse_returns_action_for_tool_call(self) -> None: + parser = StructuredChatOutputParser() + text = ( + 'Thought: call tool\nAction:\n```json\n{"action":"search_dataset","action_input":{"query":"python"}}\n```' + ) + + result = parser.parse(text) + + assert isinstance(result, ReactAction) + assert result.tool == "search_dataset" + assert result.tool_input == {"query": "python"} + assert result.log == text + + def test_parse_returns_finish_for_final_answer(self) -> None: + parser = StructuredChatOutputParser() + text = 'Thought: done\nAction:\n```json\n{"action":"Final Answer","action_input":"final text"}\n```' + + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": "final text"} + assert result.log == text + + def test_parse_returns_finish_for_json_array_payload(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n[{"action":"search","action_input":"hello"}]\n```' + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": text} + assert result.log == text + + def test_parse_returns_finish_for_plain_text(self) -> None: + parser = StructuredChatOutputParser() + text = "No structured action block" + + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": text} + + def test_parse_raises_value_error_for_invalid_json(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"action":"search","action_input": }\n```' + + with pytest.raises(ValueError, match="Could not parse LLM output"): + parser.parse(text) diff --git a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py index 943a9e5712..976de10d89 100644 --- a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py +++ b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py @@ -125,7 +125,11 @@ Run with coverage: - Tests are organized by functionality in classes for better organization """ +import asyncio import string +import sys +import types +from inspect import currentframe from unittest.mock import Mock, patch import pytest @@ -604,6 +608,51 @@ class TestRecursiveCharacterTextSplitter: assert "def hello_world" in combined or "hello_world" in combined +class TestTextSplitterBasePaths: + """Target uncovered base TextSplitter paths.""" + + def test_from_huggingface_tokenizer_success_path(self): + """Cover from_huggingface_tokenizer success branch with mocked transformers.""" + + class _FakePreTrainedTokenizerBase: + pass + + class _FakeTokenizer(_FakePreTrainedTokenizerBase): + def encode(self, text: str): + return [ord(c) for c in text] + + fake_transformers = types.SimpleNamespace(PreTrainedTokenizerBase=_FakePreTrainedTokenizerBase) + with patch.dict(sys.modules, {"transformers": fake_transformers}): + splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( + tokenizer=_FakeTokenizer(), + chunk_size=5, + chunk_overlap=1, + ) + + chunks = splitter.split_text("abcdef") + assert chunks + + def test_from_huggingface_tokenizer_import_error(self): + """Cover from_huggingface_tokenizer import-error branch.""" + with patch.dict(sys.modules, {"transformers": None}): + with pytest.raises(ValueError, match="Could not import transformers"): + RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer=object(), chunk_size=5) + + def test_atransform_documents_raises_not_implemented(self): + """Cover atransform_documents NotImplemented branch.""" + splitter = RecursiveCharacterTextSplitter(chunk_size=20, chunk_overlap=5) + with pytest.raises(NotImplementedError): + asyncio.run(splitter.atransform_documents([Document(page_content="x", metadata={})])) + + def test_merge_splits_logs_warning_for_oversized_total(self): + """Cover logger.warning path in _merge_splits.""" + splitter = RecursiveCharacterTextSplitter(chunk_size=5, chunk_overlap=1) + with patch("core.rag.splitter.text_splitter.logger.warning") as mock_warning: + merged = splitter._merge_splits(["abcdefghij", "b"], "", [10, 1]) + assert merged + mock_warning.assert_called_once() + + # ============================================================================ # Test TokenTextSplitter # ============================================================================ @@ -662,6 +711,44 @@ class TestTokenTextSplitter: except ImportError: pytest.skip("tiktoken not installed") + def test_initialization_and_split_with_mocked_tiktoken_encoding(self): + """Cover TokenTextSplitter __init__ else-path and split_text logic.""" + + class _FakeEncoding: + def encode(self, text: str, allowed_special=None, disallowed_special=None): + return [ord(c) for c in text] + + def decode(self, token_ids: list[int]) -> str: + return "".join(chr(i) for i in token_ids) + + fake_tiktoken = types.SimpleNamespace(get_encoding=lambda name: _FakeEncoding()) + with patch.dict(sys.modules, {"tiktoken": fake_tiktoken}): + splitter = TokenTextSplitter(encoding_name="gpt2", chunk_size=4, chunk_overlap=1) + result = splitter.split_text("abcdefgh") + + assert result + assert all(isinstance(chunk, str) for chunk in result) + + def test_initialization_with_model_name_uses_encoding_for_model(self): + """Cover TokenTextSplitter model_name init branch.""" + + class _FakeEncoding: + def encode(self, text: str, allowed_special=None, disallowed_special=None): + return [ord(c) for c in text] + + def decode(self, token_ids: list[int]) -> str: + return "".join(chr(i) for i in token_ids) + + fake_encoding = _FakeEncoding() + fake_tiktoken = types.SimpleNamespace( + encoding_for_model=lambda model_name: fake_encoding, + get_encoding=lambda name: _FakeEncoding(), + ) + with patch.dict(sys.modules, {"tiktoken": fake_tiktoken}): + splitter = TokenTextSplitter(model_name="gpt-4", chunk_size=5, chunk_overlap=1) + + assert splitter._tokenizer is fake_encoding + # ============================================================================ # Test EnhanceRecursiveCharacterTextSplitter @@ -731,6 +818,50 @@ class TestEnhanceRecursiveCharacterTextSplitter: assert len(result) > 0 assert all(isinstance(chunk, str) for chunk in result) + def test_from_encoder_internal_token_encoder_paths(self): + """ + Test internal _token_encoder branches by capturing local closure from frame. + + This validates: + - empty texts path + - embedding model path + - GPT2Tokenizer fallback path + - _character_encoder empty-path branch + """ + + class _SpySplitter(EnhanceRecursiveCharacterTextSplitter): + captured_token_encoder = None + captured_character_encoder = None + + def __init__(self, **kwargs): + frame = currentframe() + if frame and frame.f_back: + _SpySplitter.captured_token_encoder = frame.f_back.f_locals.get("_token_encoder") + _SpySplitter.captured_character_encoder = frame.f_back.f_locals.get("_character_encoder") + super().__init__(**kwargs) + + mock_model = Mock() + mock_model.get_text_embedding_num_tokens.return_value = [3, 5] + + _SpySplitter.from_encoder(embedding_model_instance=mock_model, chunk_size=10, chunk_overlap=1) + token_encoder = _SpySplitter.captured_token_encoder + character_encoder = _SpySplitter.captured_character_encoder + + assert token_encoder is not None + assert character_encoder is not None + assert token_encoder([]) == [] + assert token_encoder(["abc", "defgh"]) == [3, 5] + assert character_encoder([]) == [] + + with patch( + "core.rag.splitter.fixed_text_splitter.GPT2Tokenizer.get_num_tokens", + side_effect=lambda text: len(text) + 1, + ): + _SpySplitter.from_encoder(embedding_model_instance=None, chunk_size=10, chunk_overlap=1) + token_encoder_without_model = _SpySplitter.captured_token_encoder + assert token_encoder_without_model is not None + assert token_encoder_without_model(["ab", "cdef"]) == [3, 5] + # ============================================================================ # Test FixedRecursiveCharacterTextSplitter @@ -908,6 +1039,56 @@ class TestFixedRecursiveCharacterTextSplitter: chunks = splitter.split_text(data) assert chunks == ["chunk 1\n\nsubchunk 1.\nsubchunk 2.", "chunk 2\n\nsubchunk 1\nsubchunk 2."] + def test_recursive_split_keep_separator_and_recursive_fallback(self): + """Cover keep-separator split branch and recursive _split_text fallback.""" + text = "short." + ("x" * 60) + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=[".", " ", ""], + chunk_size=10, + chunk_overlap=2, + keep_separator=True, + ) + + chunks = splitter.recursive_split_text(text) + + assert chunks + assert any("short." in chunk for chunk in chunks) + assert any(len(chunk) <= 12 for chunk in chunks) + + def test_recursive_split_newline_separator_filtering(self): + """Cover newline-specific empty filtering branch.""" + text = "line1\n\nline2\n\nline3" + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=["\n", ""], + chunk_size=50, + chunk_overlap=5, + ) + + chunks = splitter.recursive_split_text(text) + + assert chunks + assert all(chunk != "" for chunk in chunks) + assert "line1" in "".join(chunks) + assert "line2" in "".join(chunks) + assert "line3" in "".join(chunks) + + def test_recursive_split_without_new_separator_appends_long_chunk(self): + """Cover branch where no further separators exist and long split is appended directly.""" + text = "aa\n" + ("b" * 40) + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=["\n"], + chunk_size=10, + chunk_overlap=2, + ) + + chunks = splitter.recursive_split_text(text) + + assert "aa" in chunks + assert any(len(chunk) >= 40 for chunk in chunks) + # ============================================================================ # Test Metadata Preservation diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py index b613573927..2a83a4e802 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py @@ -15,7 +15,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.repositories.workflow_node_execution_repository import OrderConfig from libs.datetime_utils import naive_utc_now from models import Account, EndUser @@ -61,7 +61,7 @@ def sample_workflow_node_execution(): workflow_execution_id=str(uuid4()), index=1, node_id="test_node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -259,7 +259,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 1", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -272,7 +272,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 2", inputs={"input2": "value2"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -310,7 +310,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 2", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -323,7 +323,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 1", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py new file mode 100644 index 0000000000..c66e50437a --- /dev/null +++ b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py @@ -0,0 +1,84 @@ +from datetime import datetime +from unittest.mock import MagicMock +from uuid import uuid4 + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker + +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from dify_graph.entities.workflow_execution import WorkflowExecution, WorkflowType +from models import Account, WorkflowRun +from models.enums import WorkflowRunTriggeredFrom + + +def _build_repository_with_mocked_session(session: MagicMock) -> SQLAlchemyWorkflowExecutionRepository: + engine = create_engine("sqlite:///:memory:") + real_session_factory = sessionmaker(bind=engine, expire_on_commit=False) + + user = MagicMock(spec=Account) + user.id = str(uuid4()) + user.current_tenant_id = str(uuid4()) + + repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=real_session_factory, + user=user, + app_id="app-id", + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + session_context = MagicMock() + session_context.__enter__.return_value = session + session_context.__exit__.return_value = False + repository._session_factory = MagicMock(return_value=session_context) + return repository + + +def _build_execution(*, execution_id: str, started_at: datetime) -> WorkflowExecution: + return WorkflowExecution.new( + id_=execution_id, + workflow_id="workflow-id", + workflow_type=WorkflowType.WORKFLOW, + workflow_version="1.0.0", + graph={"nodes": [], "edges": []}, + inputs={"query": "hello"}, + started_at=started_at, + ) + + +def test_save_uses_execution_started_at_when_record_does_not_exist(): + session = MagicMock() + session.get.return_value = None + repository = _build_repository_with_mocked_session(session) + + started_at = datetime(2026, 1, 1, 12, 0, 0) + execution = _build_execution(execution_id=str(uuid4()), started_at=started_at) + + repository.save(execution) + + saved_model = session.merge.call_args.args[0] + assert saved_model.created_at == started_at + session.commit.assert_called_once() + + +def test_save_preserves_existing_created_at_when_record_already_exists(): + session = MagicMock() + repository = _build_repository_with_mocked_session(session) + + execution_id = str(uuid4()) + existing_created_at = datetime(2026, 1, 1, 12, 0, 0) + existing_run = WorkflowRun() + existing_run.id = execution_id + existing_run.tenant_id = repository._tenant_id + existing_run.created_at = existing_created_at + session.get.return_value = existing_run + + execution = _build_execution( + execution_id=execution_id, + started_at=datetime(2026, 1, 1, 12, 30, 0), + ) + + repository.save(execution) + + saved_model = session.merge.call_args.args[0] + assert saved_model.created_at == existing_created_at + session.commit.assert_called_once() diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py index bae5bae06d..456c3dde12 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py @@ -14,7 +14,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from libs.datetime_utils import naive_utc_now from models import Account, WorkflowNodeExecutionTriggeredFrom @@ -70,7 +70,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -108,7 +108,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -153,7 +153,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -195,7 +195,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py index c880b8d41b..eeab81a178 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py @@ -13,6 +13,7 @@ from unittest.mock import MagicMock from sqlalchemy import Engine +from configs import dify_config from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) @@ -20,7 +21,7 @@ from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, WorkflowNodeExecutionTriggeredFrom from models.enums import ExecutionOffLoadType from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload @@ -41,7 +42,7 @@ class TruncationTestCase: def create_test_cases() -> list[TruncationTestCase]: """Create test cases for different truncation scenarios.""" # Create large data that will definitely exceed the threshold (10KB) - large_data = {"data": "x" * (TRUNCATION_SIZE_THRESHOLD + 1000)} + large_data = {"data": "x" * (dify_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE + 1000)} small_data = {"data": "small"} return [ @@ -101,7 +102,7 @@ def create_workflow_node_execution( workflow_execution_id="test-workflow-execution-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", inputs=inputs, outputs=outputs, @@ -145,7 +146,7 @@ class TestSQLAlchemyWorkflowNodeExecutionRepositoryTruncation: db_model.index = 1 db_model.predecessor_node_id = None db_model.node_id = "node-id" - db_model.node_type = NodeType.LLM + db_model.node_type = BuiltinNodeTypes.LLM db_model.title = "Test Node" db_model.inputs = json.dumps({"value": "inputs"}) db_model.process_data = json.dumps({"value": "process_data"}) diff --git a/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py index 14b42adbbe..2b508ca654 100644 --- a/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py +++ b/api/tests/unit_tests/core/test_trigger_debug_event_selectors.py @@ -6,7 +6,7 @@ import pytest import pytz from core.trigger.debug import event_selectors -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig class _DummyRedis: diff --git a/api/tests/unit_tests/core/tools/__init__.py b/api/tests/unit_tests/core/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/tools/test_builtin_tool_base.py b/api/tests/unit_tests/core/tools/test_builtin_tool_base.py new file mode 100644 index 0000000000..f123f60a34 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tool_base.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from collections.abc import Generator +from types import SimpleNamespace +from typing import Any +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage, ToolProviderType +from dify_graph.model_runtime.entities.message_entities import UserPromptMessage + + +class _BuiltinDummyTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +def _build_tool() -> _BuiltinDummyTool: + entity = ToolEntity( + identity=ToolIdentity(author="author", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=[], + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + return _BuiltinDummyTool(provider="provider-a", entity=entity, runtime=runtime) + + +def test_builtin_tool_fork_and_provider_type(): + tool = _build_tool() + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, _BuiltinDummyTool) + assert forked.runtime.tenant_id == "tenant-2" + assert tool.tool_provider_type() == ToolProviderType.BUILT_IN + + +def test_invoke_model_calls_model_invocation_utils_invoke(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.invoke", return_value="result") as mock_invoke: + assert ( + tool.invoke_model( + user_id="u1", + prompt_messages=[UserPromptMessage(content="hello")], + stop=[], + ) + == "result" + ) + mock_invoke.assert_called_once() + + +def test_get_max_tokens_returns_value(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.get_max_llm_context_tokens", return_value=4096): + assert tool.get_max_tokens() == 4096 + + +def test_get_prompt_tokens_returns_value(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.calculate_tokens", return_value=7): + assert tool.get_prompt_tokens([UserPromptMessage(content="hello")]) == 7 + + +def test_runtime_none_raises(): + tool = _build_tool() + tool.runtime = None + with pytest.raises(ValueError, match="runtime is required"): + tool.get_max_tokens() + with pytest.raises(ValueError, match="runtime is required"): + tool.get_prompt_tokens([UserPromptMessage(content="hello")]) + + +def test_builtin_tool_summary_short_and_long_content_paths(): + tool = _build_tool() + + with patch.object(_BuiltinDummyTool, "get_max_tokens", return_value=100): + with patch.object(_BuiltinDummyTool, "get_prompt_tokens", return_value=10): + assert tool.summary(user_id="u1", content="short") == "short" + + with patch.object(_BuiltinDummyTool, "get_max_tokens", return_value=10): + with patch.object( + _BuiltinDummyTool, + "get_prompt_tokens", + side_effect=lambda prompt_messages: len(prompt_messages[-1].content), + ): + with patch.object( + _BuiltinDummyTool, + "invoke_model", + return_value=SimpleNamespace(message=SimpleNamespace(content="S")), + ): + result = tool.summary(user_id="u1", content="x" * 30 + "\n" + "y" * 5) + + assert result + assert "S" in result diff --git a/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py b/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py new file mode 100644 index 0000000000..ad6d5906ae --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py @@ -0,0 +1,216 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.plugin.entities.plugin_daemon import CredentialType +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.provider import BuiltinToolProviderController +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.tool_entities import ToolInvokeMessage, ToolProviderEntity, ToolProviderType +from core.tools.errors import ToolProviderNotFoundError + + +class _FakeBuiltinTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +class _ConcreteBuiltinProvider(BuiltinToolProviderController): + last_validation: tuple[str, dict[str, Any]] | None = None + + def _validate_credentials(self, user_id: str, credentials: dict[str, Any]): + self.last_validation = (user_id, credentials) + + +def _provider_yaml() -> dict[str, Any]: + return { + "identity": { + "author": "Dify", + "name": "fake_provider", + "label": {"en_US": "Fake Provider"}, + "description": {"en_US": "Fake description"}, + "icon": "icon.svg", + "tags": ["utilities"], + }, + "credentials_for_provider": { + "api_key": { + "type": "secret-input", + "required": True, + } + }, + "oauth_schema": { + "client_schema": [ + { + "name": "client_id", + "type": "text-input", + } + ], + "credentials_schema": [ + { + "name": "access_token", + "type": "secret-input", + } + ], + }, + } + + +def _tool_yaml() -> dict[str, Any]: + return { + "identity": { + "author": "Dify", + "name": "tool_a", + "label": {"en_US": "Tool A"}, + }, + "parameters": [], + } + + +def test_builtin_tool_provider_init_load_tools_and_basic_accessors(monkeypatch): + yaml_payloads = [_provider_yaml(), _tool_yaml()] + + def _load_yaml(*args, **kwargs): + return yaml_payloads.pop(0) + + monkeypatch.setattr("core.tools.builtin_tool.provider.load_yaml_file_cached", _load_yaml) + monkeypatch.setattr( + "core.tools.builtin_tool.provider.listdir", + lambda *args, **kwargs: ["tool_a.yaml", "__init__.py", "readme.md"], + ) + monkeypatch.setattr( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + lambda *args, **kwargs: _FakeBuiltinTool, + ) + provider = _ConcreteBuiltinProvider() + + assert provider.get_credentials_schema() + assert provider.get_tools() + assert provider.get_tool("tool_a") is not None + assert provider.get_tool("missing") is None + assert provider.provider_type == ToolProviderType.BUILT_IN + assert provider.tool_labels == ["utilities"] + assert provider.need_credentials is True + + oauth_schema = provider.get_credentials_schema_by_type(CredentialType.OAUTH2) + assert len(oauth_schema) == 1 + api_schema = provider.get_credentials_schema_by_type(CredentialType.API_KEY) + assert len(api_schema) == 1 + assert provider.get_oauth_client_schema()[0].name == "client_id" + assert set(provider.get_supported_credential_types()) == {CredentialType.API_KEY, CredentialType.OAUTH2} + + +def test_builtin_tool_provider_invalid_credential_type_raises(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + with pytest.raises(ValueError, match="Invalid credential type: invalid"): + provider.get_credentials_schema_by_type("invalid") + + +def test_builtin_tool_provider_validate_credentials_delegates(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + provider.validate_credentials("user-1", {"api_key": "secret"}) + assert provider.last_validation == ("user-1", {"api_key": "secret"}) + + +def test_builtin_tool_provider_unauthorized_schema_is_empty(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + assert provider.get_credentials_schema_by_type(CredentialType.UNAUTHORIZED) == [] + + +def test_builtin_tool_provider_init_raises_when_provider_yaml_missing(): + with patch("core.tools.builtin_tool.provider.load_yaml_file_cached", side_effect=RuntimeError("boom")): + with pytest.raises(ToolProviderNotFoundError, match="can not load provider yaml"): + _ConcreteBuiltinProvider() + + +def test_builtin_tool_provider_handles_empty_credentials_and_oauth(): + provider = object.__new__(_ConcreteBuiltinProvider) + provider.tools = [] + provider.entity = ToolProviderEntity.model_validate( + { + "identity": { + "author": "Dify", + "name": "fake_provider", + "label": {"en_US": "Fake Provider"}, + "description": {"en_US": "Fake description"}, + "icon": "icon.svg", + "tags": None, + }, + "credentials_schema": [], + "oauth_schema": None, + }, + ) + + assert provider.get_oauth_client_schema() == [] + assert provider.get_supported_credential_types() == [] + assert provider.need_credentials is False + assert provider._get_tool_labels() == [] + + +def test_builtin_tool_provider_forked_tool_runtime_is_initialized(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + tool = provider.get_tool("tool_a") + assert tool is not None + assert isinstance(tool.runtime, ToolRuntime) + assert tool.runtime.tenant_id == "" + tool.runtime.invoke_from = InvokeFrom.DEBUGGER + assert tool.runtime.invoke_from == InvokeFrom.DEBUGGER diff --git a/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py b/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py new file mode 100644 index 0000000000..62cfb6ce5b --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py @@ -0,0 +1,310 @@ +from __future__ import annotations + +import math +from types import SimpleNamespace + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort +from core.tools.builtin_tool.providers.audio.audio import AudioToolProvider +from core.tools.builtin_tool.providers.audio.tools.asr import ASRTool +from core.tools.builtin_tool.providers.audio.tools.tts import TTSTool +from core.tools.builtin_tool.providers.code.code import CodeToolProvider +from core.tools.builtin_tool.providers.code.tools.simple_code import SimpleCode +from core.tools.builtin_tool.providers.time.time import WikiPediaProvider +from core.tools.builtin_tool.providers.time.tools.current_time import CurrentTimeTool +from core.tools.builtin_tool.providers.time.tools.localtime_to_timestamp import LocaltimeToTimestampTool +from core.tools.builtin_tool.providers.time.tools.timestamp_to_localtime import TimestampToLocaltimeTool +from core.tools.builtin_tool.providers.time.tools.timezone_conversion import TimezoneConversionTool +from core.tools.builtin_tool.providers.time.tools.weekday import WeekdayTool +from core.tools.builtin_tool.providers.webscraper.tools.webscraper import WebscraperTool +from core.tools.builtin_tool.providers.webscraper.webscraper import WebscraperProvider +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.errors import ToolInvokeError +from dify_graph.file.enums import FileType +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey + + +def _build_builtin_tool(tool_cls: type[BuiltinTool]) -> BuiltinTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + return tool_cls(provider="provider-a", entity=entity, runtime=runtime) + + +def _raise_runtime_error(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("boom") + + +def test_current_time_tool(): + current_tool = _build_builtin_tool(CurrentTimeTool) + utc_text = list(current_tool.invoke(user_id="u", tool_parameters={"timezone": "UTC"}))[0].message.text + assert utc_text + + invalid_tz = list(current_tool.invoke(user_id="u", tool_parameters={"timezone": "Invalid/TZ"}))[0].message.text + assert "Invalid timezone" in invalid_tz + + +def test_localtime_to_timestamp_tool(): + localtime_tool = _build_builtin_tool(LocaltimeToTimestampTool) + ts_message = list( + localtime_tool.invoke(user_id="u", tool_parameters={"localtime": "2024-01-01 10:00:00", "timezone": "UTC"}) + )[0].message.text + ts_value = float(ts_message.strip()) + assert math.isfinite(ts_value) + assert ts_value >= 0 + with pytest.raises(ToolInvokeError): + LocaltimeToTimestampTool.localtime_to_timestamp("bad", "%Y-%m-%d %H:%M:%S", "UTC") + + +def test_timestamp_to_localtime_tool(): + to_local_tool = _build_builtin_tool(TimestampToLocaltimeTool) + local_text = list(to_local_tool.invoke(user_id="u", tool_parameters={"timestamp": 1704067200, "timezone": "UTC"}))[ + 0 + ].message.text + assert "2024" in local_text + with pytest.raises(ToolInvokeError): + TimestampToLocaltimeTool.timestamp_to_localtime("bad", "UTC") # type: ignore[arg-type] + + +def test_timezone_conversion_tool(): + timezone_tool = _build_builtin_tool(TimezoneConversionTool) + converted = list( + timezone_tool.invoke( + user_id="u", + tool_parameters={ + "current_time": "2024-01-01 08:00:00", + "current_timezone": "UTC", + "target_timezone": "Asia/Tokyo", + }, + ) + )[0].message.text + assert converted.startswith("2024-01-01") + with pytest.raises(ToolInvokeError): + TimezoneConversionTool.timezone_convert("bad", "UTC", "Asia/Tokyo") + + +def test_weekday_tool(): + weekday_tool = _build_builtin_tool(WeekdayTool) + valid = list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "month": 1, "day": 1}))[0].message.text + assert "January 1, 2024" in valid + invalid = list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "month": 2, "day": 31}))[ + 0 + ].message.text + assert "Invalid date" in invalid + with pytest.raises(ValueError, match="Month is required"): + list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "day": 1})) + + +def test_simple_code_valid_execution(monkeypatch): + simple_code = _build_builtin_tool(SimpleCode) + + monkeypatch.setattr( + "core.tools.builtin_tool.providers.code.tools.simple_code.CodeExecutor.execute_code", + lambda *a: "ok", + ) + result = list( + simple_code.invoke( + user_id="u", + tool_parameters={"language": "python3", "code": "print(1)"}, + ) + )[0].message.text + assert result == "ok" + + +def test_simple_code_invalid_language(): + simple_code = _build_builtin_tool(SimpleCode) + + with pytest.raises(ValueError, match="Only python3 and javascript"): + list(simple_code.invoke(user_id="u", tool_parameters={"language": "go", "code": "fmt.Println(1)"})) + + +def test_simple_code_execution_error(monkeypatch): + simple_code = _build_builtin_tool(SimpleCode) + + monkeypatch.setattr( + "core.tools.builtin_tool.providers.code.tools.simple_code.CodeExecutor.execute_code", + _raise_runtime_error, + ) + with pytest.raises(ToolInvokeError, match="boom"): + list(simple_code.invoke(user_id="u", tool_parameters={"language": "python3", "code": "print(1)"})) + + +def test_webscraper_empty_url(): + webscraper = _build_builtin_tool(WebscraperTool) + empty = list(webscraper.invoke(user_id="u", tool_parameters={"url": ""}))[0].message.text + assert empty == "Please input url" + + +def test_webscraper_fetch(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr("core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", lambda *a, **k: "page") + full = list(webscraper.invoke(user_id="u", tool_parameters={"url": "https://example.com"}))[0].message.text + assert full == "page" + + +def test_webscraper_summary(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr("core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", lambda *a, **k: "page") + monkeypatch.setattr(webscraper, "summary", lambda user_id, content: "summary") + summarized = list( + webscraper.invoke( + user_id="u", + tool_parameters={"url": "https://example.com", "generate_summary": True}, + ) + )[0].message.text + assert summarized == "summary" + + +def test_webscraper_fetch_error(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr( + "core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", + _raise_runtime_error, + ) + with pytest.raises(ToolInvokeError, match="boom"): + list(webscraper.invoke(user_id="u", tool_parameters={"url": "https://example.com"})) + + +def test_asr_invalid_file(): + asr = _build_builtin_tool(ASRTool) + file_obj = SimpleNamespace(type=FileType.DOCUMENT) + invalid_file = list(asr.invoke(user_id="u", tool_parameters={"audio_file": file_obj}))[0].message.text + assert "not a valid audio file" in invalid_file + + +def test_asr_valid_file_invocation(monkeypatch): + asr = _build_builtin_tool(ASRTool) + model_instance = type("M", (), {"invoke_speech2text": lambda self, file, user: "transcript"})() + model_manager = type("Mgr", (), {"get_model_instance": lambda *a, **k: model_instance})() + monkeypatch.setattr("core.tools.builtin_tool.providers.audio.tools.asr.download", lambda file: b"audio-bytes") + monkeypatch.setattr("core.tools.builtin_tool.providers.audio.tools.asr.ModelManager", lambda: model_manager) + audio_file = SimpleNamespace(type=FileType.AUDIO) + ok = list(asr.invoke(user_id="u", tool_parameters={"audio_file": audio_file, "model": "p#m"}))[0].message.text + assert ok == "transcript" + + +def test_asr_available_models_and_runtime_parameters(monkeypatch): + asr = _build_builtin_tool(ASRTool) + provider_model = type("PM", (), {"provider": "p", "models": [type("Model", (), {"model": "m"})()]})() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.asr.ModelProviderService.get_models_by_model_type", + lambda *a, **k: [provider_model], + ) + assert asr.get_available_models() == [("p", "m")] + assert asr.get_runtime_parameters()[0].name == "model" + + +def test_tts_invoke_returns_messages(monkeypatch): + tts = _build_builtin_tool(TTSTool) + voices_model_instance = type( + "TTSM", + (), + { + "get_tts_voices": lambda self: [{"value": "voice-1"}], + "invoke_tts": lambda self, **kwargs: [b"a", b"b"], + }, + )() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelManager", + lambda: type("M", (), {"get_model_instance": lambda *a, **k: voices_model_instance})(), + ) + messages = list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + assert [m.type for m in messages] == [ToolInvokeMessage.MessageType.TEXT, ToolInvokeMessage.MessageType.BLOB] + + +def test_tts_get_available_models_requires_runtime(): + tts = _build_builtin_tool(TTSTool) + tts.runtime = None + with pytest.raises(ValueError, match="Runtime is required"): + tts.get_available_models() + + +def test_tts_tool_raises_when_runtime_missing(): + tts = _build_builtin_tool(TTSTool) + tts.runtime = None + with pytest.raises(ValueError, match="Runtime is required"): + list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + + +@pytest.mark.parametrize( + "voices", + [[{"value": None}], []], +) +def test_tts_tool_raises_when_voice_unavailable(monkeypatch, voices): + tts = _build_builtin_tool(TTSTool) + tts.runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + model_without_voice = type( + "TTSModelNoVoice", + (), + { + "get_tts_voices": lambda self: voices, + "invoke_tts": lambda self, **kwargs: [b"x"], + }, + )() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelManager", + lambda: type("Manager", (), {"get_model_instance": lambda *args, **kwargs: model_without_voice})(), + ) + with pytest.raises(ValueError, match="no voice available"): + list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + + +def test_tts_tool_get_available_models_and_runtime_parameters(monkeypatch): + tts = _build_builtin_tool(TTSTool) + + model_1 = SimpleNamespace( + model="model-a", + model_properties={ModelPropertyKey.VOICES: [{"mode": "v1", "name": "Voice 1"}]}, + ) + model_2 = SimpleNamespace(model="model-b", model_properties={}) + provider_models = [SimpleNamespace(provider="provider-a", models=[model_1, model_2])] + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelProviderService.get_models_by_model_type", + lambda *args, **kwargs: provider_models, + ) + + available_models = tts.get_available_models() + assert available_models == [ + ("provider-a", "model-a", [{"mode": "v1", "name": "Voice 1"}]), + ("provider-a", "model-b", []), + ] + + runtime_parameters = tts.get_runtime_parameters() + assert runtime_parameters[0].name == "model" + assert runtime_parameters[0].required is True + assert runtime_parameters[0].options[0].value == "provider-a#model-a" + assert runtime_parameters[1].name == "voice#provider-a#model-a" + + +def test_provider_classes_and_builtin_sort(monkeypatch): + # Use object.__new__ to avoid YAML-loading __init__; only pass-through validation is exercised. + # Ensure pass-through _validate_credentials methods are executed. + AudioToolProvider._validate_credentials(object.__new__(AudioToolProvider), "u", {}) + CodeToolProvider._validate_credentials(object.__new__(CodeToolProvider), "u", {}) + WikiPediaProvider._validate_credentials(object.__new__(WikiPediaProvider), "u", {}) + WebscraperProvider._validate_credentials(object.__new__(WebscraperProvider), "u", {}) + + providers = [SimpleNamespace(name="b"), SimpleNamespace(name="a")] + monkeypatch.setattr(BuiltinToolProviderSort, "_position", {}) + monkeypatch.setattr( + "core.tools.builtin_tool.providers._positions.get_tool_position_map", + lambda _: {"a": 0, "b": 1}, + ) + monkeypatch.setattr( + "core.tools.builtin_tool.providers._positions.sort_by_position_map", + lambda position, values, name_func: sorted(values, key=lambda x: name_func(x)), + ) + sorted_providers = BuiltinToolProviderSort.sort(providers) + assert [p.name for p in sorted_providers] == ["a", "b"] diff --git a/api/tests/unit_tests/core/tools/test_custom_tool.py b/api/tests/unit_tests/core/tools/test_custom_tool.py new file mode 100644 index 0000000000..79b8eaaa87 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_custom_tool.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import httpx +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.custom_tool.tool import ApiTool, ParsedResponse +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_bundle import ApiToolBundle +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.errors import ToolInvokeError, ToolParameterValidationError, ToolProviderCredentialValidationError + + +def _build_tool(*, openapi: dict | None = None) -> ApiTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + bundle = ApiToolBundle( + server_url="https://api.example.com/items/{id}", + method="GET", + summary="summary", + operation_id="op-id", + parameters=[], + author="author", + openapi=openapi or {"parameters": []}, + ) + runtime = ToolRuntime( + tenant_id="tenant-1", + invoke_from=InvokeFrom.DEBUGGER, + credentials={"auth_type": "api_key_header", "api_key_value": "k"}, + ) + return ApiTool(entity=entity, api_bundle=bundle, runtime=runtime, provider_id="provider-id") + + +def test_parsed_response_to_string(): + assert ParsedResponse({"a": 1}, True).to_string() == '{"a": 1}' + assert ParsedResponse("ok", False).to_string() == "ok" + + +def test_api_tool_fork_runtime_and_validate_credentials(monkeypatch): + tool = _build_tool() + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, ApiTool) + assert forked.runtime.tenant_id == "tenant-2" + + tool.api_bundle = None # type: ignore[assignment] + with pytest.raises(ValueError, match="api_bundle is required"): + tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + + tool = _build_tool() + assert tool.validate_credentials(credentials={}, parameters={}, format_only=True) == "" + monkeypatch.setattr(tool, "assembling_request", lambda parameters: {"Authorization": "Bearer x"}) + monkeypatch.setattr( + tool, + "do_http_request", + lambda url, method, headers, parameters: httpx.Response(200, json={"ok": True}), + ) + result = tool.validate_credentials(credentials={}, parameters={"a": 1}, format_only=False) + assert result == '{"ok": true}' + + +def test_assembling_request_auth_header_assembly(): + tool = _build_tool() + + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "k" + + tool.runtime.credentials = { + "auth_type": "api_key_header", + "api_key_header_prefix": "bearer", + "api_key_value": "abc", + } + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "Bearer abc" + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_header_prefix": "basic", "api_key_value": "abc"} + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "Basic abc" + + tool.runtime.credentials = {"auth_type": "api_key_query", "api_key_value": "abc"} + assert tool.assembling_request(parameters={}) == {} + + +def test_assembling_request_runtime_auth_errors(): + tool = _build_tool() + + tool.runtime = None + with pytest.raises(ToolProviderCredentialValidationError, match="runtime not initialized"): + tool.assembling_request(parameters={}) + + tool.runtime = ToolRuntime(tenant_id="tenant", credentials={}) + with pytest.raises(ToolProviderCredentialValidationError, match="Missing auth_type"): + tool.assembling_request(parameters={}) + + tool.runtime.credentials = {"auth_type": "api_key_header"} + with pytest.raises(ToolProviderCredentialValidationError, match="Missing api_key_value"): + tool.assembling_request(parameters={}) + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": 123} + with pytest.raises(ToolProviderCredentialValidationError, match="must be a string"): + tool.assembling_request(parameters={}) + + +def test_assembling_request_parameter_validation_and_defaults(): + tool = _build_tool() + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": "x"} + tool.api_bundle.parameters = [ + SimpleNamespace(required=True, name="required_param", default=None), + ] + with pytest.raises(ToolParameterValidationError, match="Missing required parameter required_param"): + tool.assembling_request(parameters={}) + + tool.api_bundle.parameters = [ + SimpleNamespace(required=True, name="required_param", default="d"), + ] + params = {} + tool.assembling_request(parameters=params) + assert params["required_param"] == "d" + + +def test_validate_and_parse_response_branches(): + tool = _build_tool() + + with pytest.raises(ToolInvokeError, match="status code 500"): + tool.validate_and_parse_response(httpx.Response(500, text="boom")) + + empty = tool.validate_and_parse_response(httpx.Response(200, content=b"")) + assert empty.is_json is False + assert "Empty response from the tool" in str(empty.content) + + json_resp = tool.validate_and_parse_response( + httpx.Response(200, json={"a": 1}, headers={"content-type": "application/json"}) + ) + assert json_resp.is_json is True + assert json_resp.content == {"a": 1} + + non_json_type = tool.validate_and_parse_response( + httpx.Response(200, text='{"a": 1}', headers={"content-type": "text/plain"}) + ) + assert non_json_type.is_json is False + assert non_json_type.content == '{"a": 1}' + + plain_resp = tool.validate_and_parse_response(httpx.Response(200, text="plain")) + assert plain_resp.is_json is False + assert plain_resp.content == "plain" + + with pytest.raises(ValueError, match="Invalid response type"): + tool.validate_and_parse_response("invalid") # type: ignore[arg-type] + + +def test_get_parameter_value_and_type_conversion_helpers(): + tool = _build_tool() + + assert tool.get_parameter_value({"name": "x"}, {"x": 1}) == 1 + assert tool.get_parameter_value({"name": "x", "required": False, "schema": {"default": "d"}}, {}) == "d" + with pytest.raises(ToolParameterValidationError, match="Missing required parameter x"): + tool.get_parameter_value({"name": "x", "required": True}, {}) + + assert tool._convert_body_property_any_of({}, "12", [{"type": "integer"}]) == 12 + assert tool._convert_body_property_any_of({}, "1.5", [{"type": "number"}]) == 1.5 + assert tool._convert_body_property_any_of({}, "true", [{"type": "boolean"}]) is True + assert tool._convert_body_property_any_of({}, "", [{"type": "null"}]) is None + assert tool._convert_body_property_any_of({}, "x", [{"anyOf": [{"type": "string"}]}]) == "x" + + assert tool._convert_body_property_type({"type": "integer"}, "1") == 1 + assert tool._convert_body_property_type({"type": "number"}, "1.2") == 1.2 + assert tool._convert_body_property_type({"type": "string"}, 1) == "1" + assert tool._convert_body_property_type({"type": "boolean"}, 1) is True + assert tool._convert_body_property_type({"type": "null"}, None) is None + assert tool._convert_body_property_type({"type": "object"}, '{"a":1}') == {"a": 1} + assert tool._convert_body_property_type({"type": "array"}, "[1,2]") == [1, 2] + assert tool._convert_body_property_type({"type": "invalid"}, "v") == "v" + assert tool._convert_body_property_type({"anyOf": [{"type": "integer"}]}, "2") == 2 + + +def test_do_http_request_builds_arguments_and_handles_invalid_method(monkeypatch): + openapi = { + "parameters": [ + {"name": "id", "in": "path", "required": True, "schema": {"type": "string"}}, + {"name": "q", "in": "query", "required": False, "schema": {"default": ""}}, + {"name": "X-Extra", "in": "header", "required": False, "schema": {"default": "x"}}, + {"name": "sid", "in": "cookie", "required": False, "schema": {"default": "cookie1"}}, + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "required": ["count"], + "properties": { + "count": {"type": "integer"}, + "name": {"type": "string", "default": "n"}, + }, + } + } + } + }, + } + tool = _build_tool(openapi=openapi) + tool.runtime.credentials = {"auth_type": "api_key_query", "api_key_query_param": "key", "api_key_value": "v"} + headers = {} + captured = {} + + def _fake_get(url, **kwargs): + captured["url"] = url + captured["kwargs"] = kwargs + return httpx.Response(200, text="ok") + + monkeypatch.setattr("core.tools.custom_tool.tool.ssrf_proxy.get", _fake_get) + response = tool.do_http_request( + "https://api.example.com/items/{id}", + "GET", + headers=headers, + parameters={"id": "123", "count": "2", "q": "search"}, + ) + + assert isinstance(response, httpx.Response) + assert captured["url"].endswith("/items/123") + assert captured["kwargs"]["params"]["q"] == "search" + assert captured["kwargs"]["params"]["key"] == "v" + assert captured["kwargs"]["headers"]["Content-Type"] == "application/json" + + invalid_method_tool = _build_tool(openapi={"parameters": []}) + with pytest.raises(ValueError, match="Invalid http method"): + invalid_method_tool.do_http_request("https://api.example.com", "TRACE", headers={}, parameters={}) + + +def test_do_http_request_handles_file_upload_and_invoke_paths(monkeypatch): + openapi = { + "parameters": [], + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "type": "object", + "properties": {"file": {"format": "binary"}}, + } + } + } + }, + } + tool = _build_tool(openapi=openapi) + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": "k"} + fake_file = SimpleNamespace(filename="a.txt", mime_type="text/plain") + captured = {} + + def _fake_post(url, **kwargs): + captured["headers"] = kwargs["headers"] + captured["files"] = kwargs["files"] + return httpx.Response(200, text="ok") + + monkeypatch.setattr("core.tools.custom_tool.tool.download", lambda _: b"file-bytes") + monkeypatch.setattr("core.tools.custom_tool.tool.ssrf_proxy.post", _fake_post) + response = tool.do_http_request( + "https://api.example.com/upload", + "POST", + headers={}, + parameters={"file": fake_file}, + ) + assert isinstance(response, httpx.Response) + assert "Content-Type" not in captured["headers"] + assert captured["files"][0][0] == "file" + + # _invoke JSON path + monkeypatch.setattr(tool, "assembling_request", lambda parameters: {}) + monkeypatch.setattr(tool, "do_http_request", lambda *args, **kwargs: httpx.Response(200, text='{"a":1}')) + monkeypatch.setattr(tool, "validate_and_parse_response", lambda _: ParsedResponse({"a": 1}, True)) + messages = list(tool.invoke(user_id="u1", tool_parameters={})) + assert [m.type for m in messages] == [ToolInvokeMessage.MessageType.JSON, ToolInvokeMessage.MessageType.TEXT] + + # _invoke text path + monkeypatch.setattr(tool, "validate_and_parse_response", lambda _: ParsedResponse("plain", False)) + messages = list(tool.invoke(user_id="u1", tool_parameters={})) + assert len(messages) == 1 + assert messages[0].message.text == "plain" diff --git a/api/tests/unit_tests/core/tools/test_custom_tool_provider.py b/api/tests/unit_tests/core/tools/test_custom_tool_provider.py new file mode 100644 index 0000000000..93ae217e24 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_custom_tool_provider.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.tools.custom_tool.provider import ApiToolProviderController +from core.tools.custom_tool.tool import ApiTool +from core.tools.entities.tool_bundle import ApiToolBundle +from core.tools.entities.tool_entities import ApiProviderAuthType, ToolProviderType + + +def _db_provider() -> SimpleNamespace: + bundle = ApiToolBundle( + server_url="https://api.example.com/items", + method="GET", + summary="List items", + operation_id="list_items", + parameters=[], + author="author", + openapi={"parameters": []}, + ) + return SimpleNamespace( + id="provider-id", + tenant_id="tenant-1", + name="provider-a", + description="desc", + icon="icon.svg", + user=SimpleNamespace(name="Alice"), + tools=[bundle], + ) + + +def test_api_tool_provider_from_db_and_parse_tool_bundle(): + controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.API_KEY_HEADER) + assert controller.provider_type == ToolProviderType.API + assert any(c.name == "api_key_value" for c in controller.entity.credentials_schema) + + tool = controller._parse_tool_bundle(_db_provider().tools[0]) + assert isinstance(tool, ApiTool) + assert tool.entity.identity.provider == "provider-id" + + +def test_api_tool_provider_from_db_query_auth_and_none_auth(): + query_controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.API_KEY_QUERY) + assert any(c.name == "api_key_query_param" for c in query_controller.entity.credentials_schema) + + none_controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.NONE) + assert [c.name for c in none_controller.entity.credentials_schema] == ["auth_type"] + + +def test_api_tool_provider_load_get_tools_and_get_tool(): + controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.NONE) + loaded = controller.load_bundled_tools(_db_provider().tools) + assert len(loaded) == 1 + + assert isinstance(controller.get_tool("list_items"), ApiTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + # Return cached tools without querying database. + cached = controller.get_tools("tenant-1") + assert len(cached) == 1 + + # Force DB fetch branch. + controller.tools = [] + provider_with_tools = _db_provider() + with patch("core.tools.custom_tool.provider.db") as mock_db: + scalars_result = Mock() + scalars_result.all.return_value = [provider_with_tools] + mock_db.session.scalars.return_value = scalars_result + tools = controller.get_tools("tenant-1") + assert len(tools) == 1 diff --git a/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py b/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py new file mode 100644 index 0000000000..23c0be9487 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py @@ -0,0 +1,145 @@ +"""Unit tests for DatasetRetrieverTool behavior and retrieval wiring.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock, patch + +from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.utils.dataset_retriever_tool import DatasetRetrieverTool + + +def _retrieve_config() -> DatasetRetrieveConfigEntity: + return DatasetRetrieveConfigEntity(retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE) + + +def test_get_dataset_tools_returns_empty_for_empty_dataset_ids() -> None: + # Arrange + retrieve_config = _retrieve_config() + + # Act + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=[], + retrieve_config=retrieve_config, + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + + # Assert + assert tools == [] + + +def test_get_dataset_tools_returns_empty_for_missing_retrieve_config() -> None: + # Arrange + dataset_ids = ["d1"] + + # Act + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=dataset_ids, + retrieve_config=None, # type: ignore[arg-type] + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + + # Assert + assert tools == [] + + +def test_get_dataset_tools_builds_tool_and_restores_strategy() -> None: + # Arrange + retrieve_config = _retrieve_config() + retrieval_tool = SimpleNamespace(name="dataset_tool", description="desc", run=lambda query: f"result:{query}") + feature = Mock() + feature.to_dataset_retriever_tool.return_value = [retrieval_tool] + + # Act + with patch("core.tools.utils.dataset_retriever_tool.DatasetRetrieval", return_value=feature): + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=["d1"], + retrieve_config=retrieve_config, + return_resource=True, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={"x": 1}, + ) + + # Assert + assert len(tools) == 1 + assert tools[0].entity.identity.name == "dataset_tool" + assert retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE + + +def _build_dataset_tool() -> tuple[DatasetRetrieverTool, SimpleNamespace]: + retrieval_tool = SimpleNamespace(name="dataset_tool", description="desc", run=lambda query: f"result:{query}") + feature = Mock() + feature.to_dataset_retriever_tool.return_value = [retrieval_tool] + with patch("core.tools.utils.dataset_retriever_tool.DatasetRetrieval", return_value=feature): + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=["d1"], + retrieve_config=_retrieve_config(), + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + return tools[0], retrieval_tool + + +def test_runtime_parameters_shape() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + params = tool.get_runtime_parameters() + + # Assert + assert len(params) == 1 + assert params[0].name == "query" + + +def test_empty_query_behavior() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + empty_query = list(tool.invoke(user_id="u", tool_parameters={})) + + # Assert + assert len(empty_query) == 1 + assert empty_query[0].message.text == "please input query" + + +def test_query_invocation_result() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + result = list(tool.invoke(user_id="u", tool_parameters={"query": "hello"})) + + # Assert + assert len(result) == 1 + assert result[0].message.text == "result:hello" + + +def test_validate_credentials() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + result = tool.validate_credentials(credentials={}, parameters={}, format_only=False) + + # Assert + assert result is None diff --git a/api/tests/unit_tests/core/tools/test_mcp_tool.py b/api/tests/unit_tests/core/tools/test_mcp_tool.py new file mode 100644 index 0000000000..eaf054de59 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_mcp_tool.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import base64 +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.mcp.types import ( + BlobResourceContents, + CallToolResult, + EmbeddedResource, + ImageContent, + TextContent, + TextResourceContents, +) +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage, ToolProviderType +from core.tools.errors import ToolInvokeError +from core.tools.mcp_tool.tool import MCPTool + + +def _build_mcp_tool(*, with_output_schema: bool = True) -> MCPTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="remote-tool", + label=I18nObject(en_US="remote-tool"), + provider="provider-id", + ), + parameters=[], + output_schema={"type": "object"} if with_output_schema else {}, + ) + return MCPTool( + entity=entity, + runtime=ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER), + tenant_id="tenant-1", + icon="icon.svg", + server_url="https://mcp.example.com", + provider_id="provider-id", + headers={"x-auth": "token"}, + ) + + +def test_mcp_tool_provider_type_and_fork_runtime(): + tool = _build_mcp_tool() + assert tool.tool_provider_type() == ToolProviderType.MCP + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, MCPTool) + assert forked.runtime.tenant_id == "tenant-2" + assert forked.provider_id == "provider-id" + + +def test_mcp_tool_text_and_json_processing_helpers(): + tool = _build_mcp_tool() + + json_messages = list(tool._process_text_content(TextContent(type="text", text='{"a": 1}'))) + assert json_messages[0].type == ToolInvokeMessage.MessageType.JSON + + plain_messages = list(tool._process_text_content(TextContent(type="text", text="not-json"))) + assert plain_messages[0].type == ToolInvokeMessage.MessageType.TEXT + assert plain_messages[0].message.text == "not-json" + + list_messages = list(tool._process_json_content([{"k": 1}, {"k": 2}])) + assert [m.type for m in list_messages] == [ToolInvokeMessage.MessageType.JSON, ToolInvokeMessage.MessageType.JSON] + + mixed_list_messages = list(tool._process_json_list([{"k": 1}, 2])) + assert len(mixed_list_messages) == 1 + assert mixed_list_messages[0].type == ToolInvokeMessage.MessageType.TEXT + + primitive_messages = list(tool._process_json_content(123)) + assert primitive_messages[0].message.text == "123" + + +def test_mcp_tool_usage_extraction_helpers(): + usage = MCPTool._extract_usage_dict({"usage": {"total_tokens": 9}}) + assert usage == {"total_tokens": 9} + + usage = MCPTool._extract_usage_dict({"metadata": {"usage": {"prompt_tokens": 3, "completion_tokens": 2}}}) + assert usage == {"prompt_tokens": 3, "completion_tokens": 2} + + usage = MCPTool._extract_usage_dict({"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}) + assert usage == {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3} + + usage = MCPTool._extract_usage_dict({"nested": [{"deep": {"usage": {"total_tokens": 7}}}]}) + assert usage == {"total_tokens": 7} + + result_with_usage = CallToolResult(content=[], _meta={"usage": {"prompt_tokens": 1, "completion_tokens": 2}}) + derived = MCPTool._derive_usage_from_result(result_with_usage) + assert derived.prompt_tokens == 1 + assert derived.completion_tokens == 2 + + result_without_usage = CallToolResult(content=[], _meta=None) + derived = MCPTool._derive_usage_from_result(result_without_usage) + assert derived.total_tokens == 0 + + +def test_mcp_tool_invoke_handles_content_types_and_structured_output(): + tool = _build_mcp_tool() + img_data = base64.b64encode(b"img").decode() + blob_data = base64.b64encode(b"blob").decode() + result = CallToolResult( + content=[ + TextContent(type="text", text='{"a": 1}'), + ImageContent(type="image", data=img_data, mimeType="image/png"), + EmbeddedResource( + type="resource", + resource=TextResourceContents(uri="file:///tmp/a.txt", text="embedded-text"), + ), + EmbeddedResource( + type="resource", + resource=BlobResourceContents( + uri="file:///tmp/b.bin", + blob=blob_data, + mimeType="application/octet-stream", + ), + ), + ], + structuredContent={"x": 1}, + _meta={"usage": {"prompt_tokens": 2, "completion_tokens": 3}}, + ) + + with patch.object(MCPTool, "invoke_remote_mcp_tool", return_value=result): + messages = list(tool.invoke(user_id="user-1", tool_parameters={"a": 1})) + + types = [m.type for m in messages] + assert ToolInvokeMessage.MessageType.JSON in types + assert ToolInvokeMessage.MessageType.BLOB in types + assert ToolInvokeMessage.MessageType.TEXT in types + assert ToolInvokeMessage.MessageType.VARIABLE in types + assert tool.latest_usage.total_tokens == 5 + + +def test_mcp_tool_invoke_raises_for_unsupported_embedded_resource(): + tool = _build_mcp_tool() + # Use model_construct to bypass pydantic validation and force unsupported resource path. + bad_resource = EmbeddedResource.model_construct(type="resource", resource=object()) + result = CallToolResult(content=[bad_resource], _meta=None) + + with patch.object(MCPTool, "invoke_remote_mcp_tool", return_value=result): + with pytest.raises(ToolInvokeError, match="Unsupported embedded resource type"): + list(tool.invoke(user_id="user-1", tool_parameters={})) + + +def test_mcp_tool_handle_none_parameter_filters_empty_values(): + tool = _build_mcp_tool() + cleaned = tool._handle_none_parameter({"a": 1, "b": None, "c": "", "d": " ", "e": "ok"}) + assert cleaned == {"a": 1, "e": "ok"} diff --git a/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py b/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py new file mode 100644 index 0000000000..1060d19ab1 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from datetime import datetime +from unittest.mock import Mock, patch + +import pytest + +from core.entities.mcp_provider import MCPProviderEntity +from core.tools.entities.tool_entities import ToolProviderType +from core.tools.mcp_tool.provider import MCPToolProviderController +from core.tools.mcp_tool.tool import MCPTool + + +def _build_mcp_entity(*, icon: str = "icon.svg") -> MCPProviderEntity: + now = datetime.now() + return MCPProviderEntity( + id="db-id", + provider_id="provider-id", + name="MCP Provider", + tenant_id="tenant-1", + user_id="user-1", + server_url="https://mcp.example.com", + headers={}, + timeout=30, + sse_read_timeout=300, + authed=False, + credentials={}, + tools=[ + { + "name": "remote-tool", + "description": "remote tool", + "inputSchema": {}, + "outputSchema": {"type": "object"}, + } + ], + icon=icon, + created_at=now, + updated_at=now, + ) + + +def test_mcp_tool_provider_controller_from_entity_and_get_tools(): + entity = _build_mcp_entity() + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + controller = MCPToolProviderController.from_entity(entity) + + assert controller.provider_type == ToolProviderType.MCP + tool = controller.get_tool("remote-tool") + assert isinstance(tool, MCPTool) + assert tool.tenant_id == "tenant-1" + + tools = controller.get_tools() + assert len(tools) == 1 + assert isinstance(tools[0], MCPTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + +def test_mcp_tool_provider_controller_from_entity_requires_icon(): + entity = _build_mcp_entity(icon="") + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + with pytest.raises(ValueError, match="icon is required"): + MCPToolProviderController.from_entity(entity) + + +def test_mcp_tool_provider_controller_from_db_delegates_to_entity(): + entity = _build_mcp_entity() + db_provider = Mock() + db_provider.to_entity.return_value = entity + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + controller = MCPToolProviderController.from_db(db_provider) + assert isinstance(controller, MCPToolProviderController) diff --git a/api/tests/unit_tests/core/tools/test_plugin_tool.py b/api/tests/unit_tests/core/tools/test_plugin_tool.py new file mode 100644 index 0000000000..4378432a0f --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_plugin_tool.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from unittest.mock import Mock, patch + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolParameter +from core.tools.plugin_tool.tool import PluginTool + + +def _build_plugin_tool(*, has_runtime_parameters: bool) -> PluginTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[ + ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ], + has_runtime_parameters=has_runtime_parameters, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER, credentials={"api_key": "x"}) + return PluginTool( + entity=entity, + runtime=runtime, + tenant_id="tenant-1", + icon="icon.svg", + plugin_unique_identifier="plugin-uid", + ) + + +def test_plugin_tool_invoke_and_fork_runtime(): + tool = _build_plugin_tool(has_runtime_parameters=False) + manager = Mock() + manager.invoke.return_value = iter([tool.create_text_message("ok")]) + + with patch("core.tools.plugin_tool.tool.PluginToolManager", return_value=manager): + with patch( + "core.tools.plugin_tool.tool.convert_parameters_to_plugin_format", + return_value={"converted": 1}, + ): + messages = list(tool.invoke(user_id="user-1", tool_parameters={"raw": 1})) + + assert [m.message.text for m in messages] == ["ok"] + manager.invoke.assert_called_once() + assert manager.invoke.call_args.kwargs["tool_parameters"] == {"converted": 1} + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, PluginTool) + assert forked.runtime.tenant_id == "tenant-2" + assert forked.plugin_unique_identifier == "plugin-uid" + + +def test_plugin_tool_get_runtime_parameters_branches(): + tool = _build_plugin_tool(has_runtime_parameters=False) + assert tool.get_runtime_parameters() == tool.entity.parameters + + tool = _build_plugin_tool(has_runtime_parameters=True) + cached = [ + ToolParameter.get_simple_instance( + name="k", + llm_description="k", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + tool.runtime_parameters = cached + assert tool.get_runtime_parameters() == cached + + tool.runtime_parameters = None + manager = Mock() + returned = [ + ToolParameter.get_simple_instance( + name="dyn", + llm_description="dyn", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + manager.get_runtime_parameters.return_value = returned + with patch("core.tools.plugin_tool.tool.PluginToolManager", return_value=manager): + assert tool.get_runtime_parameters(conversation_id="c1", app_id="a1", message_id="m1") == returned + assert tool.runtime_parameters == returned diff --git a/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py b/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py new file mode 100644 index 0000000000..5ef03cc6ca --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from unittest.mock import Mock, patch + +import pytest + +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolProviderEntityWithPlugin, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.plugin_tool.provider import PluginToolProviderController +from core.tools.plugin_tool.tool import PluginTool + + +def _build_controller() -> PluginToolProviderController: + tool_entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + entity = ToolProviderEntityWithPlugin( + identity=ToolProviderIdentity( + author="author", + name="provider-a", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="Provider"), + ), + credentials_schema=[], + plugin_id="plugin-id", + tools=[tool_entity], + ) + return PluginToolProviderController( + entity=entity, + plugin_id="plugin-id", + plugin_unique_identifier="plugin-uid", + tenant_id="tenant-1", + ) + + +def test_plugin_tool_provider_controller_basic_behaviors(): + controller = _build_controller() + assert controller.provider_type == ToolProviderType.PLUGIN + + tool = controller.get_tool("tool-a") + assert isinstance(tool, PluginTool) + assert tool.runtime.tenant_id == "tenant-1" + + tools = controller.get_tools() + assert len(tools) == 1 + assert isinstance(tools[0], PluginTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + +def test_validate_credentials_success(): + controller = _build_controller() + manager = Mock() + manager.validate_provider_credentials.return_value = True + + with patch("core.tools.plugin_tool.provider.PluginToolManager", return_value=manager): + controller._validate_credentials(user_id="u1", credentials={"api_key": "x"}) + + manager.validate_provider_credentials.assert_called_once_with( + tenant_id="tenant-1", + user_id="u1", + provider="provider-a", + credentials={"api_key": "x"}, + ) + + +def test_validate_credentials_failure(): + controller = _build_controller() + manager = Mock() + manager.validate_provider_credentials.return_value = False + + with patch("core.tools.plugin_tool.provider.PluginToolManager", return_value=manager): + with pytest.raises(ToolProviderCredentialValidationError, match="Invalid credentials"): + controller._validate_credentials(user_id="u1", credentials={"api_key": "x"}) diff --git a/api/tests/unit_tests/core/tools/test_signature.py b/api/tests/unit_tests/core/tools/test_signature.py new file mode 100644 index 0000000000..a5242a78c5 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_signature.py @@ -0,0 +1,119 @@ +"""Unit tests for core.tools.signature covering signing and verification invariants.""" + +from __future__ import annotations + +from urllib.parse import parse_qs, urlparse + +import pytest + +from core.tools.signature import sign_tool_file, sign_upload_file, verify_tool_file_signature + + +def test_sign_tool_file_and_verify_roundtrip(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x01" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 120) + + url = sign_tool_file("tool-file-id", ".png", for_external=False) + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + assert parsed.scheme == "https" + assert parsed.netloc == "internal.example.com" + assert parsed.path == "/files/tools/tool-file-id.png" + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, sign) is True + + +def test_sign_tool_file_for_external_uses_files_url(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x04" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 120) + + url = sign_tool_file("tool-file-id", ".png", for_external=True) + parsed = urlparse(url) + + assert parsed.scheme == "https" + assert parsed.netloc == "files.example.com" + assert parsed.path == "/files/tools/tool-file-id.png" + + +def test_verify_tool_file_signature_rejects_invalid_sign(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x02" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 10) + + url = sign_tool_file("tool-file-id", ".txt") + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, "bad-signature") is False + + +def test_verify_tool_file_signature_rejects_expired_signature(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x02" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 10) + + url = sign_tool_file("tool-file-id", ".txt") + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + monkeypatch.setattr("core.tools.signature.time.time", lambda: int(timestamp) + 99) + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, sign) is False + + +def test_sign_upload_file_prefers_internal_url(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x03" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + + url = sign_upload_file("upload-id", ".png") + parsed = urlparse(url) + query = parse_qs(parsed.query) + + assert parsed.netloc == "internal.example.com" + assert parsed.path == "/files/upload-id/image-preview" + assert query["timestamp"][0] + assert query["nonce"][0] + assert query["sign"][0] + + +def test_sign_upload_file_uses_files_url_fallback(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x05" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + + url = sign_upload_file("upload-id", ".png") + parsed = urlparse(url) + query = parse_qs(parsed.query) + + assert parsed.netloc == "files.example.com" + assert parsed.path == "/files/upload-id/image-preview" + assert query["timestamp"][0] + assert query["nonce"][0] + assert query["sign"][0] diff --git a/api/tests/unit_tests/core/tools/test_tool_engine.py b/api/tests/unit_tests/core/tools/test_tool_engine.py new file mode 100644 index 0000000000..40c107667c --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_engine.py @@ -0,0 +1,280 @@ +from __future__ import annotations + +from collections.abc import Generator +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool import Tool +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolInvokeMessageBinary, + ToolInvokeMeta, + ToolParameter, + ToolProviderType, +) +from core.tools.errors import ( + ToolEngineInvokeError, + ToolInvokeError, + ToolParameterValidationError, +) +from core.tools.tool_engine import ToolEngine + + +class _DummyTool(Tool): + result: Any + raise_error: Exception | None + + def __init__(self, entity: ToolEntity, runtime: ToolRuntime): + super().__init__(entity=entity, runtime=runtime) + self.result = [self.create_text_message("ok")] + self.raise_error = None + + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + if self.raise_error: + raise self.raise_error + if isinstance(self.result, list | Generator): + yield from self.result + else: + yield self.result + + +def _build_tool(with_llm_parameter: bool = False) -> _DummyTool: + parameters = [] + if with_llm_parameter: + parameters = [ + ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + entity = ToolEntity( + identity=ToolIdentity(author="author", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=parameters, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER, runtime_parameters={"rt": 1}) + return _DummyTool(entity=entity, runtime=runtime) + + +def test_convert_tool_response_to_str_and_extract_binary_messages(): + tool = _build_tool() + messages = [ + tool.create_text_message("hello"), + tool.create_link_message("https://example.com"), + ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.IMAGE, + message=ToolInvokeMessage.TextMessage(text="https://example.com/a.png"), + meta={"mime_type": "image/png"}, + ), + tool.create_json_message({"a": 1}), + tool.create_json_message({"a": 1}, suppress_output=True), + ] + text = ToolEngine._convert_tool_response_to_str(messages) + assert "hello" in text + assert "result link: https://example.com." in text + assert '"a": 1' in text + + blob_message = ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.BLOB, + message=ToolInvokeMessage.TextMessage(text="https://example.com/blob.bin"), + meta={"mime_type": "application/octet-stream"}, + ) + link_message = ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.LINK, + message=ToolInvokeMessage.TextMessage(text="https://example.com/file.pdf"), + meta={"mime_type": "application/pdf"}, + ) + binaries = list(ToolEngine._extract_tool_response_binary_and_text([messages[2], blob_message, link_message])) + assert [b.mimetype for b in binaries] == ["image/png", "application/octet-stream", "application/pdf"] + + with pytest.raises(ValueError, match="missing meta data"): + list( + ToolEngine._extract_tool_response_binary_and_text( + [ + ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.IMAGE, + message=ToolInvokeMessage.TextMessage(text="x"), + ) + ] + ) + ) + + +def test_create_message_files_and_invoke_generator(): + binaries = [ + ToolInvokeMessageBinary(mimetype="image/png", url="https://example.com/abc.png"), + ToolInvokeMessageBinary(mimetype="audio/wav", url="https://example.com/def.wav"), + ] + created = [] + + def _message_file_factory(**kwargs): + obj = SimpleNamespace(id=f"mf-{len(created) + 1}", **kwargs) + created.append(obj) + return obj + + with patch("core.tools.tool_engine.MessageFile", side_effect=_message_file_factory): + with patch("core.tools.tool_engine.db") as mock_db: + ids = ToolEngine._create_message_files( + tool_messages=binaries, + agent_message=SimpleNamespace(id="msg-1"), + invoke_from=InvokeFrom.DEBUGGER, + user_id="user-1", + ) + + assert ids == ["mf-1", "mf-2"] + assert mock_db.session.add.call_count == 2 + mock_db.session.close.assert_called_once() + + tool = _build_tool() + invoked = list(ToolEngine._invoke(tool, {"a": 1}, user_id="u")) + assert invoked[0].type == ToolInvokeMessage.MessageType.TEXT + assert isinstance(invoked[-1], ToolInvokeMeta) + assert invoked[-1].error is None + + +def test_generic_invoke_success_and_error_paths(): + tool = _build_tool() + callback = Mock() + callback.on_tool_execution.side_effect = lambda **kwargs: kwargs["tool_outputs"] + response = list( + ToolEngine.generic_invoke( + tool=tool, + tool_parameters={"x": 1}, + user_id="u1", + workflow_tool_callback=callback, + workflow_call_depth=0, + conversation_id="c1", + app_id="a1", + message_id="m1", + ) + ) + assert response[0].message.text == "ok" + callback.on_tool_start.assert_called_once() + callback.on_tool_execution.assert_called_once() + + tool.raise_error = RuntimeError("boom") + error_callback = Mock() + error_callback.on_tool_execution.side_effect = lambda **kwargs: list(kwargs["tool_outputs"]) + with pytest.raises(RuntimeError, match="boom"): + list( + ToolEngine.generic_invoke( + tool=tool, + tool_parameters={"x": 1}, + user_id="u1", + workflow_tool_callback=error_callback, + workflow_call_depth=0, + ) + ) + error_callback.on_tool_error.assert_called_once() + + +def test_agent_invoke_success(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + meta = ToolInvokeMeta.empty() + + with patch.object(ToolEngine, "_invoke", return_value=iter([tool.create_text_message("ok"), meta])): + with patch( + "core.tools.tool_engine.ToolFileMessageTransformer.transform_tool_invoke_messages", + side_effect=lambda messages, **kwargs: messages, + ): + with patch.object(ToolEngine, "_extract_tool_response_binary_and_text", return_value=iter([])): + with patch.object(ToolEngine, "_create_message_files", return_value=[]): + result_text, message_files, result_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters="hello", + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert result_text == "ok" + assert message_files == [] + assert result_meta.error is None + callback.on_tool_start.assert_called_once() + callback.on_tool_end.assert_called_once() + + +def test_agent_invoke_param_validation_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + + with patch.object(ToolEngine, "_invoke", side_effect=ToolParameterValidationError("bad-param")): + error_text, files, error_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "tool parameters validation error" in error_text + assert files == [] + assert error_meta.error + + +def test_agent_invoke_engine_meta_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + engine_error = ToolEngineInvokeError(ToolInvokeMeta.error_instance("meta failure")) + + with patch.object(ToolEngine, "_invoke", side_effect=engine_error): + error_text, files, error_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "meta failure" in error_text + assert files == [] + assert error_meta.error == "meta failure" + + +def test_agent_invoke_tool_invoke_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + + with patch.object(ToolEngine, "_invoke", side_effect=ToolInvokeError("invoke boom")): + error_text, files, _ = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "tool invoke error" in error_text + assert files == [] diff --git a/api/tests/unit_tests/core/tools/test_tool_file_manager.py b/api/tests/unit_tests/core/tools/test_tool_file_manager.py new file mode 100644 index 0000000000..cca8254dd6 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_file_manager.py @@ -0,0 +1,249 @@ +"""Unit tests for `ToolFileManager` behavior. + +Covers signing/verification, file persistence flows, and retrieval APIs with +mocked storage/session boundaries (httpx, SimpleNamespace, Mock/patch) to +avoid real IO. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import httpx +import pytest + +from core.tools.tool_file_manager import ToolFileManager + + +def _setup_tool_file_signing(monkeypatch: pytest.MonkeyPatch) -> dict[str, str]: + monkeypatch.setattr("core.tools.tool_file_manager.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.tool_file_manager.os.urandom", lambda _: b"\x01" * 16) + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.SECRET_KEY", "secret") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_ACCESS_TIMEOUT", 100) + + url = ToolFileManager.sign_file("tf-1", ".png") + return dict(part.split("=", 1) for part in url.split("?", 1)[1].split("&")) + + +def _patch_session_factory(session: Mock): + session_cm = MagicMock() + session_cm.__enter__.return_value = session + session_cm.__exit__.return_value = False + return patch("core.tools.tool_file_manager.session_factory.create_session", return_value=session_cm) + + +def test_tool_file_manager_sign_verify_valid(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + url = ToolFileManager.sign_file("tf-1", ".png") + assert "/files/tools/tf-1.png" in url + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], query["sign"]) is True + + +def test_tool_file_manager_sign_verify_bad_signature(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], "bad") is False + + +def test_tool_file_manager_sign_verify_expired_timestamp(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_ACCESS_TIMEOUT", 0) + monkeypatch.setattr("core.tools.tool_file_manager.time.time", lambda: 1700000100) + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], query["sign"]) is False + + +def test_create_file_by_raw_stores_file_and_persists_record() -> None: + manager = ToolFileManager() + session = Mock() + session.refresh.side_effect = lambda model: setattr(model, "id", "tf-1") + + def tool_file_factory(**kwargs): + return SimpleNamespace(**kwargs) + + with ( + patch("core.tools.tool_file_manager.storage") as storage, + patch("core.tools.tool_file_manager.ToolFile", side_effect=tool_file_factory), + patch("core.tools.tool_file_manager.guess_extension", return_value=".txt"), + patch("core.tools.tool_file_manager.uuid4", return_value=SimpleNamespace(hex="abc")), + _patch_session_factory(session), + ): + file_model = manager.create_file_by_raw( + user_id="u1", + tenant_id="t1", + conversation_id="c1", + file_binary=b"hello", + mimetype="text/plain", + filename="readme", + ) + + assert file_model.name.endswith(".txt") + storage.save.assert_called_once() + session.add.assert_called_once() + session.commit.assert_called_once() + session.refresh.assert_called_once_with(file_model) + + +def test_create_file_by_url_downloads_and_persists_record() -> None: + manager = ToolFileManager() + response = Mock() + response.content = b"binary" + response.headers = {"Content-Type": "application/octet-stream"} + response.raise_for_status.return_value = None + session = Mock() + + def tool_file_factory(**kwargs): + return SimpleNamespace(**kwargs) + + session.refresh.side_effect = lambda model: setattr(model, "id", "tf-2") + with ( + patch("core.tools.tool_file_manager.storage") as storage, + patch("core.tools.tool_file_manager.ToolFile", side_effect=tool_file_factory), + patch("core.tools.tool_file_manager.uuid4", return_value=SimpleNamespace(hex="def")), + _patch_session_factory(session), + patch("core.tools.tool_file_manager.ssrf_proxy.get", return_value=response), + ): + file_model = manager.create_file_by_url("u1", "t1", "https://example.com/f.bin", "c1") + + assert file_model.file_key.startswith("tools/t1/") + storage.save.assert_called_once() + session.add.assert_called_once_with(file_model) + session.commit.assert_called_once() + session.refresh.assert_called_once_with(file_model) + + +def test_create_file_by_url_raises_on_timeout() -> None: + manager = ToolFileManager() + + with patch("core.tools.tool_file_manager.ssrf_proxy.get", side_effect=httpx.TimeoutException("timeout")): + with pytest.raises(ValueError, match="timeout when downloading file"): + manager.create_file_by_url("u1", "t1", "https://example.com/f.bin", "c1") + + +def test_get_file_binary_returns_none_when_not_found() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + session.query.return_value.where.return_value.first.return_value = None + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary("missing") + + # Assert + assert result is None + + +def test_get_file_binary_returns_bytes_when_found() -> None: + # Arrange + manager = ToolFileManager() + tool_file = SimpleNamespace(file_key="k1", mimetype="text/plain") + session = Mock() + session.query.return_value.where.return_value.first.return_value = tool_file + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + storage.load_once.return_value = b"hello" + with _patch_session_factory(session): + result = manager.get_file_binary("id1") + + # Assert + assert result == (b"hello", "text/plain") + + +def test_get_file_binary_by_message_file_id_when_messagefile_missing() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = None + second_query.where.return_value.first.return_value = None + session.query.side_effect = [first_query, second_query] + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result is None + + +def test_get_file_binary_by_message_file_id_when_url_is_none() -> None: + # Arrange + manager = ToolFileManager() + message_file = SimpleNamespace(url=None) + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = message_file + second_query.where.return_value.first.return_value = None + session.query.side_effect = [first_query, second_query] + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result is None + + +def test_get_file_binary_by_message_file_id_returns_bytes_when_found() -> None: + # Arrange + manager = ToolFileManager() + message_file = SimpleNamespace(url="https://x/files/tools/tool123.png") + tool_file = SimpleNamespace(file_key="k2", mimetype="image/png") + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = message_file + second_query.where.return_value.first.return_value = tool_file + session.query.side_effect = [first_query, second_query] + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + storage.load_once.return_value = b"img" + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result == (b"img", "image/png") + + +def test_get_file_generator_returns_none_when_toolfile_missing() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + session.query.return_value.where.return_value.first.return_value = None + + # Act + with _patch_session_factory(session): + stream, tool_file = manager.get_file_generator_by_tool_file_id("tool123") + + # Assert + assert stream is None + assert tool_file is None + + +def test_get_file_generator_returns_stream_when_found() -> None: + # Arrange + manager = ToolFileManager() + tool_file = SimpleNamespace(file_key="k2", mimetype="image/png") + session = Mock() + session.query.return_value.where.return_value.first.return_value = tool_file + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + stream = iter([b"a", b"b"]) + storage.load_stream.return_value = stream + with ( + _patch_session_factory(session), + patch("core.tools.tool_file_manager.ToolFilePydanticModel.model_validate", return_value="validated-file"), + ): + result_stream, result_file = manager.get_file_generator_by_tool_file_id("tool123") + assert list(result_stream) == [b"a", b"b"] + assert result_file == "validated-file" diff --git a/api/tests/unit_tests/core/tools/test_tool_label_manager.py b/api/tests/unit_tests/core/tools/test_tool_label_manager.py new file mode 100644 index 0000000000..857f4aa178 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_label_manager.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from unittest.mock import PropertyMock, patch + +import pytest + +from core.tools.builtin_tool.provider import BuiltinToolProviderController +from core.tools.custom_tool.provider import ApiToolProviderController +from core.tools.tool_label_manager import ToolLabelManager +from core.tools.workflow_as_tool.provider import WorkflowToolProviderController + + +class _ConcreteBuiltinToolProviderController(BuiltinToolProviderController): + def _validate_credentials(self, user_id: str, credentials: dict[str, Any]): + return None + + +def _api_controller(provider_id: str = "api-1") -> ApiToolProviderController: + controller = object.__new__(ApiToolProviderController) + controller.provider_id = provider_id + return controller + + +def _workflow_controller(provider_id: str = "wf-1") -> WorkflowToolProviderController: + controller = object.__new__(WorkflowToolProviderController) + controller.provider_id = provider_id + return controller + + +def test_tool_label_manager_filter_tool_labels(): + filtered = ToolLabelManager.filter_tool_labels(["search", "search", "invalid", "news"]) + assert set(filtered) == {"search", "news"} + assert len(filtered) == 2 + + +def test_tool_label_manager_update_tool_labels_db(): + controller = _api_controller("api-1") + with patch("core.tools.tool_label_manager.db") as mock_db: + delete_query = mock_db.session.query.return_value.where.return_value + delete_query.delete.return_value = None + ToolLabelManager.update_tool_labels(controller, ["search", "search", "invalid"]) + + delete_query.delete.assert_called_once() + # only one valid unique label should be inserted. + assert mock_db.session.add.call_count == 1 + mock_db.session.commit.assert_called_once() + + +def test_tool_label_manager_update_tool_labels_unsupported(): + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.update_tool_labels(object(), ["search"]) # type: ignore[arg-type] + + +def test_tool_label_manager_get_tool_labels_for_builtin_and_db(): + with patch.object( + _ConcreteBuiltinToolProviderController, + "tool_labels", + new_callable=PropertyMock, + return_value=["search", "news"], + ): + builtin = object.__new__(_ConcreteBuiltinToolProviderController) + assert ToolLabelManager.get_tool_labels(builtin) == ["search", "news"] + + api = _api_controller("api-1") + with patch("core.tools.tool_label_manager.db") as mock_db: + mock_db.session.scalars.return_value.all.return_value = ["search", "news"] + labels = ToolLabelManager.get_tool_labels(api) + assert labels == ["search", "news"] + + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.get_tool_labels(object()) # type: ignore[arg-type] + + +def test_tool_label_manager_get_tools_labels_batch(): + assert ToolLabelManager.get_tools_labels([]) == {} + + api = _api_controller("api-1") + wf = _workflow_controller("wf-1") + records = [ + SimpleNamespace(tool_id="api-1", label_name="search"), + SimpleNamespace(tool_id="api-1", label_name="news"), + SimpleNamespace(tool_id="wf-1", label_name="utilities"), + ] + with patch("core.tools.tool_label_manager.db") as mock_db: + mock_db.session.scalars.return_value.all.return_value = records + labels = ToolLabelManager.get_tools_labels([api, wf]) + assert labels == {"api-1": ["search", "news"], "wf-1": ["utilities"]} + + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.get_tools_labels([api, object()]) # type: ignore[list-item] diff --git a/api/tests/unit_tests/core/tools/test_tool_manager.py b/api/tests/unit_tests/core/tools/test_tool_manager.py new file mode 100644 index 0000000000..0f73e22654 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_manager.py @@ -0,0 +1,899 @@ +from __future__ import annotations + +"""Unit tests for ToolManager behavior with mocked providers and collaborators.""" + +import json +import threading +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.plugin.entities.plugin_daemon import CredentialType +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.tool_entities import ( + ApiProviderAuthType, + ToolParameter, + ToolProviderType, +) +from core.tools.errors import ToolProviderNotFoundError +from core.tools.plugin_tool.provider import PluginToolProviderController +from core.tools.tool_manager import ToolManager + + +class _SimpleContextVar: + def __init__(self): + self._is_set = False + self._value: Any = None + + def get(self): + if not self._is_set: + raise LookupError + return self._value + + def set(self, value: Any): + self._value = value + self._is_set = True + + +def _cm(session: Any): + context = Mock() + context.__enter__ = Mock(return_value=session) + context.__exit__ = Mock(return_value=False) + return context + + +def _setup_list_providers_from_api_mocks( + monkeypatch, + *, + session: Mock, + hardcoded_controller: SimpleNamespace, + plugin_controller: PluginToolProviderController, + api_controller: SimpleNamespace, + workflow_controller: SimpleNamespace, +): + mock_db = Mock() + mock_db.engine = object() + monkeypatch.setattr("core.tools.tool_manager.db", mock_db) + monkeypatch.setattr("core.tools.tool_manager.Session", lambda *args, **kwargs: _cm(session)) + monkeypatch.setattr( + ToolManager, + "list_builtin_providers", + Mock(return_value=[hardcoded_controller, plugin_controller]), + ) + monkeypatch.setattr( + ToolManager, + "list_default_builtin_providers", + Mock(return_value=[SimpleNamespace(provider="hardcoded")]), + ) + monkeypatch.setattr("core.tools.tool_manager.is_filtered", lambda *args, **kwargs: False) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.builtin_provider_to_user_provider", + lambda **kwargs: SimpleNamespace(name=kwargs["provider_controller"].entity.identity.name), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.api_provider_to_controller", + Mock(side_effect=[api_controller, RuntimeError("invalid")]), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.api_provider_to_user_provider", + Mock(return_value=SimpleNamespace(name="api-provider")), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_controller", + Mock(side_effect=[workflow_controller, RuntimeError("deleted app")]), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_user_provider", + Mock(return_value=SimpleNamespace(name="workflow-provider")), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolLabelManager.get_tools_labels", + Mock(side_effect=[{"api-1": ["search"]}, {"wf-1": ["utility"]}]), + ) + mock_mcp_service = Mock() + mock_mcp_service.list_providers.return_value = [SimpleNamespace(name="mcp-provider")] + monkeypatch.setattr("core.tools.tool_manager.MCPToolManageService", Mock(return_value=mock_mcp_service)) + monkeypatch.setattr("core.tools.tool_manager.BuiltinToolProviderSort.sort", lambda providers: providers) + + +@pytest.fixture(autouse=True) +def _reset_tool_manager_state(): + old_hardcoded = ToolManager._hardcoded_providers.copy() + old_loaded = ToolManager._builtin_providers_loaded + old_labels = ToolManager._builtin_tools_labels.copy() + try: + yield + finally: + ToolManager._hardcoded_providers = old_hardcoded + ToolManager._builtin_providers_loaded = old_loaded + ToolManager._builtin_tools_labels = old_labels + + +def test_get_hardcoded_provider_loads_cache_when_empty(): + provider = Mock() + ToolManager._hardcoded_providers = {} + + def _load(): + ToolManager._hardcoded_providers["weather"] = provider + + with patch.object(ToolManager, "load_hardcoded_providers_cache", side_effect=_load) as mock_load: + assert ToolManager.get_hardcoded_provider("weather") is provider + + mock_load.assert_called_once() + + +def test_get_builtin_provider_returns_plugin_for_missing_hardcoded(): + hardcoded = Mock() + plugin_provider = Mock() + ToolManager._hardcoded_providers = {"time": hardcoded} + + with patch.object(ToolManager, "get_plugin_provider", return_value=plugin_provider): + assert ToolManager.get_builtin_provider("time", "tenant-1") is hardcoded + assert ToolManager.get_builtin_provider("plugin/time", "tenant-1") is plugin_provider + + +def test_get_plugin_provider_uses_context_cache(): + provider_context = _SimpleContextVar() + lock_context = _SimpleContextVar() + lock_context.set(threading.Lock()) + provider_entity = SimpleNamespace(declaration=Mock(), plugin_id="pid", plugin_unique_identifier="uid") + + with patch("core.tools.tool_manager.contexts.plugin_tool_providers", provider_context): + with patch("core.tools.tool_manager.contexts.plugin_tool_providers_lock", lock_context): + with patch("core.tools.tool_manager.PluginToolManager") as mock_manager_cls: + mock_manager_cls.return_value.fetch_tool_provider.return_value = provider_entity + controller = SimpleNamespace(name="controller") + with patch("core.tools.tool_manager.PluginToolProviderController", return_value=controller): + built = ToolManager.get_plugin_provider("provider-a", "tenant-1") + cached = ToolManager.get_plugin_provider("provider-a", "tenant-1") + + assert built is controller + assert cached is controller + mock_manager_cls.return_value.fetch_tool_provider.assert_called_once() + + +def test_get_plugin_provider_raises_when_provider_missing(): + provider_context = _SimpleContextVar() + lock_context = _SimpleContextVar() + lock_context.set(threading.Lock()) + + with patch("core.tools.tool_manager.contexts.plugin_tool_providers", provider_context): + with patch("core.tools.tool_manager.contexts.plugin_tool_providers_lock", lock_context): + with patch("core.tools.tool_manager.PluginToolManager") as mock_manager_cls: + mock_manager_cls.return_value.fetch_tool_provider.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="plugin provider provider-a not found"): + ToolManager.get_plugin_provider("provider-a", "tenant-1") + + +def test_get_tool_runtime_builtin_without_credentials(): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace(get_tool=Mock(return_value=tool), need_credentials=False) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="current_time", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + runtime = tool.fork_tool_runtime.call_args.kwargs["runtime"] + assert runtime.tenant_id == "tenant-1" + assert runtime.credentials == {} + + +def test_get_tool_runtime_builtin_missing_tool_raises(): + controller = SimpleNamespace(get_tool=Mock(return_value=None), need_credentials=False) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + with pytest.raises(ToolProviderNotFoundError, match="builtin tool missing not found"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="missing", + tenant_id="tenant-1", + ) + + +def test_get_tool_runtime_builtin_with_credentials_decrypts_and_forks(): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace( + get_tool=Mock(return_value=tool), + need_credentials=True, + get_credentials_schema_by_type=Mock(return_value=[]), + ) + builtin_provider = SimpleNamespace( + id="cred-1", + credential_type=CredentialType.API_KEY.value, + credentials={"encrypted": "value"}, + expires_at=-1, + user_id="user-1", + ) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + with patch("core.helper.credential_utils.check_credential_policy_compliance"): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.order_by.return_value.first.return_value = ( + builtin_provider + ) + encrypter = Mock() + encrypter.decrypt.return_value = {"api_key": "secret"} + cache = Mock() + with patch("core.tools.tool_manager.create_provider_encrypter", return_value=(encrypter, cache)): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + runtime = tool.fork_tool_runtime.call_args.kwargs["runtime"] + assert runtime.credentials == {"api_key": "secret"} + assert runtime.credential_type == CredentialType.API_KEY + + +@patch("core.tools.tool_manager.create_provider_encrypter") +@patch("core.plugin.impl.oauth.OAuthHandler") +@patch( + "services.tools.builtin_tools_manage_service.BuiltinToolManageService.get_oauth_client", + return_value={"client_id": "id"}, +) +@patch("core.tools.tool_manager.db") +@patch("core.tools.tool_manager.time.time", return_value=1000) +@patch("core.helper.credential_utils.check_credential_policy_compliance") +def test_get_tool_runtime_builtin_refreshes_expired_oauth_credentials( + mock_check, + mock_time, + mock_db, + mock_get_oauth_client, + mock_oauth_handler_cls, + mock_create_provider_encrypter, +): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace( + get_tool=Mock(return_value=tool), + need_credentials=True, + get_credentials_schema_by_type=Mock(return_value=[]), + ) + builtin_provider = SimpleNamespace( + id="cred-1", + credential_type=CredentialType.OAUTH2.value, + credentials={"encrypted": "value"}, + encrypted_credentials=None, + expires_at=1, + user_id="user-1", + ) + refreshed = SimpleNamespace(credentials={"token": "new"}, expires_at=123456) + + mock_db.session.query.return_value.where.return_value.order_by.return_value.first.return_value = builtin_provider + encrypter = Mock() + encrypter.decrypt.return_value = {"token": "old"} + encrypter.encrypt.return_value = {"token": "encrypted"} + cache = Mock() + mock_create_provider_encrypter.return_value = (encrypter, cache) + mock_oauth_handler_cls.return_value.refresh_credentials.return_value = refreshed + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + assert builtin_provider.expires_at == refreshed.expires_at + assert builtin_provider.encrypted_credentials == json.dumps({"token": "encrypted"}) + mock_db.session.commit.assert_called_once() + cache.delete.assert_called_once() + + +def test_get_tool_runtime_builtin_plugin_provider_deleted_raises(): + plugin_controller = object.__new__(PluginToolProviderController) + plugin_controller.entity = SimpleNamespace(credentials_schema=[{"name": "k"}], oauth_schema=None) + plugin_controller.get_tool = Mock(return_value=Mock()) + plugin_controller.get_credentials_schema_by_type = Mock(return_value=[]) + + with patch.object(ToolManager, "get_builtin_provider", return_value=plugin_controller): + with patch("core.tools.tool_manager.is_valid_uuid", return_value=True): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.scalar.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="provider has been deleted"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + credential_id="uuid-id", + ) + + +def test_get_tool_runtime_api_path(): + api_tool = Mock() + api_tool.fork_tool_runtime.return_value = "api-runtime" + api_provider = Mock() + api_provider.get_tool.return_value = api_tool + + with patch.object(ToolManager, "get_api_provider_controller", return_value=(api_provider, {"c": "enc"})): + encrypter = Mock() + encrypter.decrypt.return_value = {"c": "dec"} + with patch("core.tools.tool_manager.create_tool_provider_encrypter", return_value=(encrypter, Mock())): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tenant_id="tenant-1", + ) + == "api-runtime" + ) + + +def test_get_tool_runtime_workflow_path(): + workflow_provider = SimpleNamespace(tenant_id="tenant-1") + workflow_tool = Mock() + workflow_tool.fork_tool_runtime.return_value = "wf-runtime" + workflow_controller = Mock() + workflow_controller.get_tools.return_value = [workflow_tool] + session = Mock() + session.begin.return_value = _cm(None) + session.scalar.return_value = workflow_provider + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_controller", + return_value=workflow_controller, + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.WORKFLOW, + provider_id="wf-1", + tool_name="wf", + tenant_id="tenant-1", + ) + == "wf-runtime" + ) + + +def test_get_tool_runtime_plugin_path(): + with patch.object( + ToolManager, + "get_plugin_provider", + return_value=SimpleNamespace(get_tool=lambda _: "plugin-tool"), + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.PLUGIN, + provider_id="plugin-1", + tool_name="p", + tenant_id="tenant-1", + ) + == "plugin-tool" + ) + + +def test_get_tool_runtime_mcp_path(): + with patch.object( + ToolManager, + "get_mcp_provider_controller", + return_value=SimpleNamespace(get_tool=lambda _: "mcp-tool"), + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.MCP, + provider_id="mcp-1", + tool_name="m", + tenant_id="tenant-1", + ) + == "mcp-tool" + ) + + +def test_get_tool_runtime_app_not_implemented(): + with pytest.raises(NotImplementedError, match="app provider not implemented"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.APP, + provider_id="app", + tool_name="x", + tenant_id="tenant-1", + ) + + +def test_get_agent_runtime_apply_runtime_parameters(): + parameter = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + parameter.form = ToolParameter.ToolParameterForm.FORM + + tool_runtime = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_runtime.get_merged_runtime_parameters = Mock(return_value=[parameter]) + + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={"query": "hello"}): + manager = Mock() + manager.decrypt_tool_parameters.return_value = {"query": "decrypted"} + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=manager): + agent_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_parameters={"query": "hello"}, + credential_id=None, + ) + result = ToolManager.get_agent_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + agent_tool=agent_tool, + invoke_from=InvokeFrom.DEBUGGER, + variable_pool=None, + ) + + assert result is tool_runtime + assert tool_runtime.runtime.runtime_parameters["query"] == "decrypted" + + +def test_get_workflow_runtime_apply_runtime_parameters(): + parameter = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + parameter.form = ToolParameter.ToolParameterForm.FORM + + workflow_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_configurations={"query": "hello"}, + credential_id=None, + ) + tool_runtime2 = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_runtime2.get_merged_runtime_parameters = Mock(return_value=[parameter]) + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime2): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={"query": "workflow"}): + manager = Mock() + manager.decrypt_tool_parameters.return_value = {"query": "workflow-dec"} + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=manager): + workflow_result = ToolManager.get_workflow_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + node_id="node-1", + workflow_tool=workflow_tool, + invoke_from=InvokeFrom.DEBUGGER, + variable_pool=None, + ) + + assert workflow_result is tool_runtime2 + assert tool_runtime2.runtime.runtime_parameters["query"] == "workflow-dec" + + +def test_get_agent_runtime_raises_when_runtime_missing(): + tool_runtime = SimpleNamespace(runtime=None, get_merged_runtime_parameters=lambda: []) + agent_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_parameters={}, + credential_id=None, + ) + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={}): + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=Mock()): + with pytest.raises(ValueError, match="runtime not found"): + ToolManager.get_agent_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + agent_tool=agent_tool, + ) + + +def test_get_tool_runtime_from_plugin_only_uses_form_parameters(): + form_param = ToolParameter.get_simple_instance( + name="q", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + form_param.form = ToolParameter.ToolParameterForm.FORM + llm_param = ToolParameter.get_simple_instance( + name="llm", + llm_description="llm", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + llm_param.form = ToolParameter.ToolParameterForm.LLM + + tool_entity = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_entity.get_merged_runtime_parameters = Mock(return_value=[form_param, llm_param]) + + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_entity): + result = ToolManager.get_tool_runtime_from_plugin( + tool_type=ToolProviderType.API, + tenant_id="tenant-1", + provider="api-1", + tool_name="search", + tool_parameters={"q": "hello", "llm": "ignore"}, + ) + + assert result is tool_entity + assert tool_entity.runtime.runtime_parameters == {"q": "hello"} + + +def test_hardcoded_provider_icon_success(): + provider = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(icon="icon.svg"))) + with patch.object(ToolManager, "get_hardcoded_provider", return_value=provider): + with patch("core.tools.tool_manager.path.exists", return_value=True): + with patch("core.tools.tool_manager.mimetypes.guess_type", return_value=("image/svg+xml", None)): + icon_path, mime = ToolManager.get_hardcoded_provider_icon("time") + assert icon_path.endswith("icon.svg") + assert mime == "image/svg+xml" + + +def test_hardcoded_provider_icon_missing_raises(): + provider = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(icon="icon.svg"))) + with patch.object(ToolManager, "get_hardcoded_provider", return_value=provider): + with patch("core.tools.tool_manager.path.exists", return_value=False): + with pytest.raises(ToolProviderNotFoundError, match="icon not found"): + ToolManager.get_hardcoded_provider_icon("time") + + +def test_list_hardcoded_providers_cache_hit(): + ToolManager._hardcoded_providers = {"p": Mock()} + ToolManager._builtin_providers_loaded = True + assert list(ToolManager.list_hardcoded_providers()) == list(ToolManager._hardcoded_providers.values()) + + +def test_clear_hardcoded_providers_cache_resets(): + ToolManager._hardcoded_providers = {"p": Mock()} + ToolManager._builtin_providers_loaded = True + ToolManager.clear_hardcoded_providers_cache() + assert ToolManager._hardcoded_providers == {} + assert ToolManager._builtin_providers_loaded is False + + +def test_list_hardcoded_providers_internal_loader(): + good_provider = SimpleNamespace( + entity=SimpleNamespace(identity=SimpleNamespace(name="good")), + get_tools=lambda: [SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="tool-a", label="A")))], + ) + provider_class = Mock(return_value=good_provider) + + with patch("core.tools.tool_manager.listdir", return_value=["good", "bad", "__skip"]): + with patch("core.tools.tool_manager.path.isdir", side_effect=lambda p: "good" in p or "bad" in p): + with patch( + "core.tools.tool_manager.load_single_subclass_from_source", + side_effect=[provider_class, RuntimeError("boom")], + ): + ToolManager._hardcoded_providers = {} + ToolManager._builtin_tools_labels = {} + providers = list(ToolManager._list_hardcoded_providers()) + + assert providers == [good_provider] + assert ToolManager._hardcoded_providers["good"] is good_provider + assert ToolManager._builtin_tools_labels["tool-a"] == "A" + assert ToolManager._builtin_providers_loaded is True + + +def test_get_tool_label_loads_cache_and_handles_missing(): + ToolManager._builtin_tools_labels = {} + + def _load(): + ToolManager._builtin_tools_labels["tool-a"] = "Label A" + + with patch.object(ToolManager, "load_hardcoded_providers_cache", side_effect=_load): + assert ToolManager.get_tool_label("tool-a") == "Label A" + assert ToolManager.get_tool_label("missing") is None + + +def test_list_default_builtin_providers_for_postgres_and_mysql(): + provider_records = [SimpleNamespace(id="id-1"), SimpleNamespace(id="id-2")] + + for scheme in ("postgresql", "mysql"): + session = Mock() + session.execute.return_value.all.return_value = [SimpleNamespace(id="id-1"), SimpleNamespace(id="id-2")] + session.query.return_value.where.return_value.all.return_value = provider_records + + with patch("core.tools.tool_manager.dify_config", SimpleNamespace(SQLALCHEMY_DATABASE_URI_SCHEME=scheme)): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + providers = ToolManager.list_default_builtin_providers("tenant-1") + + assert providers == provider_records + + +def test_list_providers_from_api_covers_builtin_api_workflow_and_mcp(monkeypatch): + hardcoded_controller = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="hardcoded"))) + plugin_controller = object.__new__(PluginToolProviderController) + plugin_controller.entity = SimpleNamespace(identity=SimpleNamespace(name="plugin-provider")) + + api_db_provider_good = SimpleNamespace(id="api-1") + api_db_provider_bad = SimpleNamespace(id="api-2") + api_controller = SimpleNamespace(provider_id="api-1") + + workflow_db_provider_good = SimpleNamespace(id="wf-1") + workflow_db_provider_bad = SimpleNamespace(id="wf-2") + workflow_controller = SimpleNamespace(provider_id="wf-1") + + session = Mock() + session.scalars.side_effect = [ + SimpleNamespace(all=lambda: [api_db_provider_good, api_db_provider_bad]), + SimpleNamespace(all=lambda: [workflow_db_provider_good, workflow_db_provider_bad]), + ] + + _setup_list_providers_from_api_mocks( + monkeypatch, + session=session, + hardcoded_controller=hardcoded_controller, + plugin_controller=plugin_controller, + api_controller=api_controller, + workflow_controller=workflow_controller, + ) + providers = ToolManager.list_providers_from_api(user_id="user-1", tenant_id="tenant-1", typ="") + + names = {provider.name for provider in providers} + assert {"hardcoded", "plugin-provider", "api-provider", "workflow-provider", "mcp-provider"} <= names + + +def test_get_api_provider_controller_returns_controller_and_credentials(): + provider = SimpleNamespace( + id="api-1", + tenant_id="tenant-1", + name="api-provider", + description="desc", + credentials={"auth_type": "api_key_query"}, + credentials_str='{"auth_type": "api_key_query", "api_key_value": "secret"}', + schema_type="openapi", + schema="schema", + tools=[], + icon='{"background": "#000", "content": "A"}', + privacy_policy="privacy", + custom_disclaimer="disclaimer", + ) + db_query = Mock() + db_query.where.return_value.first.return_value = provider + controller = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value = db_query + with patch( + "core.tools.tool_manager.ApiToolProviderController.from_db", return_value=controller + ) as mock_from_db: + built_controller, credentials = ToolManager.get_api_provider_controller("tenant-1", "api-1") + + assert built_controller is controller + assert credentials == provider.credentials + mock_from_db.assert_called_with(provider, ApiProviderAuthType.API_KEY_QUERY) + controller.load_bundled_tools.assert_called_once_with(provider.tools) + + +def test_user_get_api_provider_masks_credentials_and_adds_labels(): + provider = SimpleNamespace( + id="api-1", + tenant_id="tenant-1", + name="api-provider", + description="desc", + credentials={"auth_type": "api_key_query"}, + credentials_str='{"auth_type": "api_key_query", "api_key_value": "secret"}', + schema_type="openapi", + schema="schema", + tools=[], + icon='{"background": "#000", "content": "A"}', + privacy_policy="privacy", + custom_disclaimer="disclaimer", + ) + db_query = Mock() + db_query.where.return_value.first.return_value = provider + controller = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value = db_query + with patch("core.tools.tool_manager.ApiToolProviderController.from_db", return_value=controller): + encrypter = Mock() + encrypter.decrypt.return_value = {"api_key_value": "secret"} + encrypter.mask_plugin_credentials.return_value = {"api_key_value": "***"} + with patch("core.tools.tool_manager.create_tool_provider_encrypter", return_value=(encrypter, Mock())): + with patch("core.tools.tool_manager.ToolLabelManager.get_tool_labels", return_value=["search"]): + user_payload = ToolManager.user_get_api_provider("api-provider", "tenant-1") + + assert user_payload["credentials"]["api_key_value"] == "***" + assert user_payload["labels"] == ["search"] + + +def test_get_api_provider_controller_not_found_raises(): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="api provider missing not found"): + ToolManager.get_api_provider_controller("tenant-1", "missing") + + +def test_get_mcp_provider_controller_returns_controller(): + provider_entity = SimpleNamespace(provider_icon={"background": "#111", "content": "M"}) + controller = Mock() + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service = mock_service_cls.return_value + mock_service.get_provider.return_value = provider_entity + with patch("core.tools.tool_manager.MCPToolProviderController.from_db", return_value=controller): + built = ToolManager.get_mcp_provider_controller("tenant-1", "mcp-1") + assert built is controller + + +def test_generate_mcp_tool_icon_url_returns_provider_icon(): + provider_entity = SimpleNamespace(provider_icon={"background": "#111", "content": "M"}) + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service = mock_service_cls.return_value + mock_service.get_provider_entity.return_value = provider_entity + assert ToolManager.generate_mcp_tool_icon_url("tenant-1", "mcp-1") == provider_entity.provider_icon + + +def test_get_mcp_provider_controller_missing_raises(): + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service_cls.return_value.get_provider.side_effect = ValueError("missing") + with pytest.raises(ToolProviderNotFoundError, match="mcp provider mcp-1 not found"): + ToolManager.get_mcp_provider_controller("tenant-1", "mcp-1") + + +def test_generate_tool_icon_urls_for_builtin_and_plugin(): + with patch("core.tools.tool_manager.dify_config.CONSOLE_API_URL", "https://console.example.com"): + builtin_url = ToolManager.generate_builtin_tool_icon_url("time") + plugin_url = ToolManager.generate_plugin_tool_icon_url("tenant-1", "icon.svg") + + assert builtin_url.endswith("/tool-provider/builtin/time/icon") + assert "/plugin/icon" in plugin_url + + +def test_generate_tool_icon_urls_for_workflow_and_api(): + workflow_provider = SimpleNamespace(icon='{"background": "#222", "content": "W"}') + api_provider = SimpleNamespace(icon='{"background": "#333", "content": "A"}') + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.side_effect = [workflow_provider, api_provider] + assert ToolManager.generate_workflow_tool_icon_url("tenant-1", "wf-1") == {"background": "#222", "content": "W"} + assert ToolManager.generate_api_tool_icon_url("tenant-1", "api-1") == {"background": "#333", "content": "A"} + + +def test_generate_tool_icon_urls_missing_workflow_and_api_use_default(): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.return_value = None + assert ToolManager.generate_workflow_tool_icon_url("tenant-1", "missing")["background"] == "#252525" + assert ToolManager.generate_api_tool_icon_url("tenant-1", "missing")["background"] == "#252525" + + +def test_get_tool_icon_for_builtin_provider_variants(): + plugin_provider = object.__new__(PluginToolProviderController) + plugin_provider.entity = SimpleNamespace(identity=SimpleNamespace(icon="plugin.svg")) + + with patch.object(ToolManager, "get_builtin_provider", return_value=plugin_provider): + with patch.object(ToolManager, "generate_plugin_tool_icon_url", return_value="plugin-icon"): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.BUILT_IN, "plugin-provider") == "plugin-icon" + + with patch.object(ToolManager, "get_builtin_provider", return_value=SimpleNamespace()): + with patch.object(ToolManager, "generate_builtin_tool_icon_url", return_value="builtin-icon"): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.BUILT_IN, "time") == "builtin-icon" + + +def test_get_tool_icon_for_api_workflow_and_mcp(): + with patch.object(ToolManager, "generate_api_tool_icon_url", return_value={"background": "#000"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.API, "api-1") == {"background": "#000"} + + with patch.object(ToolManager, "generate_workflow_tool_icon_url", return_value={"background": "#111"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.WORKFLOW, "wf-1") == {"background": "#111"} + + with patch.object(ToolManager, "generate_mcp_tool_icon_url", return_value={"background": "#222"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.MCP, "mcp-1") == {"background": "#222"} + + +def test_get_tool_icon_plugin_error_returns_default(): + plugin_provider = object.__new__(PluginToolProviderController) + plugin_provider.entity = SimpleNamespace(identity=SimpleNamespace(icon="plugin.svg")) + + with patch.object(ToolManager, "get_plugin_provider", return_value=plugin_provider): + with patch.object(ToolManager, "generate_plugin_tool_icon_url", side_effect=RuntimeError("fail")): + icon = ToolManager.get_tool_icon("tenant-1", ToolProviderType.PLUGIN, "plugin-provider") + assert icon["background"] == "#252525" + + +def test_get_tool_icon_invalid_provider_type_raises(): + with pytest.raises(ValueError, match="provider type"): + ToolManager.get_tool_icon("tenant-1", "invalid", "x") # type: ignore[arg-type] + + +def test_convert_tool_parameters_type_agent_and_workflow_branches(): + file_param = ToolParameter.get_simple_instance( + name="file", + llm_description="file", + typ=ToolParameter.ToolParameterType.FILE, + required=True, + ) + file_param.form = ToolParameter.ToolParameterForm.FORM + + with pytest.raises(ValueError, match="file type parameter file not supported in agent"): + ToolManager._convert_tool_parameters_type( + parameters=[file_param], + variable_pool=None, + tool_configurations={"file": "x"}, + typ="agent", + ) + + text_param = ToolParameter.get_simple_instance( + name="text", + llm_description="text", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + plain = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=None, + tool_configurations={"text": "hello"}, + typ="workflow", + ) + assert plain == {"text": "hello"} + + variable_pool = Mock() + variable_pool.get.return_value = SimpleNamespace(value="from-variable") + variable_pool.convert_template.return_value = SimpleNamespace(text="from-template") + + mixed = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "mixed", "value": "Hello {{name}}"}}, + typ="workflow", + ) + assert mixed == {"text": "from-template"} + + variable = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "variable", "value": ["sys", "query"]}}, + typ="workflow", + ) + assert variable == {"text": "from-variable"} + + +def test_convert_tool_parameters_type_constant_branch(): + text_param = ToolParameter.get_simple_instance( + name="text", + llm_description="text", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + variable_pool = Mock() + + constant = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "constant", "value": "fixed"}}, + typ="workflow", + ) + + assert constant == {"text": "fixed"} diff --git a/api/tests/unit_tests/core/tools/test_tool_provider_controller.py b/api/tests/unit_tests/core/tools/test_tool_provider_controller.py new file mode 100644 index 0000000000..30b8494c92 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_provider_controller.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any + +import pytest + +from core.entities.provider_entities import ProviderConfig +from core.tools.__base.tool import Tool +from core.tools.__base.tool_provider import ToolProviderController +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolProviderEntity, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.errors import ToolProviderCredentialValidationError + + +class _DummyTool(Tool): + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +class _DummyController(ToolProviderController): + def get_tool(self, tool_name: str) -> Tool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name=tool_name, + label=I18nObject(en_US=tool_name), + provider="provider", + ), + parameters=[], + ) + return _DummyTool(entity=entity, runtime=ToolRuntime(tenant_id="tenant")) + + +def _provider_identity() -> ToolProviderIdentity: + return ToolProviderIdentity( + author="author", + name="provider", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="Provider"), + ) + + +def test_tool_provider_controller_get_credentials_schema_returns_deep_copy(): + entity = ToolProviderEntity( + identity=_provider_identity(), + credentials_schema=[ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="api_key", required=False)], + ) + controller = _DummyController(entity=entity) + + schema = controller.get_credentials_schema() + schema[0].name = "changed" + + assert controller.entity.credentials_schema[0].name == "api_key" + + +def test_tool_provider_controller_default_provider_type(): + entity = ToolProviderEntity(identity=_provider_identity(), credentials_schema=[]) + controller = _DummyController(entity=entity) + + assert controller.provider_type == ToolProviderType.BUILT_IN + + +def test_validate_credentials_format_covers_required_default_and_type_rules(): + select_options = [ProviderConfig.Option(value="opt-a", label=I18nObject(en_US="A"))] + entity = ToolProviderEntity( + identity=_provider_identity(), + credentials_schema=[ + ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="required_text", required=True), + ProviderConfig(type=ProviderConfig.Type.SECRET_INPUT, name="secret", required=False), + ProviderConfig(type=ProviderConfig.Type.SELECT, name="choice", required=False, options=select_options), + ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="with_default", required=False, default="x"), + ], + ) + controller = _DummyController(entity=entity) + + credentials = {"required_text": "value", "secret": None, "choice": "opt-a"} + controller.validate_credentials_format(credentials) + assert credentials["with_default"] == "x" + + with pytest.raises(ToolProviderCredentialValidationError, match="not found"): + controller.validate_credentials_format({"required_text": "value", "unknown": "v"}) + + with pytest.raises(ToolProviderCredentialValidationError, match="is required"): + controller.validate_credentials_format({"secret": "s"}) + + with pytest.raises(ToolProviderCredentialValidationError, match="should be string"): + controller.validate_credentials_format({"required_text": 123}) # type: ignore[arg-type] + + with pytest.raises(ToolProviderCredentialValidationError, match="should be one of"): + controller.validate_credentials_format({"required_text": "value", "choice": "opt-b"}) diff --git a/api/tests/unit_tests/core/tools/utils/test_configuration.py b/api/tests/unit_tests/core/tools/utils/test_configuration.py new file mode 100644 index 0000000000..5ceaa08893 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_configuration.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any +from unittest.mock import patch + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.helper.tool_parameter_cache import ToolParameterCache +from core.tools.__base.tool import Tool +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolParameter, + ToolProviderType, +) +from core.tools.utils.configuration import ToolParameterConfigurationManager + + +class _DummyTool(Tool): + runtime_overrides: list[ToolParameter] + + def __init__(self, entity: ToolEntity, runtime: ToolRuntime, runtime_overrides: list[ToolParameter]): + super().__init__(entity=entity, runtime=runtime) + self.runtime_overrides = runtime_overrides + + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + def get_runtime_parameters( + self, + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> list[ToolParameter]: + return self.runtime_overrides + + +def _param( + name: str, + *, + typ: ToolParameter.ToolParameterType, + form: ToolParameter.ToolParameterForm, + required: bool = False, +) -> ToolParameter: + return ToolParameter( + name=name, + label=I18nObject(en_US=name), + placeholder=I18nObject(en_US=""), + human_description=I18nObject(en_US=""), + type=typ, + form=form, + required=required, + default=None, + ) + + +def _build_manager() -> ToolParameterConfigurationManager: + base_params = [ + _param("secret", typ=ToolParameter.ToolParameterType.SECRET_INPUT, form=ToolParameter.ToolParameterForm.FORM), + _param("plain", typ=ToolParameter.ToolParameterType.STRING, form=ToolParameter.ToolParameterForm.FORM), + ] + runtime_overrides = [ + _param("secret", typ=ToolParameter.ToolParameterType.SECRET_INPUT, form=ToolParameter.ToolParameterForm.FORM), + _param("runtime_only", typ=ToolParameter.ToolParameterType.STRING, form=ToolParameter.ToolParameterForm.FORM), + ] + entity = ToolEntity( + identity=ToolIdentity(author="a", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=base_params, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + tool = _DummyTool(entity=entity, runtime=runtime, runtime_overrides=runtime_overrides) + return ToolParameterConfigurationManager( + tenant_id="tenant-1", + tool_runtime=tool, + provider_name="provider-a", + provider_type=ToolProviderType.BUILT_IN, + identity_id="ID.1", + ) + + +def test_merge_and_mask_parameters(): + manager = _build_manager() + + masked = manager.mask_tool_parameters({"secret": "abcdefghi", "plain": "x", "runtime_only": "y"}) + assert masked["secret"] == "ab*****hi" + assert masked["plain"] == "x" + assert masked["runtime_only"] == "y" + + +def test_encrypt_tool_parameters(): + manager = _build_manager() + + with patch("core.tools.utils.configuration.encrypter.encrypt_token", return_value="enc"): + encrypted = manager.encrypt_tool_parameters({"secret": "raw", "plain": "x"}) + + assert encrypted["secret"] == "enc" + assert encrypted["plain"] == "x" + + +def test_decrypt_tool_parameters_cache_hit_and_miss(): + manager = _build_manager() + + with ( + patch.object(ToolParameterCache, "get", return_value={"secret": "cached"}), + patch.object(ToolParameterCache, "set") as mock_set, + ): + assert manager.decrypt_tool_parameters({"secret": "enc"}) == {"secret": "cached"} + mock_set.assert_not_called() + + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch.object(ToolParameterCache, "set") as mock_set, + patch("core.tools.utils.configuration.encrypter.decrypt_token", return_value="dec"), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc", "plain": "x"}) + assert decrypted["secret"] == "dec" + mock_set.assert_called_once() + + +def test_delete_tool_parameters_cache(): + manager = _build_manager() + + with patch.object(ToolParameterCache, "delete") as mock_delete: + manager.delete_tool_parameters_cache() + + mock_delete.assert_called_once() + + +def test_configuration_manager_decrypt_suppresses_errors(): + manager = _build_manager() + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch("core.tools.utils.configuration.encrypter.decrypt_token", side_effect=RuntimeError("boom")), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc"}) + # decryption failure is suppressed, original value is retained. + assert decrypted["secret"] == "enc" diff --git a/api/tests/unit_tests/core/tools/utils/test_encryption.py b/api/tests/unit_tests/core/tools/utils/test_encryption.py index 94be0bb573..ce77473dbd 100644 --- a/api/tests/unit_tests/core/tools/utils/test_encryption.py +++ b/api/tests/unit_tests/core/tools/utils/test_encryption.py @@ -1,10 +1,13 @@ import copy -from unittest.mock import patch +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch import pytest from core.entities.provider_entities import BasicProviderConfig from core.helper.provider_encryption import ProviderConfigEncrypter +from core.tools.utils.encryption import create_tool_provider_encrypter # --------------------------- @@ -13,13 +16,13 @@ from core.helper.provider_encryption import ProviderConfigEncrypter class NoopCache: """Simple cache stub: always returns None, does nothing for set/delete.""" - def get(self): + def get(self) -> Any | None: return None - def set(self, config): + def set(self, config: Any) -> None: pass - def delete(self): + def delete(self) -> None: pass @@ -179,3 +182,35 @@ def test_decrypt_swallow_exception_and_keep_original(encrypter_obj): out = encrypter_obj.decrypt({"password": "ENC_ERR"}) assert out["password"] == "ENC_ERR" + + +def test_create_tool_provider_encrypter_builds_cache_and_encrypter(): + basic_config = BasicProviderConfig(name="key", type=BasicProviderConfig.Type.TEXT_INPUT) + credential_schema_item = SimpleNamespace(to_basic_provider_config=lambda: basic_config) + controller = SimpleNamespace( + provider_type=SimpleNamespace(value="builtin"), + entity=SimpleNamespace(identity=SimpleNamespace(name="provider-a")), + get_credentials_schema=lambda: [credential_schema_item], + ) + + cache_instance = Mock() + encrypter_instance = Mock() + + with patch( + "core.tools.utils.encryption.SingletonProviderCredentialsCache", return_value=cache_instance + ) as cache_cls: + with patch("core.tools.utils.encryption.ProviderConfigEncrypter", return_value=encrypter_instance) as enc_cls: + encrypter, cache = create_tool_provider_encrypter("tenant-1", controller) + + assert encrypter is encrypter_instance + assert cache is cache_instance + cache_cls.assert_called_once_with( + tenant_id="tenant-1", + provider_type="builtin", + provider_identity="provider-a", + ) + enc_cls.assert_called_once_with( + tenant_id="tenant-1", + config=[basic_config], + provider_config_cache=cache_instance, + ) diff --git a/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py b/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py new file mode 100644 index 0000000000..4ce73272bf --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py @@ -0,0 +1,478 @@ +from __future__ import annotations + +import uuid +from contextlib import nullcontext +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest +from yaml import YAMLError + +from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler +from core.rag.models.document import Document as RagDocument +from core.tools.utils.dataset_retriever import dataset_multi_retriever_tool as multi_retriever_module +from core.tools.utils.dataset_retriever import dataset_retriever_tool as single_retriever_module +from core.tools.utils.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool +from core.tools.utils.dataset_retriever.dataset_retriever_tool import DatasetRetrieverTool as SingleDatasetRetrieverTool +from core.tools.utils.text_processing_utils import remove_leading_symbols +from core.tools.utils.uuid_utils import is_valid_uuid +from core.tools.utils.yaml_utils import _load_yaml_file, load_yaml_file_cached + + +def _retrieve_config() -> DatasetRetrieveConfigEntity: + return DatasetRetrieveConfigEntity(retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE) + + +class _FakeFlaskApp: + def app_context(self): + return nullcontext() + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None, **_kwargs): + self._target = target + self._kwargs = kwargs or {} + + def start(self): + if self._target is not None: + self._target(**self._kwargs) + + def join(self): + return None + + +class _TestHitCallback(DatasetIndexToolCallbackHandler): + def __init__(self): + self.queries: list[tuple[str, str]] = [] + self.documents: list[RagDocument] | None = None + self.resources = None + + def on_query(self, query: str, dataset_id: str): + self.queries.append((query, dataset_id)) + + def on_tool_end(self, documents: list[RagDocument]): + self.documents = documents + + def return_retriever_resource_info(self, resource): + self.resources = list(resource) + + +def test_remove_leading_symbols_preserves_markdown_link_and_strips_punctuation(): + markdown = "[Example](https://example.com) content" + assert remove_leading_symbols(markdown) == markdown + + assert remove_leading_symbols("...Hello world") == "Hello world" + + +def test_is_valid_uuid_handles_valid_invalid_and_empty_values(): + assert is_valid_uuid(str(uuid.uuid4())) is True + assert is_valid_uuid("not-a-uuid") is False + assert is_valid_uuid("") is False + assert is_valid_uuid(None) is False + + +def test_load_yaml_file_valid(tmp_path): + valid_file = tmp_path / "valid.yaml" + valid_file.write_text("a: 1\nb: two\n", encoding="utf-8") + + loaded = _load_yaml_file(file_path=str(valid_file)) + + assert loaded == {"a": 1, "b": "two"} + + +def test_load_yaml_file_missing(tmp_path): + with pytest.raises(FileNotFoundError): + _load_yaml_file(file_path=str(tmp_path / "missing.yaml")) + + +def test_load_yaml_file_invalid(tmp_path): + invalid_file = tmp_path / "invalid.yaml" + invalid_file.write_text("a: [1, 2\n", encoding="utf-8") + + with pytest.raises(YAMLError): + _load_yaml_file(file_path=str(invalid_file)) + + +def test_load_yaml_file_cached_hits(tmp_path): + valid_file = tmp_path / "valid.yaml" + valid_file.write_text("a: 1\nb: two\n", encoding="utf-8") + + load_yaml_file_cached.cache_clear() + assert load_yaml_file_cached(str(valid_file)) == {"a": 1, "b": "two"} + + assert load_yaml_file_cached(str(valid_file)) == {"a": 1, "b": "two"} + assert load_yaml_file_cached.cache_info().hits == 1 + + +def test_single_dataset_retriever_from_dataset_builds_name_and_description(): + dataset = SimpleNamespace(id="dataset-1", tenant_id="tenant-1", name="Knowledge", description=None) + + tool = SingleDatasetRetrieverTool.from_dataset( + dataset=dataset, + retrieve_config=_retrieve_config(), + return_resource=False, + retriever_from="prod", + inputs={}, + ) + + assert tool.name == "dataset_dataset_1" + assert tool.description == "useful for when you want to answer queries about the Knowledge" + + +def test_single_dataset_retriever_external_run_returns_content_and_resources(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="external", + indexing_technique="high_quality", + retrieval_model={}, + ) + callback = _TestHitCallback() + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = ( + {"dataset-1": ["doc-a"]}, + {"logical_operator": "and"}, + ) + db_session = Mock() + db_session.scalar.return_value = dataset + external_documents = [ + {"content": "first", "metadata": {"document_id": "doc-a"}, "score": 0.9, "title": "Doc A"}, + {"content": "second", "metadata": {"document_id": "doc-b"}, "score": 0.8, "title": "Doc B"}, + ] + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + inputs={"x": 1}, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object( + single_retriever_module.ExternalDatasetService, + "fetch_external_knowledge_retrieval", + return_value=external_documents, + ) as fetch_mock: + result = tool.run(query="hello") + + assert result == "first\nsecond" + assert callback.queries == [("hello", "dataset-1")] + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].dataset_id == "dataset-1" + fetch_mock.assert_called_once() + + +def test_single_dataset_retriever_returns_empty_when_metadata_filter_finds_no_documents(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="internal", + indexing_technique="high_quality", + retrieval_model=None, + ) + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = ({"dataset-1": []}, {"logical_operator": "and"}) + db_session = Mock() + db_session.scalar.return_value = dataset + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=False, + retriever_from="prod", + hit_callbacks=[_TestHitCallback()], + inputs={}, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object(single_retriever_module.RetrievalService, "retrieve") as retrieve_mock: + result = tool.run(query="hello") + + assert result == "" + retrieve_mock.assert_not_called() + + +def test_single_dataset_retriever_non_economy_run_sorts_context_and_resources(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="internal", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "score_threshold_enabled": True, + "score_threshold": 0.2, + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "provider", "reranking_model_name": "model"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {"vector_weight": 0.6}}, + }, + ) + callback = _TestHitCallback() + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = (None, None) + low_segment = SimpleNamespace( + id="seg-low", + dataset_id="dataset-1", + document_id="doc-low", + content="raw low", + answer="low answer", + hit_count=1, + word_count=10, + position=3, + index_node_hash="hash-low", + get_sign_content=lambda: "signed low", + ) + high_segment = SimpleNamespace( + id="seg-high", + dataset_id="dataset-1", + document_id="doc-high", + content="raw high", + answer=None, + hit_count=9, + word_count=25, + position=1, + index_node_hash="hash-high", + get_sign_content=lambda: "signed high", + ) + records = [ + SimpleNamespace(segment=low_segment, score=0.2, summary="summary low"), + SimpleNamespace(segment=high_segment, score=0.9, summary=None), + ] + documents = [ + RagDocument(page_content="first", metadata={"doc_id": "node-low", "score": 0.2}), + RagDocument(page_content="second", metadata={"doc_id": "node-high", "score": 0.9}), + ] + lookup_doc_low = SimpleNamespace( + id="doc-low", name="Document Low", data_source_type="upload_file", doc_metadata={"lang": "en"} + ) + lookup_doc_high = SimpleNamespace( + id="doc-high", name="Document High", data_source_type="notion", doc_metadata={"lang": "fr"} + ) + db_session = Mock() + db_session.scalar.side_effect = [dataset, lookup_doc_low, lookup_doc_high] + db_session.query.return_value.filter_by.return_value.first.return_value = dataset + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + inputs={}, + top_k=2, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object(single_retriever_module.RetrievalService, "retrieve", return_value=documents): + with patch.object( + single_retriever_module.RetrievalService, + "format_retrieval_documents", + return_value=records, + ): + result = tool.run(query="hello") + + assert result == "signed high\nsummary low\nquestion:signed low answer:low answer" + assert callback.documents == documents + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].segment_id == "seg-high" + assert resource_info[0].hit_count == 9 + assert resource_info[1].summary == "summary low" + assert resource_info[1].content == "question:raw low \nanswer:low answer" + + +def test_multi_dataset_retriever_from_dataset_sets_tool_name(): + tool = DatasetMultiRetrieverTool.from_dataset( + dataset_ids=["dataset-1"], + tenant_id="tenant-1", + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + ) + + assert tool.name == "dataset_tenant_1" + + +def test_multi_dataset_retriever_retriever_returns_early_when_dataset_is_missing(): + callback = _TestHitCallback() + all_documents: list[RagDocument] = [] + db_session = Mock() + db_session.scalar.return_value = None + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + ) + + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(multi_retriever_module.RetrievalService, "retrieve") as retrieve_mock: + result = tool._retriever( + flask_app=_FakeFlaskApp(), + dataset_id="dataset-1", + query="hello", + all_documents=all_documents, + hit_callbacks=[callback], + ) + + assert result == [] + assert all_documents == [] + assert callback.queries == [] + retrieve_mock.assert_not_called() + + +def test_multi_dataset_retriever_retriever_non_economy_uses_retrieval_model(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "top_k": 6, + "score_threshold_enabled": True, + "score_threshold": 0.4, + "reranking_enable": False, + "reranking_mode": None, + "weights": {"balanced": True}, + }, + ) + callback = _TestHitCallback() + documents = [RagDocument(page_content="retrieved", metadata={"doc_id": "node-1", "score": 0.4})] + all_documents: list[RagDocument] = [] + db_session = Mock() + db_session.scalar.return_value = dataset + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + top_k=2, + ) + + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(multi_retriever_module.RetrievalService, "retrieve", return_value=documents) as retrieve_mock: + tool._retriever( + flask_app=_FakeFlaskApp(), + dataset_id="dataset-1", + query="hello", + all_documents=all_documents, + hit_callbacks=[callback], + ) + + assert all_documents == documents + assert callback.queries == [("hello", "dataset-1")] + retrieve_mock.assert_called_once_with( + retrieval_method="semantic_search", + dataset_id="dataset-1", + query="hello", + top_k=6, + score_threshold=0.4, + reranking_model=None, + reranking_mode="reranking_model", + weights={"balanced": True}, + ) + + +def test_multi_dataset_retriever_run_orders_segments_and_returns_resources(): + callback = _TestHitCallback() + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1", "dataset-2"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + top_k=2, + score_threshold=0.1, + ) + first_doc = RagDocument(page_content="first", metadata={"doc_id": "node-2", "score": 0.4}) + second_doc = RagDocument(page_content="second", metadata={"doc_id": "node-1", "score": 0.9}) + + def fake_retriever(**kwargs): + if kwargs["dataset_id"] == "dataset-1": + kwargs["all_documents"].append(first_doc) + else: + kwargs["all_documents"].append(second_doc) + + segment_for_node_2 = SimpleNamespace( + id="seg-2", + dataset_id="dataset-1", + document_id="doc-2", + index_node_id="node-2", + content="raw two", + answer="answer two", + hit_count=2, + word_count=20, + position=2, + index_node_hash="hash-2", + get_sign_content=lambda: "signed two", + ) + segment_for_node_1 = SimpleNamespace( + id="seg-1", + dataset_id="dataset-2", + document_id="doc-1", + index_node_id="node-1", + content="raw one", + answer=None, + hit_count=7, + word_count=30, + position=1, + index_node_hash="hash-1", + get_sign_content=lambda: "signed one", + ) + db_session = Mock() + db_session.scalars.return_value.all.return_value = [segment_for_node_2, segment_for_node_1] + db_session.query.return_value.filter_by.return_value.first.side_effect = [ + SimpleNamespace(id="dataset-2", name="Dataset Two"), + SimpleNamespace(id="dataset-1", name="Dataset One"), + ] + db_session.scalar.side_effect = [ + SimpleNamespace(id="doc-1", name="Doc One", data_source_type="upload_file", doc_metadata={"p": 1}), + SimpleNamespace(id="doc-2", name="Doc Two", data_source_type="notion", doc_metadata={"p": 2}), + ] + model_manager = Mock() + model_manager.get_model_instance.return_value = Mock() + rerank_runner = Mock() + rerank_runner.run.return_value = [second_doc, first_doc] + fake_current_app = SimpleNamespace(_get_current_object=lambda: _FakeFlaskApp()) + + with patch.object(tool, "_retriever", side_effect=fake_retriever) as retriever_mock: + with patch.object(multi_retriever_module, "current_app", fake_current_app): + with patch.object(multi_retriever_module.threading, "Thread", _ImmediateThread): + with patch.object(multi_retriever_module, "ModelManager", return_value=model_manager): + with patch.object(multi_retriever_module, "RerankModelRunner", return_value=rerank_runner): + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + result = tool.run(query="hello") + + assert result == "signed one\nquestion:signed two answer:answer two" + assert retriever_mock.call_count == 2 + assert callback.documents == [second_doc, first_doc] + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].score == 0.9 + assert resource_info[0].content == "raw one" + assert resource_info[1].score == 0.4 + assert resource_info[1].content == "question:raw two \nanswer:answer two" diff --git a/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py b/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py new file mode 100644 index 0000000000..2acae889b2 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py @@ -0,0 +1,158 @@ +"""Unit tests for ModelInvocationUtils. + +Covers success and error branches for ModelInvocationUtils, including +InvokeModelError and invoke error mappings for InvokeAuthorizationError, +InvokeBadRequestError, InvokeConnectionError, InvokeRateLimitError, and +InvokeServerUnavailableError. Assumes mocked model instances and managers. +""" + +from __future__ import annotations + +from decimal import Decimal +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.tools.utils.model_invocation_utils import InvokeModelError, ModelInvocationUtils +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey +from dify_graph.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +def _mock_model_instance(*, schema: dict | None = None) -> SimpleNamespace: + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = ( + SimpleNamespace(model_properties=schema or {}) if schema is not None else None + ) + return SimpleNamespace( + provider="provider", + model="model-a", + model_name="model-a", + credentials={"api_key": "x"}, + model_type_instance=model_type_instance, + get_llm_num_tokens=lambda prompt_messages: 5, + invoke_llm=Mock(), + ) + + +@pytest.mark.parametrize( + ("model_instance", "expected", "error_match"), + [ + (None, None, "Model not found"), + (_mock_model_instance(schema=None), None, "No model schema found"), + (_mock_model_instance(schema={}), 2048, None), + (_mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 8192}), 8192, None), + ], + ids=[ + "missing-model", + "missing-schema", + "default-context-size", + "schema-context-size", + ], +) +def test_get_max_llm_context_tokens_branches(model_instance, expected, error_match): + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + if error_match: + with pytest.raises(InvokeModelError, match=error_match): + ModelInvocationUtils.get_max_llm_context_tokens("tenant") + else: + assert ModelInvocationUtils.get_max_llm_context_tokens("tenant") == expected + + +def test_calculate_tokens_handles_missing_model(): + manager = Mock() + manager.get_default_model_instance.return_value = None + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with pytest.raises(InvokeModelError, match="Model not found"): + ModelInvocationUtils.calculate_tokens("tenant", []) + + +def test_invoke_success_and_error_mappings(): + model_instance = _mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 2048}) + model_instance.invoke_llm.return_value = SimpleNamespace( + message=SimpleNamespace(content="ok"), + usage=SimpleNamespace( + completion_tokens=7, + completion_unit_price=Decimal("0.1"), + completion_price_unit=Decimal(1), + latency=0.3, + total_price=Decimal("0.7"), + currency="USD", + ), + ) + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + class _ToolModelInvoke: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + db_mock = SimpleNamespace(session=Mock()) + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with patch("core.tools.utils.model_invocation_utils.ToolModelInvoke", _ToolModelInvoke): + with patch("core.tools.utils.model_invocation_utils.db", db_mock): + response = ModelInvocationUtils.invoke( + user_id="u1", + tenant_id="tenant", + tool_type="builtin", + tool_name="tool-a", + prompt_messages=[], + ) + + assert response.message.content == "ok" + assert db_mock.session.add.call_count == 1 + assert db_mock.session.commit.call_count == 2 + + +@pytest.mark.parametrize( + ("exc", "expected"), + [ + (InvokeRateLimitError("rate"), "Invoke rate limit error"), + (InvokeBadRequestError("bad"), "Invoke bad request error"), + (InvokeConnectionError("conn"), "Invoke connection error"), + (InvokeAuthorizationError("auth"), "Invoke authorization error"), + (InvokeServerUnavailableError("down"), "Invoke server unavailable error"), + (RuntimeError("oops"), "Invoke error"), + ], + ids=[ + "rate-limit", + "bad-request", + "connection", + "authorization", + "server-unavailable", + "generic-error", + ], +) +def test_invoke_error_mappings(exc, expected): + model_instance = _mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 2048}) + model_instance.invoke_llm.side_effect = exc + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + class _ToolModelInvoke: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + db_mock = SimpleNamespace(session=Mock()) + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with patch("core.tools.utils.model_invocation_utils.ToolModelInvoke", _ToolModelInvoke): + with patch("core.tools.utils.model_invocation_utils.db", db_mock): + with pytest.raises(InvokeModelError, match=expected): + ModelInvocationUtils.invoke( + user_id="u1", + tenant_id="tenant", + tool_type="builtin", + tool_name="tool-a", + prompt_messages=[], + ) diff --git a/api/tests/unit_tests/core/tools/utils/test_parser.py b/api/tests/unit_tests/core/tools/utils/test_parser.py index f39158aa59..40f91b12a0 100644 --- a/api/tests/unit_tests/core/tools/utils/test_parser.py +++ b/api/tests/unit_tests/core/tools/utils/test_parser.py @@ -1,6 +1,12 @@ +from json.decoder import JSONDecodeError +from unittest.mock import Mock, patch + import pytest from flask import Flask +from yaml import YAMLError +from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter +from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError from core.tools.utils.parser import ApiBasedToolSchemaParser @@ -189,3 +195,225 @@ def test_parse_openapi_to_tool_bundle_default_value_type_casting(app): available_param = params_by_name["available"] assert available_param.type == "boolean" assert available_param.default is True + + +def test_sanitize_default_value_and_type_detection(): + assert ApiBasedToolSchemaParser._sanitize_default_value([]) is None + assert ApiBasedToolSchemaParser._sanitize_default_value({}) is None + assert ApiBasedToolSchemaParser._sanitize_default_value("ok") == "ok" + + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"format": "binary"}) == ToolParameter.ToolParameterType.FILE + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "integer"}) == ToolParameter.ToolParameterType.NUMBER + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"schema": {"type": "boolean"}}) + == ToolParameter.ToolParameterType.BOOLEAN + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "array", "items": {"format": "binary"}}) + == ToolParameter.ToolParameterType.FILES + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "array", "items": {"type": "string"}}) + == ToolParameter.ToolParameterType.ARRAY + ) + assert ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "object"}) is None + + +def test_parse_openapi_to_tool_bundle_server_env_and_refs(app): + openapi = { + "openapi": "3.0.0", + "info": {"title": "API", "version": "1.0.0", "description": "API description"}, + "servers": [ + {"url": "https://dev.example.com", "env": "dev"}, + {"url": "https://prod.example.com", "env": "prod"}, + ], + "paths": { + "/items": { + "post": { + "description": "Create item", + "parameters": [ + {"$ref": "#/components/parameters/token"}, + {"name": "token", "schema": {"type": "string"}}, + ], + "requestBody": { + "content": {"application/json": {"schema": {"$ref": "#/components/schemas/ItemRequest"}}} + }, + } + } + }, + "components": { + "parameters": { + "token": {"name": "token", "required": True, "schema": {"type": "string"}}, + }, + "schemas": { + "ItemRequest": { + "type": "object", + "required": ["age"], + "properties": {"age": {"type": "integer", "description": "Age", "default": 18}}, + } + }, + }, + } + + extra_info: dict = {} + warning: dict = {} + with app.test_request_context(headers={"X-Request-Env": "prod"}): + bundles = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning) + + assert len(bundles) == 1 + assert bundles[0].server_url == "https://prod.example.com/items" + assert warning["duplicated_parameter"].startswith("Parameter token") + assert extra_info["description"] == "API description" + + +def test_parse_openapi_to_tool_bundle_no_server_raises(app): + openapi = {"info": {"title": "x"}, "servers": [], "paths": {}} + with app.test_request_context(): + with pytest.raises(ToolProviderNotFoundError, match="No server found"): + ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi) + + +def test_parse_openapi_yaml_to_tool_bundle_invalid_yaml(app): + with app.test_request_context(): + with pytest.raises(ToolApiSchemaError, match="Invalid openapi yaml"): + ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle("null") + + +def test_parse_swagger_to_openapi_branches(): + with pytest.raises(ToolApiSchemaError, match="No server found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi({"info": {}, "paths": {}}) + + with pytest.raises(ToolApiSchemaError, match="No paths found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi({"servers": [{"url": "https://x"}], "paths": {}}) + + with pytest.raises(ToolApiSchemaError, match="No operationId found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi( + { + "servers": [{"url": "https://x"}], + "paths": {"/a": {"get": {"summary": "x", "responses": {}}}}, + } + ) + + warning: dict = {"seed": True} + converted = ApiBasedToolSchemaParser.parse_swagger_to_openapi( + { + "servers": [{"url": "https://x"}], + "paths": {"/a": {"get": {"operationId": "getA", "responses": {}}}}, + "definitions": {"A": {"type": "object"}}, + }, + warning=warning, + ) + assert converted["openapi"] == "3.0.0" + assert converted["components"]["schemas"]["A"]["type"] == "object" + assert warning["missing_summary"].startswith("No summary or description found") + + +def test_parse_openai_plugin_json_branches(app): + with app.test_request_context(): + with pytest.raises(ToolProviderNotFoundError, match="Invalid openai plugin json"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle("{bad") + + with pytest.raises(ToolNotSupportedError, match="Only openapi is supported"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "graphql"}}' + ) + + +def test_parse_openai_plugin_json_http_branches(app): + with app.test_request_context(): + response = type("Resp", (), {"status_code": 500, "text": "", "close": Mock()})() + with patch("core.tools.utils.parser.httpx.get", return_value=response): + with pytest.raises(ToolProviderNotFoundError, match="cannot get openapi yaml"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "openapi"}}' + ) + response.close.assert_called_once() + + success_response = type("Resp", (), {"status_code": 200, "text": "openapi: 3.0.0", "close": Mock()})() + with patch("core.tools.utils.parser.httpx.get", return_value=success_response): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle", + return_value=["bundle"], + ) as mock_parse: + bundles = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "openapi"}}' + ) + assert bundles == ["bundle"] + mock_parse.assert_called_once() + success_response.close.assert_called_once() + + +def test_auto_parse_json_yaml_failure(): + with patch("core.tools.utils.parser.json_loads", side_effect=JSONDecodeError("bad", "x", 0)): + with patch("core.tools.utils.parser.safe_load", side_effect=YAMLError("bad yaml")): + with pytest.raises(ToolApiSchemaError, match="Invalid api schema, schema is neither json nor yaml"): + ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(":::") + + +def test_auto_parse_openapi_success(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + return_value=["openapi-bundle"], + ): + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["openapi-bundle"] + assert schema_type == ApiProviderSchemaType.OPENAPI + + +def test_auto_parse_openapi_then_swagger(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + loaded_content = { + "openapi": "3.0.0", + "servers": [{"url": "https://x"}], + "info": {"title": "x"}, + "paths": {}, + } + converted_swagger = { + "openapi": "3.0.0", + "servers": [{"url": "https://x"}], + "info": {"title": "x"}, + "paths": {}, + } + + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + side_effect=[ToolApiSchemaError("openapi error"), ["swagger-bundle"]], + ) as mock_parse_openapi: + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_swagger_to_openapi", + return_value=converted_swagger, + ) as mock_parse_swagger: + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["swagger-bundle"] + assert schema_type == ApiProviderSchemaType.SWAGGER + mock_parse_swagger.assert_called_once_with(loaded_content, extra_info={}, warning={}) + assert mock_parse_openapi.call_count == 2 + mock_parse_openapi.assert_any_call(loaded_content, extra_info={}, warning={}) + mock_parse_openapi.assert_any_call(converted_swagger, extra_info={}, warning={}) + + +def test_auto_parse_openapi_swagger_then_plugin(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + side_effect=ToolApiSchemaError("openapi error"), + ): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_swagger_to_openapi", + side_effect=ToolApiSchemaError("swagger error"), + ): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle", + return_value=["plugin-bundle"], + ): + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["plugin-bundle"] + assert schema_type == ApiProviderSchemaType.OPENAI_PLUGIN diff --git a/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py b/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py new file mode 100644 index 0000000000..5691f33e65 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import pytest + +from core.tools.utils import system_oauth_encryption as oauth_encryption +from core.tools.utils.system_oauth_encryption import OAuthEncryptionError, SystemOAuthEncrypter + + +def test_system_oauth_encrypter_roundtrip(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + payload = {"client_id": "cid", "client_secret": "csecret", "grant_type": "authorization_code"} + + encrypted = encrypter.encrypt_oauth_params(payload) + decrypted = encrypter.decrypt_oauth_params(encrypted) + + assert encrypted + assert dict(decrypted) == payload + + +def test_system_oauth_encrypter_decrypt_validates_input(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + + with pytest.raises(ValueError, match="must be a string"): + encrypter.decrypt_oauth_params(123) # type: ignore[arg-type] + + with pytest.raises(ValueError, match="cannot be empty"): + encrypter.decrypt_oauth_params("") + + +def test_system_oauth_encrypter_raises_oauth_error_for_invalid_ciphertext(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + + with pytest.raises(OAuthEncryptionError, match="Decryption failed"): + encrypter.decrypt_oauth_params("not-base64") + + +def test_system_oauth_helpers_use_global_cached_instance(monkeypatch): + monkeypatch.setattr(oauth_encryption, "_oauth_encrypter", None) + monkeypatch.setattr("core.tools.utils.system_oauth_encryption.dify_config.SECRET_KEY", "global-secret") + + first = oauth_encryption.get_system_oauth_encrypter() + second = oauth_encryption.get_system_oauth_encrypter() + assert first is second + + encrypted = oauth_encryption.encrypt_system_oauth_params({"k": "v"}) + assert oauth_encryption.decrypt_system_oauth_params(encrypted) == {"k": "v"} + + +def test_create_system_oauth_encrypter_factory(): + encrypter = oauth_encryption.create_system_oauth_encrypter(secret_key="factory-secret") + assert isinstance(encrypter, SystemOAuthEncrypter) diff --git a/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py b/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py index c46e31d90f..dd79b79718 100644 --- a/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py +++ b/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py @@ -1,7 +1,9 @@ import pytest +from core.tools.entities.tool_entities import ToolParameter, WorkflowToolParameterConfiguration from core.tools.errors import WorkflowToolHumanInputNotSupportedError from core.tools.utils.workflow_configuration_sync import WorkflowToolConfigurationUtils +from dify_graph.variables.input_entities import VariableEntity, VariableEntityType def test_ensure_no_human_input_nodes_passes_for_non_human_input(): @@ -31,3 +33,91 @@ def test_ensure_no_human_input_nodes_raises_for_human_input(): WorkflowToolConfigurationUtils.ensure_no_human_input_nodes(graph) assert exc_info.value.error_code == "workflow_tool_human_input_not_supported" + + +def test_get_workflow_graph_variables_and_outputs(): + graph = { + "nodes": [ + { + "id": "start", + "data": { + "type": "start", + "variables": [ + { + "variable": "query", + "label": "Query", + "type": "text-input", + "required": True, + } + ], + }, + }, + { + "id": "end-1", + "data": { + "type": "end", + "outputs": [ + {"variable": "answer", "value_type": "string", "value_selector": ["n1", "answer"]}, + {"variable": "score", "value_type": "number", "value_selector": ["n1", "score"]}, + ], + }, + }, + { + "id": "end-2", + "data": { + "type": "end", + "outputs": [ + {"variable": "answer", "value_type": "object", "value_selector": ["n2", "answer"]}, + ], + }, + }, + ] + } + + variables = WorkflowToolConfigurationUtils.get_workflow_graph_variables(graph) + assert len(variables) == 1 + assert variables[0].variable == "query" + assert variables[0].type == VariableEntityType.TEXT_INPUT + + outputs = WorkflowToolConfigurationUtils.get_workflow_graph_output(graph) + assert [output.variable for output in outputs] == ["answer", "score"] + assert outputs[0].value_type == "object" + assert outputs[1].value_type == "number" + + no_start = WorkflowToolConfigurationUtils.get_workflow_graph_variables({"nodes": []}) + assert no_start == [] + + +def test_check_is_synced_validation(): + variables = [ + VariableEntity( + variable="query", + label="Query", + type=VariableEntityType.TEXT_INPUT, + required=True, + ) + ] + configs = [ + WorkflowToolParameterConfiguration( + name="query", + description="desc", + form=ToolParameter.ToolParameterForm.FORM, + ) + ] + + WorkflowToolConfigurationUtils.check_is_synced(variables=variables, tool_configurations=configs) + + with pytest.raises(ValueError, match="parameter configuration mismatch"): + WorkflowToolConfigurationUtils.check_is_synced(variables=variables, tool_configurations=[]) + + with pytest.raises(ValueError, match="parameter configuration mismatch"): + WorkflowToolConfigurationUtils.check_is_synced( + variables=variables, + tool_configurations=[ + WorkflowToolParameterConfiguration( + name="other", + description="desc", + form=ToolParameter.ToolParameterForm.FORM, + ) + ], + ) diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py new file mode 100644 index 0000000000..dd140cbb27 --- /dev/null +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolParameter, + ToolProviderEntity, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.workflow_as_tool.provider import WorkflowToolProviderController +from dify_graph.variables.input_entities import VariableEntity, VariableEntityType + + +def _controller() -> WorkflowToolProviderController: + entity = ToolProviderEntity( + identity=ToolProviderIdentity( + author="author", + name="wf-provider", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="WF"), + ), + credentials_schema=[], + ) + return WorkflowToolProviderController(entity=entity, provider_id="provider-1") + + +def _mock_session_with_begin() -> Mock: + session = Mock() + begin_cm = Mock() + begin_cm.__enter__ = Mock(return_value=None) + begin_cm.__exit__ = Mock(return_value=False) + session.begin.return_value = begin_cm + return session + + +def test_get_db_provider_tool_builds_entity(): + controller = _controller() + session = Mock() + workflow = SimpleNamespace(graph_dict={"nodes": []}, features_dict={}) + session.query.return_value.where.return_value.first.return_value = workflow + app = SimpleNamespace(id="app-1") + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + user_id="user-1", + parameter_configurations=[ + SimpleNamespace(name="country", description="Country", form=ToolParameter.ToolParameterForm.FORM), + SimpleNamespace(name="files", description="files", form=ToolParameter.ToolParameterForm.FORM), + ], + ) + user = SimpleNamespace(name="Alice") + variables = [ + VariableEntity( + variable="country", + label="Country", + description="Country", + type=VariableEntityType.SELECT, + required=True, + options=["US", "IN"], + ) + ] + outputs = [ + SimpleNamespace(variable="json", value_type="string"), + SimpleNamespace(variable="answer", value_type="string"), + ] + + with ( + patch( + "core.tools.workflow_as_tool.provider.WorkflowAppConfigManager.convert_features", + return_value=SimpleNamespace(file_upload=True), + ), + patch( + "core.tools.workflow_as_tool.provider.WorkflowToolConfigurationUtils.get_workflow_graph_variables", + return_value=variables, + ), + patch( + "core.tools.workflow_as_tool.provider.WorkflowToolConfigurationUtils.get_workflow_graph_output", + return_value=outputs, + ), + ): + tool = controller._get_db_provider_tool(db_provider, app, session=session, user=user) + + assert tool.entity.identity.name == "workflow_tool" + # "json" output is reserved for ToolInvokeMessage.VariableMessage and filtered out. + assert tool.entity.output_schema["properties"] == {"answer": {"type": "string", "description": ""}} + assert "json" not in tool.entity.output_schema["properties"] + assert tool.entity.parameters[0].type == ToolParameter.ToolParameterType.SELECT + assert tool.entity.parameters[1].type == ToolParameter.ToolParameterType.SYSTEM_FILES + assert controller.provider_type == ToolProviderType.WORKFLOW + + +def test_get_tool_returns_hit_or_none(): + controller = _controller() + tool = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="workflow_tool"))) + controller.tools = [tool] + + assert controller.get_tool("workflow_tool") is tool + assert controller.get_tool("missing") is None + + +def test_get_tools_returns_cached(): + controller = _controller() + cached_tools = [SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="wf-cached")))] + controller.tools = cached_tools # type: ignore[assignment] + + assert controller.get_tools("tenant-1") == cached_tools + + +def test_from_db_builds_controller(): + controller = _controller() + + app = SimpleNamespace(id="app-1") + user = SimpleNamespace(name="Alice") + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + user_id="user-1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + parameter_configurations=[], + ) + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = db_provider + session.get.side_effect = [app, user] + fake_cm = MagicMock() + fake_cm.__enter__.return_value = session + fake_cm.__exit__.return_value = False + fake_session_factory = Mock() + fake_session_factory.create_session.return_value = fake_cm + + with patch("core.tools.workflow_as_tool.provider.session_factory", fake_session_factory): + with patch.object( + WorkflowToolProviderController, + "_get_db_provider_tool", + return_value=SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="wf"))), + ): + built = WorkflowToolProviderController.from_db(db_provider) + assert isinstance(built, WorkflowToolProviderController) + assert built.tools + + +def test_get_tools_returns_empty_when_provider_missing(): + controller = _controller() + controller.tools = None # type: ignore[assignment] + + with patch("core.tools.workflow_as_tool.provider.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.workflow_as_tool.provider.Session") as session_cls: + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = None + session_cls.return_value.__enter__.return_value = session + + assert controller.get_tools("tenant-1") == [] + + +def test_get_tools_raises_when_app_missing(): + controller = _controller() + controller.tools = None # type: ignore[assignment] + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + user_id="user-1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + parameter_configurations=[], + ) + + with patch("core.tools.workflow_as_tool.provider.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.workflow_as_tool.provider.Session") as session_cls: + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = db_provider + session.get.return_value = None + session_cls.return_value.__enter__.return_value = session + with pytest.raises(ValueError, match="app not found"): + controller.get_tools("tenant-1") diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py index 36fdb0218c..cc00f79698 100644 --- a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py @@ -1,20 +1,85 @@ +"""Unit tests for workflow-as-tool behavior. + +StubSession/StubScalars emulate SQLAlchemy session/scalars with minimal methods +(`scalar`, `scalars`, `expunge`, `commit`, `refresh`, context manager) to keep +database access mocked and predictable in tests. +""" + +import json from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, Mock, patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom from core.tools.__base.tool_runtime import ToolRuntime from core.tools.entities.common_entities import I18nObject -from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolParameter, + ToolProviderType, +) from core.tools.errors import ToolInvokeError from core.tools.workflow_as_tool.tool import WorkflowTool +from dify_graph.file import FILE_MODEL_IDENTITY -def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_field(monkeypatch: pytest.MonkeyPatch): - """Ensure that WorkflowTool will throw a `ToolInvokeError` exception when - `WorkflowAppGenerator.generate` returns a result with `error` key inside - the `data` element. - """ +class StubScalars: + """Minimal stub for SQLAlchemy scalar results.""" + + _value: Any + + def __init__(self, value: Any) -> None: + self._value = value + + def first(self) -> Any: + return self._value + + +class StubSession: + """Minimal stub for session_factory-created sessions.""" + + scalar_results: list[Any] + scalars_results: list[Any] + expunge_calls: list[object] + + def __init__(self, *, scalar_results: list[Any] | None = None, scalars_results: list[Any] | None = None) -> None: + self.scalar_results = list(scalar_results or []) + self.scalars_results = list(scalars_results or []) + self.expunge_calls: list[object] = [] + + def scalar(self, _stmt: Any) -> Any: + return self.scalar_results.pop(0) + + def scalars(self, _stmt: Any) -> StubScalars: + return StubScalars(self.scalars_results.pop(0)) + + def expunge(self, value: Any) -> None: + self.expunge_calls.append(value) + + def begin(self) -> "StubSession": + return self + + def commit(self) -> None: + pass + + def refresh(self, _value: Any) -> None: + pass + + def close(self) -> None: + pass + + def __enter__(self) -> "StubSession": + return self + + def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool: + return False + + +def _build_tool() -> WorkflowTool: entity = ToolEntity( identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), parameters=[], @@ -22,9 +87,9 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel has_runtime_parameters=False, ) runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", + return WorkflowTool( + workflow_app_id="app-1", + workflow_as_tool_id="wf-tool-1", version="1", workflow_entities={}, workflow_call_depth=1, @@ -32,13 +97,19 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel runtime=runtime, ) + +def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_field(monkeypatch: pytest.MonkeyPatch): + """Ensure that WorkflowTool will throw a `ToolInvokeError` exception when + `WorkflowAppGenerator.generate` returns a result with `error` key inside + the `data` element. + """ + tool = _build_tool() + # needs to patch those methods to avoid database access. monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -56,28 +127,12 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel def test_workflow_tool_does_not_use_pause_state_config(monkeypatch: pytest.MonkeyPatch): - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + """Ensure pause_state_config is passed as None.""" + tool = _build_tool() monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) - from unittest.mock import MagicMock, Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -94,22 +149,7 @@ def test_workflow_tool_does_not_use_pause_state_config(monkeypatch: pytest.Monke def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch: pytest.MonkeyPatch): """Test that WorkflowTool should generate variable messages when there are outputs""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() # Mock workflow outputs mock_outputs = {"result": "success", "count": 42, "data": {"key": "value"}} @@ -119,8 +159,6 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -134,10 +172,6 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch # Execute tool invocation messages = list(tool.invoke("test_user", {})) - # Verify generated messages - # Should contain: 3 variable messages + 1 text message + 1 JSON message = 5 messages - assert len(messages) == 5 - # Verify variable messages variable_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.VARIABLE] assert len(variable_messages) == 3 @@ -151,7 +185,7 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch # Verify text message text_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.TEXT] assert len(text_messages) == 1 - assert '{"result": "success", "count": 42, "data": {"key": "value"}}' in text_messages[0].message.text + assert json.loads(text_messages[0].message.text) == mock_outputs # Verify JSON message json_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.JSON] @@ -161,30 +195,13 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch def test_workflow_tool_should_handle_empty_outputs(monkeypatch: pytest.MonkeyPatch): """Test that WorkflowTool should handle empty outputs correctly""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() # needs to patch those methods to avoid database access. monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -217,61 +234,32 @@ def test_workflow_tool_should_handle_empty_outputs(monkeypatch: pytest.MonkeyPat assert json_messages[0].message.json_object == {} -def test_create_variable_message(): - """Test the functionality of creating variable messages""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) - - # Test different types of variable values - test_cases = [ +@pytest.mark.parametrize( + ("var_name", "var_value"), + [ ("string_var", "test string"), ("int_var", 42), ("float_var", 3.14), ("bool_var", True), ("list_var", [1, 2, 3]), ("dict_var", {"key": "value"}), - ] + ], +) +def test_create_variable_message(var_name, var_value): + """Create variable messages for multiple value types.""" + tool = _build_tool() - for var_name, var_value in test_cases: - message = tool.create_variable_message(var_name, var_value) + message = tool.create_variable_message(var_name, var_value) - assert message.type == ToolInvokeMessage.MessageType.VARIABLE - assert message.message.variable_name == var_name - assert message.message.variable_value == var_value - assert message.message.stream is False + assert message.type == ToolInvokeMessage.MessageType.VARIABLE + assert message.message.variable_name == var_name + assert message.message.variable_value == var_value + assert message.message.stream is False def test_create_file_message_should_include_file_marker(): - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + """Ensure file message includes marker and meta payload.""" + tool = _build_tool() file_obj = object() message = tool.create_file_message(file_obj) # type: ignore[arg-type] @@ -284,103 +272,247 @@ def test_create_file_message_should_include_file_marker(): def test_resolve_user_from_database_falls_back_to_end_user(monkeypatch: pytest.MonkeyPatch): """Ensure worker context can resolve EndUser when Account is missing.""" - class StubSession: - def __init__(self, results: list): - self.results = results - - def scalar(self, _stmt): - return self.results.pop(0) - - # SQLAlchemy Session APIs used by code under test - def expunge(self, *_args, **_kwargs): - pass - - def close(self): - pass - - # support `with session_factory.create_session() as session:` - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - self.close() - tenant = SimpleNamespace(id="tenant_id") end_user = SimpleNamespace(id="end_user_id", tenant_id="tenant_id") # Monkeypatch session factory to return our stub session + stub_session = StubSession(scalar_results=[tenant, None, end_user]) monkeypatch.setattr( "core.tools.workflow_as_tool.tool.session_factory.create_session", - lambda: StubSession([tenant, None, end_user]), + lambda: stub_session, ) - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="tenant_id", invoke_from=InvokeFrom.SERVICE_API) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() + tool.runtime.invoke_from = InvokeFrom.SERVICE_API + tool.runtime.tenant_id = "tenant_id" resolved_user = tool._resolve_user_from_database(user_id=end_user.id) assert resolved_user is end_user + assert stub_session.expunge_calls == [end_user] def test_resolve_user_from_database_returns_none_when_no_tenant(monkeypatch: pytest.MonkeyPatch): """Return None if tenant cannot be found in worker context.""" - class StubSession: - def __init__(self, results: list): - self.results = results - - def scalar(self, _stmt): - return self.results.pop(0) - - def expunge(self, *_args, **_kwargs): - pass - - def close(self): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - self.close() - # Monkeypatch session factory to return our stub session with no tenant monkeypatch.setattr( "core.tools.workflow_as_tool.tool.session_factory.create_session", - lambda: StubSession([None]), + lambda: StubSession(scalar_results=[None]), ) - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="missing_tenant", invoke_from=InvokeFrom.SERVICE_API) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() + tool.runtime.invoke_from = InvokeFrom.SERVICE_API + tool.runtime.tenant_id = "missing_tenant" resolved_user = tool._resolve_user_from_database(user_id="any") assert resolved_user is None + + +def test_workflow_tool_provider_type_and_fork_runtime(): + """Verify provider type and forked runtime behavior.""" + tool = _build_tool() + assert tool.tool_provider_type() == ToolProviderType.WORKFLOW + assert tool.latest_usage.total_tokens == 0 + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2", invoke_from=InvokeFrom.DEBUGGER)) + assert isinstance(forked, WorkflowTool) + assert forked.workflow_app_id == tool.workflow_app_id + assert forked.runtime.tenant_id == "tenant-2" + + +def test_derive_usage_from_top_level_usage_key(): + """Derive usage from top-level usage dict.""" + usage = WorkflowTool._derive_usage_from_result({"usage": {"total_tokens": 12, "total_price": "0.2"}}) + assert usage.total_tokens == 12 + + +def test_derive_usage_from_metadata_usage(): + """Derive usage from metadata usage dict.""" + metadata_usage = WorkflowTool._derive_usage_from_result({"metadata": {"usage": {"total_tokens": 7}}}) + assert metadata_usage.total_tokens == 7 + + +def test_derive_usage_from_totals(): + """Derive usage from top-level totals fields.""" + totals_usage = WorkflowTool._derive_usage_from_result( + {"total_tokens": "9", "total_price": "1.3", "currency": "USD"} + ) + assert totals_usage.total_tokens == 9 + assert str(totals_usage.total_price) == "1.3" + + +def test_derive_usage_from_empty(): + """Default usage values when result is empty.""" + empty_usage = WorkflowTool._derive_usage_from_result({}) + assert empty_usage.total_tokens == 0 + + +def test_extract_usage_from_nested(): + """Extract nested usage dict from result payloads.""" + nested = WorkflowTool._extract_usage_dict({"nested": [{"data": {"usage": {"total_tokens": 3}}}]}) + assert nested == {"total_tokens": 3} + + +def test_invoke_raises_when_user_not_found(monkeypatch: pytest.MonkeyPatch): + """Raise ToolInvokeError when user resolution fails.""" + tool = _build_tool() + monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) + monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) + monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: None) + + with pytest.raises(ToolInvokeError, match="User not found"): + list(tool.invoke("missing", {})) + + +def test_resolve_user_from_database_returns_account(monkeypatch: pytest.MonkeyPatch): + """Resolve Account and set tenant in worker context.""" + tenant = SimpleNamespace(id="tenant_id") + account = SimpleNamespace(id="account_id", current_tenant=None) + session = StubSession(scalar_results=[tenant, account]) + + monkeypatch.setattr("core.tools.workflow_as_tool.tool.session_factory.create_session", lambda: session) + tool = _build_tool() + tool.runtime.tenant_id = "tenant_id" + + resolved = tool._resolve_user_from_database(user_id="account_id") + assert resolved is account + assert account.current_tenant is tenant + assert session.expunge_calls == [account] + + +def test_get_workflow_and_get_app_db_branches(monkeypatch: pytest.MonkeyPatch): + """Cover workflow/app retrieval branches and error cases.""" + tool = _build_tool() + latest_workflow = SimpleNamespace(id="wf-latest") + specific_workflow = SimpleNamespace(id="wf-v1") + app = SimpleNamespace(id="app-1") + sessions = iter( + [ + StubSession(scalar_results=[], scalars_results=[latest_workflow]), + StubSession(scalar_results=[specific_workflow], scalars_results=[]), + StubSession(scalar_results=[app], scalars_results=[]), + ] + ) + monkeypatch.setattr( + "core.tools.workflow_as_tool.tool.session_factory.create_session", + lambda: next(sessions), + ) + + assert tool._get_workflow("app-1", "") is latest_workflow + assert tool._get_workflow("app-1", "1") is specific_workflow + assert tool._get_app("app-1") is app + + monkeypatch.setattr( + "core.tools.workflow_as_tool.tool.session_factory.create_session", + lambda: StubSession(scalar_results=[None, None], scalars_results=[None]), + ) + with pytest.raises(ValueError, match="workflow not found"): + tool._get_workflow("app-1", "1") + with pytest.raises(ValueError, match="app not found"): + tool._get_app("app-1") + + +def _setup_transform_args_tool(monkeypatch: pytest.MonkeyPatch) -> WorkflowTool: + """Build a WorkflowTool and stub merged runtime parameters for files/query.""" + tool = _build_tool() + files_param = ToolParameter.get_simple_instance( + name="files", + llm_description="files", + typ=ToolParameter.ToolParameterType.SYSTEM_FILES, + required=False, + ) + files_param.form = ToolParameter.ToolParameterForm.FORM + text_param = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + + monkeypatch.setattr(tool, "get_merged_runtime_parameters", lambda: [files_param, text_param]) + return tool + + +def test_transform_args_valid_files(monkeypatch: pytest.MonkeyPatch): + """Transform args into parameters and files payloads.""" + tool = _setup_transform_args_tool(monkeypatch) + + params, files = tool._transform_args( + { + "query": "hello", + "files": [ + { + "tenant_id": "tenant-1", + "type": "image", + "transfer_method": "tool_file", + "related_id": "tool-1", + "extension": ".png", + }, + { + "tenant_id": "tenant-1", + "type": "document", + "transfer_method": "local_file", + "related_id": "upload-1", + }, + { + "tenant_id": "tenant-1", + "type": "document", + "transfer_method": "remote_url", + "remote_url": "https://example.com/a.pdf", + }, + ], + } + ) + assert params == {"query": "hello"} + assert any(file_item.get("tool_file_id") == "tool-1" for file_item in files) + assert any(file_item.get("upload_file_id") == "upload-1" for file_item in files) + assert any(file_item.get("url") == "https://example.com/a.pdf" for file_item in files) + + +def test_transform_args_invalid_files(monkeypatch: pytest.MonkeyPatch): + """Ignore invalid file entries while keeping params.""" + tool = _setup_transform_args_tool(monkeypatch) + invalid_params, invalid_files = tool._transform_args({"query": "hello", "files": [{"invalid": True}]}) + assert invalid_params == {"query": "hello"} + assert invalid_files == [] + + +def test_extract_files(): + """Extract file outputs into result and file list.""" + tool = _build_tool() + built_files = [ + SimpleNamespace(id="file-1"), + SimpleNamespace(id="file-2"), + ] + with patch("core.tools.workflow_as_tool.tool.build_from_mapping", side_effect=built_files): + outputs = { + "attachments": [ + { + "dify_model_identity": FILE_MODEL_IDENTITY, + "transfer_method": "tool_file", + "related_id": "r1", + } + ], + "single_file": { + "dify_model_identity": FILE_MODEL_IDENTITY, + "transfer_method": "local_file", + "related_id": "r2", + }, + "text": "ok", + } + result, extracted_files = tool._extract_files(outputs) + + assert result["text"] == "ok" + assert len(extracted_files) == 2 + + +def test_update_file_mapping(): + """Map tool/local file transfer methods into output shape.""" + tool = _build_tool() + tool_file = tool._update_file_mapping({"transfer_method": "tool_file", "related_id": "tool-1"}) + assert tool_file["tool_file_id"] == "tool-1" + local_file = tool._update_file_mapping({"transfer_method": "local_file", "related_id": "upload-1"}) + assert local_file["upload_file_id"] == "upload-1" diff --git a/api/tests/unit_tests/core/trigger/__init__.py b/api/tests/unit_tests/core/trigger/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/conftest.py b/api/tests/unit_tests/core/trigger/conftest.py new file mode 100644 index 0000000000..d9da80a8b7 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/conftest.py @@ -0,0 +1,93 @@ +"""Shared factory helpers for core.trigger test suite.""" + +from __future__ import annotations + +from typing import Any + +from core.entities.provider_entities import ProviderConfig +from core.tools.entities.common_entities import I18nObject +from core.trigger.entities.entities import ( + EventEntity, + EventIdentity, + EventParameter, + OAuthSchema, + Subscription, + SubscriptionConstructor, + TriggerProviderEntity, + TriggerProviderIdentity, +) +from core.trigger.provider import PluginTriggerProviderController +from models.provider_ids import TriggerProviderID + +# Valid format for TriggerProviderID: org/plugin/provider +VALID_PROVIDER_ID = "testorg/testplugin/testprovider" + + +def i18n(text: str = "test") -> I18nObject: + return I18nObject(en_US=text, zh_Hans=text) + + +def make_event(name: str = "test_event", parameters: list[EventParameter] | None = None) -> EventEntity: + return EventEntity( + identity=EventIdentity(author="a", name=name, label=i18n(name)), + description=i18n(name), + parameters=parameters or [], + ) + + +def make_provider_entity( + name: str = "test_provider", + events: list[EventEntity] | None = None, + constructor: SubscriptionConstructor | None = None, + subscription_schema: list[ProviderConfig] | None = None, + icon: str | None = "icon.png", + icon_dark: str | None = None, +) -> TriggerProviderEntity: + return TriggerProviderEntity( + identity=TriggerProviderIdentity( + author="a", + name=name, + label=i18n(name), + description=i18n(name), + icon=icon, + icon_dark=icon_dark, + ), + events=events if events is not None else [make_event()], + subscription_constructor=constructor, + subscription_schema=subscription_schema or [], + ) + + +def make_controller( + entity: TriggerProviderEntity | None = None, + tenant_id: str = "tenant-1", + provider_id: str = VALID_PROVIDER_ID, +) -> PluginTriggerProviderController: + return PluginTriggerProviderController( + entity=entity or make_provider_entity(), + plugin_id="plugin-1", + plugin_unique_identifier="uid-1", + provider_id=TriggerProviderID(provider_id), + tenant_id=tenant_id, + ) + + +def make_subscription(**overrides: Any) -> Subscription: + defaults = {"expires_at": 9999999999, "endpoint": "https://hook.test", "properties": {"k": "v"}, "parameters": {}} + defaults.update(overrides) + return Subscription(**defaults) + + +def make_provider_config( + name: str = "api_key", required: bool = True, config_type: str = "secret-input" +) -> ProviderConfig: + return ProviderConfig(name=name, label=i18n(name), type=config_type, required=required) + + +def make_constructor( + credentials_schema: list[ProviderConfig] | None = None, + oauth_schema: OAuthSchema | None = None, +) -> SubscriptionConstructor: + return SubscriptionConstructor( + parameters=[], credentials_schema=credentials_schema or [], oauth_schema=oauth_schema + ) diff --git a/api/tests/unit_tests/core/trigger/debug/__init__.py b/api/tests/unit_tests/core/trigger/debug/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py b/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py new file mode 100644 index 0000000000..d557c20f5e --- /dev/null +++ b/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py @@ -0,0 +1,93 @@ +""" +Tests for core.trigger.debug.event_bus.TriggerDebugEventBus. + +Covers: Lua-script dispatch/poll with Redis error resilience. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from redis import RedisError + +from core.trigger.debug.event_bus import TriggerDebugEventBus +from core.trigger.debug.events import PluginTriggerDebugEvent + + +class TestDispatch: + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_dispatch_count(self, mock_redis): + mock_redis.eval.return_value = 3 + event = MagicMock() + event.model_dump_json.return_value = '{"test": true}' + + result = TriggerDebugEventBus.dispatch("tenant-1", event, "pool:key") + + assert result == 3 + mock_redis.eval.assert_called_once() + + @patch("core.trigger.debug.event_bus.redis_client") + def test_redis_error_returns_zero(self, mock_redis): + mock_redis.eval.side_effect = RedisError("connection lost") + event = MagicMock() + event.model_dump_json.return_value = "{}" + + result = TriggerDebugEventBus.dispatch("tenant-1", event, "pool:key") + + assert result == 0 + + +class TestPoll: + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_deserialized_event(self, mock_redis): + event_json = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ).model_dump_json() + mock_redis.eval.return_value = event_json + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is not None + assert result.name == "push" + + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_none_when_no_event(self, mock_redis): + mock_redis.eval.return_value = None + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is None + + @patch("core.trigger.debug.event_bus.redis_client") + def test_redis_error_returns_none(self, mock_redis): + mock_redis.eval.side_effect = RedisError("timeout") + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is None diff --git a/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py new file mode 100644 index 0000000000..bcb1d745e3 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py @@ -0,0 +1,281 @@ +""" +Tests for core.trigger.debug.event_selectors. + +Covers: Plugin/Webhook/Schedule pollers, create_event_poller factory, +and select_trigger_debug_events orchestrator. +""" + +from __future__ import annotations + +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from core.trigger.debug.event_selectors import ( + PluginTriggerDebugEventPoller, + ScheduleTriggerDebugEventPoller, + WebhookTriggerDebugEventPoller, + create_event_poller, + select_trigger_debug_events, +) +from core.trigger.debug.events import PluginTriggerDebugEvent, WebhookDebugEvent +from dify_graph.enums import BuiltinNodeTypes, NodeType +from tests.unit_tests.core.trigger.conftest import VALID_PROVIDER_ID + + +def _make_poller_args(node_config: dict | None = None) -> dict: + return { + "tenant_id": "t1", + "user_id": "u1", + "app_id": "a1", + "node_config": node_config or {"data": {}}, + "node_id": "n1", + } + + +def _plugin_node_config(provider_id: str = VALID_PROVIDER_ID) -> dict: + """Valid node config for TriggerEventNodeData.model_validate.""" + return { + "data": { + "title": "test", + "plugin_id": "org/testplugin", + "provider_id": provider_id, + "event_name": "push", + "subscription_id": "s1", + "plugin_unique_identifier": "uid-1", + } + } + + +class TestPluginTriggerDebugEventPoller: + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_workflow_args_on_success(self, mock_bus): + event = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.trigger_service.TriggerService") as mock_trigger_svc: + mock_trigger_svc.invoke_trigger_event.return_value = TriggerInvokeEventResponse( + variables={"repo": "dify"}, + cancelled=False, + ) + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"repo": "dify"} + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_no_event(self, mock_bus): + mock_bus.poll.return_value = None + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + + assert poller.poll() is None + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_invoke_cancelled(self, mock_bus): + event = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.trigger_service.TriggerService") as mock_trigger_svc: + mock_trigger_svc.invoke_trigger_event.return_value = TriggerInvokeEventResponse( + variables={}, + cancelled=True, + ) + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + + assert poller.poll() is None + + +class TestWebhookTriggerDebugEventPoller: + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_uses_inputs_directly_when_present(self, mock_bus): + event = WebhookDebugEvent( + timestamp=100, + request_id="r1", + node_id="n1", + payload={"inputs": {"key": "val"}, "webhook_data": {}}, + ) + mock_bus.poll.return_value = event + + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"key": "val"} + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_falls_back_to_webhook_data(self, mock_bus): + event = WebhookDebugEvent( + timestamp=100, + request_id="r1", + node_id="n1", + payload={"webhook_data": {"body": "raw"}}, + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.webhook_service.WebhookService") as mock_webhook_svc: + mock_webhook_svc.build_workflow_inputs.return_value = {"parsed": "data"} + + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"parsed": "data"} + mock_webhook_svc.build_workflow_inputs.assert_called_once_with({"body": "raw"}) + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_no_event(self, mock_bus): + mock_bus.poll.return_value = None + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + + assert poller.poll() is None + + +class TestScheduleTriggerDebugEventPoller: + def _make_schedule_poller(self, mock_redis, mock_schedule_svc, next_run_at: datetime): + """Set up mocks and create a schedule poller.""" + mock_redis.get.return_value = None + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + return ScheduleTriggerDebugEventPoller(**_make_poller_args()) + + @patch("core.trigger.debug.event_selectors.redis_client") + @patch("core.trigger.debug.event_selectors.naive_utc_now") + @patch("core.trigger.debug.event_selectors.calculate_next_run_at") + @patch("core.trigger.debug.event_selectors.ensure_naive_utc") + def test_returns_none_when_not_yet_due(self, mock_ensure, mock_calc, mock_now, mock_redis): + now = datetime(2025, 1, 1, 12, 0, 0) + next_run = datetime(2025, 1, 1, 13, 0, 0) # future + mock_now.return_value = now + mock_calc.return_value = next_run + mock_ensure.return_value = next_run + mock_redis.get.return_value = None + + with patch("services.trigger.schedule_service.ScheduleService") as mock_schedule_svc: + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + + poller = ScheduleTriggerDebugEventPoller(**_make_poller_args()) + + assert poller.poll() is None + + @patch("core.trigger.debug.event_selectors.redis_client") + @patch("core.trigger.debug.event_selectors.naive_utc_now") + @patch("core.trigger.debug.event_selectors.calculate_next_run_at") + @patch("core.trigger.debug.event_selectors.ensure_naive_utc") + def test_fires_event_when_due(self, mock_ensure, mock_calc, mock_now, mock_redis): + now = datetime(2025, 1, 1, 14, 0, 0) + next_run = datetime(2025, 1, 1, 12, 0, 0) # past + mock_now.return_value = now + mock_calc.return_value = next_run + mock_ensure.return_value = next_run + mock_redis.get.return_value = None + + with patch("services.trigger.schedule_service.ScheduleService") as mock_schedule_svc: + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + + poller = ScheduleTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + mock_redis.delete.assert_called_once() + + +class TestCreateEventPoller: + def _workflow_with_node(self, node_type: NodeType): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = node_type + return wf + + def test_creates_plugin_poller(self): + wf = self._workflow_with_node(TRIGGER_PLUGIN_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, PluginTriggerDebugEventPoller) + + def test_creates_webhook_poller(self): + wf = self._workflow_with_node(TRIGGER_WEBHOOK_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, WebhookTriggerDebugEventPoller) + + def test_creates_schedule_poller(self): + wf = self._workflow_with_node(TRIGGER_SCHEDULE_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, ScheduleTriggerDebugEventPoller) + + def test_raises_for_unknown_type(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = BuiltinNodeTypes.START + + with pytest.raises(ValueError): + create_event_poller(wf, "t1", "u1", "a1", "n1") + + def test_raises_when_node_config_missing(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = None + + with pytest.raises(ValueError): + create_event_poller(wf, "t1", "u1", "a1", "n1") + + +class TestSelectTriggerDebugEvents: + def test_returns_first_non_none_event(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE + app_model = MagicMock() + app_model.tenant_id = "t1" + app_model.id = "a1" + + with patch.object(WebhookTriggerDebugEventPoller, "poll") as mock_poll: + expected = MagicMock() + mock_poll.return_value = expected + + result = select_trigger_debug_events(wf, app_model, "u1", ["n1", "n2"]) + + assert result is expected + + def test_returns_none_when_no_events(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE + app_model = MagicMock() + app_model.tenant_id = "t1" + app_model.id = "a1" + + with patch.object(WebhookTriggerDebugEventPoller, "poll", return_value=None): + result = select_trigger_debug_events(wf, app_model, "u1", ["n1"]) + + assert result is None diff --git a/api/tests/unit_tests/core/trigger/test_provider.py b/api/tests/unit_tests/core/trigger/test_provider.py new file mode 100644 index 0000000000..3c2f297e90 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/test_provider.py @@ -0,0 +1,332 @@ +""" +Tests for core.trigger.provider.PluginTriggerProviderController. + +Covers: to_api_entity creation-method logic, credential validation pipeline, +schema resolution by type, event lookup, dispatch/invoke/subscribe delegation. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import CredentialType +from core.trigger.entities.entities import ( + EventParameter, + EventParameterType, + OAuthSchema, + TriggerCreationMethod, +) +from core.trigger.errors import TriggerProviderCredentialValidationError +from tests.unit_tests.core.trigger.conftest import ( + i18n, + make_constructor, + make_controller, + make_event, + make_provider_config, + make_provider_entity, + make_subscription, +) + +ICON_URL = "https://cdn/icon.png" + + +class TestToApiEntity: + @patch("core.trigger.provider.PluginService") + def test_includes_icons_when_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller(entity=make_provider_entity(icon="icon.png", icon_dark="dark.png")) + + api = ctrl.to_api_entity() + + assert api.icon == ICON_URL + assert api.icon_dark == ICON_URL + + @patch("core.trigger.provider.PluginService") + def test_icons_none_when_absent(self, mock_plugin_svc): + ctrl = make_controller(entity=make_provider_entity(icon=None, icon_dark=None)) + + api = ctrl.to_api_entity() + + assert api.icon is None + assert api.icon_dark is None + mock_plugin_svc.get_plugin_icon_url.assert_not_called() + + @patch("core.trigger.provider.PluginService") + def test_manual_only_without_schemas(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + + api = ctrl.to_api_entity() + + assert api.supported_creation_methods == [TriggerCreationMethod.MANUAL] + + @patch("core.trigger.provider.PluginService") + def test_adds_oauth_when_oauth_schema_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + oauth = OAuthSchema(client_schema=[], credentials_schema=[]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + api = ctrl.to_api_entity() + + assert TriggerCreationMethod.OAUTH in api.supported_creation_methods + assert TriggerCreationMethod.MANUAL in api.supported_creation_methods + + @patch("core.trigger.provider.PluginService") + def test_adds_apikey_when_credentials_schema_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + + api = ctrl.to_api_entity() + + assert TriggerCreationMethod.APIKEY in api.supported_creation_methods + + +class TestGetEvent: + def test_returns_matching_event(self): + evt = make_event("push") + ctrl = make_controller(entity=make_provider_entity(events=[evt, make_event("pr")])) + + assert ctrl.get_event("push") is evt + + def test_returns_none_for_unknown(self): + ctrl = make_controller(entity=make_provider_entity(events=[make_event("push")])) + + assert ctrl.get_event("nonexistent") is None + + +class TestGetSubscriptionDefaultProperties: + def test_returns_defaults_skipping_none(self): + config1 = make_provider_config("key1") + config1.default = "val1" + config2 = make_provider_config("key2") + config2.default = None + ctrl = make_controller(entity=make_provider_entity(subscription_schema=[config1, config2])) + + props = ctrl.get_subscription_default_properties() + + assert props == {"key1": "val1"} + + +class TestValidateCredentials: + def test_raises_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + + with pytest.raises(ValueError, match="Subscription constructor not found"): + ctrl.validate_credentials("u1", {"key": "val"}) + + def test_raises_for_missing_required_field(self): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + + with pytest.raises(TriggerProviderCredentialValidationError, match="Missing required"): + ctrl.validate_credentials("u1", {}) + + @patch("core.trigger.provider.PluginTriggerClient") + def test_passes_with_valid_credentials(self, mock_client): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + mock_client.return_value.validate_provider_credentials.return_value = True + + ctrl.validate_credentials("u1", {"api_key": "secret123"}) # should not raise + + @patch("core.trigger.provider.PluginTriggerClient") + def test_raises_when_plugin_rejects(self, mock_client): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + mock_client.return_value.validate_provider_credentials.return_value = None + + with pytest.raises(TriggerProviderCredentialValidationError, match="Invalid credentials"): + ctrl.validate_credentials("u1", {"api_key": "bad"}) + + +class TestGetSupportedCredentialTypes: + def test_empty_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + assert ctrl.get_supported_credential_types() == [] + + def test_oauth_only(self): + oauth = OAuthSchema(client_schema=[], credentials_schema=[]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.OAUTH2 in types + assert CredentialType.API_KEY not in types + + def test_apikey_only(self): + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.API_KEY in types + assert CredentialType.OAUTH2 not in types + + def test_both(self): + oauth = OAuthSchema(client_schema=[], credentials_schema=[make_provider_config("oauth_secret")]) + ctrl = make_controller( + entity=make_provider_entity( + constructor=make_constructor(credentials_schema=[make_provider_config()], oauth_schema=oauth) + ) + ) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.OAUTH2 in types + assert CredentialType.API_KEY in types + + +class TestGetCredentialsSchema: + def test_returns_empty_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + assert ctrl.get_credentials_schema(CredentialType.API_KEY) == [] + + def test_returns_apikey_credentials(self): + cfg = make_provider_config("token") + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(credentials_schema=[cfg]))) + + result = ctrl.get_credentials_schema(CredentialType.API_KEY) + + assert len(result) == 1 + assert result[0].name == "token" + + def test_returns_oauth_credentials(self): + oauth_cred = make_provider_config("oauth_token") + oauth = OAuthSchema(client_schema=[], credentials_schema=[oauth_cred]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + result = ctrl.get_credentials_schema(CredentialType.OAUTH2) + + assert len(result) == 1 + assert result[0].name == "oauth_token" + + def test_unauthorized_returns_empty(self): + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + assert ctrl.get_credentials_schema(CredentialType.UNAUTHORIZED) == [] + + def test_invalid_type_raises(self): + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor())) + with pytest.raises(ValueError, match="Invalid credential type"): + ctrl.get_credentials_schema("bogus_type") + + +class TestGetEventParameters: + def test_returns_params_for_known_event(self): + param = EventParameter(name="branch", label=i18n("branch"), type=EventParameterType.STRING) + evt = make_event("push", parameters=[param]) + ctrl = make_controller(entity=make_provider_entity(events=[evt])) + + result = ctrl.get_event_parameters("push") + + assert "branch" in result + assert result["branch"].name == "branch" + + def test_returns_empty_for_unknown_event(self): + ctrl = make_controller(entity=make_provider_entity(events=[make_event("push")])) + + assert ctrl.get_event_parameters("nonexistent") == {} + + +class TestDispatch: + @patch("core.trigger.provider.PluginTriggerClient") + def test_delegates_to_client(self, mock_client): + ctrl = make_controller() + expected = MagicMock() + mock_client.return_value.dispatch_event.return_value = expected + + result = ctrl.dispatch( + request=MagicMock(), + subscription=make_subscription(), + credentials={"k": "v"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + mock_client.return_value.dispatch_event.assert_called_once() + + +class TestInvokeTriggerEvent: + @patch("core.trigger.provider.PluginTriggerClient") + def test_delegates_to_client(self, mock_client): + ctrl = make_controller() + expected = MagicMock() + mock_client.return_value.invoke_trigger_event.return_value = expected + + result = ctrl.invoke_trigger_event( + user_id="u1", + event_name="push", + parameters={}, + credentials={}, + credential_type=CredentialType.API_KEY, + subscription=make_subscription(), + request=MagicMock(), + payload={}, + ) + + assert result is expected + + +class TestSubscribeTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_returns_validated_subscription(self, mock_client): + ctrl = make_controller() + mock_client.return_value.subscribe.return_value.subscription = { + "expires_at": 123, + "endpoint": "https://e", + "properties": {}, + } + + result = ctrl.subscribe_trigger( + user_id="u1", + endpoint="https://e", + parameters={}, + credentials={}, + credential_type=CredentialType.API_KEY, + ) + + assert result.endpoint == "https://e" + + +class TestUnsubscribeTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_returns_validated_result(self, mock_client): + ctrl = make_controller() + mock_client.return_value.unsubscribe.return_value.subscription = {"success": True, "message": "ok"} + + result = ctrl.unsubscribe_trigger( + user_id="u1", + subscription=make_subscription(), + credentials={}, + credential_type=CredentialType.API_KEY, + ) + + assert result.success is True + + +class TestRefreshTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_uses_system_user_id(self, mock_client): + ctrl = make_controller() + mock_client.return_value.refresh.return_value.subscription = { + "expires_at": 456, + "endpoint": "https://e", + "properties": {}, + } + + ctrl.refresh_trigger(subscription=make_subscription(), credentials={}, credential_type=CredentialType.API_KEY) + + call_kwargs = mock_client.return_value.refresh.call_args[1] + assert call_kwargs["user_id"] == "system" diff --git a/api/tests/unit_tests/core/trigger/test_trigger_manager.py b/api/tests/unit_tests/core/trigger/test_trigger_manager.py new file mode 100644 index 0000000000..612be25ec9 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/test_trigger_manager.py @@ -0,0 +1,307 @@ +""" +Tests for core.trigger.trigger_manager.TriggerManager. + +Covers: icon URL construction, provider listing with error resilience, +double-check lock caching, error translation, EventIgnoreError -> cancelled, +and delegation to provider controller. +""" + +from __future__ import annotations + +from threading import Lock +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.entities.request import TriggerInvokeEventResponse +from core.plugin.impl.exc import PluginDaemonError, PluginNotFoundError +from core.trigger.errors import EventIgnoreError +from core.trigger.trigger_manager import TriggerManager +from models.provider_ids import TriggerProviderID +from tests.unit_tests.core.trigger.conftest import ( + VALID_PROVIDER_ID, + make_controller, + make_provider_entity, + make_subscription, +) + +PID = TriggerProviderID(VALID_PROVIDER_ID) +PID_STR = str(PID) + + +class TestGetTriggerPluginIcon: + @patch("core.trigger.trigger_manager.dify_config") + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_builds_correct_url(self, mock_client, mock_config): + mock_config.CONSOLE_API_URL = "https://console.example.com" + provider = MagicMock() + provider.declaration.identity.icon = "my-icon.svg" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + url = TriggerManager.get_trigger_plugin_icon("tenant-1", VALID_PROVIDER_ID) + + assert "tenant_id=tenant-1" in url + assert "filename=my-icon.svg" in url + assert url.startswith("https://console.example.com/console/api/workspaces/current/plugin/icon") + + +class TestListPluginTriggerProviders: + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_wraps_entities_into_controllers(self, mock_client): + entity = MagicMock() + entity.declaration = make_provider_entity("p1") + entity.plugin_id = "plugin-1" + entity.plugin_unique_identifier = "uid-1" + entity.provider = VALID_PROVIDER_ID + mock_client.return_value.fetch_trigger_providers.return_value = [entity] + + controllers = TriggerManager.list_plugin_trigger_providers("tenant-1") + + assert len(controllers) == 1 + assert controllers[0].plugin_id == "plugin-1" + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_skips_failing_providers(self, mock_client): + good = MagicMock() + good.declaration = make_provider_entity("good") + good.plugin_id = "good-plugin" + good.plugin_unique_identifier = "uid-good" + good.provider = VALID_PROVIDER_ID + + bad = MagicMock() + bad.declaration = make_provider_entity("bad") + bad.plugin_id = "bad-plugin" + bad.plugin_unique_identifier = "uid-bad" + bad.provider = "bad/format" # 2-part: fails TriggerProviderID validation + + mock_client.return_value.fetch_trigger_providers.return_value = [bad, good] + + controllers = TriggerManager.list_plugin_trigger_providers("tenant-1") + + assert len(controllers) == 1 + assert controllers[0].plugin_id == "good-plugin" + + +class TestGetTriggerProvider: + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_initializes_context_on_first_call(self, mock_ctx, mock_client): + # get() called 3 times: (1) try block, (2) after set, (3) under lock + mock_ctx.plugin_trigger_providers.get.side_effect = [LookupError, {}, {}] + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + provider = MagicMock() + provider.declaration = make_provider_entity() + provider.plugin_id = "p1" + provider.plugin_unique_identifier = "uid-1" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + result = TriggerManager.get_trigger_provider("t1", PID) + + mock_ctx.plugin_trigger_providers.set.assert_called_once_with({}) + mock_ctx.plugin_trigger_providers_lock.set.assert_called_once() + assert result is not None + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_returns_cached_without_fetch(self, mock_ctx, mock_client): + cached = make_controller() + mock_ctx.plugin_trigger_providers.get.return_value = {PID_STR: cached} + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is cached + mock_client.return_value.fetch_trigger_provider.assert_not_called() + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_double_check_lock_uses_cached_from_other_thread(self, mock_ctx, mock_client): + cached = make_controller() + mock_ctx.plugin_trigger_providers.get.side_effect = [ + {}, # first check misses + {PID_STR: cached}, # under-lock check hits + ] + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is cached + mock_client.return_value.fetch_trigger_provider.assert_not_called() + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_fetches_and_caches_on_miss(self, mock_ctx, mock_client): + cache: dict = {} + mock_ctx.plugin_trigger_providers.get.return_value = cache + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + provider = MagicMock() + provider.declaration = make_provider_entity() + provider.plugin_id = "p1" + provider.plugin_unique_identifier = "uid-1" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is not None + assert PID_STR in cache + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_none_fetch_raises_value_error(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.return_value = None + + with pytest.raises(ValueError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/missing")) + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_plugin_not_found_becomes_value_error(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.side_effect = PluginNotFoundError("gone") + + with pytest.raises(ValueError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/miss")) + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_plugin_daemon_error_propagates(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.side_effect = PluginDaemonError("test error") + + with pytest.raises(PluginDaemonError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/miss")) + + +class TestListAllTriggerProviders: + @patch.object(TriggerManager, "list_plugin_trigger_providers") + def test_delegates_to_list_plugin(self, mock_list): + expected = [make_controller()] + mock_list.return_value = expected + + assert TriggerManager.list_all_trigger_providers("t1") is expected + mock_list.assert_called_once_with("t1") + + +class TestListTriggersByProvider: + @patch.object(TriggerManager, "get_trigger_provider") + def test_returns_provider_events(self, mock_get): + ctrl = make_controller() + mock_get.return_value = ctrl + + result = TriggerManager.list_triggers_by_provider("t1", PID) + + assert result == ctrl.get_events() + + +class TestInvokeTriggerEvent: + def _args(self): + return { + "tenant_id": "t1", + "user_id": "u1", + "provider_id": PID, + "event_name": "on_push", + "parameters": {"branch": "main"}, + "credentials": {"token": "abc"}, + "credential_type": CredentialType.API_KEY, + "subscription": make_subscription(), + "request": MagicMock(), + "payload": {"action": "push"}, + } + + @patch.object(TriggerManager, "get_trigger_provider") + def test_returns_invoke_response(self, mock_get): + ctrl = MagicMock() + expected = TriggerInvokeEventResponse(variables={"v": "1"}, cancelled=False) + ctrl.invoke_trigger_event.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.invoke_trigger_event(**self._args()) + + assert result is expected + assert result.cancelled is False + + @patch.object(TriggerManager, "get_trigger_provider") + def test_event_ignore_returns_cancelled(self, mock_get): + ctrl = MagicMock() + ctrl.invoke_trigger_event.side_effect = EventIgnoreError("skip") + mock_get.return_value = ctrl + + result = TriggerManager.invoke_trigger_event(**self._args()) + + assert result.cancelled is True + assert result.variables == {} + + @patch.object(TriggerManager, "get_trigger_provider") + def test_other_errors_propagate(self, mock_get): + ctrl = MagicMock() + ctrl.invoke_trigger_event.side_effect = RuntimeError("boom") + mock_get.return_value = ctrl + + with pytest.raises(RuntimeError, match="boom"): + TriggerManager.invoke_trigger_event(**self._args()) + + +class TestSubscribeTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = make_subscription() + ctrl.subscribe_trigger.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.subscribe_trigger( + tenant_id="t1", + user_id="u1", + provider_id=PID, + endpoint="https://hook.test", + parameters={"f": "all"}, + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + ctrl.subscribe_trigger.assert_called_once() + + +class TestUnsubscribeTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = MagicMock() + ctrl.unsubscribe_trigger.return_value = expected + mock_get.return_value = ctrl + sub = make_subscription() + + result = TriggerManager.unsubscribe_trigger( + tenant_id="t1", + user_id="u1", + provider_id=PID, + subscription=sub, + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + + +class TestRefreshTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = make_subscription() + ctrl.refresh_trigger.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.refresh_trigger( + tenant_id="t1", + provider_id=PID, + subscription=make_subscription(), + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected diff --git a/api/tests/unit_tests/core/trigger/utils/__init__.py b/api/tests/unit_tests/core/trigger/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py b/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py new file mode 100644 index 0000000000..8804526e2e --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py @@ -0,0 +1,62 @@ +"""Tests for core.trigger.utils.encryption — masking logic and cache key generation.""" + +from __future__ import annotations + +from core.entities.provider_entities import ProviderConfig +from core.tools.entities.common_entities import I18nObject +from core.trigger.utils.encryption import ( + TriggerProviderCredentialsCache, + TriggerProviderOAuthClientParamsCache, + TriggerProviderPropertiesCache, + masked_credentials, +) + + +def _make_schema(name: str, field_type: str = "secret-input") -> ProviderConfig: + return ProviderConfig( + name=name, + label=I18nObject(en_US=name, zh_Hans=name), + type=field_type, + ) + + +class TestMaskedCredentials: + def test_short_secret_fully_masked(self): + schema = [_make_schema("key", "secret-input")] + result = masked_credentials(schema, {"key": "ab"}) + assert result["key"] == "**" + + def test_long_secret_partially_masked(self): + schema = [_make_schema("key", "secret-input")] + result = masked_credentials(schema, {"key": "abcdef"}) + assert result["key"].startswith("ab") + assert result["key"].endswith("ef") + assert "**" in result["key"] + + def test_non_secret_field_unchanged(self): + schema = [_make_schema("host", "text-input")] + result = masked_credentials(schema, {"host": "example.com"}) + assert result["host"] == "example.com" + + def test_unknown_key_passes_through(self): + result = masked_credentials([], {"unknown": "value"}) + assert result["unknown"] == "value" + + +class TestCacheKeyGeneration: + def test_credentials_cache_key_contains_ids(self): + cache = TriggerProviderCredentialsCache(tenant_id="t1", provider_id="p1", credential_id="c1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + assert "c1" in cache.cache_key + + def test_oauth_client_cache_key_contains_ids(self): + cache = TriggerProviderOAuthClientParamsCache(tenant_id="t1", provider_id="p1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + + def test_properties_cache_key_contains_ids(self): + cache = TriggerProviderPropertiesCache(tenant_id="t1", provider_id="p1", subscription_id="s1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + assert "s1" in cache.cache_key diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py b/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py new file mode 100644 index 0000000000..e5879aea0a --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py @@ -0,0 +1,31 @@ +"""Tests for core.trigger.utils.endpoint — URL generation.""" + +from __future__ import annotations + +from unittest.mock import patch + +from yarl import URL + +from core.trigger.utils import endpoint + + +class TestGeneratePluginTriggerEndpointUrl: + def test_builds_correct_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_plugin_trigger_endpoint_url("endpoint-123") + + assert url == "https://api.example.com/triggers/plugin/endpoint-123" + + +class TestGenerateWebhookTriggerEndpoint: + def test_non_debug_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_webhook_trigger_endpoint("sub-456", debug=False) + + assert url == "https://api.example.com/triggers/webhook/sub-456" + + def test_debug_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_webhook_trigger_endpoint("sub-456", debug=True) + + assert url == "https://api.example.com/triggers/webhook-debug/sub-456" diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py b/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py new file mode 100644 index 0000000000..4fa202b164 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py @@ -0,0 +1,23 @@ +"""Tests for core.trigger.utils.locks — Redis lock key builders.""" + +from __future__ import annotations + +from core.trigger.utils.locks import build_trigger_refresh_lock_key, build_trigger_refresh_lock_keys + + +class TestBuildTriggerRefreshLockKey: + def test_correct_format(self): + key = build_trigger_refresh_lock_key("tenant-1", "sub-1") + + assert key == "trigger_provider_refresh_lock:tenant-1_sub-1" + + +class TestBuildTriggerRefreshLockKeys: + def test_maps_over_pairs(self): + pairs = [("t1", "s1"), ("t2", "s2")] + + keys = build_trigger_refresh_lock_keys(pairs) + + assert len(keys) == 2 + assert keys[0] == "trigger_provider_refresh_lock:t1_s1" + assert keys[1] == "trigger_provider_refresh_lock:t2_s2" diff --git a/api/tests/unit_tests/core/variables/test_segment.py b/api/tests/unit_tests/core/variables/test_segment.py index d47d4d6130..91259c9a45 100644 --- a/api/tests/unit_tests/core/variables/test_segment.py +++ b/api/tests/unit_tests/core/variables/test_segment.py @@ -1,11 +1,14 @@ import dataclasses +import orjson +import pytest from pydantic import BaseModel from core.helper import encrypter from dify_graph.file import File, FileTransferMethod, FileType from dify_graph.runtime import VariablePool from dify_graph.system_variable import SystemVariable +from dify_graph.variables.segment_group import SegmentGroup from dify_graph.variables.segments import ( ArrayAnySegment, ArrayFileSegment, @@ -23,6 +26,11 @@ from dify_graph.variables.segments import ( get_segment_discriminator, ) from dify_graph.variables.types import SegmentType +from dify_graph.variables.utils import ( + dumps_with_segments, + segment_orjson_default, + to_selector, +) from dify_graph.variables.variables import ( ArrayAnyVariable, ArrayFileVariable, @@ -379,3 +387,125 @@ class TestSegmentDumpAndLoad: assert get_segment_discriminator("not_a_dict") is None assert get_segment_discriminator(42) is None assert get_segment_discriminator(object) is None + + +class TestSegmentAdditionalProperties: + def test_base_segment_text_log_markdown_size_and_to_object(self): + """Ensure StringSegment exposes text, log, markdown, size and to_object.""" + segment = StringSegment(value="hello") + + assert segment.text == "hello" + assert segment.log == "hello" + assert segment.markdown == "hello" + assert segment.size > 0 + assert segment.to_object() == "hello" + + def test_none_segment_empty_outputs(self): + """Ensure NoneSegment renders empty text, log and markdown.""" + segment = NoneSegment() + + assert segment.text == "" + assert segment.log == "" + assert segment.markdown == "" + + def test_object_segment_json_outputs(self): + """Ensure ObjectSegment renders JSON output for text, log and markdown.""" + segment = ObjectSegment(value={"key": "值", "n": 1}) + + assert segment.text == '{"key": "值", "n": 1}' + assert segment.log == '{\n "key": "值",\n "n": 1\n}' + assert segment.markdown == '{\n "key": "值",\n "n": 1\n}' + + def test_array_segment_text_and_markdown(self): + """Ensure ArrayAnySegment handles empty/non-empty text and markdown rendering.""" + empty_segment = ArrayAnySegment(value=[]) + non_empty_segment = ArrayAnySegment(value=[1, "two"]) + + assert empty_segment.text == "" + assert non_empty_segment.text == "[1, 'two']" + assert non_empty_segment.markdown == "- 1\n- two" + + def test_file_segment_properties(self): + """Ensure FileSegment markdown, text and log fields match expected behavior.""" + file = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="doc.txt") + segment = FileSegment(value=file) + + assert segment.markdown == "[doc.txt](https://example.com/file.txt)" + assert segment.log == "" + assert segment.text == "" + + def test_array_string_segment_text_branches(self): + """Ensure ArrayStringSegment text handling for empty and non-empty values.""" + empty_segment = ArrayStringSegment(value=[]) + non_empty_segment = ArrayStringSegment(value=["hello", "世界"]) + + assert empty_segment.text == "" + assert non_empty_segment.text == '["hello", "世界"]' + + def test_array_file_segment_markdown_and_empty_text_log(self): + """Ensure ArrayFileSegment markdown renders links and text/log stay empty.""" + file1 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="a.txt") + file2 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="b.txt") + segment = ArrayFileSegment(value=[file1, file2]) + + assert segment.markdown == "[a.txt](https://example.com/file.txt)\n[b.txt](https://example.com/file.txt)" + assert segment.log == "" + assert segment.text == "" + + +class TestSegmentGroupAdditional: + def test_segment_group_markdown_and_to_object(self): + group = SegmentGroup(value=[StringSegment(value="A"), NoneSegment(), StringSegment(value="B")]) + + assert group.markdown == "AB" + assert group.to_object() == ["A", None, "B"] + + +class TestSegmentUtils: + def test_to_selector_without_paths(self): + assert to_selector("node-1", "output") == ["node-1", "output"] + + def test_to_selector_with_paths(self): + assert to_selector("node-1", "output", ("a", "b")) == ["node-1", "output", "a", "b"] + + def test_array_file_segment_serialization(self): + file1 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="a.txt") + file2 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="b.txt") + + result = segment_orjson_default(ArrayFileSegment(value=[file1, file2])) + + assert len(result) == 2 + assert result[0]["filename"] == "a.txt" + assert result[1]["filename"] == "b.txt" + + def test_file_segment_serialization(self): + file = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="single.txt") + + result = segment_orjson_default(FileSegment(value=file)) + + assert result["filename"] == "single.txt" + assert result["remote_url"] == "https://example.com/file.txt" + + def test_segment_group_and_segment_serialization(self): + group = SegmentGroup(value=[StringSegment(value="a"), StringSegment(value="b")]) + + assert segment_orjson_default(group) == ["a", "b"] + assert segment_orjson_default(StringSegment(value="value")) == "value" + + def test_segment_orjson_default_unsupported_type(self): + with pytest.raises(TypeError, match="not JSON serializable"): + segment_orjson_default(object()) + + def test_dumps_with_segments(self): + data = { + "segment": StringSegment(value="hello"), + "group": SegmentGroup(value=[StringSegment(value="x"), StringSegment(value="y")]), + 1: "numeric-key", + } + + dumped = dumps_with_segments(data) + loaded = orjson.loads(dumped) + + assert loaded["segment"] == "hello" + assert loaded["group"] == ["x", "y"] + assert loaded["1"] == "numeric-key" diff --git a/api/tests/unit_tests/core/variables/test_segment_type.py b/api/tests/unit_tests/core/variables/test_segment_type.py index 8704e3a8e9..9c7755709c 100644 --- a/api/tests/unit_tests/core/variables/test_segment_type.py +++ b/api/tests/unit_tests/core/variables/test_segment_type.py @@ -1,5 +1,7 @@ import pytest +from dify_graph.variables.segment_group import SegmentGroup +from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import ArrayValidation, SegmentType @@ -70,22 +72,36 @@ class TestSegmentTypeIsValidArrayValidation: """ def test_array_validation_all_success(self): + # Arrange value = ["hello", "world", "foo"] - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Assert + assert is_valid def test_array_validation_all_fail(self): + # Arrange value = ["hello", 123, "world"] - # Should return False, since 123 is not a string - assert not SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Assert + assert not is_valid def test_array_validation_first(self): + # Arrange value = ["hello", 123, None] - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.FIRST) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.FIRST) + # Assert + assert is_valid def test_array_validation_none(self): + # Arrange value = [1, 2, 3] - # validation is None, skip - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.NONE) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.NONE) + # Assert + assert is_valid class TestSegmentTypeGetZeroValue: @@ -164,3 +180,62 @@ class TestSegmentTypeGetZeroValue: for seg_type in unsupported_types: with pytest.raises(ValueError, match="unsupported variable type"): SegmentType.get_zero_value(seg_type) + + +class TestSegmentTypeInferSegmentType: + @pytest.mark.parametrize( + ("value", "expected"), + [ + ([], SegmentType.ARRAY_NUMBER), + ([1, 2, 3], SegmentType.ARRAY_NUMBER), + ([1, 2.5], SegmentType.ARRAY_NUMBER), + (["a", "b"], SegmentType.ARRAY_STRING), + ([{"k": "v"}], SegmentType.ARRAY_OBJECT), + ([None], SegmentType.ARRAY_ANY), + ([True, False], SegmentType.ARRAY_BOOLEAN), + ([[1], [2]], SegmentType.ARRAY_ANY), + ([1, "a"], SegmentType.ARRAY_ANY), + (None, SegmentType.NONE), + (True, SegmentType.BOOLEAN), + (1, SegmentType.INTEGER), + (1.2, SegmentType.FLOAT), + ("abc", SegmentType.STRING), + ({"k": "v"}, SegmentType.OBJECT), + ], + ) + def test_infer_segment_type_supported_values(self, value, expected): + assert SegmentType.infer_segment_type(value) == expected + + +class TestSegmentTypeAdditionalMethods: + def test_cast_value_for_bool_number_and_array_number(self): + assert SegmentType.cast_value(True, SegmentType.INTEGER) == 1 + assert SegmentType.cast_value(False, SegmentType.NUMBER) == 0 + assert SegmentType.cast_value([True, False], SegmentType.ARRAY_NUMBER) == [1, 0] + + mixed = [True, 1] + assert SegmentType.cast_value(mixed, SegmentType.ARRAY_NUMBER) is mixed + assert SegmentType.cast_value("x", SegmentType.STRING) == "x" + + def test_exposed_type_and_element_type(self): + assert SegmentType.INTEGER.exposed_type() == SegmentType.NUMBER + assert SegmentType.FLOAT.exposed_type() == SegmentType.NUMBER + assert SegmentType.STRING.exposed_type() == SegmentType.STRING + + assert SegmentType.ARRAY_STRING.element_type() == SegmentType.STRING + assert SegmentType.ARRAY_ANY.element_type() is None + + with pytest.raises(ValueError, match="element_type is only supported by array type"): + SegmentType.STRING.element_type() + + def test_group_validation_for_segment_group_and_list(self): + valid_group = SegmentGroup(value=[StringSegment(value="a")]) + assert SegmentType.GROUP.is_valid(valid_group) is True + assert SegmentType.GROUP.is_valid([StringSegment(value="b")]) is True + assert SegmentType.GROUP.is_valid(["not-segment"]) is False + + def test_unreachable_assertion_branch(self, monkeypatch): + monkeypatch.setattr(SegmentType, "is_array_type", lambda self: False) + + with pytest.raises(AssertionError, match="unreachable"): + SegmentType.ARRAY_STRING.is_valid(["a"]) diff --git a/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py b/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py index 0df4927697..22792eb5b3 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py +++ b/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py @@ -4,8 +4,10 @@ from unittest.mock import MagicMock, patch import pytest +from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.runtime import GraphRuntimeState, ReadOnlyGraphRuntimeStateWrapper, VariablePool +from dify_graph.variables.variables import StringVariable class StubCoordinator: @@ -278,3 +280,17 @@ class TestGraphRuntimeState: assert restored_execution.started is True assert new_stub.state == "configured" + + def test_snapshot_restore_preserves_updated_conversation_variable(self): + variable_pool = VariablePool( + conversation_variables=[StringVariable(name="session_name", value="before")], + ) + variable_pool.add((CONVERSATION_VARIABLE_NODE_ID, "session_name"), "after") + + state = GraphRuntimeState(variable_pool=variable_pool, start_at=time()) + snapshot = state.dumps() + restored = GraphRuntimeState.from_snapshot(snapshot) + + restored_value = restored.variable_pool.get((CONVERSATION_VARIABLE_NODE_ID, "session_name")) + assert restored_value is not None + assert restored_value.value == "after" diff --git a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py index 4035c1a871..216e64db8d 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py +++ b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py @@ -9,7 +9,7 @@ from typing import Any import pytest from dify_graph.entities.workflow_node_execution import WorkflowNodeExecution -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes class TestWorkflowNodeExecutionProcessDataTruncation: @@ -25,7 +25,7 @@ class TestWorkflowNodeExecutionProcessDataTruncation: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=process_data, created_at=datetime.now(), @@ -212,7 +212,7 @@ class TestWorkflowNodeExecutionProcessDataScenarios: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=scenario.original_data, created_at=datetime.now(), diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph.py b/api/tests/unit_tests/core/workflow/graph/test_graph.py index c46b9e51fd..24bd9ccbed 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph.py @@ -2,7 +2,7 @@ from unittest.mock import Mock -from dify_graph.enums import NodeExecutionType, NodeState, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState from dify_graph.graph.edge import Edge from dify_graph.graph.graph import Graph from dify_graph.nodes.base.node import Node @@ -14,7 +14,7 @@ def create_mock_node(node_id: str, execution_type: NodeExecutionType, state: Nod node.id = node_id node.execution_type = execution_type node.state = state - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py index bd4a0f32e2..64c2eee776 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py @@ -2,12 +2,12 @@ from unittest.mock import MagicMock import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.graph import Graph from dify_graph.nodes.base.node import Node -def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: +def _make_node(node_id: str, node_type: NodeType = BuiltinNodeTypes.START) -> Node: node = MagicMock(spec=Node) node.id = node_id node.node_type = node_type @@ -17,9 +17,9 @@ def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: def test_graph_builder_creates_linear_graph(): builder = Graph.new() - root = _make_node("root", NodeType.START) - mid = _make_node("mid", NodeType.LLM) - end = _make_node("end", NodeType.END) + root = _make_node("root", BuiltinNodeTypes.START) + mid = _make_node("mid", BuiltinNodeTypes.LLM) + end = _make_node("end", BuiltinNodeTypes.END) graph = builder.add_root(root).add_node(mid).add_node(end).build() diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py index b93f18c5bd..75de07bd8b 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py @@ -7,7 +7,7 @@ import pytest from core.workflow.node_factory import DifyNodeFactory from dify_graph.graph import Graph from dify_graph.graph.validation import GraphValidationError -from dify_graph.nodes import NodeType +from dify_graph.nodes import BuiltinNodeTypes from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from tests.workflow_test_utils import build_test_graph_init_params @@ -92,7 +92,7 @@ def test_iteration_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.ITERATION + assert graph.root_node.node_type == BuiltinNodeTypes.ITERATION def test_loop_root_requires_skip_validation(): @@ -115,4 +115,4 @@ def test_loop_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.LOOP + assert graph.root_node.node_type == BuiltinNodeTypes.LOOP diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py index b98d56147e..e94ad74eb0 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py @@ -7,10 +7,10 @@ from dataclasses import dataclass import pytest from dify_graph.entities import GraphInitParams -from dify_graph.enums import ErrorStrategy, NodeExecutionType, NodeType +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, NodeExecutionType, NodeType from dify_graph.graph import Graph from dify_graph.graph.validation import GraphValidationError -from dify_graph.nodes.base.entities import BaseNodeData from dify_graph.nodes.base.node import Node from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable @@ -18,12 +18,12 @@ from tests.workflow_test_utils import build_test_graph_init_params class _TestNodeData(BaseNodeData): - type: NodeType | str | None = None + type: NodeType | None = None execution_type: NodeExecutionType | str | None = None class _TestNode(Node[_TestNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER execution_type = NodeExecutionType.EXECUTABLE @classmethod @@ -46,13 +46,8 @@ class _TestNode(Node[_TestNodeData]): ) node_type_value = self.data.get("type") - if isinstance(node_type_value, NodeType): + if isinstance(node_type_value, str): self.node_type = node_type_value - elif isinstance(node_type_value, str): - try: - self.node_type = NodeType(node_type_value) - except ValueError: - pass def _run(self): raise NotImplementedError @@ -112,14 +107,17 @@ def test_graph_initialization_runs_default_validators( ): node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, - {"id": "answer", "data": {"type": NodeType.ANSWER, "title": "Answer"}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, ] graph_config["edges"] = [ {"source": "start", "target": "answer", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.root_node.id == "start" assert "answer" in graph.nodes @@ -130,14 +128,17 @@ def test_graph_validation_fails_for_unknown_edge_targets( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, ] graph_config["edges"] = [ {"source": "start", "target": "missing", "sourceHandle": "success"}, ] with pytest.raises(GraphValidationError) as exc: - Graph.init(graph_config=graph_config, node_factory=node_factory) + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert any(issue.code == "MISSING_NODE" for issue in exc.value.issues) @@ -147,11 +148,14 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, { "id": "branch", "data": { - "type": NodeType.IF_ELSE, + "type": BuiltinNodeTypes.IF_ELSE, "title": "Branch", "error_strategy": ErrorStrategy.FAIL_BRANCH, }, @@ -161,25 +165,55 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( {"source": "start", "target": "branch", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.nodes["branch"].execution_type == NodeExecutionType.BRANCH -def test_graph_validation_blocks_start_and_trigger_coexistence( +def test_graph_init_ignores_custom_note_nodes_before_node_data_validation( graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, { - "id": "trigger", - "data": {"type": NodeType.TRIGGER_WEBHOOK, "title": "Webhook", "execution_type": NodeExecutionType.ROOT}, + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, + { + "id": "note", + "type": "custom-note", + "data": { + "type": "", + "title": "", + "desc": "", + "text": "{}", + "theme": "blue", + }, + }, + ] + graph_config["edges"] = [ + {"source": "start", "target": "answer", "sourceHandle": "success"}, + ] + + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") + + assert graph.root_node.id == "start" + assert "answer" in graph.nodes + assert "note" not in graph.nodes + + +def test_graph_init_fails_for_unknown_root_node_id( + graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], +) -> None: + node_factory, graph_config = graph_init_dependencies + graph_config["nodes"] = [ + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, }, ] graph_config["edges"] = [] - with pytest.raises(GraphValidationError) as exc_info: - Graph.init(graph_config=graph_config, node_factory=node_factory) - - assert any(issue.code == "TRIGGER_START_NODE_CONFLICT" for issue in exc_info.value.issues) + with pytest.raises(ValueError, match="Root node id missing not found in the graph"): + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="missing") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py index 011c80df96..5b56024ee4 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py @@ -2,7 +2,8 @@ from __future__ import annotations -from dify_graph.enums import NodeExecutionType, NodeState, NodeType, WorkflowNodeExecutionStatus +from dify_graph.entities.base_node_data import RetryConfig +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine.domain.graph_execution import GraphExecution from dify_graph.graph_engine.event_management.event_handlers import EventHandler @@ -12,7 +13,6 @@ from dify_graph.graph_engine.ready_queue.in_memory import InMemoryReadyQueue from dify_graph.graph_engine.response_coordinator.coordinator import ResponseStreamCoordinator from dify_graph.graph_events import NodeRunRetryEvent, NodeRunStartedEvent from dify_graph.node_events import NodeRunResult -from dify_graph.nodes.base.entities import RetryConfig from dify_graph.runtime import GraphRuntimeState, VariablePool from libs.datetime_utils import naive_utc_now @@ -80,7 +80,7 @@ def test_retry_does_not_emit_additional_start_event() -> None: handler, event_manager, graph_execution = _build_event_handler(node_id) execution_id = "exec-1" - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE start_time = naive_utc_now() start_event = NodeRunStartedEvent( diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py index 3d8de0a00d..9e7b3654b7 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py @@ -10,7 +10,7 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.trace import set_tracer_provider -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes @pytest.fixture @@ -44,7 +44,7 @@ def mock_start_node(): node.id = "test-start-node-id" node.title = "Start Node" node.execution_id = "test-start-execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node @@ -55,7 +55,7 @@ def mock_llm_node(): node.id = "test-llm-node-id" node.title = "LLM Node" node.execution_id = "test-llm-execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM return node @@ -69,7 +69,7 @@ def mock_tool_node(): node.id = "test-tool-node-id" node.title = "Test Tool Node" node.execution_id = "test-tool-execution-id" - node.node_type = NodeType.TOOL + node.node_type = BuiltinNodeTypes.TOOL tool_data = ToolNodeData( title="Test Tool Node", @@ -108,7 +108,7 @@ def mock_retrieval_node(): node.id = "test-retrieval-node-id" node.title = "Retrieval Node" node.execution_id = "test-retrieval-execution-id" - node.node_type = NodeType.KNOWLEDGE_RETRIEVAL + node.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL return node @@ -130,7 +130,7 @@ def mock_result_event(): return NodeRunSucceededEvent( id="test-execution-id", node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=node_run_result, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py index 819fd67f9d..2a36f712fd 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch from core.app.workflow.layers.llm_quota import LLMQuotaLayer from core.errors.error import QuotaExceededError -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.entities.commands import CommandType from dify_graph.graph_events.node import NodeRunSucceededEvent from dify_graph.model_runtime.entities.llm_entities import LLMUsage @@ -15,7 +15,7 @@ def _build_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="execution-id", node_id="llm-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -30,7 +30,7 @@ def test_deduct_quota_called_for_successful_llm_node() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -51,7 +51,7 @@ def test_deduct_quota_called_for_question_classifier_node() -> None: node = MagicMock() node.id = "question-classifier-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.QUESTION_CLASSIFIER + node.node_type = BuiltinNodeTypes.QUESTION_CLASSIFIER node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -72,7 +72,7 @@ def test_non_llm_node_is_ignored() -> None: node = MagicMock() node.id = "start-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node._model_instance = object() @@ -89,7 +89,7 @@ def test_quota_error_is_handled_in_layer() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -111,7 +111,7 @@ def test_quota_deduction_exceeded_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() @@ -140,7 +140,7 @@ def test_quota_precheck_failure_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event @@ -166,7 +166,7 @@ def test_quota_precheck_passes_without_abort() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py index b4a7cec494..478a2b592e 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py @@ -16,7 +16,7 @@ import pytest from opentelemetry.trace import StatusCode from core.app.workflow.layers.observability import ObservabilityLayer -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes class TestObservabilityLayerInitialization: @@ -29,7 +29,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", False) @@ -39,7 +39,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @@ -117,7 +117,7 @@ class TestObservabilityLayerParserIntegration: attrs = spans[0].attributes assert attrs["node.id"] == mock_start_node.id assert attrs["node.execution_id"] == mock_start_node.execution_id - assert attrs["node.type"] == mock_start_node.node_type.value + assert attrs["node.type"] == mock_start_node.node_type @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", True) @pytest.mark.usefixtures("mock_is_instrument_flag_enabled_false") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py index 50d14ff48f..548c10ce8d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py @@ -6,7 +6,7 @@ import queue from unittest import mock from dify_graph.entities.pause_reason import SchedulingPause -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.event_management.event_handlers import EventHandler from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher from dify_graph.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator @@ -26,7 +26,7 @@ def test_dispatcher_should_consume_remains_events_after_pause(): GraphNodeEventBase( id="test", node_id="test", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, ) ) event_handler = mock.Mock(spec=EventHandler) @@ -107,7 +107,7 @@ def _make_started_event() -> NodeRunStartedEvent: return NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), ) @@ -117,7 +117,7 @@ def _make_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), @@ -151,20 +151,20 @@ def test_dispatcher_drain_event_queue(): NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Code", start_at=naive_utc_now(), ), NodeRunPauseRequestedEvent( id="pause-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, reason=SchedulingPause(message="test pause"), ), NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ), diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py index f886ae1c2b..fc0d22f739 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py @@ -7,7 +7,7 @@ for workflows containing nodes that require third-party services. import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig @@ -227,23 +227,23 @@ def test_mock_factory_node_type_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) def test_custom_mock_handler(): @@ -341,15 +341,15 @@ def test_register_custom_mock_node(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Re-register custom mock - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, MockTemplateTransformNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_default_config_by_node_type(): @@ -358,7 +358,7 @@ def test_default_config_by_node_type(): # Set default config for all LLM nodes mock_config.set_default_config( - NodeType.LLM, + BuiltinNodeTypes.LLM, { "default_response": "Default LLM response for all nodes", "temperature": 0.7, @@ -367,23 +367,23 @@ def test_default_config_by_node_type(): # Set default config for all HTTP nodes mock_config.set_default_config( - NodeType.HTTP_REQUEST, + BuiltinNodeTypes.HTTP_REQUEST, { "default_status": 200, "default_timeout": 30, }, ) - llm_config = mock_config.get_default_config(NodeType.LLM) + llm_config = mock_config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config["default_response"] == "Default LLM response for all nodes" assert llm_config["temperature"] == 0.7 - http_config = mock_config.get_default_config(NodeType.HTTP_REQUEST) + http_config = mock_config.get_default_config(BuiltinNodeTypes.HTTP_REQUEST) assert http_config["default_status"] == 200 assert http_config["default_timeout"] == 30 # Non-configured node type should return empty dict - tool_config = mock_config.get_default_config(NodeType.TOOL) + tool_config = mock_config.get_default_config(BuiltinNodeTypes.TOOL) assert tool_config == {} diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py index cde99196c8..76bf179f33 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py @@ -6,7 +6,7 @@ This test validates that: - When blocking != 1: NodeRunStreamChunkEvent present (direct LLM to End output) """ -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_engine import GraphEngine, GraphEngineConfig from dify_graph.graph_engine.command_channels import InMemoryChannel from dify_graph.graph_events import ( @@ -74,7 +74,11 @@ def test_streaming_output_with_blocking_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -96,16 +100,16 @@ def test_streaming_output_with_blocking_equals_one(): # Check all Template's NodeRunStreamChunkEvent should has same id with Template's NodeRunStartedEvent start_events = [ - e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.TEMPLATE_TRANSFORM + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM ] - template_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.TEMPLATE_TRANSFORM] + template_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM] assert len(template_chunk_events) == 1, f"Expected 1 template chunk event, but got {len(template_chunk_events)}" assert all(e.id in [se.id for se in start_events] for e in template_chunk_events), ( "Expected all Template chunk events to have same id with Template's NodeRunStartedEvent" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" @@ -168,7 +172,11 @@ def test_streaming_output_with_blocking_not_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -194,15 +202,15 @@ def test_streaming_output_with_blocking_not_equals_one(): assert all(e.id == start_event.id for e in query_chunk_events), "Expected all query chunk events to have same id" # Check all LLM's NodeRunStreamChunkEvent should be from LLM nodes - start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.LLM] - llm_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.LLM] + start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.LLM] + llm_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.LLM] llm_node_ids = {se.node_id for se in start_events} assert all(e.node_id in llm_node_ids for e in llm_chunk_events), ( "Expected all LLM chunk events to be from LLM nodes" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py index b88c15ea2a..778dad5952 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py @@ -1,7 +1,7 @@ import queue from datetime import datetime -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher from dify_graph.graph_events import NodeRunSucceededEvent from dify_graph.node_events import NodeRunResult @@ -51,7 +51,7 @@ def test_dispatcher_drains_events_when_paused() -> None: event = NodeRunSucceededEvent( id="exec-1", node_id="node-1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py index b9ae680f52..4e13177d2b 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py @@ -10,6 +10,7 @@ import time from hypothesis import HealthCheck, given, settings from hypothesis import strategies as st +from dify_graph.entities.base_node_data import DefaultValue, DefaultValueType from dify_graph.enums import ErrorStrategy from dify_graph.graph_engine import GraphEngine, GraphEngineConfig from dify_graph.graph_engine.command_channels import InMemoryChannel @@ -18,7 +19,6 @@ from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, ) -from dify_graph.nodes.base.entities import DefaultValue, DefaultValueType # Import the test framework from the new module from .test_mock_config import MockConfigBuilder diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py index 805e7dbbce..255784b77d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py @@ -6,7 +6,7 @@ import json from collections import deque from unittest.mock import MagicMock -from dify_graph.enums import NodeExecutionType, NodeState, NodeType +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState from dify_graph.graph_engine.domain import GraphExecution from dify_graph.graph_engine.response_coordinator import ResponseStreamCoordinator from dify_graph.graph_engine.response_coordinator.path import Path @@ -101,7 +101,9 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No class DummyNode: def __init__(self, node_id: str, template: Template, execution_type: NodeExecutionType) -> None: self.id = node_id - self.node_type = NodeType.ANSWER if execution_type == NodeExecutionType.RESPONSE else NodeType.LLM + self.node_type = ( + BuiltinNodeTypes.ANSWER if execution_type == NodeExecutionType.RESPONSE else BuiltinNodeTypes.LLM + ) self.execution_type = execution_type self.state = NodeState.UNKNOWN self.title = node_id @@ -160,7 +162,7 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No event = NodeRunStreamChunkEvent( id="exec-1", node_id="response-1", - node_type=NodeType.ANSWER, + node_type=BuiltinNodeTypes.ANSWER, selector=["node-source", "text"], chunk="chunk-1", is_final=False, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py index 6041c6ff30..8a4649693d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py @@ -11,8 +11,6 @@ from collections.abc import Callable from dataclasses import dataclass, field from typing import Any -from dify_graph.enums import NodeType - @dataclass class NodeMockConfig: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py index 9f33a81985..93010eea54 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py @@ -5,11 +5,11 @@ This module provides a MockNodeFactory that automatically detects and mocks node requiring external services (LLM, Agent, Tool, Knowledge Retrieval, HTTP Request). """ -from collections.abc import Mapping from typing import TYPE_CHECKING, Any from core.workflow.node_factory import DifyNodeFactory -from dify_graph.enums import NodeType +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node from .test_mock_nodes import ( @@ -61,63 +61,51 @@ class MockNodeFactory(DifyNodeFactory): # Map of node types that should be mocked self._mock_node_types = { - NodeType.LLM: MockLLMNode, - NodeType.AGENT: MockAgentNode, - NodeType.TOOL: MockToolNode, - NodeType.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, - NodeType.HTTP_REQUEST: MockHttpRequestNode, - NodeType.QUESTION_CLASSIFIER: MockQuestionClassifierNode, - NodeType.PARAMETER_EXTRACTOR: MockParameterExtractorNode, - NodeType.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, - NodeType.ITERATION: MockIterationNode, - NodeType.LOOP: MockLoopNode, - NodeType.TEMPLATE_TRANSFORM: MockTemplateTransformNode, - NodeType.CODE: MockCodeNode, + BuiltinNodeTypes.LLM: MockLLMNode, + BuiltinNodeTypes.AGENT: MockAgentNode, + BuiltinNodeTypes.TOOL: MockToolNode, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, + BuiltinNodeTypes.HTTP_REQUEST: MockHttpRequestNode, + BuiltinNodeTypes.QUESTION_CLASSIFIER: MockQuestionClassifierNode, + BuiltinNodeTypes.PARAMETER_EXTRACTOR: MockParameterExtractorNode, + BuiltinNodeTypes.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, + BuiltinNodeTypes.ITERATION: MockIterationNode, + BuiltinNodeTypes.LOOP: MockLoopNode, + BuiltinNodeTypes.TEMPLATE_TRANSFORM: MockTemplateTransformNode, + BuiltinNodeTypes.CODE: MockCodeNode, } - def create_node(self, node_config: Mapping[str, Any]) -> Node: + def create_node(self, node_config: dict[str, Any] | NodeConfigDict) -> Node: """ Create a node instance, using mock implementations for third-party service nodes. :param node_config: Node configuration dictionary :return: Node instance (real or mocked) """ - # Get node type from config - node_data = node_config.get("data", {}) - node_type_str = node_data.get("type") - - if not node_type_str: - # Fall back to parent implementation for nodes without type - return super().create_node(node_config) - - try: - node_type = NodeType(node_type_str) - except ValueError: - # Unknown node type, use parent implementation - return super().create_node(node_config) + typed_node_config = NodeConfigDictAdapter.validate_python(node_config) + node_data = typed_node_config["data"] + node_type = node_data.type # Check if this node type should be mocked if node_type in self._mock_node_types: - node_id = node_config.get("id") - if not node_id: - raise ValueError("Node config missing id") + node_id = typed_node_config["id"] # Create mock node instance mock_class = self._mock_node_types[node_type] - if node_type == NodeType.CODE: + if node_type == BuiltinNodeTypes.CODE: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, code_executor=self._code_executor, code_limits=self._code_limits, ) - elif node_type == NodeType.HTTP_REQUEST: + elif node_type == BuiltinNodeTypes.HTTP_REQUEST: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -126,10 +114,14 @@ class MockNodeFactory(DifyNodeFactory): tool_file_manager_factory=self._http_request_tool_file_manager_factory, file_manager=self._http_request_file_manager, ) - elif node_type in {NodeType.LLM, NodeType.QUESTION_CLASSIFIER, NodeType.PARAMETER_EXTRACTOR}: + elif node_type in { + BuiltinNodeTypes.LLM, + BuiltinNodeTypes.QUESTION_CLASSIFIER, + BuiltinNodeTypes.PARAMETER_EXTRACTOR, + }: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -139,7 +131,7 @@ class MockNodeFactory(DifyNodeFactory): else: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -148,7 +140,7 @@ class MockNodeFactory(DifyNodeFactory): return mock_instance # For non-mocked node types, use parent implementation - return super().create_node(node_config) + return super().create_node(typed_node_config) def should_mock_node(self, node_type: NodeType) -> bool: """ diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py index eb449e6d75..3e4247f33f 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py @@ -2,16 +2,8 @@ Simple test to verify MockNodeFactory works with iteration nodes. """ -import sys -from pathlib import Path - from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY - -# Add api directory to path -api_dir = Path(__file__).parent.parent.parent.parent.parent.parent -sys.path.insert(0, str(api_dir)) - -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfigBuilder from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -50,20 +42,20 @@ def test_mock_factory_registers_iteration_node(): ) # Check that iteration node is registered - assert NodeType.ITERATION in factory._mock_node_types + assert BuiltinNodeTypes.ITERATION in factory._mock_node_types print("✓ Iteration node is registered in MockNodeFactory") # Check that loop node is registered - assert NodeType.LOOP in factory._mock_node_types + assert BuiltinNodeTypes.LOOP in factory._mock_node_types print("✓ Loop node is registered in MockNodeFactory") # Check the class types from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockIterationNode, MockLoopNode - assert factory._mock_node_types[NodeType.ITERATION] == MockIterationNode + assert factory._mock_node_types[BuiltinNodeTypes.ITERATION] == MockIterationNode print("✓ Iteration node maps to MockIterationNode class") - assert factory._mock_node_types[NodeType.LOOP] == MockLoopNode + assert factory._mock_node_types[BuiltinNodeTypes.LOOP] == MockLoopNode print("✓ Loop node maps to MockLoopNode class") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py index 3f458e9de9..e117f81ff9 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py @@ -11,17 +11,18 @@ from typing import TYPE_CHECKING, Any, Optional from unittest.mock import MagicMock from core.model_manager import ModelInstance +from core.workflow.nodes.agent import AgentNode +from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.nodes.agent import AgentNode from dify_graph.nodes.code import CodeNode from dify_graph.nodes.document_extractor import DocumentExtractorNode from dify_graph.nodes.http_request import HttpRequestNode -from dify_graph.nodes.knowledge_retrieval import KnowledgeRetrievalNode from dify_graph.nodes.llm import LLMNode from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory from dify_graph.nodes.parameter_extractor import ParameterExtractorNode +from dify_graph.nodes.protocols import HttpClientProtocol, ToolFileManagerProtocol from dify_graph.nodes.question_classifier import QuestionClassifierNode from dify_graph.nodes.template_transform import TemplateTransformNode from dify_graph.nodes.template_transform.template_renderer import ( @@ -65,11 +66,27 @@ class MockNodeMixin: kwargs.setdefault("credentials_provider", MagicMock(spec=CredentialsProvider)) kwargs.setdefault("model_factory", MagicMock(spec=ModelFactory)) kwargs.setdefault("model_instance", MagicMock(spec=ModelInstance)) + # LLM-like nodes now require an http_client; provide a mock by default for tests. + kwargs.setdefault("http_client", MagicMock(spec=HttpClientProtocol)) # Ensure TemplateTransformNode receives a renderer now required by constructor if isinstance(self, TemplateTransformNode): kwargs.setdefault("template_renderer", _TestJinja2Renderer()) + # Provide default tool_file_manager_factory for ToolNode subclasses + from dify_graph.nodes.tool import ToolNode as _ToolNode # local import to avoid cycles + + if isinstance(self, _ToolNode): + kwargs.setdefault("tool_file_manager_factory", MagicMock(spec=ToolFileManagerProtocol)) + + if isinstance(self, AgentNode): + presentation_provider = MagicMock() + presentation_provider.get_icon.return_value = None + kwargs.setdefault("strategy_resolver", MagicMock()) + kwargs.setdefault("presentation_provider", presentation_provider) + kwargs.setdefault("runtime_support", MagicMock()) + kwargs.setdefault("message_transformer", MagicMock()) + super().__init__( id=id, config=config, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py index 1550dca402..a8398e8f79 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py @@ -7,7 +7,7 @@ to ensure they work correctly with the TableTestRunner. from configs import dify_config from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.nodes.code.limits import CodeNodeLimits from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -550,12 +550,12 @@ class TestMockNodeFactory: ) # Verify that CODE and TEMPLATE_TRANSFORM ARE mocked by default (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Verify that other third-party service nodes ARE also mocked by default - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) def test_factory_creates_mock_template_transform_node(self): """Test that MockNodeFactory creates MockTemplateTransformNode for template-transform type.""" @@ -610,7 +610,7 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_factory_creates_mock_code_node(self): """Test that MockNodeFactory creates MockCodeNode for code type.""" @@ -667,4 +667,4 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockCodeNode) - assert factory.should_mock_node(NodeType.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py index 84d1444585..5b35b3310a 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py @@ -3,15 +3,9 @@ Simple test to validate the auto-mock system without external dependencies. """ import sys -from pathlib import Path from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY - -# Add api directory to path -api_dir = Path(__file__).parent.parent.parent.parent.parent.parent -sys.path.insert(0, str(api_dir)) - -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -70,8 +64,8 @@ def test_mock_config_operations(): assert error_config.error == "Test error" # Test default configs by node type - config.set_default_config(NodeType.LLM, {"temperature": 0.7}) - llm_config = config.get_default_config(NodeType.LLM) + config.set_default_config(BuiltinNodeTypes.LLM, {"temperature": 0.7}) + llm_config = config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config == {"temperature": 0.7} print("✓ MockConfig operations test passed") @@ -136,23 +130,23 @@ def test_mock_factory_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) print("✓ MockNodeFactory detection test passed") @@ -192,18 +186,18 @@ def test_mock_factory_registration(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Register custom mock (using a dummy class for testing) class DummyMockNode: pass - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, DummyMockNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, DummyMockNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) print("✓ MockNodeFactory registration test passed") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py index 0ac9d6618d..b954a4faac 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py @@ -14,8 +14,8 @@ from uuid import uuid4 from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.model_manager import ModelInstance -from core.workflow.node_factory import DifyNodeFactory -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine, GraphEngineConfig from dify_graph.graph_engine.command_channels import InMemoryChannel @@ -118,7 +118,11 @@ def test_parallel_streaming_workflow(): with patch.object( DifyNodeFactory, "_build_model_instance_for_llm_node", return_value=MagicMock(spec=ModelInstance), autospec=True ): - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) # Create the graph engine engine = GraphEngine( @@ -164,7 +168,9 @@ def test_parallel_streaming_workflow(): stream_chunk_events = [e for e in events if isinstance(e, NodeRunStreamChunkEvent)] # Get Answer node start event - answer_start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.ANSWER] + answer_start_events = [ + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.ANSWER + ] assert len(answer_start_events) == 1, f"Expected 1 Answer node start event, got {len(answer_start_events)}" answer_start_event = answer_start_events[0] @@ -211,7 +217,9 @@ def test_parallel_streaming_workflow(): # Get LLM completion events llm_completed_events = [ - (i, e) for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM + (i, e) + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM ] # Check LLM completion order - in the current implementation, LLMs run sequentially @@ -263,7 +271,7 @@ def test_parallel_streaming_workflow(): # According to Answer node configuration: '{{#1754339725656.text#}}{{#1754339718571.text#}}' # This means LLM 2 output should come first, then LLM 1 output answer_complete_events = [ - e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.ANSWER + e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.ANSWER ] assert len(answer_complete_events) == 1, f"Expected 1 Answer completion event, got {len(answer_complete_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py new file mode 100644 index 0000000000..198e133454 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py @@ -0,0 +1,71 @@ +"""Unit tests for response session creation.""" + +from __future__ import annotations + +import pytest + +import dify_graph.graph_engine.response_coordinator.session as response_session_module +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, NodeType +from dify_graph.graph_engine.response_coordinator import RESPONSE_SESSION_NODE_TYPES +from dify_graph.graph_engine.response_coordinator.session import ResponseSession +from dify_graph.nodes.base.template import Template, TextSegment + + +class DummyResponseNode: + """Minimal response-capable node for session tests.""" + + def __init__(self, *, node_id: str, node_type: NodeType, template: Template) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + self._template = template + + def get_streaming_template(self) -> Template: + return self._template + + +class DummyNodeWithoutStreamingTemplate: + """Minimal node that violates the response-session contract.""" + + def __init__(self, *, node_id: str, node_type: NodeType) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + + +def test_response_session_from_node_rejects_node_types_outside_allowlist() -> None: + """Unsupported node types are rejected even if they expose a template.""" + node = DummyResponseNode( + node_id="llm-node", + node_type=BuiltinNodeTypes.LLM, + template=Template(segments=[TextSegment(text="hello")]), + ) + + with pytest.raises(TypeError, match="RESPONSE_SESSION_NODE_TYPES"): + ResponseSession.from_node(node) + + +def test_response_session_from_node_supports_downstream_allowlist_extension(monkeypatch) -> None: + """Downstream applications can extend the supported node-type list.""" + node = DummyResponseNode( + node_id="llm-node", + node_type=BuiltinNodeTypes.LLM, + template=Template(segments=[TextSegment(text="hello")]), + ) + extended_node_types = [*RESPONSE_SESSION_NODE_TYPES, BuiltinNodeTypes.LLM] + monkeypatch.setattr(response_session_module, "RESPONSE_SESSION_NODE_TYPES", extended_node_types) + + session = ResponseSession.from_node(node) + + assert session.node_id == "llm-node" + assert session.template.segments == [TextSegment(text="hello")] + + +def test_response_session_from_node_requires_streaming_template_method() -> None: + """Allowed node types still need to implement the streaming-template contract.""" + node = DummyNodeWithoutStreamingTemplate(node_id="answer-node", node_type=BuiltinNodeTypes.ANSWER) + + with pytest.raises(TypeError, match="get_streaming_template"): + ResponseSession.from_node(node) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py index 767a8f60ce..ab8fb346b8 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py @@ -21,7 +21,7 @@ from typing import Any, cast from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.tools.utils.yaml_utils import _load_yaml_file -from core.workflow.node_factory import DifyNodeFactory +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams from dify_graph.graph import Graph from dify_graph.graph_engine import GraphEngine, GraphEngineConfig @@ -257,7 +257,11 @@ class WorkflowRunner: else: node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) return graph, graph_runtime_state diff --git a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py index f0d80af1ed..fd563d1be2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py +++ b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py @@ -64,7 +64,7 @@ def test_execute_answer(): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "answer", diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py index bf814d0c97..81d3f5be9c 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py @@ -1,14 +1,12 @@ import pytest -from dify_graph.enums import NodeType -from dify_graph.nodes.base.entities import BaseNodeData +from core.workflow.node_factory import get_node_type_classes_mapping +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node -# Ensures that all node classes are imported. -from dify_graph.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - -# Ensure `NODE_TYPE_CLASSES_MAPPING` is used and not automatically removed. -_ = NODE_TYPE_CLASSES_MAPPING +# Ensures that all production node classes are imported and registered. +_ = get_node_type_classes_mapping() class _TestNodeData(BaseNodeData): @@ -43,7 +41,7 @@ def test_ensure_subclasses_of_base_node_has_node_type_and_version_method_defined node_type = cls.node_type node_version = cls.version() - assert isinstance(cls.node_type, NodeType) + assert isinstance(cls.node_type, str) assert isinstance(node_version, str) node_type_and_version = (node_type, node_version) assert node_type_and_version not in type_version_set, ( @@ -56,7 +54,7 @@ def test_extract_node_data_type_from_generic_extracts_type(): """When a class inherits from Node[T], it should extract T.""" class _ConcreteNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -108,7 +106,7 @@ def test_init_subclass_rejects_explicit_node_data_type_without_generic(): class _ExplicitNode(Node): _node_data_type = _TestNodeData - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -119,10 +117,27 @@ def test_init_subclass_sets_node_data_type_from_generic(): """Verify that __init_subclass__ sets _node_data_type from the generic parameter.""" class _AutoNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: return "1" assert _AutoNode._node_data_type is _TestNodeData + + +def test_validate_node_data_uses_declared_node_data_type(): + """Public validation should hydrate the subclass-declared node data model.""" + + class _AutoNode(Node[_TestNodeData]): + node_type = BuiltinNodeTypes.CODE + + @staticmethod + def version() -> str: + return "1" + + base_node_data = BaseNodeData.model_validate({"type": BuiltinNodeTypes.CODE, "title": "Test"}) + + validated = _AutoNode.validate_node_data(base_node_data) + + assert isinstance(validated, _TestNodeData) diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py index f8d799e446..972a945ca0 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py @@ -1,8 +1,9 @@ import types from collections.abc import Mapping -from dify_graph.enums import NodeType -from dify_graph.nodes.base.entities import BaseNodeData +from core.workflow.node_factory import get_node_type_classes_mapping +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType from dify_graph.nodes.base.node import Node # Import concrete nodes we will assert on (numeric version path) @@ -16,11 +17,11 @@ from dify_graph.nodes.variable_assigner.v2.node import ( def test_variable_assigner_latest_prefers_highest_numeric_version(): # Act - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert basic presence - assert NodeType.VARIABLE_ASSIGNER in mapping - va_versions = mapping[NodeType.VARIABLE_ASSIGNER] + assert BuiltinNodeTypes.VARIABLE_ASSIGNER in mapping + va_versions = mapping[BuiltinNodeTypes.VARIABLE_ASSIGNER] # Both concrete versions must be present assert va_versions.get("1") is VariableAssignerV1 @@ -34,7 +35,7 @@ def test_latest_prefers_highest_numeric_version(): # Arrange: define two ephemeral subclasses with numeric versions under a NodeType # that has no concrete implementations in production to avoid interference. class _Version1(Node[BaseNodeData]): # type: ignore[misc] - node_type = NodeType.LEGACY_VARIABLE_AGGREGATOR + node_type = BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR def init_node_data(self, data): pass @@ -73,11 +74,11 @@ def test_latest_prefers_highest_numeric_version(): return "version2" # Act: build a fresh mapping (it should now see our ephemeral subclasses) - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert: both numeric versions exist for this NodeType; 'latest' points to the higher numeric version - assert NodeType.LEGACY_VARIABLE_AGGREGATOR in mapping - legacy_versions = mapping[NodeType.LEGACY_VARIABLE_AGGREGATOR] + assert BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR in mapping + legacy_versions = mapping[BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR] assert legacy_versions.get("1") is _Version1 assert legacy_versions.get("2") is _Version2 diff --git a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py index 95cb653635..784e08edd2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py @@ -272,7 +272,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_1", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert result == {} @@ -292,7 +292,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_1", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert "node_1.input_text" in result @@ -315,7 +315,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="code_node", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert len(result) == 3 @@ -338,7 +338,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_x", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert result["node_x.deep_var"] == ["node", "obj", "nested", "value"] @@ -437,7 +437,7 @@ class TestCodeNodeInitialization: "outputs": {"x": {"type": "number"}}, } - node._node_data = node._hydrate_node_data(data) + node._node_data = CodeNode._node_data_type.model_validate(data, from_attributes=True) assert node._node_data.title == "Test Node" assert node._node_data.code_language == CodeLanguage.PYTHON3 @@ -453,7 +453,7 @@ class TestCodeNodeInitialization: "outputs": {"x": {"type": "number"}}, } - node._node_data = node._hydrate_node_data(data) + node._node_data = CodeNode._node_data_type.model_validate(data, from_attributes=True) assert node._node_data.code_language == CodeLanguage.JAVASCRIPT diff --git a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py index db096b1aed..859115ceb3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py @@ -1,7 +1,7 @@ +from core.workflow.nodes.datasource.datasource_node import DatasourceNode from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent -from dify_graph.nodes.datasource.datasource_node import DatasourceNode class _VarSeg: @@ -74,6 +74,8 @@ def test_datasource_node_delegates_to_manager_stream(mocker): def get_upload_file_by_id(cls, **_): raise AssertionError("not called") + mocker.patch("core.workflow.nodes.datasource.datasource_node.DatasourceManager", new=_Mgr) + node = DatasourceNode( id="n", config={ @@ -90,7 +92,6 @@ def test_datasource_node_delegates_to_manager_stream(mocker): }, graph_init_params=gp, graph_runtime_state=gs, - datasource_manager=_Mgr, ) evts = list(node._run()) diff --git a/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py b/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py index 1fea19e795..b0ed47158d 100644 --- a/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py +++ b/api/tests/unit_tests/core/workflow/nodes/human_input/test_human_input_form_filled_event.py @@ -3,7 +3,7 @@ from types import SimpleNamespace from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.graph_events import ( NodeRunHumanInputFormFilledEvent, NodeRunHumanInputFormTimeoutEvent, @@ -47,7 +47,7 @@ def _build_node(form_content: str = "Please enter your name:\n\n{{#$output.name# config = { "id": "node-1", - "type": NodeType.HUMAN_INPUT.value, + "type": BuiltinNodeTypes.HUMAN_INPUT, "data": { "title": "Human Input", "form_content": form_content, @@ -111,7 +111,7 @@ def _build_timeout_node() -> HumanInputNode: config = { "id": "node-1", - "type": NodeType.HUMAN_INPUT.value, + "type": BuiltinNodeTypes.HUMAN_INPUT, "data": { "title": "Human Input", "form_content": "Please enter your name:\n\n{{#$output.name#}}", diff --git a/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py index b95a7ad8ae..fdf5f4d1f8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/iteration/iteration_node_spec.py @@ -1,4 +1,5 @@ -from dify_graph.enums import NodeType +from dify_graph.entities.graph_config import NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.iteration.entities import ErrorHandleMode, IterationNodeData from dify_graph.nodes.iteration.exc import ( InvalidIteratorValueError, @@ -90,7 +91,7 @@ class TestIterationNodeClassAttributes: def test_node_type(self): """Test IterationNode node_type attribute.""" - assert IterationNode.node_type == NodeType.ITERATION + assert IterationNode.node_type == BuiltinNodeTypes.ITERATION def test_version(self): """Test IterationNode version method.""" @@ -388,3 +389,50 @@ class TestIterationNodeErrorStrategies: result = node._get_default_value_dict() assert isinstance(result, dict) + + +def test_extract_variable_selector_to_variable_mapping_validates_child_node_configs(monkeypatch) -> None: + seen_configs: list[object] = [] + original_validate_python = NodeConfigDictAdapter.validate_python + + def record_validate_python(value: object): + seen_configs.append(value) + return original_validate_python(value) + + monkeypatch.setattr(NodeConfigDictAdapter, "validate_python", record_validate_python) + + child_node_config = { + "id": "answer-node", + "data": { + "type": "answer", + "title": "Answer", + "answer": "", + "iteration_id": "iteration-node", + }, + } + + IterationNode._extract_variable_selector_to_variable_mapping( + graph_config={ + "nodes": [ + { + "id": "iteration-node", + "data": { + "type": "iteration", + "title": "Iteration", + "iterator_selector": ["start", "items"], + "output_selector": ["iteration", "result"], + }, + }, + child_node_config, + ], + "edges": [], + }, + node_id="iteration-node", + node_data=IterationNodeData( + title="Iteration", + iterator_selector=["start", "items"], + output_selector=["iteration", "result"], + ), + ) + + assert seen_configs == [child_node_config] diff --git a/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py b/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py index 8116fc8b3c..33f7ace5ab 100644 --- a/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_index/test_knowledge_index_node.py @@ -5,12 +5,16 @@ from unittest.mock import Mock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData +from core.workflow.nodes.knowledge_index.exc import KnowledgeIndexNodeError +from core.workflow.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode +from core.workflow.nodes.knowledge_index.protocols import ( + IndexProcessorProtocol, + Preview, + PreviewItem, + SummaryIndexServiceProtocol, +) from dify_graph.enums import SystemVariableKey, WorkflowNodeExecutionStatus -from dify_graph.nodes.knowledge_index.entities import KnowledgeIndexNodeData -from dify_graph.nodes.knowledge_index.exc import KnowledgeIndexNodeError -from dify_graph.nodes.knowledge_index.knowledge_index_node import KnowledgeIndexNode -from dify_graph.repositories.index_processor_protocol import IndexProcessorProtocol, Preview, PreviewItem -from dify_graph.repositories.summary_index_service_protocol import SummaryIndexServiceProtocol from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variables.segments import StringSegment @@ -45,16 +49,24 @@ def mock_graph_runtime_state(): @pytest.fixture -def mock_index_processor(): +def mock_index_processor(mocker): """Create mock IndexProcessorProtocol.""" mock_processor = Mock(spec=IndexProcessorProtocol) + mocker.patch( + "core.workflow.nodes.knowledge_index.knowledge_index_node.IndexProcessor", + return_value=mock_processor, + ) return mock_processor @pytest.fixture -def mock_summary_index_service(): +def mock_summary_index_service(mocker): """Create mock SummaryIndexServiceProtocol.""" mock_service = Mock(spec=SummaryIndexServiceProtocol) + mocker.patch( + "core.workflow.nodes.knowledge_index.knowledge_index_node.SummaryIndex", + return_value=mock_service, + ) return mock_service @@ -107,8 +119,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Assert @@ -137,8 +147,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act & Assert @@ -172,8 +180,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act & Assert @@ -210,8 +216,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -269,8 +273,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -334,8 +336,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -387,8 +387,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -446,8 +444,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -506,8 +502,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -546,8 +540,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -595,8 +587,6 @@ class TestKnowledgeIndexNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act @@ -637,8 +627,6 @@ class TestInvokeKnowledgeIndex: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - index_processor=mock_index_processor, - summary_index_service=mock_summary_index_service, ) # Act diff --git a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py index e194d66ee3..99997db6b2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/knowledge_retrieval/test_knowledge_retrieval_node.py @@ -5,17 +5,19 @@ from unittest.mock import Mock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.enums import WorkflowNodeExecutionStatus -from dify_graph.model_runtime.entities.llm_entities import LLMUsage -from dify_graph.nodes.knowledge_retrieval.entities import ( +from core.workflow.nodes.knowledge_retrieval.entities import ( + Condition, KnowledgeRetrievalNodeData, + MetadataFilteringCondition, MultipleRetrievalConfig, RerankingModelConfig, SingleRetrievalConfig, ) -from dify_graph.nodes.knowledge_retrieval.exc import RateLimitExceededError -from dify_graph.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode -from dify_graph.repositories.rag_retrieval_protocol import RAGRetrievalProtocol, Source +from core.workflow.nodes.knowledge_retrieval.exc import RateLimitExceededError +from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode +from core.workflow.nodes.knowledge_retrieval.retrieval import RAGRetrievalProtocol, Source +from dify_graph.enums import WorkflowNodeExecutionStatus +from dify_graph.model_runtime.entities.llm_entities import LLMUsage from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable from dify_graph.variables import StringSegment @@ -50,11 +52,15 @@ def mock_graph_runtime_state(): @pytest.fixture -def mock_rag_retrieval(): +def mock_rag_retrieval(mocker): """Create mock RAGRetrievalProtocol.""" mock_retrieval = Mock(spec=RAGRetrievalProtocol) mock_retrieval.knowledge_retrieval.return_value = [] mock_retrieval.llm_usage = LLMUsage.empty_usage() + mocker.patch( + "core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node.DatasetRetrieval", + return_value=mock_retrieval, + ) return mock_retrieval @@ -104,13 +110,11 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Assert assert node.id == node_id assert node._rag_retrieval == mock_rag_retrieval - assert node._llm_file_saver is not None def test_run_with_no_query_or_attachment( self, @@ -135,7 +139,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -195,7 +198,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -205,6 +207,7 @@ class TestKnowledgeRetrievalNode: assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED assert "result" in result.outputs assert mock_rag_retrieval.knowledge_retrieval.called + mock_source.model_dump.assert_called_once_with(by_alias=True) def test_run_with_query_variable_multiple_mode( self, @@ -239,7 +242,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -276,7 +278,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -312,7 +313,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -354,7 +354,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -394,7 +393,6 @@ class TestKnowledgeRetrievalNode: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -408,14 +406,14 @@ class TestKnowledgeRetrievalNode: """Test _extract_variable_selector_to_variable_mapping class method.""" # Arrange node_id = "knowledge_node_1" - node_data = { - "type": "knowledge-retrieval", - "title": "Knowledge Retrieval", - "dataset_ids": [str(uuid.uuid4())], - "retrieval_mode": "multiple", - "query_variable_selector": ["start", "query"], - "query_attachment_selector": ["start", "attachments"], - } + node_data = KnowledgeRetrievalNodeData( + type="knowledge-retrieval", + title="Knowledge Retrieval", + dataset_ids=[str(uuid.uuid4())], + retrieval_mode="multiple", + query_variable_selector=["start", "query"], + query_attachment_selector=["start", "attachments"], + ) graph_config = {} # Act @@ -476,7 +474,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -514,7 +511,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -570,7 +566,6 @@ class TestFetchDatasetRetriever: config=config, graph_init_params=mock_graph_init_params, graph_runtime_state=mock_graph_runtime_state, - rag_retrieval=mock_rag_retrieval, ) # Act @@ -592,3 +587,104 @@ class TestFetchDatasetRetriever: # Assert assert version == "1" + + def test_resolve_metadata_filtering_conditions_templates( + self, + mock_graph_init_params, + mock_graph_runtime_state, + mock_rag_retrieval, + ): + """_resolve_metadata_filtering_conditions should expand {{#...#}} and keep numbers/None unchanged.""" + # Arrange + node_id = str(uuid.uuid4()) + config = { + "id": node_id, + "data": { + "title": "Knowledge Retrieval", + "type": "knowledge-retrieval", + "dataset_ids": [str(uuid.uuid4())], + "retrieval_mode": "multiple", + }, + } + # Variable in pool used by template + mock_graph_runtime_state.variable_pool.add(["start", "query"], StringSegment(value="readme")) + + node = KnowledgeRetrievalNode( + id=node_id, + config=config, + graph_init_params=mock_graph_init_params, + graph_runtime_state=mock_graph_runtime_state, + ) + + conditions = MetadataFilteringCondition( + logical_operator="and", + conditions=[ + Condition(name="document_name", comparison_operator="is", value="{{#start.query#}}"), + Condition(name="tags", comparison_operator="in", value=["x", "{{#start.query#}}"]), + Condition(name="year", comparison_operator="=", value=2025), + ], + ) + + # Act + resolved = node._resolve_metadata_filtering_conditions(conditions) + + # Assert + assert resolved.logical_operator == "and" + assert resolved.conditions[0].value == "readme" + assert isinstance(resolved.conditions[1].value, list) + assert resolved.conditions[1].value[1] == "readme" + assert resolved.conditions[2].value == 2025 + + def test_fetch_passes_resolved_metadata_conditions( + self, + mock_graph_init_params, + mock_graph_runtime_state, + mock_rag_retrieval, + ): + """_fetch_dataset_retriever should pass resolved metadata conditions into request.""" + # Arrange + query = "hi" + variables = {"query": query} + mock_graph_runtime_state.variable_pool.add(["start", "q"], StringSegment(value="readme")) + + node_data = KnowledgeRetrievalNodeData( + title="Knowledge Retrieval", + type="knowledge-retrieval", + dataset_ids=[str(uuid.uuid4())], + retrieval_mode="multiple", + multiple_retrieval_config=MultipleRetrievalConfig( + top_k=4, + score_threshold=0.0, + reranking_mode="reranking_model", + reranking_enable=True, + reranking_model=RerankingModelConfig(provider="cohere", model="rerank-v2"), + ), + metadata_filtering_mode="manual", + metadata_filtering_conditions=MetadataFilteringCondition( + logical_operator="and", + conditions=[ + Condition(name="document_name", comparison_operator="is", value="{{#start.q#}}"), + ], + ), + ) + + node_id = str(uuid.uuid4()) + config = {"id": node_id, "data": node_data.model_dump()} + node = KnowledgeRetrievalNode( + id=node_id, + config=config, + graph_init_params=mock_graph_init_params, + graph_runtime_state=mock_graph_runtime_state, + ) + + mock_rag_retrieval.knowledge_retrieval.return_value = [] + mock_rag_retrieval.llm_usage = LLMUsage.empty_usage() + + # Act + node._fetch_dataset_retriever(node_data=node_data, variables=variables) + + # Assert the passed request has resolved value + call_args = mock_rag_retrieval.knowledge_retrieval.call_args + request = call_args[1]["request"] + assert request.metadata_filtering_conditions is not None + assert request.metadata_filtering_conditions.conditions[0].value == "readme" diff --git a/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py b/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py index 25760ba352..d71e0921c1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/list_operator/node_spec.py @@ -4,7 +4,7 @@ import pytest from dify_graph.entities import GraphInitParams from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.nodes.list_operator.node import ListOperatorNode from dify_graph.runtime import GraphRuntimeState from dify_graph.variables import ArrayNumberSegment, ArrayStringSegment @@ -71,7 +71,7 @@ class TestListOperatorNode: graph_runtime_state=mock_graph_runtime_state, ) - assert node.node_type == NodeType.LIST_OPERATOR + assert node.node_type == BuiltinNodeTypes.LIST_OPERATOR assert node._node_data.title == "List Operator" def test_version(self): diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py index a3afd1ed5c..b0f0fd428b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_file_saver.py @@ -1,10 +1,10 @@ import uuid from typing import NamedTuple from unittest import mock +from unittest.mock import MagicMock import httpx import pytest -from sqlalchemy import Engine from core.helper import ssrf_proxy from core.tools import signature @@ -44,7 +44,6 @@ class TestFileSaverImpl: ) mock_tool_file.id = _gen_id() mocked_tool_file_manager = mock.MagicMock(spec=ToolFileManager) - mocked_engine = mock.MagicMock(spec=Engine) mocked_tool_file_manager.create_file_by_raw.return_value = mock_tool_file monkeypatch.setattr(FileSaverImpl, "_get_tool_file_manager", lambda _: mocked_tool_file_manager) @@ -53,11 +52,12 @@ class TestFileSaverImpl: # Since `File.generate_url` used `signature.sign_tool_file` directly, we also need to patch it here. monkeypatch.setattr(models, "sign_tool_file", mocked_sign_file) mocked_sign_file.return_value = mock_signed_url + http_client = MagicMock() storage_file_manager = FileSaverImpl( user_id=user_id, tenant_id=tenant_id, - engine_factory=mocked_engine, + http_client=http_client, ) file = storage_file_manager.save_binary_string(_PNG_DATA, mime_type, file_type) @@ -87,16 +87,18 @@ class TestFileSaverImpl: status_code=401, request=mock_request, ) + http_client = MagicMock() + http_client.get.return_value = mock_response + file_saver = FileSaverImpl( user_id=_gen_id(), tenant_id=_gen_id(), + http_client=http_client, ) - mock_get = mock.MagicMock(spec=ssrf_proxy.get, return_value=mock_response) - monkeypatch.setattr(ssrf_proxy, "get", mock_get) with pytest.raises(httpx.HTTPStatusError) as exc: file_saver.save_remote_url(_TEST_URL, FileType.IMAGE) - mock_get.assert_called_once_with(_TEST_URL) + http_client.get.assert_called_once_with(_TEST_URL) assert exc.value.response.status_code == 401 def test_save_remote_url_success(self, monkeypatch: pytest.MonkeyPatch): @@ -112,8 +114,10 @@ class TestFileSaverImpl: headers={"Content-Type": mime_type}, request=mock_request, ) + http_client = MagicMock() + http_client.get.return_value = mock_response - file_saver = FileSaverImpl(user_id=user_id, tenant_id=tenant_id) + file_saver = FileSaverImpl(user_id=user_id, tenant_id=tenant_id, http_client=http_client) mock_tool_file = ToolFile( user_id=user_id, tenant_id=tenant_id, diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py index 90308facc3..d56035b6bc 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py @@ -111,6 +111,7 @@ def llm_node( "id": "1", "data": llm_node_data.model_dump(), } + http_client = mock.MagicMock() node = LLMNode( id="1", config=node_config, @@ -120,6 +121,7 @@ def llm_node( model_factory=mock_model_factory, model_instance=mock.MagicMock(spec=ModelInstance), llm_file_saver=mock_file_saver, + http_client=http_client, ) return node @@ -632,6 +634,7 @@ def llm_node_for_multimodal(llm_node_data, graph_init_params, graph_runtime_stat "id": "1", "data": llm_node_data.model_dump(), } + http_client = mock.MagicMock() node = LLMNode( id="1", config=node_config, @@ -641,6 +644,7 @@ def llm_node_for_multimodal(llm_node_data, graph_init_params, graph_runtime_stat model_factory=mock_model_factory, model_instance=mock.MagicMock(spec=ModelInstance), llm_file_saver=mock_file_saver, + http_client=http_client, ) return node, mock_file_saver diff --git a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py index 6831626f58..332a8761f9 100644 --- a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, WorkflowNodeExecutionStatus from dify_graph.graph import Graph from dify_graph.nodes.template_transform.template_renderer import TemplateRenderError from dify_graph.nodes.template_transform.template_transform_node import TemplateTransformNode @@ -65,7 +65,7 @@ class TestTemplateTransformNode: template_renderer=mock_renderer, ) - assert node.node_type == NodeType.TEMPLATE_TRANSFORM + assert node.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM assert node._node_data.title == "Template Transform" assert len(node._node_data.variables) == 2 assert node._node_data.template == "Hello {{ name }}, you are {{ age }} years old!" diff --git a/api/tests/unit_tests/core/workflow/nodes/test_base_node.py b/api/tests/unit_tests/core/workflow/nodes/test_base_node.py index 44abf430c0..2b0205fb7b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_base_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_base_node.py @@ -4,8 +4,9 @@ import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities import GraphInitParams -from dify_graph.enums import NodeType -from dify_graph.nodes.base.entities import BaseNodeData +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.base.node import Node from dify_graph.runtime import GraphRuntimeState, VariablePool from dify_graph.system_variable import SystemVariable @@ -17,7 +18,7 @@ class _SampleNodeData(BaseNodeData): class _SampleNode(Node[_SampleNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER @classmethod def version(cls) -> str: @@ -40,13 +41,26 @@ def _build_context(graph_config: Mapping[str, object]) -> tuple[GraphInitParams, return init_params, runtime_state +def _build_node_config() -> NodeConfigDict: + return NodeConfigDictAdapter.validate_python( + { + "id": "node-1", + "data": { + "type": BuiltinNodeTypes.ANSWER, + "title": "Sample", + "foo": "bar", + }, + } + ) + + def test_node_hydrates_data_during_initialization(): graph_config: dict[str, object] = {} init_params, runtime_state = _build_context(graph_config) node = _SampleNode( id="node-1", - config={"id": "node-1", "data": {"title": "Sample", "foo": "bar"}}, + config=_build_node_config(), graph_init_params=init_params, graph_runtime_state=runtime_state, ) @@ -72,7 +86,7 @@ def test_node_accepts_invoke_from_enum(): node = _SampleNode( id="node-1", - config={"id": "node-1", "data": {"title": "Sample", "foo": "bar"}}, + config=_build_node_config(), graph_init_params=init_params, graph_runtime_state=runtime_state, ) @@ -91,7 +105,7 @@ def test_missing_generic_argument_raises_type_error(): with pytest.raises(TypeError): class _InvalidNode(Node): # type: ignore[type-abstract] - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER @classmethod def version(cls) -> str: @@ -99,3 +113,17 @@ def test_missing_generic_argument_raises_type_error(): def _run(self): raise NotImplementedError + + +def test_base_node_data_keeps_dict_style_access_compatibility(): + node_data = _SampleNodeData.model_validate( + { + "type": BuiltinNodeTypes.ANSWER, + "title": "Sample", + "foo": "bar", + } + ) + + assert node_data["foo"] == "bar" + assert node_data.get("foo") == "bar" + assert node_data.get("missing", "fallback") == "fallback" diff --git a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py index 5e20b1e12f..40754974c1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_document_extractor_node.py @@ -7,7 +7,7 @@ from docx.oxml.text.paragraph import CT_P from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from dify_graph.entities import GraphInitParams -from dify_graph.enums import NodeType, WorkflowNodeExecutionStatus +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus from dify_graph.file import File, FileTransferMethod from dify_graph.node_events import NodeRunResult from dify_graph.nodes.document_extractor import DocumentExtractorNode, DocumentExtractorNodeData @@ -16,6 +16,7 @@ from dify_graph.nodes.document_extractor.node import ( _extract_text_from_excel, _extract_text_from_pdf, _extract_text_from_plain_text, + _normalize_docx_zip, ) from dify_graph.variables import ArrayFileSegment from dify_graph.variables.segments import ArrayStringSegment @@ -86,6 +87,38 @@ def test_run_invalid_variable_type(document_extractor_node, mock_graph_runtime_s assert "is not an ArrayFileSegment" in result.error +def test_run_empty_file_list_returns_succeeded(document_extractor_node, mock_graph_runtime_state): + """Empty file list should return SUCCEEDED with empty documents and ArrayStringSegment([]).""" + document_extractor_node.graph_runtime_state = mock_graph_runtime_state + + # Provide an actual ArrayFileSegment with an empty list + mock_graph_runtime_state.variable_pool.get.return_value = ArrayFileSegment(value=[]) + + result = document_extractor_node._run() + + assert isinstance(result, NodeRunResult) + assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED, result.error + assert result.process_data.get("documents") == [] + assert result.outputs["text"] == ArrayStringSegment(value=[]) + + +def test_run_none_only_file_list_returns_succeeded(document_extractor_node, mock_graph_runtime_state): + """A file list containing only None (e.g., [None]) should be filtered to [] and succeed.""" + document_extractor_node.graph_runtime_state = mock_graph_runtime_state + + # Use a Mock to bypass type validation for None entries in the list + afs = Mock(spec=ArrayFileSegment) + afs.value = [None] + mock_graph_runtime_state.variable_pool.get.return_value = afs + + result = document_extractor_node._run() + + assert isinstance(result, NodeRunResult) + assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED, result.error + assert result.process_data.get("documents") == [] + assert result.outputs["text"] == ArrayStringSegment(value=[]) + + @pytest.mark.parametrize( ("mime_type", "file_content", "expected_text", "transfer_method", "extension"), [ @@ -217,7 +250,7 @@ def test_extract_text_from_docx(mock_document): def test_node_type(document_extractor_node): - assert document_extractor_node.node_type == NodeType.DOCUMENT_EXTRACTOR + assert document_extractor_node.node_type == BuiltinNodeTypes.DOCUMENT_EXTRACTOR @patch("pandas.ExcelFile") @@ -385,3 +418,58 @@ def test_extract_text_from_excel_numeric_type_column(mock_excel_file): expected_manual = "| 1.0 | 1.1 |\n| --- | --- |\n| Test | Test |\n\n" assert expected_manual == result + + +def _make_docx_zip(use_backslash: bool) -> bytes: + """Helper to build a minimal in-memory DOCX zip. + + When use_backslash=True the ZIP entry names use backslash separators + (as produced by Evernote on Windows), otherwise forward slashes are used. + """ + import zipfile + + sep = "\\" if use_backslash else "/" + buf = io.BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + zf.writestr("[Content_Types].xml", b"") + zf.writestr(f"_rels{sep}.rels", b"") + zf.writestr(f"word{sep}document.xml", b"") + zf.writestr(f"word{sep}_rels{sep}document.xml.rels", b"") + return buf.getvalue() + + +def test_normalize_docx_zip_replaces_backslashes(): + """ZIP entries with backslash separators must be rewritten to forward slashes.""" + import zipfile + + malformed = _make_docx_zip(use_backslash=True) + fixed = _normalize_docx_zip(malformed) + + with zipfile.ZipFile(io.BytesIO(fixed)) as zf: + names = zf.namelist() + + assert "word/document.xml" in names + assert "word/_rels/document.xml.rels" in names + # No entry should contain a backslash after normalization + assert all("\\" not in name for name in names) + + +def test_normalize_docx_zip_leaves_forward_slash_unchanged(): + """ZIP entries that already use forward slashes must not be modified.""" + import zipfile + + normal = _make_docx_zip(use_backslash=False) + fixed = _normalize_docx_zip(normal) + + with zipfile.ZipFile(io.BytesIO(fixed)) as zf: + names = zf.namelist() + + assert "word/document.xml" in names + assert "word/_rels/document.xml.rels" in names + + +def test_normalize_docx_zip_returns_original_on_bad_zip(): + """Non-zip bytes must be returned as-is without raising.""" + garbage = b"not a zip file at all" + result = _normalize_docx_zip(garbage) + assert result == garbage diff --git a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py index 041bd66d03..c746a945fe 100644 --- a/api/tests/unit_tests/core/workflow/nodes/test_if_else.py +++ b/api/tests/unit_tests/core/workflow/nodes/test_if_else.py @@ -60,7 +60,7 @@ def test_execute_if_else_result_true(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "if-else", @@ -154,7 +154,7 @@ def test_execute_if_else_result_false(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "if-else", @@ -328,7 +328,7 @@ def test_execute_if_else_boolean_conditions(condition: Condition): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean Test", @@ -382,7 +382,7 @@ def test_execute_if_else_boolean_false_conditions(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean False Test", @@ -450,7 +450,7 @@ def test_execute_if_else_boolean_cases_structure(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_data = { "title": "Boolean Cases Test", diff --git a/api/tests/unit_tests/core/workflow/nodes/test_loop_node.py b/api/tests/unit_tests/core/workflow/nodes/test_loop_node.py new file mode 100644 index 0000000000..6372583839 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/nodes/test_loop_node.py @@ -0,0 +1,52 @@ +from dify_graph.entities.graph_config import NodeConfigDictAdapter +from dify_graph.nodes.loop.entities import LoopNodeData +from dify_graph.nodes.loop.loop_node import LoopNode + + +def test_extract_variable_selector_to_variable_mapping_validates_child_node_configs(monkeypatch) -> None: + seen_configs: list[object] = [] + original_validate_python = NodeConfigDictAdapter.validate_python + + def record_validate_python(value: object): + seen_configs.append(value) + return original_validate_python(value) + + monkeypatch.setattr(NodeConfigDictAdapter, "validate_python", record_validate_python) + + child_node_config = { + "id": "answer-node", + "data": { + "type": "answer", + "title": "Answer", + "answer": "", + "loop_id": "loop-node", + }, + } + + LoopNode._extract_variable_selector_to_variable_mapping( + graph_config={ + "nodes": [ + { + "id": "loop-node", + "data": { + "type": "loop", + "title": "Loop", + "loop_count": 1, + "break_conditions": [], + "logical_operator": "and", + }, + }, + child_node_config, + ], + "edges": [], + }, + node_id="loop-node", + node_data=LoopNodeData( + title="Loop", + loop_count=1, + break_conditions=[], + logical_operator="and", + ), + ) + + assert seen_configs == [child_node_config] diff --git a/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py b/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py index 11554169e1..3cbd96dfef 100644 --- a/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/tool/test_tool_node.py @@ -31,6 +31,7 @@ def tool_node(monkeypatch) -> ToolNode: ops_stub.TraceTask = object # pragma: no cover - stub attribute monkeypatch.setitem(sys.modules, module_name, ops_stub) + from dify_graph.nodes.protocols import ToolFileManagerProtocol from dify_graph.nodes.tool.tool_node import ToolNode graph_config: dict[str, Any] = { @@ -69,11 +70,16 @@ def tool_node(monkeypatch) -> ToolNode: graph_runtime_state = GraphRuntimeState(variable_pool=variable_pool, start_at=0.0) config = graph_config["nodes"][0] + + # Provide a stub ToolFileManager to satisfy the updated ToolNode constructor + tool_file_manager_factory = MagicMock(spec=ToolFileManagerProtocol) + node = ToolNode( id="node-instance", config=config, graph_init_params=init_params, graph_runtime_state=graph_runtime_state, + tool_file_manager_factory=tool_file_manager_factory, ) return node diff --git a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py index 2cd3a38fa6..e69c05dc0b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py +++ b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v1/test_variable_assigner_v1.py @@ -88,7 +88,7 @@ def test_overwrite_string_variable(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -188,7 +188,7 @@ def test_append_variable_to_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -277,7 +277,7 @@ def test_clear_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", diff --git a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py index 5b285c2681..6874f3fef1 100644 --- a/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py +++ b/api/tests/unit_tests/core/workflow/nodes/variable_assigner/v2/test_variable_assigner_v2.py @@ -118,7 +118,7 @@ def test_remove_first_from_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -206,7 +206,7 @@ def test_remove_last_from_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -291,7 +291,7 @@ def test_remove_first_from_empty_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", @@ -376,7 +376,7 @@ def test_remove_last_from_empty_array(): graph_init_params=init_params, graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "node_id", diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py index 410c4993e4..6be5bb23e8 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_entities.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from dify_graph.nodes.trigger_webhook.entities import ( +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, @@ -210,9 +210,6 @@ def test_webhook_data_model_dump_with_alias(): def test_webhook_data_validation_errors(): """Test WebhookData validation errors.""" - # Title is required (inherited from BaseNodeData) - with pytest.raises(ValidationError): - WebhookData() # Invalid method with pytest.raises(ValidationError): @@ -254,6 +251,36 @@ def test_webhook_data_sequence_fields(): assert len(data.headers) == 1 # Should still be 1 +def test_webhook_data_rejects_non_string_header_types(): + """Headers should stay string-only because runtime does not coerce header values.""" + for param_type in ["number", "boolean", "object", "array[string]", "file"]: + with pytest.raises(ValidationError): + WebhookData( + title="Test", + headers=[WebhookParameter(name="X-Test", type=param_type)], + ) + + +def test_webhook_data_limits_query_param_types_to_scalar_values(): + """Query params only support scalar conversions in the current runtime.""" + data = WebhookData( + title="Test", + params=[ + WebhookParameter(name="count", type="number"), + WebhookParameter(name="enabled", type="boolean"), + ], + ) + assert data.params[0].type == "number" + assert data.params[1].type == "boolean" + + for param_type in ["object", "array[string]", "array[number]", "array[boolean]", "array[object]", "file"]: + with pytest.raises(ValidationError): + WebhookData( + title="Test", + params=[WebhookParameter(name="test", type=param_type)], + ) + + def test_webhook_data_sync_mode(): """Test WebhookData SyncMode nested enum.""" # Test that SyncMode enum exists and has expected value @@ -297,7 +324,7 @@ def test_webhook_body_parameter_edge_cases(): def test_webhook_data_inheritance(): """Test WebhookData inherits from BaseNodeData correctly.""" - from dify_graph.nodes.base import BaseNodeData + from dify_graph.entities.base_node_data import BaseNodeData # Test that WebhookData is a subclass of BaseNodeData assert issubclass(WebhookData, BaseNodeData) diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py index f2273e441e..ddf1af5a59 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_exceptions.py @@ -1,12 +1,12 @@ import pytest -from dify_graph.nodes.base.exc import BaseNodeError -from dify_graph.nodes.trigger_webhook.exc import ( +from core.workflow.nodes.trigger_webhook.exc import ( WebhookConfigError, WebhookNodeError, WebhookNotFoundError, WebhookTimeoutError, ) +from dify_graph.entities.exc import BaseNodeError def test_webhook_node_error_inheritance(): @@ -149,7 +149,7 @@ def test_webhook_error_attributes(): assert WebhookConfigError.__name__ == "WebhookConfigError" # Test that all error classes have proper __module__ - expected_module = "dify_graph.nodes.trigger_webhook.exc" + expected_module = "core.workflow.nodes.trigger_webhook.exc" assert WebhookNodeError.__module__ == expected_module assert WebhookTimeoutError.__module__ == expected_module assert WebhookNotFoundError.__module__ == expected_module diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py index c750e74182..78dd7ce0f3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_file_conversion.py @@ -9,15 +9,15 @@ when passing files to downstream LLM nodes. from unittest.mock import Mock, patch from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.nodes.trigger_webhook.entities import ( +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, WebhookData, ) -from dify_graph.nodes.trigger_webhook.node import TriggerWebhookNode +from core.workflow.nodes.trigger_webhook.node import TriggerWebhookNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus from dify_graph.runtime.graph_runtime_state import GraphRuntimeState from dify_graph.runtime.variable_pool import VariablePool from dify_graph.system_variable import SystemVariable @@ -130,8 +130,8 @@ def test_webhook_node_file_conversion_to_file_variable(): # Mock the file factory and variable factory with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks mock_file_obj = Mock() @@ -322,8 +322,8 @@ def test_webhook_node_file_conversion_mixed_parameters(): with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks for file mock_file_obj = Mock() @@ -390,8 +390,8 @@ def test_webhook_node_different_file_types(): with ( patch("factories.file_factory.build_from_mapping") as mock_file_factory, - patch("dify_graph.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, - patch("dify_graph.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, + patch("core.workflow.nodes.trigger_webhook.node.build_segment_with_type") as mock_segment_factory, + patch("core.workflow.nodes.trigger_webhook.node.FileVariable") as mock_file_variable, ): # Setup mocks for all files mock_file_objs = [Mock() for _ in range(3)] diff --git a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py index df13bbb92f..139f65d6c3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/webhook/test_webhook_node.py @@ -3,17 +3,18 @@ from unittest.mock import patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom -from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams -from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from dify_graph.file import File, FileTransferMethod, FileType -from dify_graph.nodes.trigger_webhook.entities import ( +from core.trigger.constants import TRIGGER_WEBHOOK_NODE_TYPE +from core.workflow.nodes.trigger_webhook.entities import ( ContentType, Method, WebhookBodyParameter, WebhookData, WebhookParameter, ) -from dify_graph.nodes.trigger_webhook.node import TriggerWebhookNode +from core.workflow.nodes.trigger_webhook.node import TriggerWebhookNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.file import File, FileTransferMethod, FileType from dify_graph.runtime.graph_runtime_state import GraphRuntimeState from dify_graph.runtime.variable_pool import VariablePool from dify_graph.system_variable import SystemVariable @@ -82,7 +83,7 @@ def test_webhook_node_basic_initialization(): node = create_webhook_node(data, variable_pool) - assert node.node_type.value == "trigger-webhook" + assert node.node_type == TRIGGER_WEBHOOK_NODE_TYPE assert node.version() == "1" assert node._get_title() == "Test Webhook" assert node._node_data.method == Method.POST diff --git a/api/tests/unit_tests/core/workflow/test_node_factory.py b/api/tests/unit_tests/core/workflow/test_node_factory.py new file mode 100644 index 0000000000..ab46126ca6 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_node_factory.py @@ -0,0 +1,587 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, sentinel + +import pytest + +from core.app.entities.app_invoke_entities import DifyRunContext, InvokeFrom, UserFrom +from core.workflow import node_factory +from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY +from dify_graph.enums import BuiltinNodeTypes, NodeType, SystemVariableKey +from dify_graph.nodes.code.entities import CodeLanguage +from dify_graph.variables.segments import StringSegment + + +def _assert_typed_node_config(config, *, node_id: str, node_type: NodeType, version: str = "1") -> None: + assert config["id"] == node_id + assert isinstance(config["data"], BaseNodeData) + assert config["data"].type == node_type + assert config["data"].version == version + + +class TestFetchMemory: + @pytest.mark.parametrize( + ("conversation_id", "memory_config"), + [ + (None, object()), + ("conversation-id", None), + ], + ) + def test_returns_none_when_memory_or_conversation_is_missing(self, conversation_id, memory_config): + result = node_factory.fetch_memory( + conversation_id=conversation_id, + app_id="app-id", + node_data_memory=memory_config, + model_instance=sentinel.model_instance, + ) + + assert result is None + + def test_returns_none_when_conversation_does_not_exist(self, monkeypatch): + class FakeSelect: + def where(self, *_args): + return self + + class FakeSession: + def __init__(self, *_args, **_kwargs): + pass + + def __enter__(self): + return self + + def __exit__(self, *_args): + return False + + def scalar(self, _stmt): + return None + + monkeypatch.setattr(node_factory, "db", SimpleNamespace(engine=sentinel.engine)) + monkeypatch.setattr(node_factory, "select", MagicMock(return_value=FakeSelect())) + monkeypatch.setattr(node_factory, "Session", FakeSession) + + result = node_factory.fetch_memory( + conversation_id="conversation-id", + app_id="app-id", + node_data_memory=object(), + model_instance=sentinel.model_instance, + ) + + assert result is None + + def test_builds_token_buffer_memory_for_existing_conversation(self, monkeypatch): + conversation = sentinel.conversation + memory = sentinel.memory + + class FakeSelect: + def where(self, *_args): + return self + + class FakeSession: + def __init__(self, *_args, **_kwargs): + pass + + def __enter__(self): + return self + + def __exit__(self, *_args): + return False + + def scalar(self, _stmt): + return conversation + + token_buffer_memory = MagicMock(return_value=memory) + monkeypatch.setattr(node_factory, "db", SimpleNamespace(engine=sentinel.engine)) + monkeypatch.setattr(node_factory, "select", MagicMock(return_value=FakeSelect())) + monkeypatch.setattr(node_factory, "Session", FakeSession) + monkeypatch.setattr(node_factory, "TokenBufferMemory", token_buffer_memory) + + result = node_factory.fetch_memory( + conversation_id="conversation-id", + app_id="app-id", + node_data_memory=object(), + model_instance=sentinel.model_instance, + ) + + assert result is memory + token_buffer_memory.assert_called_once_with( + conversation=conversation, + model_instance=sentinel.model_instance, + ) + + +class TestDefaultWorkflowCodeExecutor: + def test_execute_delegates_to_code_executor(self, monkeypatch): + executor = node_factory.DefaultWorkflowCodeExecutor() + execute_workflow_code_template = MagicMock(return_value={"answer": "ok"}) + monkeypatch.setattr( + node_factory.CodeExecutor, + "execute_workflow_code_template", + execute_workflow_code_template, + ) + + result = executor.execute( + language=CodeLanguage.PYTHON3, + code="print('ok')", + inputs={"name": "workflow"}, + ) + + assert result == {"answer": "ok"} + execute_workflow_code_template.assert_called_once_with( + language=CodeLanguage.PYTHON3, + code="print('ok')", + inputs={"name": "workflow"}, + ) + + def test_is_execution_error_checks_code_execution_error_type(self): + executor = node_factory.DefaultWorkflowCodeExecutor() + + assert executor.is_execution_error(node_factory.CodeExecutionError("boom")) is True + assert executor.is_execution_error(RuntimeError("boom")) is False + + +class TestDifyNodeFactoryInit: + def test_init_builds_default_dependencies(self): + graph_init_params = SimpleNamespace(run_context={"context": "value"}) + graph_runtime_state = sentinel.graph_runtime_state + dify_context = SimpleNamespace(tenant_id="tenant-id") + template_renderer = sentinel.template_renderer + unstructured_api_config = sentinel.unstructured_api_config + http_request_config = sentinel.http_request_config + credentials_provider = sentinel.credentials_provider + model_factory = sentinel.model_factory + + with ( + patch.object( + node_factory.DifyNodeFactory, + "_resolve_dify_context", + return_value=dify_context, + ) as resolve_dify_context, + patch.object( + node_factory, + "CodeExecutorJinja2TemplateRenderer", + return_value=template_renderer, + ) as renderer_factory, + patch.object( + node_factory, + "UnstructuredApiConfig", + return_value=unstructured_api_config, + ), + patch.object( + node_factory, + "build_http_request_config", + return_value=http_request_config, + ), + patch.object( + node_factory, + "build_dify_model_access", + return_value=(credentials_provider, model_factory), + ) as build_dify_model_access, + ): + factory = node_factory.DifyNodeFactory( + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + + resolve_dify_context.assert_called_once_with(graph_init_params.run_context) + build_dify_model_access.assert_called_once_with("tenant-id") + renderer_factory.assert_called_once() + assert renderer_factory.call_args.kwargs["code_executor"] is factory._code_executor + assert factory.graph_init_params is graph_init_params + assert factory.graph_runtime_state is graph_runtime_state + assert factory._dify_context is dify_context + assert factory._template_renderer is template_renderer + assert factory._document_extractor_unstructured_api_config is unstructured_api_config + assert factory._http_request_config is http_request_config + assert factory._llm_credentials_provider is credentials_provider + assert factory._llm_model_factory is model_factory + + +class TestDifyNodeFactoryResolveContext: + def test_requires_reserved_context_key(self): + with pytest.raises(ValueError, match=DIFY_RUN_CONTEXT_KEY): + node_factory.DifyNodeFactory._resolve_dify_context({}) + + def test_returns_existing_dify_context(self): + dify_context = DifyRunContext( + tenant_id="tenant-id", + app_id="app-id", + user_id="user-id", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + ) + + result = node_factory.DifyNodeFactory._resolve_dify_context({DIFY_RUN_CONTEXT_KEY: dify_context}) + + assert result is dify_context + + def test_validates_mapping_context(self): + raw_context = { + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "tenant-id", + "app_id": "app-id", + "user_id": "user-id", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.DEBUGGER, + } + } + + result = node_factory.DifyNodeFactory._resolve_dify_context(raw_context) + + assert isinstance(result, DifyRunContext) + assert result.tenant_id == "tenant-id" + + +class TestDifyNodeFactoryCreateNode: + @pytest.fixture + def factory(self): + factory = object.__new__(node_factory.DifyNodeFactory) + factory.graph_init_params = sentinel.graph_init_params + factory.graph_runtime_state = sentinel.graph_runtime_state + factory._dify_context = SimpleNamespace(tenant_id="tenant-id", app_id="app-id") + factory._code_executor = sentinel.code_executor + factory._code_limits = sentinel.code_limits + factory._template_renderer = sentinel.template_renderer + factory._template_transform_max_output_length = 2048 + factory._http_request_http_client = sentinel.http_client + factory._http_request_tool_file_manager_factory = sentinel.tool_file_manager_factory + factory._http_request_file_manager = sentinel.file_manager + factory._document_extractor_unstructured_api_config = sentinel.unstructured_api_config + factory._http_request_config = sentinel.http_request_config + factory._llm_credentials_provider = sentinel.credentials_provider + factory._llm_model_factory = sentinel.model_factory + return factory + + def test_rejects_unknown_node_type(self, factory): + with pytest.raises(ValueError, match="No class mapping found for node type: missing"): + factory.create_node({"id": "node-id", "data": {"type": "missing"}}) + + def test_rejects_missing_class_mapping(self, monkeypatch, factory): + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(side_effect=ValueError("No class mapping found for node type: start")), + ) + + with pytest.raises(ValueError, match="No class mapping found for node type: start"): + factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START}}) + + def test_rejects_missing_latest_class(self, monkeypatch, factory): + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(side_effect=ValueError("No latest version class found for node type: start")), + ) + + with pytest.raises(ValueError, match="No latest version class found for node type: start"): + factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START}}) + + def test_uses_version_specific_class_when_available(self, monkeypatch, factory): + matched_node = sentinel.matched_node + latest_node_class = MagicMock(return_value=sentinel.latest_node) + matched_node_class = MagicMock(return_value=matched_node) + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(return_value=matched_node_class), + ) + + result = factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START, "version": "9"}}) + + assert result is matched_node + matched_node_class.assert_called_once() + kwargs = matched_node_class.call_args.kwargs + assert kwargs["id"] == "node-id" + _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=BuiltinNodeTypes.START, version="9") + assert kwargs["graph_init_params"] is sentinel.graph_init_params + assert kwargs["graph_runtime_state"] is sentinel.graph_runtime_state + latest_node_class.assert_not_called() + + def test_falls_back_to_latest_class_when_version_specific_mapping_is_missing(self, monkeypatch, factory): + latest_node = sentinel.latest_node + latest_node_class = MagicMock(return_value=latest_node) + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(return_value=latest_node_class), + ) + + result = factory.create_node({"id": "node-id", "data": {"type": BuiltinNodeTypes.START, "version": "9"}}) + + assert result is latest_node + latest_node_class.assert_called_once() + kwargs = latest_node_class.call_args.kwargs + assert kwargs["id"] == "node-id" + _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=BuiltinNodeTypes.START, version="9") + assert kwargs["graph_init_params"] is sentinel.graph_init_params + assert kwargs["graph_runtime_state"] is sentinel.graph_runtime_state + + @pytest.mark.parametrize( + ("node_type", "constructor_name"), + [ + (BuiltinNodeTypes.CODE, "CodeNode"), + (BuiltinNodeTypes.TEMPLATE_TRANSFORM, "TemplateTransformNode"), + (BuiltinNodeTypes.HTTP_REQUEST, "HttpRequestNode"), + (BuiltinNodeTypes.HUMAN_INPUT, "HumanInputNode"), + (KNOWLEDGE_INDEX_NODE_TYPE, "KnowledgeIndexNode"), + (BuiltinNodeTypes.DATASOURCE, "DatasourceNode"), + (BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "KnowledgeRetrievalNode"), + (BuiltinNodeTypes.DOCUMENT_EXTRACTOR, "DocumentExtractorNode"), + ], + ) + def test_creates_specialized_nodes(self, monkeypatch, factory, node_type, constructor_name): + created_node = object() + constructor = MagicMock(name=constructor_name, return_value=created_node) + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(return_value=constructor), + ) + + if constructor_name == "HumanInputNode": + form_repository = sentinel.form_repository + form_repository_impl = MagicMock(return_value=form_repository) + monkeypatch.setattr( + node_factory, + "HumanInputFormRepositoryImpl", + form_repository_impl, + ) + + node_config = {"id": "node-id", "data": {"type": node_type}} + result = factory.create_node(node_config) + + assert result is created_node + kwargs = constructor.call_args.kwargs + assert kwargs["id"] == "node-id" + _assert_typed_node_config(kwargs["config"], node_id="node-id", node_type=node_type) + assert kwargs["graph_init_params"] is sentinel.graph_init_params + assert kwargs["graph_runtime_state"] is sentinel.graph_runtime_state + + if constructor_name == "CodeNode": + assert kwargs["code_executor"] is sentinel.code_executor + assert kwargs["code_limits"] is sentinel.code_limits + elif constructor_name == "TemplateTransformNode": + assert kwargs["template_renderer"] is sentinel.template_renderer + assert kwargs["max_output_length"] == 2048 + elif constructor_name == "HttpRequestNode": + assert kwargs["http_request_config"] is sentinel.http_request_config + assert kwargs["http_client"] is sentinel.http_client + assert kwargs["tool_file_manager_factory"] is sentinel.tool_file_manager_factory + assert kwargs["file_manager"] is sentinel.file_manager + elif constructor_name == "HumanInputNode": + assert kwargs["form_repository"] is form_repository + form_repository_impl.assert_called_once_with(tenant_id="tenant-id") + elif constructor_name == "DocumentExtractorNode": + assert kwargs["unstructured_api_config"] is sentinel.unstructured_api_config + assert kwargs["http_client"] is sentinel.http_client + + @pytest.mark.parametrize( + ("node_type", "constructor_name", "expected_extra_kwargs"), + [ + (BuiltinNodeTypes.LLM, "LLMNode", {"http_client": sentinel.http_client}), + (BuiltinNodeTypes.QUESTION_CLASSIFIER, "QuestionClassifierNode", {"http_client": sentinel.http_client}), + (BuiltinNodeTypes.PARAMETER_EXTRACTOR, "ParameterExtractorNode", {}), + ], + ) + def test_creates_model_backed_nodes( + self, + monkeypatch, + factory, + node_type, + constructor_name, + expected_extra_kwargs, + ): + created_node = object() + constructor = MagicMock(name=constructor_name, return_value=created_node) + monkeypatch.setattr( + factory, + "_resolve_node_class", + MagicMock(return_value=constructor), + ) + llm_init_kwargs = { + "credentials_provider": sentinel.credentials_provider, + "model_factory": sentinel.model_factory, + "model_instance": sentinel.model_instance, + "memory": sentinel.memory, + **expected_extra_kwargs, + } + build_llm_init_kwargs = MagicMock(return_value=llm_init_kwargs) + factory._build_llm_compatible_node_init_kwargs = build_llm_init_kwargs + + node_config = {"id": "node-id", "data": {"type": node_type}} + result = factory.create_node(node_config) + + assert result is created_node + build_llm_init_kwargs.assert_called_once() + helper_kwargs = build_llm_init_kwargs.call_args.kwargs + assert helper_kwargs["node_class"] is constructor + assert isinstance(helper_kwargs["node_data"], BaseNodeData) + assert helper_kwargs["node_data"].type == node_type + assert helper_kwargs["include_http_client"] is (node_type != BuiltinNodeTypes.PARAMETER_EXTRACTOR) + + constructor_kwargs = constructor.call_args.kwargs + assert constructor_kwargs["id"] == "node-id" + _assert_typed_node_config(constructor_kwargs["config"], node_id="node-id", node_type=node_type) + assert constructor_kwargs["graph_init_params"] is sentinel.graph_init_params + assert constructor_kwargs["graph_runtime_state"] is sentinel.graph_runtime_state + assert constructor_kwargs["credentials_provider"] is sentinel.credentials_provider + assert constructor_kwargs["model_factory"] is sentinel.model_factory + assert constructor_kwargs["model_instance"] is sentinel.model_instance + assert constructor_kwargs["memory"] is sentinel.memory + for key, value in expected_extra_kwargs.items(): + assert constructor_kwargs[key] is value + + +class TestDifyNodeFactoryModelInstance: + @pytest.fixture + def factory(self): + factory = object.__new__(node_factory.DifyNodeFactory) + factory._llm_credentials_provider = MagicMock() + factory._llm_model_factory = MagicMock() + return factory + + @pytest.fixture + def llm_model_setup(self, factory): + def _configure( + *, + completion_params=None, + has_provider_model=True, + model_schema=sentinel.model_schema, + ): + credentials = {"api_key": "secret"} + node_data_model = SimpleNamespace( + provider="provider", + name="model", + mode="chat", + completion_params=completion_params or {}, + ) + node_data = SimpleNamespace(model=node_data_model) + provider_model = MagicMock() if has_provider_model else None + provider_model_bundle = SimpleNamespace( + configuration=SimpleNamespace(get_provider_model=MagicMock(return_value=provider_model)) + ) + model_type_instance = MagicMock() + model_type_instance.get_model_schema.return_value = model_schema + model_instance = SimpleNamespace( + provider_model_bundle=provider_model_bundle, + model_type_instance=model_type_instance, + provider=None, + model_name=None, + credentials=None, + parameters=None, + stop=None, + ) + factory._llm_credentials_provider.fetch.return_value = credentials + factory._llm_model_factory.init_model_instance.return_value = model_instance + return SimpleNamespace( + node_data=node_data, + credentials=credentials, + provider_model=provider_model, + model_type_instance=model_type_instance, + model_instance=model_instance, + ) + + return _configure + + def test_requires_llm_mode(self, factory): + node_data = SimpleNamespace( + model=SimpleNamespace( + provider="provider", + name="model", + mode="", + completion_params={}, + ) + ) + + with pytest.raises(node_factory.LLMModeRequiredError, match="LLM mode is required"): + factory._build_model_instance_for_llm_node(node_data) + + def test_raises_when_provider_model_is_missing(self, factory, llm_model_setup): + setup = llm_model_setup(has_provider_model=False) + + with pytest.raises(node_factory.ModelNotExistError, match="Model model not exist"): + factory._build_model_instance_for_llm_node(setup.node_data) + + def test_raises_when_model_schema_is_missing(self, factory, llm_model_setup): + setup = llm_model_setup(model_schema=None) + + with pytest.raises(node_factory.ModelNotExistError, match="Model model not exist"): + factory._build_model_instance_for_llm_node(setup.node_data) + + setup.provider_model.raise_for_status.assert_called_once() + + def test_builds_model_instance_and_normalizes_stop_tokens(self, factory, llm_model_setup): + setup = llm_model_setup( + completion_params={"temperature": 0.3, "stop": "not-a-list"}, + model_schema={"schema": "value"}, + ) + + result = factory._build_model_instance_for_llm_node(setup.node_data) + + assert result is setup.model_instance + assert result.provider == "provider" + assert result.model_name == "model" + assert result.credentials == setup.credentials + assert result.parameters == {"temperature": 0.3} + assert result.stop == () + assert result.model_type_instance is setup.model_type_instance + setup.provider_model.raise_for_status.assert_called_once() + + +class TestDifyNodeFactoryMemory: + @pytest.fixture + def factory(self): + factory = object.__new__(node_factory.DifyNodeFactory) + factory._dify_context = SimpleNamespace(app_id="app-id") + factory.graph_runtime_state = SimpleNamespace(variable_pool=MagicMock()) + return factory + + def test_returns_none_when_memory_is_not_configured(self, factory): + result = factory._build_memory_for_llm_node( + node_data=SimpleNamespace(memory=None), + model_instance=sentinel.model_instance, + ) + + assert result is None + factory.graph_runtime_state.variable_pool.get.assert_not_called() + + def test_uses_string_segment_conversation_id(self, monkeypatch, factory): + memory_config = sentinel.memory_config + factory.graph_runtime_state.variable_pool.get.return_value = StringSegment(value="conversation-id") + fetch_memory = MagicMock(return_value=sentinel.memory) + monkeypatch.setattr(node_factory, "fetch_memory", fetch_memory) + + result = factory._build_memory_for_llm_node( + node_data=SimpleNamespace(memory=memory_config), + model_instance=sentinel.model_instance, + ) + + assert result is sentinel.memory + factory.graph_runtime_state.variable_pool.get.assert_called_once_with( + ["sys", SystemVariableKey.CONVERSATION_ID] + ) + fetch_memory.assert_called_once_with( + conversation_id="conversation-id", + app_id="app-id", + node_data_memory=memory_config, + model_instance=sentinel.model_instance, + ) + + def test_ignores_non_string_segment_conversation_ids(self, monkeypatch, factory): + memory_config = sentinel.memory_config + factory.graph_runtime_state.variable_pool.get.return_value = sentinel.segment + fetch_memory = MagicMock(return_value=sentinel.memory) + monkeypatch.setattr(node_factory, "fetch_memory", fetch_memory) + + result = factory._build_memory_for_llm_node( + node_data=SimpleNamespace(memory=memory_config), + model_instance=sentinel.model_instance, + ) + + assert result is sentinel.memory + fetch_memory.assert_called_once_with( + conversation_id=None, + app_id="app-id", + node_data_memory=memory_config, + model_instance=sentinel.model_instance, + ) diff --git a/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py b/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py new file mode 100644 index 0000000000..8de45257ec --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_node_mapping_bootstrap.py @@ -0,0 +1,43 @@ +import os +import subprocess +import sys +import textwrap +from pathlib import Path + + +def test_moved_core_nodes_resolve_after_importing_production_entrypoints(): + api_root = Path(__file__).resolve().parents[4] + script = textwrap.dedent( + """ + from core.app.apps import workflow_app_runner + from core.workflow import workflow_entry + from core.workflow.nodes.knowledge_index import KNOWLEDGE_INDEX_NODE_TYPE + from core.workflow.node_factory import DifyNodeFactory, NODE_TYPE_CLASSES_MAPPING + from dify_graph.enums import BuiltinNodeTypes + from services import workflow_service + from services.rag_pipeline import rag_pipeline + + _ = workflow_entry, workflow_app_runner, workflow_service, rag_pipeline + + expected = ( + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, + KNOWLEDGE_INDEX_NODE_TYPE, + BuiltinNodeTypes.DATASOURCE, + ) + + for node_type in expected: + assert node_type in NODE_TYPE_CLASSES_MAPPING, node_type + resolved = DifyNodeFactory._resolve_node_class(node_type=node_type, node_version="1") + assert resolved.__module__.startswith("core.workflow.nodes."), resolved.__module__ + """ + ) + completed = subprocess.run( + [sys.executable, "-c", script], + cwd=api_root, + env=os.environ.copy(), + capture_output=True, + text=True, + check=False, + ) + + assert completed.returncode == 0, completed.stderr or completed.stdout diff --git a/api/tests/unit_tests/core/workflow/test_workflow_entry.py b/api/tests/unit_tests/core/workflow/test_workflow_entry.py index 0aa6ec3f45..93ba7f3333 100644 --- a/api/tests/unit_tests/core/workflow/test_workflow_entry.py +++ b/api/tests/unit_tests/core/workflow/test_workflow_entry.py @@ -9,6 +9,7 @@ from dify_graph.constants import ( CONVERSATION_VARIABLE_NODE_ID, ENVIRONMENT_VARIABLE_NODE_ID, ) +from dify_graph.entities.graph_config import NodeConfigDictAdapter from dify_graph.file.enums import FileType from dify_graph.file.models import File, FileTransferMethod from dify_graph.nodes.code.code_node import CodeNode @@ -124,7 +125,7 @@ class TestWorkflowEntry: def get_node_config_by_id(self, target_id: str): assert target_id == node_id - return node_config + return NodeConfigDictAdapter.validate_python(node_config) workflow = StubWorkflow() variable_pool = VariablePool(system_variables=SystemVariable.default(), user_inputs={}) diff --git a/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py b/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py new file mode 100644 index 0000000000..dc4c7a00c5 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/test_workflow_entry_helpers.py @@ -0,0 +1,657 @@ +from collections import UserString +from types import SimpleNamespace +from unittest.mock import MagicMock, patch, sentinel + +import pytest + +from core.app.apps.exc import GenerateTaskStoppedError +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from core.workflow import workflow_entry +from dify_graph.entities.graph_config import NodeConfigDictAdapter +from dify_graph.enums import NodeType +from dify_graph.errors import WorkflowNodeRunFailedError +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File +from dify_graph.graph_events import GraphRunFailedEvent +from dify_graph.nodes import BuiltinNodeTypes +from dify_graph.runtime import ChildGraphNotFoundError + + +def _build_typed_node_config(node_type: NodeType): + return NodeConfigDictAdapter.validate_python({"id": "node-id", "data": {"type": node_type}}) + + +class TestWorkflowChildEngineBuilder: + @pytest.mark.parametrize( + ("graph_config", "node_id", "expected"), + [ + ({"nodes": [{"id": "root"}]}, "root", True), + ({"nodes": [{"id": "root"}]}, "other", False), + ({"nodes": "invalid"}, "root", None), + ({"nodes": ["invalid"]}, "root", None), + ], + ) + def test_has_node_id(self, graph_config, node_id, expected): + result = workflow_entry._WorkflowChildEngineBuilder._has_node_id(graph_config, node_id) + + assert result is expected + + def test_build_child_engine_raises_when_root_node_is_missing(self): + builder = workflow_entry._WorkflowChildEngineBuilder() + + with patch.object(workflow_entry, "DifyNodeFactory", return_value=sentinel.factory): + with pytest.raises(ChildGraphNotFoundError, match="child graph root node 'missing' not found"): + builder.build_child_engine( + workflow_id="workflow-id", + graph_init_params=sentinel.graph_init_params, + graph_runtime_state=sentinel.graph_runtime_state, + graph_config={"nodes": []}, + root_node_id="missing", + ) + + def test_build_child_engine_constructs_graph_engine_and_layers(self): + builder = workflow_entry._WorkflowChildEngineBuilder() + child_graph = sentinel.child_graph + child_engine = MagicMock() + quota_layer = sentinel.quota_layer + additional_layers = [sentinel.layer_one, sentinel.layer_two] + + with ( + patch.object(workflow_entry, "DifyNodeFactory", return_value=sentinel.factory) as dify_node_factory, + patch.object(workflow_entry.Graph, "init", return_value=child_graph) as graph_init, + patch.object(workflow_entry, "GraphEngine", return_value=child_engine) as graph_engine_cls, + patch.object(workflow_entry, "GraphEngineConfig", return_value=sentinel.graph_engine_config), + patch.object(workflow_entry, "InMemoryChannel", return_value=sentinel.command_channel), + patch.object(workflow_entry, "LLMQuotaLayer", return_value=quota_layer), + ): + result = builder.build_child_engine( + workflow_id="workflow-id", + graph_init_params=sentinel.graph_init_params, + graph_runtime_state=sentinel.graph_runtime_state, + graph_config={"nodes": [{"id": "root"}]}, + root_node_id="root", + layers=additional_layers, + ) + + assert result is child_engine + dify_node_factory.assert_called_once_with( + graph_init_params=sentinel.graph_init_params, + graph_runtime_state=sentinel.graph_runtime_state, + ) + graph_init.assert_called_once_with( + graph_config={"nodes": [{"id": "root"}]}, + node_factory=sentinel.factory, + root_node_id="root", + ) + graph_engine_cls.assert_called_once_with( + workflow_id="workflow-id", + graph=child_graph, + graph_runtime_state=sentinel.graph_runtime_state, + command_channel=sentinel.command_channel, + config=sentinel.graph_engine_config, + child_engine_builder=builder, + ) + assert child_engine.layer.call_args_list == [ + ((quota_layer,), {}), + ((sentinel.layer_one,), {}), + ((sentinel.layer_two,), {}), + ] + + +class TestWorkflowEntryInit: + def test_rejects_call_depth_above_limit(self): + call_depth = workflow_entry.dify_config.WORKFLOW_CALL_MAX_DEPTH + 1 + + with pytest.raises(ValueError, match="Max workflow call depth"): + workflow_entry.WorkflowEntry( + tenant_id="tenant-id", + app_id="app-id", + workflow_id="workflow-id", + graph_config={"nodes": [], "edges": []}, + graph=sentinel.graph, + user_id="user-id", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + call_depth=call_depth, + variable_pool=sentinel.variable_pool, + graph_runtime_state=sentinel.graph_runtime_state, + ) + + def test_applies_debug_and_observability_layers(self): + graph_engine = MagicMock() + debug_layer = sentinel.debug_layer + execution_limits_layer = sentinel.execution_limits_layer + llm_quota_layer = sentinel.llm_quota_layer + observability_layer = sentinel.observability_layer + + with ( + patch.object(workflow_entry.dify_config, "DEBUG", True), + patch.object(workflow_entry.dify_config, "ENABLE_OTEL", False), + patch.object(workflow_entry, "is_instrument_flag_enabled", return_value=True), + patch.object(workflow_entry, "GraphEngine", return_value=graph_engine) as graph_engine_cls, + patch.object(workflow_entry, "GraphEngineConfig", return_value=sentinel.graph_engine_config), + patch.object(workflow_entry, "InMemoryChannel", return_value=sentinel.command_channel), + patch.object(workflow_entry, "DebugLoggingLayer", return_value=debug_layer) as debug_logging_layer, + patch.object( + workflow_entry, + "ExecutionLimitsLayer", + return_value=execution_limits_layer, + ) as execution_limits_layer_cls, + patch.object(workflow_entry, "LLMQuotaLayer", return_value=llm_quota_layer), + patch.object(workflow_entry, "ObservabilityLayer", return_value=observability_layer), + ): + entry = workflow_entry.WorkflowEntry( + tenant_id="tenant-id", + app_id="app-id", + workflow_id="workflow-id-123456", + graph_config={"nodes": [], "edges": []}, + graph=sentinel.graph, + user_id="user-id", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + call_depth=0, + variable_pool=sentinel.variable_pool, + graph_runtime_state=sentinel.graph_runtime_state, + command_channel=None, + ) + + assert entry.command_channel is sentinel.command_channel + graph_engine_cls.assert_called_once_with( + workflow_id="workflow-id-123456", + graph=sentinel.graph, + graph_runtime_state=sentinel.graph_runtime_state, + command_channel=sentinel.command_channel, + config=sentinel.graph_engine_config, + child_engine_builder=entry._child_engine_builder, + ) + debug_logging_layer.assert_called_once_with( + level="DEBUG", + include_inputs=True, + include_outputs=True, + include_process_data=False, + logger_name="GraphEngine.Debug.workflow", + ) + execution_limits_layer_cls.assert_called_once_with( + max_steps=workflow_entry.dify_config.WORKFLOW_MAX_EXECUTION_STEPS, + max_time=workflow_entry.dify_config.WORKFLOW_MAX_EXECUTION_TIME, + ) + assert graph_engine.layer.call_args_list == [ + ((debug_layer,), {}), + ((execution_limits_layer,), {}), + ((llm_quota_layer,), {}), + ((observability_layer,), {}), + ] + + +class TestWorkflowEntryRun: + def test_run_swallows_generate_task_stopped_errors(self): + entry = object.__new__(workflow_entry.WorkflowEntry) + entry.graph_engine = MagicMock() + entry.graph_engine.run.side_effect = GenerateTaskStoppedError() + + assert list(entry.run()) == [] + + def test_run_emits_failed_event_for_unexpected_errors(self): + entry = object.__new__(workflow_entry.WorkflowEntry) + entry.graph_engine = MagicMock() + entry.graph_engine.run.side_effect = RuntimeError("boom") + + events = list(entry.run()) + + assert len(events) == 1 + assert isinstance(events[0], GraphRunFailedEvent) + assert events[0].error == "boom" + + +class TestWorkflowEntrySingleStepRun: + def test_uses_empty_mapping_when_selector_extraction_is_not_implemented(self): + class FakeNode: + id = "node-id" + title = "Node Title" + node_type = "fake" + + @staticmethod + def version(): + return "1" + + @staticmethod + def extract_variable_selector_to_variable_mapping(**_kwargs): + raise NotImplementedError + + with ( + patch.object(workflow_entry, "GraphInitParams", return_value=sentinel.graph_init_params), + patch.object(workflow_entry, "GraphRuntimeState", return_value=sentinel.graph_runtime_state), + patch.object(workflow_entry, "build_dify_run_context", return_value={"_dify": "context"}), + patch.object(workflow_entry.time, "perf_counter", return_value=123.0), + patch.object(workflow_entry, "DifyNodeFactory") as dify_node_factory, + patch.object(workflow_entry, "load_into_variable_pool") as load_into_variable_pool, + patch.object( + workflow_entry.WorkflowEntry, + "mapping_user_inputs_to_variable_pool", + ) as mapping_user_inputs_to_variable_pool, + patch.object( + workflow_entry.WorkflowEntry, + "_traced_node_run", + return_value=iter(["event"]), + ), + ): + dify_node_factory.return_value.create_node.return_value = FakeNode() + workflow = SimpleNamespace( + tenant_id="tenant-id", + app_id="app-id", + id="workflow-id", + graph_dict={"nodes": [], "edges": []}, + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.START), + ) + + node, generator = workflow_entry.WorkflowEntry.single_step_run( + workflow=workflow, + node_id="node-id", + user_id="user-id", + user_inputs={"question": "hello"}, + variable_pool=sentinel.variable_pool, + ) + + assert node.id == "node-id" + assert list(generator) == ["event"] + load_into_variable_pool.assert_called_once_with( + variable_loader=workflow_entry.DUMMY_VARIABLE_LOADER, + variable_pool=sentinel.variable_pool, + variable_mapping={}, + user_inputs={"question": "hello"}, + ) + mapping_user_inputs_to_variable_pool.assert_called_once_with( + variable_mapping={}, + user_inputs={"question": "hello"}, + variable_pool=sentinel.variable_pool, + tenant_id="tenant-id", + ) + + def test_skips_user_input_mapping_for_datasource_nodes(self): + class FakeDatasourceNode: + id = "node-id" + node_type = "datasource" + + @staticmethod + def version(): + return "1" + + @staticmethod + def extract_variable_selector_to_variable_mapping(**_kwargs): + return {"question": ["node", "question"]} + + with ( + patch.object(workflow_entry, "GraphInitParams", return_value=sentinel.graph_init_params), + patch.object(workflow_entry, "GraphRuntimeState", return_value=sentinel.graph_runtime_state), + patch.object(workflow_entry, "build_dify_run_context", return_value={"_dify": "context"}), + patch.object(workflow_entry.time, "perf_counter", return_value=123.0), + patch.object(workflow_entry, "DifyNodeFactory") as dify_node_factory, + patch.object(workflow_entry, "load_into_variable_pool") as load_into_variable_pool, + patch.object( + workflow_entry.WorkflowEntry, + "mapping_user_inputs_to_variable_pool", + ) as mapping_user_inputs_to_variable_pool, + patch.object( + workflow_entry.WorkflowEntry, + "_traced_node_run", + return_value=iter(["event"]), + ), + ): + dify_node_factory.return_value.create_node.return_value = FakeDatasourceNode() + workflow = SimpleNamespace( + tenant_id="tenant-id", + app_id="app-id", + id="workflow-id", + graph_dict={"nodes": [], "edges": []}, + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.DATASOURCE), + ) + + node, generator = workflow_entry.WorkflowEntry.single_step_run( + workflow=workflow, + node_id="node-id", + user_id="user-id", + user_inputs={"question": "hello"}, + variable_pool=sentinel.variable_pool, + ) + + assert node.id == "node-id" + assert list(generator) == ["event"] + load_into_variable_pool.assert_called_once() + mapping_user_inputs_to_variable_pool.assert_not_called() + + def test_wraps_traced_node_run_failures(self): + class FakeNode: + id = "node-id" + title = "Node Title" + node_type = "fake" + + @staticmethod + def extract_variable_selector_to_variable_mapping(**_kwargs): + return {} + + @staticmethod + def version(): + return "1" + + with ( + patch.object(workflow_entry, "GraphInitParams", return_value=sentinel.graph_init_params), + patch.object(workflow_entry, "GraphRuntimeState", return_value=sentinel.graph_runtime_state), + patch.object(workflow_entry, "build_dify_run_context", return_value={"_dify": "context"}), + patch.object(workflow_entry.time, "perf_counter", return_value=123.0), + patch.object(workflow_entry, "DifyNodeFactory") as dify_node_factory, + patch.object(workflow_entry, "load_into_variable_pool"), + patch.object(workflow_entry.WorkflowEntry, "mapping_user_inputs_to_variable_pool"), + patch.object( + workflow_entry.WorkflowEntry, + "_traced_node_run", + side_effect=RuntimeError("boom"), + ), + ): + dify_node_factory.return_value.create_node.return_value = FakeNode() + workflow = SimpleNamespace( + tenant_id="tenant-id", + app_id="app-id", + id="workflow-id", + graph_dict={"nodes": [], "edges": []}, + get_node_config_by_id=lambda _node_id: _build_typed_node_config(BuiltinNodeTypes.START), + ) + + with pytest.raises(WorkflowNodeRunFailedError): + workflow_entry.WorkflowEntry.single_step_run( + workflow=workflow, + node_id="node-id", + user_id="user-id", + user_inputs={}, + variable_pool=sentinel.variable_pool, + ) + + +class TestWorkflowEntryHelpers: + def test_create_single_node_graph_builds_start_edge(self): + graph = workflow_entry.WorkflowEntry._create_single_node_graph( + node_id="target-node", + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}, + node_width=320, + node_height=180, + ) + + assert graph["nodes"][0]["id"] == "start" + assert graph["nodes"][1]["id"] == "target-node" + assert graph["nodes"][1]["width"] == 320 + assert graph["nodes"][1]["height"] == 180 + assert graph["edges"] == [ + { + "source": "start", + "target": "target-node", + "sourceHandle": "source", + "targetHandle": "target", + } + ] + + def test_run_free_node_rejects_unsupported_types(self): + with pytest.raises(ValueError, match="Node type start not supported"): + workflow_entry.WorkflowEntry.run_free_node( + node_data={"type": BuiltinNodeTypes.START}, + node_id="node-id", + tenant_id="tenant-id", + user_id="user-id", + user_inputs={}, + ) + + def test_run_free_node_rejects_missing_node_class(self, monkeypatch): + monkeypatch.setattr( + workflow_entry, + "resolve_workflow_node_class", + MagicMock(return_value=None), + ) + + with pytest.raises(ValueError, match="Node class not found for node type parameter-extractor"): + workflow_entry.WorkflowEntry.run_free_node( + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}, + node_id="node-id", + tenant_id="tenant-id", + user_id="user-id", + user_inputs={}, + ) + + def test_run_free_node_uses_empty_mapping_when_selector_extraction_is_not_implemented(self, monkeypatch): + class FakeNodeClass: + @staticmethod + def extract_variable_selector_to_variable_mapping(**_kwargs): + raise NotImplementedError + + class FakeNode: + id = "node-id" + title = "Node Title" + node_type = "parameter-extractor" + + @staticmethod + def version(): + return "1" + + dify_node_factory = MagicMock() + dify_node_factory.create_node.return_value = FakeNode() + monkeypatch.setattr( + workflow_entry, + "resolve_workflow_node_class", + MagicMock(return_value=FakeNodeClass), + ) + + with ( + patch.object(workflow_entry.SystemVariable, "default", return_value=sentinel.system_variables), + patch.object(workflow_entry, "VariablePool", return_value=sentinel.variable_pool) as variable_pool_cls, + patch.object( + workflow_entry, "GraphInitParams", return_value=sentinel.graph_init_params + ) as graph_init_params, + patch.object(workflow_entry, "GraphRuntimeState", return_value=sentinel.graph_runtime_state), + patch.object( + workflow_entry, "build_dify_run_context", return_value={"_dify": "context"} + ) as build_dify_run_context, + patch.object(workflow_entry.time, "perf_counter", return_value=123.0), + patch.object(workflow_entry, "DifyNodeFactory", return_value=dify_node_factory) as dify_node_factory_cls, + patch.object( + workflow_entry.WorkflowEntry, + "mapping_user_inputs_to_variable_pool", + ) as mapping_user_inputs_to_variable_pool, + patch.object( + workflow_entry.WorkflowEntry, + "_traced_node_run", + return_value=iter(["event"]), + ), + ): + node, generator = workflow_entry.WorkflowEntry.run_free_node( + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"}, + node_id="node-id", + tenant_id="tenant-id", + user_id="user-id", + user_inputs={"question": "hello"}, + ) + + assert node.id == "node-id" + assert list(generator) == ["event"] + variable_pool_cls.assert_called_once_with( + system_variables=sentinel.system_variables, + user_inputs={}, + environment_variables=[], + ) + build_dify_run_context.assert_called_once_with( + tenant_id="tenant-id", + app_id="", + user_id="user-id", + user_from=UserFrom.ACCOUNT, + invoke_from=InvokeFrom.DEBUGGER, + ) + graph_init_params.assert_called_once_with( + workflow_id="", + graph_config=workflow_entry.WorkflowEntry._create_single_node_graph( + "node-id", {"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"} + ), + run_context={"_dify": "context"}, + call_depth=0, + ) + dify_node_factory_cls.assert_called_once_with( + graph_init_params=sentinel.graph_init_params, + graph_runtime_state=sentinel.graph_runtime_state, + ) + mapping_user_inputs_to_variable_pool.assert_called_once_with( + variable_mapping={}, + user_inputs={"question": "hello"}, + variable_pool=sentinel.variable_pool, + tenant_id="tenant-id", + ) + + def test_run_free_node_wraps_execution_failures(self, monkeypatch): + class FakeNodeClass: + @staticmethod + def extract_variable_selector_to_variable_mapping(**_kwargs): + return {} + + class FakeNode: + id = "node-id" + title = "Node Title" + node_type = "parameter-extractor" + + @staticmethod + def version(): + return "1" + + dify_node_factory = MagicMock() + dify_node_factory.create_node.return_value = FakeNode() + monkeypatch.setattr( + workflow_entry, + "resolve_workflow_node_class", + MagicMock(return_value=FakeNodeClass), + ) + + with ( + patch.object(workflow_entry.SystemVariable, "default", return_value=sentinel.system_variables), + patch.object(workflow_entry, "VariablePool", return_value=sentinel.variable_pool), + patch.object(workflow_entry, "GraphInitParams", return_value=sentinel.graph_init_params), + patch.object(workflow_entry, "GraphRuntimeState", return_value=sentinel.graph_runtime_state), + patch.object(workflow_entry, "build_dify_run_context", return_value={"_dify": "context"}), + patch.object(workflow_entry.time, "perf_counter", return_value=123.0), + patch.object(workflow_entry, "DifyNodeFactory", return_value=dify_node_factory), + patch.object( + workflow_entry.WorkflowEntry, + "mapping_user_inputs_to_variable_pool", + side_effect=RuntimeError("boom"), + ), + ): + with pytest.raises(WorkflowNodeRunFailedError, match="Node Title run failed: boom"): + workflow_entry.WorkflowEntry.run_free_node( + node_data={"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR, "title": "Node"}, + node_id="node-id", + tenant_id="tenant-id", + user_id="user-id", + user_inputs={"question": "hello"}, + ) + + def test_handle_special_values_serializes_nested_files(self): + file = File( + tenant_id="tenant-id", + type=FileType.IMAGE, + transfer_method=FileTransferMethod.REMOTE_URL, + remote_url="https://example.com/image.png", + filename="image.png", + extension=".png", + ) + + result = workflow_entry.WorkflowEntry.handle_special_values({"file": file, "nested": {"files": [file]}}) + + assert result == { + "file": file.to_dict(), + "nested": {"files": [file.to_dict()]}, + } + + def test_handle_special_values_returns_none_for_none(self): + assert workflow_entry.WorkflowEntry._handle_special_values(None) is None + + def test_handle_special_values_returns_scalar_as_is(self): + assert workflow_entry.WorkflowEntry._handle_special_values("plain-text") == "plain-text" + + +class TestMappingUserInputsBranches: + def test_rejects_invalid_node_variable_key(self): + class EmptySplitKey(UserString): + def split(self, _sep=None): + return [] + + with pytest.raises(ValueError, match="Invalid node variable broken"): + workflow_entry.WorkflowEntry.mapping_user_inputs_to_variable_pool( + variable_mapping={EmptySplitKey("broken"): ["node", "input"]}, + user_inputs={}, + variable_pool=MagicMock(), + tenant_id="tenant-id", + ) + + def test_skips_none_user_input_when_variable_already_exists(self): + variable_pool = MagicMock() + variable_pool.get.return_value = None + + workflow_entry.WorkflowEntry.mapping_user_inputs_to_variable_pool( + variable_mapping={"node.input": ["target", "input"]}, + user_inputs={"node.input": None}, + variable_pool=variable_pool, + tenant_id="tenant-id", + ) + + variable_pool.add.assert_not_called() + + def test_merges_structured_output_values(self): + variable_pool = MagicMock() + variable_pool.get.side_effect = [ + None, + SimpleNamespace(value={"existing": "value"}), + ] + + workflow_entry.WorkflowEntry.mapping_user_inputs_to_variable_pool( + variable_mapping={"node.answer": ["target", "structured_output", "answer"]}, + user_inputs={"node.answer": "new-value"}, + variable_pool=variable_pool, + tenant_id="tenant-id", + ) + + variable_pool.add.assert_called_once_with( + ["target", "structured_output"], + {"existing": "value", "answer": "new-value"}, + ) + + +class TestWorkflowEntryTracing: + def test_traced_node_run_reports_success(self): + layer = MagicMock() + + class FakeNode: + def ensure_execution_id(self): + return None + + def run(self): + yield "event" + + with patch.object(workflow_entry, "ObservabilityLayer", return_value=layer): + events = list(workflow_entry.WorkflowEntry._traced_node_run(FakeNode())) + + assert events == ["event"] + layer.on_graph_start.assert_called_once_with() + layer.on_node_run_start.assert_called_once() + layer.on_node_run_end.assert_called_once_with( + layer.on_node_run_start.call_args.args[0], + None, + ) + + def test_traced_node_run_reports_errors(self): + layer = MagicMock() + + class FakeNode: + def ensure_execution_id(self): + return None + + def run(self): + raise RuntimeError("boom") + yield + + with patch.object(workflow_entry, "ObservabilityLayer", return_value=layer): + with pytest.raises(RuntimeError, match="boom"): + list(workflow_entry.WorkflowEntry._traced_node_run(FakeNode())) + + assert isinstance(layer.on_node_run_end.call_args.args[1], RuntimeError) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/__base/__init__.py b/api/tests/unit_tests/dify_graph/model_runtime/__base/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/model_runtime/__base/test_increase_tool_call.py b/api/tests/unit_tests/dify_graph/model_runtime/__base/test_increase_tool_call.py similarity index 100% rename from api/tests/unit_tests/core/model_runtime/__base/test_increase_tool_call.py rename to api/tests/unit_tests/dify_graph/model_runtime/__base/test_increase_tool_call.py diff --git a/api/tests/unit_tests/core/model_runtime/__base/test_large_language_model_non_stream_parsing.py b/api/tests/unit_tests/dify_graph/model_runtime/__base/test_large_language_model_non_stream_parsing.py similarity index 100% rename from api/tests/unit_tests/core/model_runtime/__base/test_large_language_model_non_stream_parsing.py rename to api/tests/unit_tests/dify_graph/model_runtime/__base/test_large_language_model_non_stream_parsing.py diff --git a/api/tests/unit_tests/dify_graph/model_runtime/__init__.py b/api/tests/unit_tests/dify_graph/model_runtime/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_base_callback.py b/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_base_callback.py new file mode 100644 index 0000000000..2410d16d63 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_base_callback.py @@ -0,0 +1,964 @@ +"""Comprehensive unit tests for core/model_runtime/callbacks/base_callback.py""" + +from unittest.mock import MagicMock, patch + +import pytest + +from dify_graph.model_runtime.callbacks.base_callback import ( + _TEXT_COLOR_MAPPING, + Callback, +) +from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk +from dify_graph.model_runtime.entities.message_entities import PromptMessage, PromptMessageTool + +# --------------------------------------------------------------------------- +# Concrete implementation of the abstract Callback for testing +# --------------------------------------------------------------------------- + + +class ConcreteCallback(Callback): + """A minimal concrete subclass that satisfies all abstract methods.""" + + def __init__(self, raise_error: bool = False): + self.raise_error = raise_error + # Track invocations + self.before_invoke_calls: list[dict] = [] + self.new_chunk_calls: list[dict] = [] + self.after_invoke_calls: list[dict] = [] + self.invoke_error_calls: list[dict] = [] + + def on_before_invoke( + self, + llm_instance, + model, + credentials, + prompt_messages, + model_parameters, + tools=None, + stop=None, + stream=True, + user=None, + ): + self.before_invoke_calls.append( + { + "llm_instance": llm_instance, + "model": model, + "credentials": credentials, + "prompt_messages": prompt_messages, + "model_parameters": model_parameters, + "tools": tools, + "stop": stop, + "stream": stream, + "user": user, + } + ) + # To cover the 'raise NotImplementedError()' in the base class + try: + super().on_before_invoke( + llm_instance, model, credentials, prompt_messages, model_parameters, tools, stop, stream, user + ) + except NotImplementedError: + pass + + def on_new_chunk( + self, + llm_instance, + chunk, + model, + credentials, + prompt_messages, + model_parameters, + tools=None, + stop=None, + stream=True, + user=None, + ): + self.new_chunk_calls.append( + { + "llm_instance": llm_instance, + "chunk": chunk, + "model": model, + "credentials": credentials, + "prompt_messages": prompt_messages, + "model_parameters": model_parameters, + "tools": tools, + "stop": stop, + "stream": stream, + "user": user, + } + ) + try: + super().on_new_chunk( + llm_instance, chunk, model, credentials, prompt_messages, model_parameters, tools, stop, stream, user + ) + except NotImplementedError: + pass + + def on_after_invoke( + self, + llm_instance, + result, + model, + credentials, + prompt_messages, + model_parameters, + tools=None, + stop=None, + stream=True, + user=None, + ): + self.after_invoke_calls.append( + { + "llm_instance": llm_instance, + "result": result, + "model": model, + "credentials": credentials, + "prompt_messages": prompt_messages, + "model_parameters": model_parameters, + "tools": tools, + "stop": stop, + "stream": stream, + "user": user, + } + ) + try: + super().on_after_invoke( + llm_instance, result, model, credentials, prompt_messages, model_parameters, tools, stop, stream, user + ) + except NotImplementedError: + pass + + def on_invoke_error( + self, + llm_instance, + ex, + model, + credentials, + prompt_messages, + model_parameters, + tools=None, + stop=None, + stream=True, + user=None, + ): + self.invoke_error_calls.append( + { + "llm_instance": llm_instance, + "ex": ex, + "model": model, + "credentials": credentials, + "prompt_messages": prompt_messages, + "model_parameters": model_parameters, + "tools": tools, + "stop": stop, + "stream": stream, + "user": user, + } + ) + try: + super().on_invoke_error( + llm_instance, ex, model, credentials, prompt_messages, model_parameters, tools, stop, stream, user + ) + except NotImplementedError: + pass + + +# --------------------------------------------------------------------------- +# A subclass that deliberately leaves abstract methods un-implemented, +# used to verify that instantiation raises TypeError. +# --------------------------------------------------------------------------- + + +# =========================================================================== +# Tests for _TEXT_COLOR_MAPPING module-level constant +# =========================================================================== + + +class TestTextColorMapping: + """Tests for the module-level _TEXT_COLOR_MAPPING dictionary.""" + + def test_contains_all_expected_colors(self): + expected_keys = {"blue", "yellow", "pink", "green", "red"} + assert set(_TEXT_COLOR_MAPPING.keys()) == expected_keys + + def test_blue_escape_code(self): + assert _TEXT_COLOR_MAPPING["blue"] == "36;1" + + def test_yellow_escape_code(self): + assert _TEXT_COLOR_MAPPING["yellow"] == "33;1" + + def test_pink_escape_code(self): + assert _TEXT_COLOR_MAPPING["pink"] == "38;5;200" + + def test_green_escape_code(self): + assert _TEXT_COLOR_MAPPING["green"] == "32;1" + + def test_red_escape_code(self): + assert _TEXT_COLOR_MAPPING["red"] == "31;1" + + def test_mapping_is_dict(self): + assert isinstance(_TEXT_COLOR_MAPPING, dict) + + def test_all_values_are_strings(self): + for key, value in _TEXT_COLOR_MAPPING.items(): + assert isinstance(value, str), f"Value for {key!r} should be str" + + +# =========================================================================== +# Tests for the Callback ABC itself +# =========================================================================== + + +class TestCallbackAbstract: + """Tests verifying Callback is a proper ABC.""" + + def test_cannot_instantiate_abstract_class_directly(self): + """Callback cannot be instantiated since it has abstract methods.""" + with pytest.raises(TypeError): + Callback() # type: ignore[abstract] + + def test_concrete_subclass_can_be_instantiated(self): + cb = ConcreteCallback() + assert isinstance(cb, Callback) + + def test_default_raise_error_is_false(self): + cb = ConcreteCallback() + assert cb.raise_error is False + + def test_raise_error_can_be_set_to_true(self): + cb = ConcreteCallback(raise_error=True) + assert cb.raise_error is True + + def test_subclass_missing_on_before_invoke_raises_type_error(self): + """A subclass missing any single abstract method cannot be instantiated.""" + + class IncompleteCallback(Callback): + def on_new_chunk(self, *a, **kw): ... + def on_after_invoke(self, *a, **kw): ... + def on_invoke_error(self, *a, **kw): ... + + with pytest.raises(TypeError): + IncompleteCallback() # type: ignore[abstract] + + def test_subclass_missing_on_new_chunk_raises_type_error(self): + class IncompleteCallback(Callback): + def on_before_invoke(self, *a, **kw): ... + def on_after_invoke(self, *a, **kw): ... + def on_invoke_error(self, *a, **kw): ... + + with pytest.raises(TypeError): + IncompleteCallback() # type: ignore[abstract] + + def test_subclass_missing_on_after_invoke_raises_type_error(self): + class IncompleteCallback(Callback): + def on_before_invoke(self, *a, **kw): ... + def on_new_chunk(self, *a, **kw): ... + def on_invoke_error(self, *a, **kw): ... + + with pytest.raises(TypeError): + IncompleteCallback() # type: ignore[abstract] + + def test_subclass_missing_on_invoke_error_raises_type_error(self): + class IncompleteCallback(Callback): + def on_before_invoke(self, *a, **kw): ... + def on_new_chunk(self, *a, **kw): ... + def on_after_invoke(self, *a, **kw): ... + + with pytest.raises(TypeError): + IncompleteCallback() # type: ignore[abstract] + + +# =========================================================================== +# Tests for on_before_invoke +# =========================================================================== + + +class TestOnBeforeInvoke: + """Tests for the on_before_invoke callback method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + self.llm_instance = MagicMock() + self.model = "gpt-4" + self.credentials = {"api_key": "sk-test"} + self.prompt_messages = [MagicMock(spec=PromptMessage)] + self.model_parameters = {"temperature": 0.7} + + def test_on_before_invoke_called_with_required_args(self): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.before_invoke_calls) == 1 + call = self.cb.before_invoke_calls[0] + assert call["llm_instance"] is self.llm_instance + assert call["model"] == self.model + assert call["credentials"] == self.credentials + assert call["prompt_messages"] is self.prompt_messages + assert call["model_parameters"] is self.model_parameters + + def test_on_before_invoke_defaults_tools_none(self): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.before_invoke_calls[0]["tools"] is None + + def test_on_before_invoke_defaults_stop_none(self): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.before_invoke_calls[0]["stop"] is None + + def test_on_before_invoke_defaults_stream_true(self): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.before_invoke_calls[0]["stream"] is True + + def test_on_before_invoke_defaults_user_none(self): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.before_invoke_calls[0]["user"] is None + + def test_on_before_invoke_with_all_optional_args(self): + tools = [MagicMock(spec=PromptMessageTool)] + stop = ["stop1", "stop2"] + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + tools=tools, + stop=stop, + stream=False, + user="user-123", + ) + call = self.cb.before_invoke_calls[0] + assert call["tools"] is tools + assert call["stop"] == stop + assert call["stream"] is False + assert call["user"] == "user-123" + + def test_on_before_invoke_called_multiple_times(self): + for i in range(3): + self.cb.on_before_invoke( + llm_instance=self.llm_instance, + model=f"model-{i}", + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.before_invoke_calls) == 3 + assert self.cb.before_invoke_calls[2]["model"] == "model-2" + + +# =========================================================================== +# Tests for on_new_chunk +# =========================================================================== + + +class TestOnNewChunk: + """Tests for the on_new_chunk callback method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + self.llm_instance = MagicMock() + self.chunk = MagicMock(spec=LLMResultChunk) + self.model = "gpt-3.5-turbo" + self.credentials = {"api_key": "sk-test"} + self.prompt_messages = [MagicMock(spec=PromptMessage)] + self.model_parameters = {"max_tokens": 256} + + def test_on_new_chunk_called_with_required_args(self): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.new_chunk_calls) == 1 + call = self.cb.new_chunk_calls[0] + assert call["llm_instance"] is self.llm_instance + assert call["chunk"] is self.chunk + assert call["model"] == self.model + assert call["credentials"] == self.credentials + + def test_on_new_chunk_defaults_tools_none(self): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.new_chunk_calls[0]["tools"] is None + + def test_on_new_chunk_defaults_stop_none(self): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.new_chunk_calls[0]["stop"] is None + + def test_on_new_chunk_defaults_stream_true(self): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.new_chunk_calls[0]["stream"] is True + + def test_on_new_chunk_defaults_user_none(self): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.new_chunk_calls[0]["user"] is None + + def test_on_new_chunk_with_all_optional_args(self): + tools = [MagicMock(spec=PromptMessageTool)] + stop = ["END"] + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + tools=tools, + stop=stop, + stream=False, + user="chunk-user", + ) + call = self.cb.new_chunk_calls[0] + assert call["tools"] is tools + assert call["stop"] == stop + assert call["stream"] is False + assert call["user"] == "chunk-user" + + def test_on_new_chunk_called_multiple_times(self): + for i in range(5): + self.cb.on_new_chunk( + llm_instance=self.llm_instance, + chunk=self.chunk, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.new_chunk_calls) == 5 + + +# =========================================================================== +# Tests for on_after_invoke +# =========================================================================== + + +class TestOnAfterInvoke: + """Tests for the on_after_invoke callback method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + self.llm_instance = MagicMock() + self.result = MagicMock(spec=LLMResult) + self.model = "claude-3" + self.credentials = {"api_key": "anthropic-key"} + self.prompt_messages = [MagicMock(spec=PromptMessage)] + self.model_parameters = {"temperature": 1.0} + + def test_on_after_invoke_called_with_required_args(self): + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.after_invoke_calls) == 1 + call = self.cb.after_invoke_calls[0] + assert call["llm_instance"] is self.llm_instance + assert call["result"] is self.result + assert call["model"] == self.model + assert call["credentials"] is self.credentials + + def test_on_after_invoke_defaults_tools_none(self): + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.after_invoke_calls[0]["tools"] is None + + def test_on_after_invoke_defaults_stop_none(self): + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.after_invoke_calls[0]["stop"] is None + + def test_on_after_invoke_defaults_stream_true(self): + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.after_invoke_calls[0]["stream"] is True + + def test_on_after_invoke_defaults_user_none(self): + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.after_invoke_calls[0]["user"] is None + + def test_on_after_invoke_with_all_optional_args(self): + tools = [MagicMock(spec=PromptMessageTool)] + stop = ["STOP"] + self.cb.on_after_invoke( + llm_instance=self.llm_instance, + result=self.result, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + tools=tools, + stop=stop, + stream=False, + user="after-user", + ) + call = self.cb.after_invoke_calls[0] + assert call["tools"] is tools + assert call["stop"] == stop + assert call["stream"] is False + assert call["user"] == "after-user" + + +# =========================================================================== +# Tests for on_invoke_error +# =========================================================================== + + +class TestOnInvokeError: + """Tests for the on_invoke_error callback method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + self.llm_instance = MagicMock() + self.ex = ValueError("something went wrong") + self.model = "gemini-pro" + self.credentials = {"api_key": "google-key"} + self.prompt_messages = [MagicMock(spec=PromptMessage)] + self.model_parameters = {"top_p": 0.9} + + def test_on_invoke_error_called_with_required_args(self): + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.invoke_error_calls) == 1 + call = self.cb.invoke_error_calls[0] + assert call["llm_instance"] is self.llm_instance + assert call["ex"] is self.ex + assert call["model"] == self.model + assert call["credentials"] is self.credentials + + def test_on_invoke_error_defaults_tools_none(self): + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.invoke_error_calls[0]["tools"] is None + + def test_on_invoke_error_defaults_stop_none(self): + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.invoke_error_calls[0]["stop"] is None + + def test_on_invoke_error_defaults_stream_true(self): + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.invoke_error_calls[0]["stream"] is True + + def test_on_invoke_error_defaults_user_none(self): + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert self.cb.invoke_error_calls[0]["user"] is None + + def test_on_invoke_error_with_all_optional_args(self): + tools = [MagicMock(spec=PromptMessageTool)] + stop = ["HALT"] + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=self.ex, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + tools=tools, + stop=stop, + stream=False, + user="error-user", + ) + call = self.cb.invoke_error_calls[0] + assert call["tools"] is tools + assert call["stop"] == stop + assert call["stream"] is False + assert call["user"] == "error-user" + + def test_on_invoke_error_accepts_various_exception_types(self): + for exc in [RuntimeError("r"), KeyError("k"), Exception("e")]: + self.cb.on_invoke_error( + llm_instance=self.llm_instance, + ex=exc, + model=self.model, + credentials=self.credentials, + prompt_messages=self.prompt_messages, + model_parameters=self.model_parameters, + ) + assert len(self.cb.invoke_error_calls) == 3 + + +# =========================================================================== +# Tests for print_text (concrete method on Callback) +# =========================================================================== + + +class TestPrintText: + """Tests for the concrete print_text method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + + def test_print_text_without_color_prints_plain_text(self, capsys): + self.cb.print_text("hello world") + captured = capsys.readouterr() + assert captured.out == "hello world" + + def test_print_text_with_color_prints_colored_text(self, capsys): + self.cb.print_text("colored text", color="blue") + captured = capsys.readouterr() + # Should contain ANSI escape sequences + assert "colored text" in captured.out + assert "\001b[" in captured.out or "\033[" in captured.out or "\x1b[" in captured.out + + def test_print_text_without_color_no_ansi(self, capsys): + self.cb.print_text("plain text", color=None) + captured = capsys.readouterr() + assert captured.out == "plain text" + # No ANSI escape sequences + assert "\x1b" not in captured.out + + def test_print_text_default_end_is_empty_string(self, capsys): + self.cb.print_text("no newline") + captured = capsys.readouterr() + assert not captured.out.endswith("\n") + + def test_print_text_with_custom_end(self, capsys): + self.cb.print_text("with newline", end="\n") + captured = capsys.readouterr() + assert captured.out.endswith("\n") + + def test_print_text_with_empty_string(self, capsys): + self.cb.print_text("", color=None) + captured = capsys.readouterr() + assert captured.out == "" + + @pytest.mark.parametrize("color", ["blue", "yellow", "pink", "green", "red"]) + def test_print_text_all_colors_work(self, color, capsys): + """Verify no KeyError is thrown for any valid color.""" + self.cb.print_text("test", color=color) + captured = capsys.readouterr() + assert "test" in captured.out + + def test_print_text_calls_get_colored_text_when_color_given(self): + with patch.object(self.cb, "_get_colored_text", return_value="[COLORED]") as mock_gct: + with patch("builtins.print") as mock_print: + self.cb.print_text("hello", color="green") + mock_gct.assert_called_once_with("hello", "green") + mock_print.assert_called_once_with("[COLORED]", end="") + + def test_print_text_does_not_call_get_colored_text_when_no_color(self): + with patch.object(self.cb, "_get_colored_text") as mock_gct: + with patch("builtins.print"): + self.cb.print_text("hello", color=None) + mock_gct.assert_not_called() + + def test_print_text_passes_end_to_print(self): + with patch("builtins.print") as mock_print: + self.cb.print_text("text", end="---") + mock_print.assert_called_once_with("text", end="---") + + +# =========================================================================== +# Tests for _get_colored_text (private helper method) +# =========================================================================== + + +class TestGetColoredText: + """Tests for the _get_colored_text private method.""" + + def setup_method(self): + self.cb = ConcreteCallback() + + @pytest.mark.parametrize(("color", "expected_code"), list(_TEXT_COLOR_MAPPING.items())) + def test_get_colored_text_uses_correct_escape_code(self, color, expected_code): + result = self.cb._get_colored_text("text", color) + assert expected_code in result + + @pytest.mark.parametrize("color", ["blue", "yellow", "pink", "green", "red"]) + def test_get_colored_text_contains_input_text(self, color): + result = self.cb._get_colored_text("hello", color) + assert "hello" in result + + @pytest.mark.parametrize("color", ["blue", "yellow", "pink", "green", "red"]) + def test_get_colored_text_starts_with_escape(self, color): + result = self.cb._get_colored_text("text", color) + # Should start with an ANSI escape (\x1b or \u001b) + assert result.startswith("\x1b[") or result.startswith("\u001b[") + + @pytest.mark.parametrize("color", ["blue", "yellow", "pink", "green", "red"]) + def test_get_colored_text_ends_with_reset(self, color): + result = self.cb._get_colored_text("text", color) + # Should end with the ANSI reset code + assert result.endswith("\x1b[0m") or result.endswith("\u001b[0m") + + def test_get_colored_text_returns_string(self): + result = self.cb._get_colored_text("text", "blue") + assert isinstance(result, str) + + def test_get_colored_text_blue_exact_format(self): + result = self.cb._get_colored_text("hello", "blue") + expected = f"\u001b[{_TEXT_COLOR_MAPPING['blue']}m\033[1;3mhello\u001b[0m" + assert result == expected + + def test_get_colored_text_red_exact_format(self): + result = self.cb._get_colored_text("error", "red") + expected = f"\u001b[{_TEXT_COLOR_MAPPING['red']}m\033[1;3merror\u001b[0m" + assert result == expected + + def test_get_colored_text_green_exact_format(self): + result = self.cb._get_colored_text("ok", "green") + expected = f"\u001b[{_TEXT_COLOR_MAPPING['green']}m\033[1;3mok\u001b[0m" + assert result == expected + + def test_get_colored_text_yellow_exact_format(self): + result = self.cb._get_colored_text("warn", "yellow") + expected = f"\u001b[{_TEXT_COLOR_MAPPING['yellow']}m\033[1;3mwarn\u001b[0m" + assert result == expected + + def test_get_colored_text_pink_exact_format(self): + result = self.cb._get_colored_text("info", "pink") + expected = f"\u001b[{_TEXT_COLOR_MAPPING['pink']}m\033[1;3minfo\u001b[0m" + assert result == expected + + def test_get_colored_text_empty_string(self): + result = self.cb._get_colored_text("", "blue") + assert isinstance(result, str) + # Empty text should still have escape codes + assert _TEXT_COLOR_MAPPING["blue"] in result + + def test_get_colored_text_invalid_color_raises_key_error(self): + with pytest.raises(KeyError): + self.cb._get_colored_text("text", "purple") + + def test_get_colored_text_with_special_characters(self): + special = "hello\nworld\ttab" + result = self.cb._get_colored_text(special, "blue") + assert special in result + + def test_get_colored_text_with_long_text(self): + long_text = "a" * 10000 + result = self.cb._get_colored_text(long_text, "green") + assert long_text in result + + +# =========================================================================== +# Integration-style tests: full workflow through a ConcreteCallback +# =========================================================================== + + +class TestConcreteCallbackIntegration: + """End-to-end workflow tests using ConcreteCallback.""" + + def test_full_invocation_lifecycle(self): + """Simulate a complete LLM invocation lifecycle through all callbacks.""" + cb = ConcreteCallback() + llm_instance = MagicMock() + model = "gpt-4o" + credentials = {"api_key": "sk-xyz"} + prompt_messages = [MagicMock(spec=PromptMessage)] + model_parameters = {"temperature": 0.5} + tools = [MagicMock(spec=PromptMessageTool)] + stop = [""] + user = "user-abc" + + # 1. Before invoke + cb.on_before_invoke( + llm_instance=llm_instance, + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=True, + user=user, + ) + + # 2. Multiple chunks during streaming + for i in range(3): + chunk = MagicMock(spec=LLMResultChunk) + cb.on_new_chunk( + llm_instance=llm_instance, + chunk=chunk, + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=True, + user=user, + ) + + # 3. After invoke + result = MagicMock(spec=LLMResult) + cb.on_after_invoke( + llm_instance=llm_instance, + result=result, + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + tools=tools, + stop=stop, + stream=True, + user=user, + ) + + assert len(cb.before_invoke_calls) == 1 + assert len(cb.new_chunk_calls) == 3 + assert len(cb.after_invoke_calls) == 1 + assert len(cb.invoke_error_calls) == 0 + + def test_error_lifecycle(self): + """Simulate an invoke that results in an error.""" + cb = ConcreteCallback() + llm_instance = MagicMock() + model = "gpt-4" + credentials = {} + prompt_messages = [] + model_parameters = {} + + cb.on_before_invoke( + llm_instance=llm_instance, + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + ) + + ex = RuntimeError("API timeout") + cb.on_invoke_error( + llm_instance=llm_instance, + ex=ex, + model=model, + credentials=credentials, + prompt_messages=prompt_messages, + model_parameters=model_parameters, + ) + + assert len(cb.before_invoke_calls) == 1 + assert len(cb.invoke_error_calls) == 1 + assert cb.invoke_error_calls[0]["ex"] is ex + assert len(cb.after_invoke_calls) == 0 + + def test_print_text_with_color_in_integration(self, capsys): + """verify print_text works correctly in a concrete instance.""" + cb = ConcreteCallback() + cb.print_text("SUCCESS", color="green", end="\n") + captured = capsys.readouterr() + assert "SUCCESS" in captured.out + assert "\n" in captured.out + + def test_print_text_no_color_in_integration(self, capsys): + cb = ConcreteCallback() + cb.print_text("plain output") + captured = capsys.readouterr() + assert captured.out == "plain output" diff --git a/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_logging_callback.py b/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_logging_callback.py new file mode 100644 index 0000000000..0c6c1fd191 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/callbacks/test_logging_callback.py @@ -0,0 +1,700 @@ +""" +Comprehensive unit tests for core/model_runtime/callbacks/logging_callback.py + +Coverage targets: + - LoggingCallback.on_before_invoke (all branches: stop, tools, user, stream, + prompt_message.name, model_parameters) + - LoggingCallback.on_new_chunk (writes to stdout) + - LoggingCallback.on_after_invoke (all branches: tool_calls present / absent) + - LoggingCallback.on_invoke_error (logs exception via logger.exception) +""" + +from __future__ import annotations + +import json +from collections.abc import Sequence +from decimal import Decimal +from unittest.mock import MagicMock, patch + +import pytest + +from dify_graph.model_runtime.callbacks.logging_callback import LoggingCallback +from dify_graph.model_runtime.entities.llm_entities import ( + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, + LLMUsage, +) +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessageTool, + SystemPromptMessage, + UserPromptMessage, +) + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + + +def _make_usage() -> LLMUsage: + """Return a minimal LLMUsage instance.""" + return LLMUsage( + prompt_tokens=10, + prompt_unit_price=Decimal("0.001"), + prompt_price_unit=Decimal("0.001"), + prompt_price=Decimal("0.01"), + completion_tokens=20, + completion_unit_price=Decimal("0.002"), + completion_price_unit=Decimal("0.002"), + completion_price=Decimal("0.04"), + total_tokens=30, + total_price=Decimal("0.05"), + currency="USD", + latency=0.5, + ) + + +def _make_llm_result( + content: str = "hello world", + tool_calls: list | None = None, + model: str = "gpt-4", + system_fingerprint: str | None = "fp-abc", +) -> LLMResult: + """Return an LLMResult with an AssistantPromptMessage.""" + assistant_msg = AssistantPromptMessage( + content=content, + tool_calls=tool_calls or [], + ) + return LLMResult( + model=model, + message=assistant_msg, + usage=_make_usage(), + system_fingerprint=system_fingerprint, + ) + + +def _make_chunk(content: str = "chunk-text") -> LLMResultChunk: + """Return a minimal LLMResultChunk.""" + return LLMResultChunk( + model="gpt-4", + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage(content=content), + ), + ) + + +def _make_user_prompt(content: str = "Hello!", name: str | None = None) -> UserPromptMessage: + return UserPromptMessage(content=content, name=name) + + +def _make_system_prompt(content: str = "You are helpful.") -> SystemPromptMessage: + return SystemPromptMessage(content=content) + + +def _make_tool(name: str = "my_tool") -> PromptMessageTool: + return PromptMessageTool(name=name, description="A tool", parameters={}) + + +def _make_tool_call( + call_id: str = "call-1", + func_name: str = "some_func", + arguments: str = '{"key": "value"}', +) -> AssistantPromptMessage.ToolCall: + return AssistantPromptMessage.ToolCall( + id=call_id, + type="function", + function=AssistantPromptMessage.ToolCall.ToolCallFunction(name=func_name, arguments=arguments), + ) + + +# --------------------------------------------------------------------------- +# Fixture: shared LoggingCallback instance (no heavy state) +# --------------------------------------------------------------------------- + + +@pytest.fixture +def cb() -> LoggingCallback: + return LoggingCallback() + + +@pytest.fixture +def llm_instance() -> MagicMock: + return MagicMock() + + +# =========================================================================== +# Tests for on_before_invoke +# =========================================================================== + + +class TestOnBeforeInvoke: + """Tests for LoggingCallback.on_before_invoke.""" + + def _invoke( + self, + cb: LoggingCallback, + llm_instance: MagicMock, + *, + model: str = "gpt-4", + credentials: dict | None = None, + prompt_messages: list | None = None, + model_parameters: dict | None = None, + tools: list[PromptMessageTool] | None = None, + stop: Sequence[str] | None = None, + stream: bool = True, + user: str | None = None, + ): + cb.on_before_invoke( + llm_instance=llm_instance, + model=model, + credentials=credentials or {}, + prompt_messages=prompt_messages or [], + model_parameters=model_parameters or {}, + tools=tools, + stop=stop, + stream=stream, + user=user, + ) + + def test_minimal_call_does_not_raise(self, cb: LoggingCallback, llm_instance: MagicMock): + """Calling with bare-minimum args should not raise.""" + self._invoke(cb, llm_instance) + + def test_model_name_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """The model name must appear in print_text calls.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, model="claude-3") + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "claude-3" in calls_text + + def test_model_parameters_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """Each key-value pair of model_parameters must be printed.""" + params = {"temperature": 0.7, "max_tokens": 512} + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, model_parameters=params) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "temperature" in calls_text + assert "0.7" in calls_text + assert "max_tokens" in calls_text + assert "512" in calls_text + + def test_empty_model_parameters(self, cb: LoggingCallback, llm_instance: MagicMock): + """Empty model_parameters dict should not raise.""" + self._invoke(cb, llm_instance, model_parameters={}) + + # ------------------------------------------------------------------ + # stop branch + # ------------------------------------------------------------------ + + def test_stop_branch_printed_when_provided(self, cb: LoggingCallback, llm_instance: MagicMock): + """stop words must appear in output when provided.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, stop=["STOP", "END"]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "stop" in calls_text + + def test_stop_branch_skipped_when_none(self, cb: LoggingCallback, llm_instance: MagicMock): + """When stop=None the stop line must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, stop=None) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "\tstop:" not in calls_text + + def test_stop_branch_skipped_when_empty_list(self, cb: LoggingCallback, llm_instance: MagicMock): + """When stop=[] (falsy) the stop line must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, stop=[]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "\tstop:" not in calls_text + + # ------------------------------------------------------------------ + # tools branch + # ------------------------------------------------------------------ + + def test_tools_branch_printed_when_provided(self, cb: LoggingCallback, llm_instance: MagicMock): + """Tool names must appear in output when tools are provided.""" + tools = [_make_tool("search"), _make_tool("calculate")] + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, tools=tools) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "search" in calls_text + assert "calculate" in calls_text + + def test_tools_branch_skipped_when_none(self, cb: LoggingCallback, llm_instance: MagicMock): + """When tools=None the Tools section must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, tools=None) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "Tools:" not in calls_text + + def test_tools_branch_skipped_when_empty_list(self, cb: LoggingCallback, llm_instance: MagicMock): + """When tools=[] (falsy) the Tools section must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, tools=[]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "Tools:" not in calls_text + + # ------------------------------------------------------------------ + # user branch + # ------------------------------------------------------------------ + + def test_user_printed_when_provided(self, cb: LoggingCallback, llm_instance: MagicMock): + """User string must appear in output when provided.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, user="alice") + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "alice" in calls_text + + def test_user_skipped_when_none(self, cb: LoggingCallback, llm_instance: MagicMock): + """When user=None the User line must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, user=None) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "User:" not in calls_text + + # ------------------------------------------------------------------ + # stream branch + # ------------------------------------------------------------------ + + def test_stream_true_prints_new_chunk_header(self, cb: LoggingCallback, llm_instance: MagicMock): + """When stream=True the [on_llm_new_chunk] marker must be printed.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, stream=True) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "[on_llm_new_chunk]" in calls_text + + def test_stream_false_no_new_chunk_header(self, cb: LoggingCallback, llm_instance: MagicMock): + """When stream=False the [on_llm_new_chunk] marker must NOT appear.""" + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, stream=False) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "[on_llm_new_chunk]" not in calls_text + + # ------------------------------------------------------------------ + # prompt_messages branch + # ------------------------------------------------------------------ + + def test_prompt_message_with_name_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """When a PromptMessage has a name it must be printed.""" + msg = _make_user_prompt("hi", name="bob") + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, prompt_messages=[msg]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "bob" in calls_text + + def test_prompt_message_without_name_skips_name_line(self, cb: LoggingCallback, llm_instance: MagicMock): + """When a PromptMessage has no name the name line must NOT appear.""" + msg = _make_user_prompt("hi", name=None) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, prompt_messages=[msg]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "\tname:" not in calls_text + + def test_prompt_message_role_and_content_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """Role and content of each PromptMessage must appear in output.""" + msg = _make_system_prompt("Be concise.") + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, prompt_messages=[msg]) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "system" in calls_text + assert "Be concise." in calls_text + + def test_multiple_prompt_messages_all_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """All entries in prompt_messages are iterated and printed.""" + msgs = [ + _make_system_prompt("sys"), + _make_user_prompt("user msg"), + ] + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, prompt_messages=msgs) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "sys" in calls_text + assert "user msg" in calls_text + + # ------------------------------------------------------------------ + # Combination: everything provided + # ------------------------------------------------------------------ + + def test_all_optional_fields_combined(self, cb: LoggingCallback, llm_instance: MagicMock): + """Supply stop, tools, user, multiple params, named message – no exception.""" + msgs = [_make_user_prompt("question", name="alice")] + tools = [_make_tool("tool_a")] + with patch.object(cb, "print_text"): + self._invoke( + cb, + llm_instance, + model="gpt-3.5", + model_parameters={"temperature": 1.0, "top_p": 0.9}, + tools=tools, + stop=["DONE"], + stream=True, + user="alice", + prompt_messages=msgs, + ) + + +# =========================================================================== +# Tests for on_new_chunk +# =========================================================================== + + +class TestOnNewChunk: + """Tests for LoggingCallback.on_new_chunk.""" + + def test_chunk_content_written_to_stdout(self, cb: LoggingCallback, llm_instance: MagicMock): + """on_new_chunk must write the chunk's text content to sys.stdout.""" + chunk = _make_chunk("hello from LLM") + written = [] + + with patch("sys.stdout") as mock_stdout: + mock_stdout.write.side_effect = written.append + cb.on_new_chunk( + llm_instance=llm_instance, + chunk=chunk, + model="gpt-4", + credentials={}, + prompt_messages=[], + model_parameters={}, + ) + mock_stdout.write.assert_called_once_with("hello from LLM") + mock_stdout.flush.assert_called_once() + + def test_chunk_content_empty_string(self, cb: LoggingCallback, llm_instance: MagicMock): + """Works correctly even when the chunk content is an empty string.""" + chunk = _make_chunk("") + with patch("sys.stdout") as mock_stdout: + cb.on_new_chunk( + llm_instance=llm_instance, + chunk=chunk, + model="gpt-4", + credentials={}, + prompt_messages=[], + model_parameters={}, + ) + mock_stdout.write.assert_called_once_with("") + mock_stdout.flush.assert_called_once() + + def test_chunk_passes_all_optional_params(self, cb: LoggingCallback, llm_instance: MagicMock): + """All optional parameters are accepted without errors.""" + chunk = _make_chunk("data") + with patch("sys.stdout"): + cb.on_new_chunk( + llm_instance=llm_instance, + chunk=chunk, + model="gpt-4", + credentials={"key": "secret"}, + prompt_messages=[_make_user_prompt("q")], + model_parameters={"temperature": 0.5}, + tools=[_make_tool("t1")], + stop=["EOS"], + stream=True, + user="bob", + ) + + +# =========================================================================== +# Tests for on_after_invoke +# =========================================================================== + + +class TestOnAfterInvoke: + """Tests for LoggingCallback.on_after_invoke.""" + + def _invoke( + self, + cb: LoggingCallback, + llm_instance: MagicMock, + result: LLMResult, + **kwargs, + ): + cb.on_after_invoke( + llm_instance=llm_instance, + result=result, + model=result.model, + credentials={}, + prompt_messages=[], + model_parameters={}, + **kwargs, + ) + + def test_basic_result_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """After-invoke header, content, model, usage, fingerprint must be printed.""" + result = _make_llm_result() + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "[on_llm_after_invoke]" in calls_text + assert "hello world" in calls_text + assert "gpt-4" in calls_text + assert "fp-abc" in calls_text + + def test_no_tool_calls_skips_tool_call_block(self, cb: LoggingCallback, llm_instance: MagicMock): + """When there are no tool_calls the 'Tool calls:' block must NOT appear.""" + result = _make_llm_result(tool_calls=[]) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "Tool calls:" not in calls_text + + def test_with_tool_calls_prints_all_fields(self, cb: LoggingCallback, llm_instance: MagicMock): + """When tool_calls exist their id, name, and JSON arguments must be printed.""" + tc = _make_tool_call( + call_id="call-xyz", + func_name="fetch_data", + arguments='{"url": "https://example.com"}', + ) + result = _make_llm_result(tool_calls=[tc]) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "Tool calls:" in calls_text + assert "call-xyz" in calls_text + assert "fetch_data" in calls_text + # arguments should be JSON-dumped + assert "https://example.com" in calls_text + + def test_multiple_tool_calls_all_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """All tool calls in the list must be iterated.""" + tcs = [ + _make_tool_call("id-1", "func_a", '{"a": 1}'), + _make_tool_call("id-2", "func_b", '{"b": 2}'), + ] + result = _make_llm_result(tool_calls=tcs) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "id-1" in calls_text + assert "func_a" in calls_text + assert "id-2" in calls_text + assert "func_b" in calls_text + + def test_system_fingerprint_none_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """When system_fingerprint is None it should still be printed (as None).""" + result = _make_llm_result(system_fingerprint=None) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "System Fingerprint: None" in calls_text + + def test_usage_printed(self, cb: LoggingCallback, llm_instance: MagicMock): + """The usage object must appear in the printed output.""" + result = _make_llm_result() + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "Usage:" in calls_text + + def test_tool_call_arguments_are_json_dumped(self, cb: LoggingCallback, llm_instance: MagicMock): + """Verify json.dumps is applied to the arguments field (a string).""" + raw_args = '{"x": 42}' + tc = _make_tool_call(arguments=raw_args) + result = _make_llm_result(tool_calls=[tc]) + with patch.object(cb, "print_text") as mock_print: + self._invoke(cb, llm_instance, result) + + # Check if any call to print_text included the expected (json-encoded) arguments + # json.dumps(raw_args) produces a string starting and ending with quotes + expected_substring = json.dumps(raw_args) + found = any(expected_substring in str(call.args[0]) for call in mock_print.call_args_list) + assert found, f"Expected {expected_substring} to be printed in one of the calls" + + def test_optional_params_accepted(self, cb: LoggingCallback, llm_instance: MagicMock): + """All optional parameters should be accepted without error.""" + result = _make_llm_result() + cb.on_after_invoke( + llm_instance=llm_instance, + result=result, + model=result.model, + credentials={"key": "secret"}, + prompt_messages=[_make_user_prompt("q")], + model_parameters={"temperature": 0.9}, + tools=[_make_tool("t")], + stop=[""], + stream=False, + user="carol", + ) + + +# =========================================================================== +# Tests for on_invoke_error +# =========================================================================== + + +class TestOnInvokeError: + """Tests for LoggingCallback.on_invoke_error.""" + + def _invoke_error( + self, + cb: LoggingCallback, + llm_instance: MagicMock, + ex: Exception, + **kwargs, + ): + cb.on_invoke_error( + llm_instance=llm_instance, + ex=ex, + model="gpt-4", + credentials={}, + prompt_messages=[], + model_parameters={}, + **kwargs, + ) + + def test_prints_error_header(self, cb: LoggingCallback, llm_instance: MagicMock): + """The [on_llm_invoke_error] banner must be printed.""" + with patch.object(cb, "print_text") as mock_print: + with patch("dify_graph.model_runtime.callbacks.logging_callback.logger") as mock_logger: + self._invoke_error(cb, llm_instance, RuntimeError("boom")) + calls_text = " ".join(str(c) for c in mock_print.call_args_list) + assert "[on_llm_invoke_error]" in calls_text + + def test_exception_logged_via_logger_exception(self, cb: LoggingCallback, llm_instance: MagicMock): + """logger.exception must be called with the exception.""" + ex = ValueError("something went wrong") + with patch.object(cb, "print_text"): + with patch("dify_graph.model_runtime.callbacks.logging_callback.logger") as mock_logger: + self._invoke_error(cb, llm_instance, ex) + mock_logger.exception.assert_called_once_with(ex) + + def test_exception_type_variety(self, cb: LoggingCallback, llm_instance: MagicMock): + """Works with any exception type (TypeError, IOError, etc.).""" + for exc_cls in (TypeError, IOError, KeyError, Exception): + ex = exc_cls("error") + with patch.object(cb, "print_text"): + with patch("dify_graph.model_runtime.callbacks.logging_callback.logger") as mock_logger: + self._invoke_error(cb, llm_instance, ex) + mock_logger.exception.assert_called_once_with(ex) + + def test_optional_params_accepted(self, cb: LoggingCallback, llm_instance: MagicMock): + """All optional parameters should be accepted without error.""" + ex = RuntimeError("fail") + with patch.object(cb, "print_text"): + with patch("dify_graph.model_runtime.callbacks.logging_callback.logger"): + cb.on_invoke_error( + llm_instance=llm_instance, + ex=ex, + model="gpt-4", + credentials={"key": "secret"}, + prompt_messages=[_make_user_prompt("q")], + model_parameters={"temperature": 0.7}, + tools=[_make_tool("t")], + stop=["STOP"], + stream=True, + user="dave", + ) + + +# =========================================================================== +# Tests for print_text (inherited from Callback, exercised through LoggingCallback) +# =========================================================================== + + +class TestPrintText: + """Verify that print_text from the Callback base class works correctly.""" + + def test_print_text_with_color(self, cb: LoggingCallback, capsys): + """print_text with a known colour should emit an ANSI escape sequence.""" + cb.print_text("hello", color="blue") + captured = capsys.readouterr() + assert "hello" in captured.out + # ANSI escape codes should be present + assert "\x1b[" in captured.out + + def test_print_text_without_color(self, cb: LoggingCallback, capsys): + """print_text without colour should print plain text.""" + cb.print_text("plain text") + captured = capsys.readouterr() + assert "plain text" in captured.out + + def test_print_text_all_colours(self, cb: LoggingCallback, capsys): + """Verify all supported colour keys don't raise.""" + for colour in ("blue", "yellow", "pink", "green", "red"): + cb.print_text("x", color=colour) + captured = capsys.readouterr() + # All outputs should contain 'x' (5 calls) + assert captured.out.count("x") >= 5 + + +# =========================================================================== +# Integration-style test: real print_text called (no mocking) +# =========================================================================== + + +class TestLoggingCallbackIntegration: + """Light integration tests – real print_text calls, just checking no exceptions.""" + + def test_on_before_invoke_full_run(self, capsys): + """Full on_before_invoke run with all optional fields – verifies real output.""" + cb = LoggingCallback() + llm = MagicMock() + msgs = [_make_user_prompt("Who are you?", name="tester")] + tools = [_make_tool("calculator")] + cb.on_before_invoke( + llm_instance=llm, + model="gpt-4-turbo", + credentials={"api_key": "sk-xxx"}, + prompt_messages=msgs, + model_parameters={"temperature": 0.8}, + tools=tools, + stop=["STOP"], + stream=True, + user="test_user", + ) + captured = capsys.readouterr() + assert "gpt-4-turbo" in captured.out + assert "calculator" in captured.out + assert "test_user" in captured.out + assert "STOP" in captured.out + assert "tester" in captured.out + + def test_on_new_chunk_full_run(self, capsys): + """Full on_new_chunk run – verifies real stdout write.""" + cb = LoggingCallback() + chunk = _make_chunk("streaming token") + cb.on_new_chunk( + llm_instance=MagicMock(), + chunk=chunk, + model="gpt-4", + credentials={}, + prompt_messages=[], + model_parameters={}, + ) + captured = capsys.readouterr() + assert "streaming token" in captured.out + + def test_on_after_invoke_full_run_with_tool_calls(self, capsys): + """Full on_after_invoke run with tool calls – verifies real output.""" + cb = LoggingCallback() + tc = _make_tool_call("call-99", "do_thing", '{"n": 5}') + result = _make_llm_result(content="result content", tool_calls=[tc], system_fingerprint="fp-xyz") + cb.on_after_invoke( + llm_instance=MagicMock(), + result=result, + model=result.model, + credentials={}, + prompt_messages=[], + model_parameters={}, + ) + captured = capsys.readouterr() + assert "result content" in captured.out + assert "call-99" in captured.out + assert "do_thing" in captured.out + assert "fp-xyz" in captured.out + + def test_on_invoke_error_full_run(self, capsys): + """Full on_invoke_error run – just verifies no exception is raised.""" + cb = LoggingCallback() + ex = RuntimeError("something bad happened") + # logger.exception writes to stderr; we just confirm it doesn't crash + cb.on_invoke_error( + llm_instance=MagicMock(), + ex=ex, + model="gpt-4", + credentials={}, + prompt_messages=[], + model_parameters={}, + ) + captured = capsys.readouterr() + assert "[on_llm_invoke_error]" in captured.out diff --git a/api/tests/unit_tests/dify_graph/model_runtime/entities/test_common_entities.py b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_common_entities.py new file mode 100644 index 0000000000..db147fb0cd --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_common_entities.py @@ -0,0 +1,35 @@ +from dify_graph.model_runtime.entities.common_entities import I18nObject + + +class TestI18nObject: + def test_i18n_object_with_both_languages(self): + """ + Test I18nObject when both zh_Hans and en_US are provided. + """ + i18n = I18nObject(zh_Hans="你好", en_US="Hello") + assert i18n.zh_Hans == "你好" + assert i18n.en_US == "Hello" + + def test_i18n_object_fallback_to_en_us(self): + """ + Test I18nObject when zh_Hans is missing, it should fallback to en_US. + """ + i18n = I18nObject(en_US="Hello") + assert i18n.zh_Hans == "Hello" + assert i18n.en_US == "Hello" + + def test_i18n_object_with_none_zh_hans(self): + """ + Test I18nObject when zh_Hans is None, it should fallback to en_US. + """ + i18n = I18nObject(zh_Hans=None, en_US="Hello") + assert i18n.zh_Hans == "Hello" + assert i18n.en_US == "Hello" + + def test_i18n_object_with_empty_zh_hans(self): + """ + Test I18nObject when zh_Hans is an empty string, it should fallback to en_US. + """ + i18n = I18nObject(zh_Hans="", en_US="Hello") + assert i18n.zh_Hans == "Hello" + assert i18n.en_US == "Hello" diff --git a/api/tests/unit_tests/core/model_runtime/entities/test_llm_entities.py b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_llm_entities.py similarity index 100% rename from api/tests/unit_tests/core/model_runtime/entities/test_llm_entities.py rename to api/tests/unit_tests/dify_graph/model_runtime/entities/test_llm_entities.py diff --git a/api/tests/unit_tests/dify_graph/model_runtime/entities/test_message_entities.py b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_message_entities.py new file mode 100644 index 0000000000..a96a38f5cd --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_message_entities.py @@ -0,0 +1,210 @@ +import pytest + +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + AudioPromptMessageContent, + DocumentPromptMessageContent, + ImagePromptMessageContent, + PromptMessageContent, + PromptMessageContentType, + PromptMessageFunction, + PromptMessageRole, + PromptMessageTool, + SystemPromptMessage, + TextPromptMessageContent, + ToolPromptMessage, + UserPromptMessage, + VideoPromptMessageContent, +) + + +class TestPromptMessageRole: + def test_value_of(self): + assert PromptMessageRole.value_of("system") == PromptMessageRole.SYSTEM + assert PromptMessageRole.value_of("user") == PromptMessageRole.USER + assert PromptMessageRole.value_of("assistant") == PromptMessageRole.ASSISTANT + assert PromptMessageRole.value_of("tool") == PromptMessageRole.TOOL + + with pytest.raises(ValueError, match="invalid prompt message type value invalid"): + PromptMessageRole.value_of("invalid") + + +class TestPromptMessageEntities: + def test_prompt_message_tool(self): + tool = PromptMessageTool(name="test_tool", description="test desc", parameters={"foo": "bar"}) + assert tool.name == "test_tool" + assert tool.description == "test desc" + assert tool.parameters == {"foo": "bar"} + + def test_prompt_message_function(self): + tool = PromptMessageTool(name="test_tool", description="test desc", parameters={"foo": "bar"}) + func = PromptMessageFunction(function=tool) + assert func.type == "function" + assert func.function == tool + + +class TestPromptMessageContent: + def test_text_content(self): + content = TextPromptMessageContent(data="hello") + assert content.type == PromptMessageContentType.TEXT + assert content.data == "hello" + + def test_image_content(self): + content = ImagePromptMessageContent( + format="jpg", base64_data="abc", mime_type="image/jpeg", detail=ImagePromptMessageContent.DETAIL.HIGH + ) + assert content.type == PromptMessageContentType.IMAGE + assert content.detail == ImagePromptMessageContent.DETAIL.HIGH + assert content.data == "data:image/jpeg;base64,abc" + + def test_image_content_url(self): + content = ImagePromptMessageContent(format="jpg", url="https://example.com/image.jpg", mime_type="image/jpeg") + assert content.data == "https://example.com/image.jpg" + + def test_audio_content(self): + content = AudioPromptMessageContent(format="mp3", base64_data="abc", mime_type="audio/mpeg") + assert content.type == PromptMessageContentType.AUDIO + assert content.data == "data:audio/mpeg;base64,abc" + + def test_video_content(self): + content = VideoPromptMessageContent(format="mp4", base64_data="abc", mime_type="video/mp4") + assert content.type == PromptMessageContentType.VIDEO + assert content.data == "data:video/mp4;base64,abc" + + def test_document_content(self): + content = DocumentPromptMessageContent(format="pdf", base64_data="abc", mime_type="application/pdf") + assert content.type == PromptMessageContentType.DOCUMENT + assert content.data == "data:application/pdf;base64,abc" + + +class TestPromptMessages: + def test_user_prompt_message(self): + msg = UserPromptMessage(content="hello") + assert msg.role == PromptMessageRole.USER + assert msg.content == "hello" + assert msg.is_empty() is False + assert msg.get_text_content() == "hello" + + def test_user_prompt_message_complex_content(self): + content = [TextPromptMessageContent(data="hello "), TextPromptMessageContent(data="world")] + msg = UserPromptMessage(content=content) + assert msg.get_text_content() == "hello world" + + # Test validation from dict + msg2 = UserPromptMessage(content=[{"type": "text", "data": "hi"}]) + assert isinstance(msg2.content[0], TextPromptMessageContent) + assert msg2.content[0].data == "hi" + + def test_prompt_message_empty(self): + msg = UserPromptMessage(content=None) + assert msg.is_empty() is True + assert msg.get_text_content() == "" + + def test_assistant_prompt_message(self): + msg = AssistantPromptMessage(content="thinking...") + assert msg.role == PromptMessageRole.ASSISTANT + assert msg.is_empty() is False + + tool_call = AssistantPromptMessage.ToolCall( + id="call_1", + type="function", + function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="test", arguments="{}"), + ) + msg_with_tools = AssistantPromptMessage(content=None, tool_calls=[tool_call]) + assert msg_with_tools.is_empty() is False + assert msg_with_tools.role == PromptMessageRole.ASSISTANT + + def test_assistant_tool_call_id_transform(self): + tool_call = AssistantPromptMessage.ToolCall( + id=123, + type="function", + function=AssistantPromptMessage.ToolCall.ToolCallFunction(name="test", arguments="{}"), + ) + assert tool_call.id == "123" + + def test_system_prompt_message(self): + msg = SystemPromptMessage(content="you are a bot") + assert msg.role == PromptMessageRole.SYSTEM + assert msg.content == "you are a bot" + + def test_tool_prompt_message(self): + # Case 1: Both content and tool_call_id are present + msg = ToolPromptMessage(content="result", tool_call_id="call_1") + assert msg.role == PromptMessageRole.TOOL + assert msg.tool_call_id == "call_1" + assert msg.is_empty() is False + + # Case 2: Content is present, but tool_call_id is empty + msg_content_only = ToolPromptMessage(content="result", tool_call_id="") + assert msg_content_only.is_empty() is False + + # Case 3: Content is None, but tool_call_id is present + msg_id_only = ToolPromptMessage(content=None, tool_call_id="call_1") + assert msg_id_only.is_empty() is False + + # Case 4: Both content and tool_call_id are empty + msg_empty = ToolPromptMessage(content=None, tool_call_id="") + assert msg_empty.is_empty() is True + + def test_prompt_message_validation_errors(self): + with pytest.raises(KeyError): + # Invalid content type in list + UserPromptMessage(content=[{"type": "invalid", "data": "foo"}]) + + with pytest.raises(ValueError, match="invalid prompt message"): + # Not a dict or PromptMessageContent + UserPromptMessage(content=[123]) + + def test_prompt_message_serialization(self): + # Case: content is None + assert UserPromptMessage(content=None).serialize_content(None) is None + + # Case: content is str + assert UserPromptMessage(content="hello").serialize_content("hello") == "hello" + + # Case: content is list of dict + content_list = [{"type": "text", "data": "hi"}] + msg = UserPromptMessage(content=content_list) + assert msg.serialize_content(msg.content) == [{"type": PromptMessageContentType.TEXT, "data": "hi"}] + + # Case: content is Sequence but not list (e.g. tuple) + # To hit line 204, we can call serialize_content manually or + # try to pass a type that pydantic doesn't convert to list in its internal state. + # Actually, let's just call it manually on the instance. + msg = UserPromptMessage(content="test") + content_tuple = (TextPromptMessageContent(data="hi"),) + assert msg.serialize_content(content_tuple) == content_tuple + + def test_prompt_message_mixed_content_validation(self): + # Test branch: isinstance(prompt, PromptMessageContent) + # but not (TextPromptMessageContent | MultiModalPromptMessageContent) + # Line 187: prompt = CONTENT_TYPE_MAPPING[prompt.type].model_validate(prompt.model_dump()) + + # We need a PromptMessageContent that is NOT Text or MultiModal. + # But PromptMessageContentUnionTypes discriminator handles this usually. + # We can bypass high-level validation by passing the object directly in a list. + + class MockContent(PromptMessageContent): + type: PromptMessageContentType = PromptMessageContentType.TEXT + data: str + + mock_item = MockContent(data="test") + msg = UserPromptMessage(content=[mock_item]) + # It should hit line 187 and convert to TextPromptMessageContent + assert isinstance(msg.content[0], TextPromptMessageContent) + assert msg.content[0].data == "test" + + def test_prompt_message_get_text_content_branches(self): + # content is None + msg_none = UserPromptMessage(content=None) + assert msg_none.get_text_content() == "" + + # content is list but no text content + image = ImagePromptMessageContent(format="jpg", base64_data="abc", mime_type="image/jpeg") + msg_image = UserPromptMessage(content=[image]) + assert msg_image.get_text_content() == "" + + # content is list with mixed + text = TextPromptMessageContent(data="hello") + msg_mixed = UserPromptMessage(content=[text, image]) + assert msg_mixed.get_text_content() == "hello" diff --git a/api/tests/unit_tests/dify_graph/model_runtime/entities/test_model_entities.py b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_model_entities.py new file mode 100644 index 0000000000..3d03361f2a --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/entities/test_model_entities.py @@ -0,0 +1,220 @@ +from decimal import Decimal + +import pytest + +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ( + AIModelEntity, + DefaultParameterName, + FetchFrom, + ModelFeature, + ModelPropertyKey, + ModelType, + ModelUsage, + ParameterRule, + ParameterType, + PriceConfig, + PriceInfo, + PriceType, + ProviderModel, +) + + +class TestModelType: + def test_value_of(self): + assert ModelType.value_of("text-generation") == ModelType.LLM + assert ModelType.value_of(ModelType.LLM) == ModelType.LLM + assert ModelType.value_of("embeddings") == ModelType.TEXT_EMBEDDING + assert ModelType.value_of(ModelType.TEXT_EMBEDDING) == ModelType.TEXT_EMBEDDING + assert ModelType.value_of("reranking") == ModelType.RERANK + assert ModelType.value_of(ModelType.RERANK) == ModelType.RERANK + assert ModelType.value_of("speech2text") == ModelType.SPEECH2TEXT + assert ModelType.value_of(ModelType.SPEECH2TEXT) == ModelType.SPEECH2TEXT + assert ModelType.value_of("tts") == ModelType.TTS + assert ModelType.value_of(ModelType.TTS) == ModelType.TTS + assert ModelType.value_of(ModelType.MODERATION) == ModelType.MODERATION + + with pytest.raises(ValueError, match="invalid origin model type invalid"): + ModelType.value_of("invalid") + + def test_to_origin_model_type(self): + assert ModelType.LLM.to_origin_model_type() == "text-generation" + assert ModelType.TEXT_EMBEDDING.to_origin_model_type() == "embeddings" + assert ModelType.RERANK.to_origin_model_type() == "reranking" + assert ModelType.SPEECH2TEXT.to_origin_model_type() == "speech2text" + assert ModelType.TTS.to_origin_model_type() == "tts" + assert ModelType.MODERATION.to_origin_model_type() == "moderation" + + # Testing the else branch in to_origin_model_type + # Since it's a StrEnum, it's hard to get an invalid value here unless we mock or Force it. + # But if we look at the implementation: + # if self == self.LLM: ... elif ... else: raise ValueError + # We can try to create a "dummy" member if possible, or just skip it if we have 100% coverage otherwise. + # Actually, adding a new member to an enum at runtime is possible but messy. + # Let's see if we can trigger it. + + +class TestFetchFrom: + def test_values(self): + assert FetchFrom.PREDEFINED_MODEL == "predefined-model" + assert FetchFrom.CUSTOMIZABLE_MODEL == "customizable-model" + + +class TestModelFeature: + def test_values(self): + assert ModelFeature.TOOL_CALL == "tool-call" + assert ModelFeature.MULTI_TOOL_CALL == "multi-tool-call" + assert ModelFeature.AGENT_THOUGHT == "agent-thought" + assert ModelFeature.VISION == "vision" + assert ModelFeature.STREAM_TOOL_CALL == "stream-tool-call" + assert ModelFeature.DOCUMENT == "document" + assert ModelFeature.VIDEO == "video" + assert ModelFeature.AUDIO == "audio" + assert ModelFeature.STRUCTURED_OUTPUT == "structured-output" + + +class TestDefaultParameterName: + def test_value_of(self): + assert DefaultParameterName.value_of("temperature") == DefaultParameterName.TEMPERATURE + assert DefaultParameterName.value_of("top_p") == DefaultParameterName.TOP_P + + with pytest.raises(ValueError, match="invalid parameter name invalid"): + DefaultParameterName.value_of("invalid") + + +class TestParameterType: + def test_values(self): + assert ParameterType.FLOAT == "float" + assert ParameterType.INT == "int" + assert ParameterType.STRING == "string" + assert ParameterType.BOOLEAN == "boolean" + assert ParameterType.TEXT == "text" + + +class TestModelPropertyKey: + def test_values(self): + assert ModelPropertyKey.MODE == "mode" + assert ModelPropertyKey.CONTEXT_SIZE == "context_size" + + +class TestProviderModel: + def test_provider_model(self): + model = ProviderModel( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + ) + assert model.model == "gpt-4" + assert model.support_structure_output is False + + model_with_features = ProviderModel( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + features=[ModelFeature.STRUCTURED_OUTPUT], + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + ) + assert model_with_features.support_structure_output is True + + +class TestParameterRule: + def test_parameter_rule(self): + rule = ParameterRule( + name="temperature", + label=I18nObject(en_US="Temperature"), + type=ParameterType.FLOAT, + default=0.7, + min=0.0, + max=1.0, + precision=2, + ) + assert rule.name == "temperature" + assert rule.default == 0.7 + + +class TestPriceConfig: + def test_price_config(self): + config = PriceConfig(input=Decimal("0.01"), output=Decimal("0.02"), unit=Decimal("0.001"), currency="USD") + assert config.input == Decimal("0.01") + assert config.output == Decimal("0.02") + + +class TestAIModelEntity: + def test_ai_model_entity_no_json_schema(self): + entity = AIModelEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + parameter_rules=[ + ParameterRule(name="temperature", label=I18nObject(en_US="Temperature"), type=ParameterType.FLOAT) + ], + ) + assert ModelFeature.STRUCTURED_OUTPUT not in (entity.features or []) + + def test_ai_model_entity_with_json_schema(self): + # Case: json_schema in parameter rules, features is None + entity = AIModelEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + parameter_rules=[ + ParameterRule(name="json_schema", label=I18nObject(en_US="JSON Schema"), type=ParameterType.STRING) + ], + ) + assert ModelFeature.STRUCTURED_OUTPUT in entity.features + + def test_ai_model_entity_with_json_schema_and_features_empty(self): + # Case: json_schema in parameter rules, features is empty list + entity = AIModelEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + features=[], + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + parameter_rules=[ + ParameterRule(name="json_schema", label=I18nObject(en_US="JSON Schema"), type=ParameterType.STRING) + ], + ) + assert ModelFeature.STRUCTURED_OUTPUT in entity.features + + def test_ai_model_entity_with_json_schema_and_other_features(self): + # Case: json_schema in parameter rules, features has other things + entity = AIModelEntity( + model="gpt-4", + label=I18nObject(en_US="GPT-4"), + model_type=ModelType.LLM, + features=[ModelFeature.VISION], + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 8192}, + parameter_rules=[ + ParameterRule(name="json_schema", label=I18nObject(en_US="JSON Schema"), type=ParameterType.STRING) + ], + ) + assert ModelFeature.STRUCTURED_OUTPUT in entity.features + assert ModelFeature.VISION in entity.features + + +class TestModelUsage: + def test_model_usage(self): + usage = ModelUsage() + assert isinstance(usage, ModelUsage) + + +class TestPriceType: + def test_values(self): + assert PriceType.INPUT == "input" + assert PriceType.OUTPUT == "output" + + +class TestPriceInfo: + def test_price_info(self): + info = PriceInfo(unit_price=Decimal("0.01"), unit=Decimal(1000), total_amount=Decimal("0.05"), currency="USD") + assert info.total_amount == Decimal("0.05") diff --git a/api/tests/unit_tests/dify_graph/model_runtime/errors/test_invoke.py b/api/tests/unit_tests/dify_graph/model_runtime/errors/test_invoke.py new file mode 100644 index 0000000000..af62b2a84c --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/errors/test_invoke.py @@ -0,0 +1,63 @@ +from dify_graph.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +class TestInvokeErrors: + def test_invoke_error_with_description(self): + error = InvokeError("Custom description") + assert error.description == "Custom description" + assert str(error) == "Custom description" + assert isinstance(error, ValueError) + + def test_invoke_error_without_description(self): + error = InvokeError() + assert error.description is None + assert str(error) == "InvokeError" + + def test_invoke_connection_error(self): + # Now preserves class-level description + error = InvokeConnectionError() + assert error.description == "Connection Error" + assert str(error) == "Connection Error" + assert isinstance(error, InvokeError) + + # Test with explicit description + error_with_desc = InvokeConnectionError("Connection Error") + assert error_with_desc.description == "Connection Error" + assert str(error_with_desc) == "Connection Error" + + def test_invoke_server_unavailable_error(self): + error = InvokeServerUnavailableError() + assert error.description == "Server Unavailable Error" + assert str(error) == "Server Unavailable Error" + assert isinstance(error, InvokeError) + + def test_invoke_rate_limit_error(self): + error = InvokeRateLimitError() + assert error.description == "Rate Limit Error" + assert str(error) == "Rate Limit Error" + assert isinstance(error, InvokeError) + + def test_invoke_authorization_error(self): + error = InvokeAuthorizationError() + assert error.description == "Incorrect model credentials provided, please check and try again. " + assert str(error) == "Incorrect model credentials provided, please check and try again. " + assert isinstance(error, InvokeError) + + def test_invoke_bad_request_error(self): + error = InvokeBadRequestError() + assert error.description == "Bad Request Error" + assert str(error) == "Bad Request Error" + assert isinstance(error, InvokeError) + + def test_invoke_error_inheritance(self): + # Test that we can override the default description in subclasses + error = InvokeBadRequestError("Overridden Error") + assert error.description == "Overridden Error" + assert str(error) == "Overridden Error" diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_ai_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_ai_model.py new file mode 100644 index 0000000000..382dce876e --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_ai_model.py @@ -0,0 +1,336 @@ +import decimal +from unittest.mock import MagicMock, patch + +import pytest +from redis import RedisError + +from core.plugin.entities.plugin_daemon import PluginDaemonInnerError, PluginModelProviderEntity +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ( + AIModelEntity, + DefaultParameterName, + FetchFrom, + ModelPropertyKey, + ModelType, + ParameterRule, + ParameterType, + PriceConfig, + PriceType, +) +from dify_graph.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) +from dify_graph.model_runtime.model_providers.__base.ai_model import AIModel + + +class TestAIModel: + @pytest.fixture + def mock_plugin_model_provider(self): + return MagicMock(spec=PluginModelProviderEntity) + + @pytest.fixture + def ai_model(self, mock_plugin_model_provider): + return AIModel( + tenant_id="tenant_123", + model_type=ModelType.LLM, + plugin_id="plugin_123", + provider_name="test_provider", + plugin_model_provider=mock_plugin_model_provider, + ) + + def test_invoke_error_mapping(self, ai_model): + mapping = ai_model._invoke_error_mapping + assert InvokeConnectionError in mapping + assert InvokeServerUnavailableError in mapping + assert InvokeRateLimitError in mapping + assert InvokeAuthorizationError in mapping + assert InvokeBadRequestError in mapping + assert PluginDaemonInnerError in mapping + assert ValueError in mapping + + def test_transform_invoke_error(self, ai_model): + # Case: mapped error (InvokeAuthorizationError) + err = Exception("Original error") + with patch.object(AIModel, "_invoke_error_mapping", {InvokeAuthorizationError: [Exception]}): + transformed = ai_model._transform_invoke_error(err) + assert isinstance(transformed, InvokeAuthorizationError) + assert "Incorrect model credentials provided" in str(transformed.description) + + # Case: mapped error (InvokeError subclass) + with patch.object(AIModel, "_invoke_error_mapping", {InvokeRateLimitError("Rate limit"): [Exception]}): + transformed = ai_model._transform_invoke_error(err) + assert isinstance(transformed, InvokeError) + assert "[test_provider]" in transformed.description + + # Case: mapped error (not InvokeError) + class CustomNonInvokeError(Exception): + pass + + with patch.object(AIModel, "_invoke_error_mapping", {CustomNonInvokeError: [Exception]}): + transformed = ai_model._transform_invoke_error(err) + assert transformed == err + + # Case: unmapped error + unmapped_err = Exception("Unmapped") + transformed = ai_model._transform_invoke_error(unmapped_err) + assert isinstance(transformed, InvokeError) + assert "Error: Unmapped" in transformed.description + + def test_get_price(self, ai_model): + model_name = "test_model" + credentials = {"key": "value"} + + # Mock get_model_schema + mock_schema = MagicMock(spec=AIModelEntity) + mock_schema.pricing = PriceConfig( + input=decimal.Decimal("0.002"), + output=decimal.Decimal("0.004"), + unit=decimal.Decimal(1000), # 1000 tokens per unit + currency="USD", + ) + + with patch.object(AIModel, "get_model_schema", return_value=mock_schema): + # Test INPUT + price_info = ai_model.get_price(model_name, credentials, PriceType.INPUT, 2000) + assert price_info.unit_price == decimal.Decimal("0.002") + + # Test OUTPUT + price_info = ai_model.get_price(model_name, credentials, PriceType.OUTPUT, 2000) + assert price_info.unit_price == decimal.Decimal("0.004") + + # Case: unit_price is None (returns zeroed PriceInfo) + mock_schema.pricing = None + with patch.object(AIModel, "get_model_schema", return_value=mock_schema): + price_info = ai_model.get_price(model_name, credentials, PriceType.INPUT, 1000) + assert price_info.total_amount == decimal.Decimal("0.0") + + def test_get_price_no_price_config_error(self, ai_model): + model_name = "test_model" + + # We need it to be truthy at line 107 and 112 but falsy at line 127. + class ChangingPriceConfig: + def __init__(self): + self.input = decimal.Decimal("0.01") + self.unit = decimal.Decimal(1) + self.currency = "USD" + self.called = 0 + + def __bool__(self): + self.called += 1 + return self.called <= 2 + + mock_schema = MagicMock() + mock_schema.pricing = ChangingPriceConfig() + + with patch.object(AIModel, "get_model_schema", return_value=mock_schema): + with pytest.raises(ValueError) as excinfo: + ai_model.get_price(model_name, {}, PriceType.INPUT, 1000) + assert "Price config not found" in str(excinfo.value) + + def test_get_model_schema_cache_hit(self, ai_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + + mock_schema = AIModelEntity( + model="test_model", + label=I18nObject(en_US="Test Model"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + + with patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis: + mock_redis.get.return_value = mock_schema.model_dump_json().encode() + + schema = ai_model.get_model_schema(model_name, credentials) + + assert schema.model == "test_model" + mock_redis.get.assert_called_once() + + def test_get_model_schema_cache_miss(self, ai_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + + mock_schema = AIModelEntity( + model="test_model", + label=I18nObject(en_US="Test Model"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + + with ( + patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis, + patch("core.plugin.impl.model.PluginModelClient") as mock_client, + ): + mock_redis.get.return_value = None + mock_manager = mock_client.return_value + mock_manager.get_model_schema.return_value = mock_schema + + schema = ai_model.get_model_schema(model_name, credentials) + + assert schema == mock_schema + mock_manager.get_model_schema.assert_called_once() + mock_redis.setex.assert_called_once() + + def test_get_model_schema_redis_error(self, ai_model): + model_name = "test_model" + + with ( + patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis, + patch("core.plugin.impl.model.PluginModelClient") as mock_client, + ): + mock_redis.get.side_effect = RedisError("Connection refused") + mock_manager = mock_client.return_value + mock_manager.get_model_schema.return_value = None + + schema = ai_model.get_model_schema(model_name, {}) + + assert schema is None + mock_manager.get_model_schema.assert_called_once() + + def test_get_model_schema_validation_error(self, ai_model): + model_name = "test_model" + + with ( + patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis, + patch("core.plugin.impl.model.PluginModelClient") as mock_client, + ): + mock_redis.get.return_value = b"invalid json" + mock_manager = mock_client.return_value + mock_manager.get_model_schema.return_value = None + + # This should trigger ValidationError at line 166 and go to delete() + schema = ai_model.get_model_schema(model_name, {}) + + assert schema is None + mock_redis.delete.assert_called() + + def test_get_model_schema_redis_delete_error(self, ai_model): + model_name = "test_model" + + with ( + patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis, + patch("core.plugin.impl.model.PluginModelClient") as mock_client, + ): + mock_redis.get.return_value = b'{"invalid": "schema"}' + mock_redis.delete.side_effect = RedisError("Delete failed") + mock_manager = mock_client.return_value + mock_manager.get_model_schema.return_value = None + + schema = ai_model.get_model_schema(model_name, {}) + + assert schema is None + mock_redis.delete.assert_called() + + def test_get_model_schema_redis_setex_error(self, ai_model): + model_name = "test_model" + mock_schema = AIModelEntity( + model="test_model", + label=I18nObject(en_US="Test Model"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + + with ( + patch("dify_graph.model_runtime.model_providers.__base.ai_model.redis_client") as mock_redis, + patch("core.plugin.impl.model.PluginModelClient") as mock_client, + ): + mock_redis.get.return_value = None + mock_redis.setex.side_effect = RuntimeError("Setex failed") + mock_manager = mock_client.return_value + mock_manager.get_model_schema.return_value = mock_schema + + schema = ai_model.get_model_schema(model_name, {}) + + assert schema == mock_schema + mock_redis.setex.assert_called() + + def test_get_customizable_model_schema_from_credentials_template_mapping_value_error(self, ai_model): + model_name = "test_model" + + mock_schema = AIModelEntity( + model="test_model", + label=I18nObject(en_US="Test Model"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[ + ParameterRule( + name="invalid", + use_template="invalid_template_name", + label=I18nObject(en_US="Invalid"), + type=ParameterType.FLOAT, + ) + ], + ) + + with patch.object(AIModel, "get_customizable_model_schema", return_value=mock_schema): + schema = ai_model.get_customizable_model_schema_from_credentials(model_name, {}) + assert schema.parameter_rules[0].use_template == "invalid_template_name" + + def test_get_customizable_model_schema_from_credentials(self, ai_model): + model_name = "test_model" + + mock_schema = AIModelEntity( + model="test_model", + label=I18nObject(en_US="Test Model"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.CUSTOMIZABLE_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[ + ParameterRule( + name="temp", use_template="temperature", label=I18nObject(en_US="Temp"), type=ParameterType.FLOAT + ), + ParameterRule( + name="top_p", + use_template="top_p", + label=I18nObject(en_US="Top P"), + type=ParameterType.FLOAT, + help=I18nObject(en_US=""), + ), + ParameterRule( + name="max_tokens", + use_template="max_tokens", + label=I18nObject(en_US="Max Tokens"), + type=ParameterType.INT, + help=I18nObject(en_US="", zh_Hans=""), + ), + ParameterRule(name="custom", label=I18nObject(en_US="Custom"), type=ParameterType.STRING), + ], + ) + + with patch.object(AIModel, "get_customizable_model_schema", return_value=mock_schema): + schema = ai_model.get_customizable_model_schema_from_credentials(model_name, {}) + + assert schema.parameter_rules[0].max == 1.0 + assert schema.parameter_rules[1].help.en_US != "" + assert schema.parameter_rules[2].help.zh_Hans != "" + assert schema.parameter_rules[3].use_template is None + + def test_get_customizable_model_schema_from_credentials_none(self, ai_model): + with patch.object(AIModel, "get_customizable_model_schema", return_value=None): + schema = ai_model.get_customizable_model_schema_from_credentials("model", {}) + assert schema is None + + def test_get_customizable_model_schema_default(self, ai_model): + assert ai_model.get_customizable_model_schema("model", {}) is None + + def test_get_default_parameter_rule_variable_map(self, ai_model): + # Valid + res = ai_model._get_default_parameter_rule_variable_map(DefaultParameterName.TEMPERATURE) + assert res["default"] == 0.0 + + # Invalid + with pytest.raises(Exception) as excinfo: + ai_model._get_default_parameter_rule_variable_map("invalid_name") + assert "Invalid model parameter rule name" in str(excinfo.value) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_large_language_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_large_language_model.py new file mode 100644 index 0000000000..a692f8023a --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_large_language_model.py @@ -0,0 +1,476 @@ +import logging +from collections.abc import Generator, Iterator, Sequence +from dataclasses import dataclass, field +from datetime import datetime +from decimal import Decimal +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock + +import pytest + +import dify_graph.model_runtime.model_providers.__base.large_language_model as llm_module + +# Access large_language_model members via llm_module to avoid partial import issues in CI +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.callbacks.base_callback import Callback +from dify_graph.model_runtime.entities.llm_entities import ( + LLMResult, + LLMResultChunk, + LLMResultChunkDelta, + LLMUsage, +) +from dify_graph.model_runtime.entities.message_entities import ( + AssistantPromptMessage, + PromptMessage, + TextPromptMessageContent, + UserPromptMessage, +) +from dify_graph.model_runtime.entities.model_entities import ModelType, PriceInfo +from dify_graph.model_runtime.model_providers.__base.large_language_model import _build_llm_result_from_chunks + + +def _usage(prompt_tokens: int = 1, completion_tokens: int = 2) -> LLMUsage: + return LLMUsage( + prompt_tokens=prompt_tokens, + prompt_unit_price=Decimal("0.001"), + prompt_price_unit=Decimal(1), + prompt_price=Decimal(prompt_tokens) * Decimal("0.001"), + completion_tokens=completion_tokens, + completion_unit_price=Decimal("0.002"), + completion_price_unit=Decimal(1), + completion_price=Decimal(completion_tokens) * Decimal("0.002"), + total_tokens=prompt_tokens + completion_tokens, + total_price=Decimal(prompt_tokens) * Decimal("0.001") + Decimal(completion_tokens) * Decimal("0.002"), + currency="USD", + latency=0.0, + ) + + +def _tool_call_delta( + *, + tool_call_id: str, + tool_type: str = "function", + function_name: str = "", + function_arguments: str = "", +) -> AssistantPromptMessage.ToolCall: + return AssistantPromptMessage.ToolCall( + id=tool_call_id, + type=tool_type, + function=AssistantPromptMessage.ToolCall.ToolCallFunction(name=function_name, arguments=function_arguments), + ) + + +def _chunk( + *, + model: str = "test-model", + content: str | list[Any] | None = None, + tool_calls: list[AssistantPromptMessage.ToolCall] | None = None, + usage: LLMUsage | None = None, + system_fingerprint: str | None = None, +) -> LLMResultChunk: + return LLMResultChunk( + model=model, + system_fingerprint=system_fingerprint, + delta=LLMResultChunkDelta( + index=0, + message=AssistantPromptMessage(content=content, tool_calls=tool_calls or []), + usage=usage, + ), + ) + + +@dataclass +class SpyCallback(Callback): + raise_error: bool = False + before: list[dict[str, Any]] = field(default_factory=list) + new_chunk: list[dict[str, Any]] = field(default_factory=list) + after: list[dict[str, Any]] = field(default_factory=list) + error: list[dict[str, Any]] = field(default_factory=list) + + def on_before_invoke(self, **kwargs: Any) -> None: # type: ignore[override] + self.before.append(kwargs) + + def on_new_chunk(self, **kwargs: Any) -> None: # type: ignore[override] + self.new_chunk.append(kwargs) + + def on_after_invoke(self, **kwargs: Any) -> None: # type: ignore[override] + self.after.append(kwargs) + + def on_invoke_error(self, **kwargs: Any) -> None: # type: ignore[override] + self.error.append(kwargs) + + +class _TestLLM(llm_module.LargeLanguageModel): + def get_price(self, model: str, credentials: dict, price_type: Any, tokens: int) -> PriceInfo: # type: ignore[override] + return PriceInfo( + unit_price=Decimal("0.01"), + unit=Decimal(1), + total_amount=Decimal(tokens) * Decimal("0.01"), + currency="USD", + ) + + def _transform_invoke_error(self, error: Exception) -> Exception: # type: ignore[override] + return RuntimeError(f"transformed: {error}") + + +@pytest.fixture +def llm() -> _TestLLM: + plugin_provider = PluginModelProviderEntity.model_construct( + id="provider-id", + created_at=datetime.now(), + updated_at=datetime.now(), + provider="provider", + tenant_id="tenant", + plugin_unique_identifier="plugin-uid", + plugin_id="plugin-id", + declaration=MagicMock(), + ) + return _TestLLM.model_construct( + tenant_id="tenant", + model_type=ModelType.LLM, + plugin_id="plugin-id", + provider_name="provider", + plugin_model_provider=plugin_provider, + started_at=1.0, + ) + + +def test_gen_tool_call_id_is_uuid_based(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(llm_module.uuid, "uuid4", lambda: SimpleNamespace(hex="abc123")) + assert llm_module._gen_tool_call_id() == "chatcmpl-tool-abc123" + + +def test_run_callbacks_no_callbacks_noop() -> None: + invoked: list[int] = [] + llm_module._run_callbacks(None, event="x", invoke=lambda _: invoked.append(1)) + llm_module._run_callbacks([], event="x", invoke=lambda _: invoked.append(1)) + assert invoked == [] + + +def test_run_callbacks_swallows_error_when_raise_error_false(caplog: pytest.LogCaptureFixture) -> None: + class Boom: + raise_error = False + + caplog.set_level(logging.WARNING) + llm_module._run_callbacks( + [Boom()], event="on_before_invoke", invoke=lambda _: (_ for _ in ()).throw(ValueError("boom")) + ) + assert any("Callback" in record.message and "failed with error" in record.message for record in caplog.records) + + +def test_run_callbacks_reraises_when_raise_error_true() -> None: + class Boom: + raise_error = True + + with pytest.raises(ValueError, match="boom"): + llm_module._run_callbacks( + [Boom()], event="on_before_invoke", invoke=lambda _: (_ for _ in ()).throw(ValueError("boom")) + ) + + +def test_get_or_create_tool_call_empty_id_returns_last() -> None: + calls = [ + _tool_call_delta(tool_call_id="id1", function_name="a"), + _tool_call_delta(tool_call_id="id2", function_name="b"), + ] + assert llm_module._get_or_create_tool_call(calls, "") is calls[-1] + + +def test_get_or_create_tool_call_empty_id_without_existing_raises() -> None: + with pytest.raises(ValueError, match="tool_call_id is empty"): + llm_module._get_or_create_tool_call([], "") + + +def test_get_or_create_tool_call_creates_if_missing() -> None: + calls: list[AssistantPromptMessage.ToolCall] = [] + tool_call = llm_module._get_or_create_tool_call(calls, "new-id") + assert tool_call.id == "new-id" + assert tool_call.function.name == "" + assert tool_call.function.arguments == "" + assert calls == [tool_call] + + +def test_get_or_create_tool_call_returns_existing_when_found() -> None: + existing = _tool_call_delta(tool_call_id="same-id", function_name="fn", function_arguments="{}") + calls = [existing] + assert llm_module._get_or_create_tool_call(calls, "same-id") is existing + + +def test_merge_tool_call_delta_updates_fields_and_appends_arguments() -> None: + tool_call = _tool_call_delta(tool_call_id="id", tool_type="function", function_name="x", function_arguments="{") + delta = _tool_call_delta(tool_call_id="id2", tool_type="function", function_name="y", function_arguments="}") + llm_module._merge_tool_call_delta(tool_call, delta) + assert tool_call.id == "id2" + assert tool_call.type == "function" + assert tool_call.function.name == "y" + assert tool_call.function.arguments == "{}" + + +def test_increase_tool_call_generates_id_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(llm_module.uuid, "uuid4", lambda: SimpleNamespace(hex="fixed")) + delta = _tool_call_delta(tool_call_id="", function_name="fn", function_arguments="{") + existing: list[AssistantPromptMessage.ToolCall] = [] + llm_module._increase_tool_call([delta], existing) + assert len(existing) == 1 + assert existing[0].id == "chatcmpl-tool-fixed" + assert existing[0].function.name == "fn" + assert existing[0].function.arguments == "{" + + +def test_increase_tool_call_merges_incremental_arguments() -> None: + existing: list[AssistantPromptMessage.ToolCall] = [] + llm_module._increase_tool_call( + [_tool_call_delta(tool_call_id="id", function_name="fn", function_arguments="{")], existing + ) + llm_module._increase_tool_call( + [_tool_call_delta(tool_call_id="id", function_name="", function_arguments="}")], existing + ) + assert len(existing) == 1 + assert existing[0].function.name == "fn" + assert existing[0].function.arguments == "{}" + + +@pytest.mark.parametrize( + ("content", "expected_type"), + [ + ("hello", str), + ([TextPromptMessageContent(data="hello")], list), + ], +) +def test_build_llm_result_from_chunks_accumulates_and_raises_error( + content: str | list[TextPromptMessageContent], + expected_type: type, + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + monkeypatch.setattr(llm_module.uuid, "uuid4", lambda: SimpleNamespace(hex="drain")) + caplog.set_level(logging.DEBUG) + + tool_delta = _tool_call_delta(tool_call_id="", function_name="fn", function_arguments="{}") + first = _chunk(content=content, tool_calls=[tool_delta], usage=_usage(3, 4), system_fingerprint="fp1") + + def iter_with_error() -> Iterator[LLMResultChunk]: + yield first + raise RuntimeError("drain boom") + + with pytest.raises(RuntimeError, match="drain boom"): + _build_llm_result_from_chunks( + model="m", prompt_messages=[UserPromptMessage(content="u")], chunks=iter_with_error() + ) + + assert any("Error while consuming non-stream plugin chunk iterator" in record.message for record in caplog.records) + + +def test_build_llm_result_from_chunks_empty_iterator() -> None: + def empty() -> Iterator[LLMResultChunk]: + if False: # pragma: no cover + yield _chunk() + return + + result = _build_llm_result_from_chunks(model="m", prompt_messages=[], chunks=empty()) + assert result.message.content == [] + assert result.usage.total_tokens == 0 + assert result.system_fingerprint is None + + +def test_build_llm_result_from_chunks_accumulates_all_chunks() -> None: + chunks = iter([_chunk(content="first"), _chunk(content="second")]) + result = _build_llm_result_from_chunks(model="m", prompt_messages=[], chunks=chunks) + assert result.message.content == "firstsecond" + + +def test_invoke_llm_via_plugin_passes_list_converted_stop(monkeypatch: pytest.MonkeyPatch) -> None: + invoked: dict[str, Any] = {} + + class FakePluginModelClient: + def invoke_llm(self, **kwargs: Any) -> str: + invoked.update(kwargs) + return "ok" + + import core.plugin.impl.model as plugin_model_module + + monkeypatch.setattr(plugin_model_module, "PluginModelClient", FakePluginModelClient) + + prompt_messages: Sequence[PromptMessage] = (UserPromptMessage(content="hi"),) + result = llm_module._invoke_llm_via_plugin( + tenant_id="t", + user_id="u", + plugin_id="p", + provider="prov", + model="m", + credentials={"k": "v"}, + model_parameters={"temp": 1}, + prompt_messages=prompt_messages, + tools=None, + stop=("a", "b"), + stream=True, + ) + + assert result == "ok" + assert invoked["prompt_messages"] == list(prompt_messages) + assert invoked["stop"] == ["a", "b"] + + +def test_normalize_non_stream_plugin_result_passthrough_llmresult() -> None: + llm_result = LLMResult(model="m", message=AssistantPromptMessage(content="x"), usage=_usage()) + assert ( + llm_module._normalize_non_stream_plugin_result(model="m", prompt_messages=[], result=llm_result) is llm_result + ) + + +def test_normalize_non_stream_plugin_result_builds_from_chunks() -> None: + chunks = iter([_chunk(content="hello", usage=_usage(1, 1))]) + result = llm_module._normalize_non_stream_plugin_result( + model="m", prompt_messages=[UserPromptMessage(content="u")], result=chunks + ) + assert isinstance(result, LLMResult) + assert result.message.content == "hello" + + +def test_invoke_non_stream_normalizes_and_sets_prompt_messages(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + plugin_result = LLMResult(model="m", message=AssistantPromptMessage(content="x"), usage=_usage()) + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.__base.large_language_model._invoke_llm_via_plugin", + lambda **_: plugin_result, + ) + cb = SpyCallback() + prompt_messages = [UserPromptMessage(content="hi")] + result = llm.invoke(model="m", credentials={}, prompt_messages=prompt_messages, stream=False, callbacks=[cb]) + assert isinstance(result, LLMResult) + assert result.prompt_messages == prompt_messages + assert len(cb.before) == 1 + assert len(cb.after) == 1 + assert cb.after[0]["result"].prompt_messages == prompt_messages + + +def test_invoke_stream_wraps_generator_and_triggers_callbacks(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + plugin_chunks = iter( + [ + _chunk(model="m1", content="a"), + _chunk( + model="m2", content=[TextPromptMessageContent(data="b")], usage=_usage(2, 3), system_fingerprint="fp" + ), + _chunk(model="m3", content=None), + ] + ) + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.__base.large_language_model._invoke_llm_via_plugin", + lambda **_: plugin_chunks, + ) + + cb = SpyCallback() + prompt_messages = [UserPromptMessage(content="hi")] + gen = llm.invoke(model="m", credentials={}, prompt_messages=prompt_messages, stream=True, callbacks=[cb]) + + assert isinstance(gen, Generator) + chunks = list(gen) + assert len(chunks) == 3 + assert all(chunk.prompt_messages == prompt_messages for chunk in chunks) + assert len(cb.before) == 1 + assert len(cb.new_chunk) == 3 + assert len(cb.after) == 1 + final_result: LLMResult = cb.after[0]["result"] + assert final_result.model == "m3" + assert final_result.system_fingerprint == "fp" + assert isinstance(final_result.message.content, list) + assert [c.data for c in final_result.message.content] == ["a", "b"] + assert final_result.usage.total_tokens == 5 + + +def test_invoke_triggers_error_callbacks_and_raises_transformed(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + def boom(**_: Any) -> Any: + raise ValueError("plugin down") + + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.__base.large_language_model._invoke_llm_via_plugin", boom + ) + cb = SpyCallback() + with pytest.raises(RuntimeError, match="transformed: plugin down"): + llm.invoke( + model="m", credentials={}, prompt_messages=[UserPromptMessage(content="x")], stream=False, callbacks=[cb] + ) + assert len(cb.error) == 1 + assert isinstance(cb.error[0]["ex"], ValueError) + + +def test_invoke_raises_not_implemented_for_unsupported_result_type( + llm: _TestLLM, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(llm_module, "_invoke_llm_via_plugin", lambda **_: "not-a-result") + monkeypatch.setattr(llm_module, "_normalize_non_stream_plugin_result", lambda **_: "not-a-result") + with pytest.raises(NotImplementedError, match="unsupported invoke result type"): + llm.invoke(model="m", credentials={}, prompt_messages=[UserPromptMessage(content="x")], stream=False) + + +def test_invoke_appends_logging_callback_in_debug(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + captured_callbacks: list[list[Callback]] = [] + + class FakeLoggingCallback(SpyCallback): + pass + + monkeypatch.setattr(llm_module, "LoggingCallback", FakeLoggingCallback) + monkeypatch.setattr(llm_module.dify_config, "DEBUG", True) + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.__base.large_language_model._invoke_llm_via_plugin", + lambda **_: LLMResult(model="m", message=AssistantPromptMessage(content="x"), usage=_usage()), + ) + + original_trigger = llm._trigger_before_invoke_callbacks + + def spy_trigger(*args: Any, **kwargs: Any) -> None: + captured_callbacks.append(list(kwargs["callbacks"])) + original_trigger(*args, **kwargs) + + monkeypatch.setattr(llm, "_trigger_before_invoke_callbacks", spy_trigger) + llm.invoke(model="m", credentials={}, prompt_messages=[UserPromptMessage(content="x")], stream=False) + assert any(isinstance(cb, FakeLoggingCallback) for cb in captured_callbacks[0]) + + +def test_get_num_tokens_returns_0_when_plugin_disabled(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(llm_module.dify_config, "PLUGIN_BASED_TOKEN_COUNTING_ENABLED", False) + assert llm.get_num_tokens(model="m", credentials={}, prompt_messages=[UserPromptMessage(content="x")]) == 0 + + +def test_get_num_tokens_uses_plugin_when_enabled(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(llm_module.dify_config, "PLUGIN_BASED_TOKEN_COUNTING_ENABLED", True) + + class FakePluginModelClient: + def get_llm_num_tokens(self, **kwargs: Any) -> int: + assert kwargs["tenant_id"] == "tenant" + assert kwargs["plugin_id"] == "plugin-id" + assert kwargs["provider"] == "provider" + assert kwargs["model_type"] == "llm" + return 42 + + import core.plugin.impl.model as plugin_model_module + + monkeypatch.setattr(plugin_model_module, "PluginModelClient", FakePluginModelClient) + assert llm.get_num_tokens(model="m", credentials={}, prompt_messages=[UserPromptMessage(content="x")]) == 42 + + +def test_calc_response_usage_uses_prices_and_latency(llm: _TestLLM, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(llm_module.time, "perf_counter", lambda: 4.5) + llm.started_at = 1.0 + usage = llm.calc_response_usage(model="m", credentials={}, prompt_tokens=10, completion_tokens=5) + assert usage.total_tokens == 15 + assert usage.total_price == Decimal("0.15") + assert usage.latency == 3.5 + + +def test_invoke_result_generator_raises_transformed_on_iteration_error(llm: _TestLLM) -> None: + def broken() -> Iterator[LLMResultChunk]: + yield _chunk(content="ok") + raise ValueError("chunk stream broken") + + gen = llm._invoke_result_generator( + model="m", + result=broken(), + credentials={}, + prompt_messages=[UserPromptMessage(content="u")], + model_parameters={}, + callbacks=[SpyCallback()], + ) + + with pytest.raises(RuntimeError, match="transformed: chunk stream broken"): + list(gen) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_moderation_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_moderation_model.py new file mode 100644 index 0000000000..6ccc44ceb8 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_moderation_model.py @@ -0,0 +1,90 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.errors.invoke import InvokeError +from dify_graph.model_runtime.model_providers.__base.moderation_model import ModerationModel + + +class TestModerationModel: + @pytest.fixture + def mock_plugin_model_provider(self): + return MagicMock(spec=PluginModelProviderEntity) + + @pytest.fixture + def moderation_model(self, mock_plugin_model_provider): + return ModerationModel( + tenant_id="tenant_123", + model_type=ModelType.MODERATION, + plugin_id="plugin_123", + provider_name="test_provider", + plugin_model_provider=mock_plugin_model_provider, + ) + + def test_model_type(self, moderation_model): + assert moderation_model.model_type == ModelType.MODERATION + + def test_invoke_success(self, moderation_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + text = "test text" + user = "user_123" + + with ( + patch("core.plugin.impl.model.PluginModelClient") as mock_client_class, + patch("time.perf_counter", return_value=1.0), + ): + mock_client = mock_client_class.return_value + mock_client.invoke_moderation.return_value = True + + result = moderation_model.invoke(model=model_name, credentials=credentials, text=text, user=user) + + assert result is True + assert moderation_model.started_at == 1.0 + mock_client.invoke_moderation.assert_called_once_with( + tenant_id="tenant_123", + user_id="user_123", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + text=text, + ) + + def test_invoke_success_no_user(self, moderation_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + text = "test text" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_moderation.return_value = False + + result = moderation_model.invoke(model=model_name, credentials=credentials, text=text) + + assert result is False + mock_client.invoke_moderation.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + text=text, + ) + + def test_invoke_exception(self, moderation_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + text = "test text" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_moderation.side_effect = Exception("Test error") + + with pytest.raises(InvokeError) as excinfo: + moderation_model.invoke(model=model_name, credentials=credentials, text=text) + + assert "[test_provider] Error: Test error" in str(excinfo.value.description) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_rerank_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_rerank_model.py new file mode 100644 index 0000000000..67828894b3 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_rerank_model.py @@ -0,0 +1,181 @@ +from datetime import datetime +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.entities.rerank_entities import RerankDocument, RerankResult +from dify_graph.model_runtime.model_providers.__base.rerank_model import RerankModel + + +@pytest.fixture +def rerank_model() -> RerankModel: + plugin_provider = PluginModelProviderEntity.model_construct( + id="provider-id", + created_at=datetime.now(), + updated_at=datetime.now(), + provider="provider", + tenant_id="tenant", + plugin_unique_identifier="plugin-uid", + plugin_id="plugin-id", + declaration=MagicMock(), + ) + return RerankModel.model_construct( + tenant_id="tenant", + model_type=ModelType.RERANK, + plugin_id="plugin-id", + provider_name="provider", + plugin_model_provider=plugin_provider, + ) + + +def test_model_type_is_rerank_by_default() -> None: + plugin_provider = PluginModelProviderEntity.model_construct( + id="provider-id", + created_at=datetime.now(), + updated_at=datetime.now(), + provider="provider", + tenant_id="tenant", + plugin_unique_identifier="plugin-uid", + plugin_id="plugin-id", + declaration=MagicMock(), + ) + model = RerankModel( + tenant_id="tenant", + plugin_id="plugin-id", + provider_name="provider", + plugin_model_provider=plugin_provider, + ) + assert model.model_type == ModelType.RERANK + + +def test_invoke_calls_plugin_and_passes_args(rerank_model: RerankModel, monkeypatch: pytest.MonkeyPatch) -> None: + expected = RerankResult(model="rerank", docs=[RerankDocument(index=0, text="a", score=0.5)]) + + class FakePluginModelClient: + def __init__(self) -> None: + self.invoke_rerank_called_with: dict[str, Any] | None = None + + def invoke_rerank(self, **kwargs: Any) -> RerankResult: + self.invoke_rerank_called_with = kwargs + return expected + + import core.plugin.impl.model as plugin_model_module + + fake_client = FakePluginModelClient() + monkeypatch.setattr(plugin_model_module, "PluginModelClient", lambda: fake_client) + + result = rerank_model.invoke( + model="rerank", + credentials={"k": "v"}, + query="q", + docs=["d1", "d2"], + score_threshold=0.2, + top_n=10, + user="user-1", + ) + + assert result == expected + assert fake_client.invoke_rerank_called_with == { + "tenant_id": "tenant", + "user_id": "user-1", + "plugin_id": "plugin-id", + "provider": "provider", + "model": "rerank", + "credentials": {"k": "v"}, + "query": "q", + "docs": ["d1", "d2"], + "score_threshold": 0.2, + "top_n": 10, + } + + +def test_invoke_uses_unknown_user_when_not_provided(rerank_model: RerankModel, monkeypatch: pytest.MonkeyPatch) -> None: + class FakePluginModelClient: + def __init__(self) -> None: + self.kwargs: dict[str, Any] | None = None + + def invoke_rerank(self, **kwargs: Any) -> RerankResult: + self.kwargs = kwargs + return RerankResult(model="m", docs=[]) + + import core.plugin.impl.model as plugin_model_module + + fake_client = FakePluginModelClient() + monkeypatch.setattr(plugin_model_module, "PluginModelClient", lambda: fake_client) + + rerank_model.invoke(model="m", credentials={}, query="q", docs=["d"]) + assert fake_client.kwargs is not None + assert fake_client.kwargs["user_id"] == "unknown" + + +def test_invoke_transforms_and_raises_on_plugin_error( + rerank_model: RerankModel, monkeypatch: pytest.MonkeyPatch +) -> None: + class FakePluginModelClient: + def invoke_rerank(self, **_: Any) -> RerankResult: + raise ValueError("plugin down") + + import core.plugin.impl.model as plugin_model_module + + monkeypatch.setattr(plugin_model_module, "PluginModelClient", FakePluginModelClient) + monkeypatch.setattr(rerank_model, "_transform_invoke_error", lambda e: RuntimeError(f"transformed: {e}")) + + with pytest.raises(RuntimeError, match="transformed: plugin down"): + rerank_model.invoke(model="m", credentials={}, query="q", docs=["d"]) + + +def test_invoke_multimodal_calls_plugin_and_passes_args( + rerank_model: RerankModel, monkeypatch: pytest.MonkeyPatch +) -> None: + expected = RerankResult(model="mm", docs=[RerankDocument(index=0, text="x", score=0.9)]) + + class FakePluginModelClient: + def __init__(self) -> None: + self.invoke_multimodal_rerank_called_with: dict[str, Any] | None = None + + def invoke_multimodal_rerank(self, **kwargs: Any) -> RerankResult: + self.invoke_multimodal_rerank_called_with = kwargs + return expected + + import core.plugin.impl.model as plugin_model_module + + fake_client = FakePluginModelClient() + monkeypatch.setattr(plugin_model_module, "PluginModelClient", lambda: fake_client) + + query = {"type": "text", "text": "q"} + docs = [{"type": "text", "text": "d1"}] + result = rerank_model.invoke_multimodal_rerank( + model="mm", + credentials={"k": "v"}, + query=query, + docs=docs, + score_threshold=None, + top_n=None, + user=None, + ) + + assert result == expected + assert fake_client.invoke_multimodal_rerank_called_with is not None + assert fake_client.invoke_multimodal_rerank_called_with["tenant_id"] == "tenant" + assert fake_client.invoke_multimodal_rerank_called_with["user_id"] == "unknown" + assert fake_client.invoke_multimodal_rerank_called_with["query"] == query + assert fake_client.invoke_multimodal_rerank_called_with["docs"] == docs + + +def test_invoke_multimodal_transforms_and_raises_on_plugin_error( + rerank_model: RerankModel, monkeypatch: pytest.MonkeyPatch +) -> None: + class FakePluginModelClient: + def invoke_multimodal_rerank(self, **_: Any) -> RerankResult: + raise ValueError("plugin down") + + import core.plugin.impl.model as plugin_model_module + + monkeypatch.setattr(plugin_model_module, "PluginModelClient", FakePluginModelClient) + monkeypatch.setattr(rerank_model, "_transform_invoke_error", lambda e: RuntimeError(f"transformed: {e}")) + + with pytest.raises(RuntimeError, match="transformed: plugin down"): + rerank_model.invoke_multimodal_rerank(model="m", credentials={}, query={"q": 1}, docs=[{"d": 1}]) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_speech2text_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_speech2text_model.py new file mode 100644 index 0000000000..f891718dc6 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_speech2text_model.py @@ -0,0 +1,87 @@ +from io import BytesIO +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.errors.invoke import InvokeError +from dify_graph.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel + + +class TestSpeech2TextModel: + @pytest.fixture + def mock_plugin_model_provider(self): + return MagicMock(spec=PluginModelProviderEntity) + + @pytest.fixture + def speech2text_model(self, mock_plugin_model_provider): + return Speech2TextModel( + tenant_id="tenant_123", + model_type=ModelType.SPEECH2TEXT, + plugin_id="plugin_123", + provider_name="test_provider", + plugin_model_provider=mock_plugin_model_provider, + ) + + def test_model_type(self, speech2text_model): + assert speech2text_model.model_type == ModelType.SPEECH2TEXT + + def test_invoke_success(self, speech2text_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + file = BytesIO(b"audio data") + user = "user_123" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_speech_to_text.return_value = "transcribed text" + + result = speech2text_model.invoke(model=model_name, credentials=credentials, file=file, user=user) + + assert result == "transcribed text" + mock_client.invoke_speech_to_text.assert_called_once_with( + tenant_id="tenant_123", + user_id="user_123", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + file=file, + ) + + def test_invoke_success_no_user(self, speech2text_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + file = BytesIO(b"audio data") + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_speech_to_text.return_value = "transcribed text" + + result = speech2text_model.invoke(model=model_name, credentials=credentials, file=file) + + assert result == "transcribed text" + mock_client.invoke_speech_to_text.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + file=file, + ) + + def test_invoke_exception(self, speech2text_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + file = BytesIO(b"audio data") + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_speech_to_text.side_effect = Exception("Test error") + + with pytest.raises(InvokeError) as excinfo: + speech2text_model.invoke(model=model_name, credentials=credentials, file=file) + + assert "[test_provider] Error: Test error" in str(excinfo.value.description) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_text_embedding_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_text_embedding_model.py new file mode 100644 index 0000000000..c8f0a2ad49 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_text_embedding_model.py @@ -0,0 +1,185 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.entities.embedding_type import EmbeddingInputType +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey, ModelType +from dify_graph.model_runtime.entities.text_embedding_entities import EmbeddingResult +from dify_graph.model_runtime.errors.invoke import InvokeError +from dify_graph.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel + + +class TestTextEmbeddingModel: + @pytest.fixture + def mock_plugin_model_provider(self): + return MagicMock(spec=PluginModelProviderEntity) + + @pytest.fixture + def text_embedding_model(self, mock_plugin_model_provider): + return TextEmbeddingModel( + tenant_id="tenant_123", + model_type=ModelType.TEXT_EMBEDDING, + plugin_id="plugin_123", + provider_name="test_provider", + plugin_model_provider=mock_plugin_model_provider, + ) + + def test_model_type(self, text_embedding_model): + assert text_embedding_model.model_type == ModelType.TEXT_EMBEDDING + + def test_invoke_with_texts(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + texts = ["hello", "world"] + user = "user_123" + expected_result = MagicMock(spec=EmbeddingResult) + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_text_embedding.return_value = expected_result + + result = text_embedding_model.invoke(model=model_name, credentials=credentials, texts=texts, user=user) + + assert result == expected_result + mock_client.invoke_text_embedding.assert_called_once_with( + tenant_id="tenant_123", + user_id="user_123", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + texts=texts, + input_type=EmbeddingInputType.DOCUMENT, + ) + + def test_invoke_with_multimodel_documents(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + multimodel_documents = [{"type": "text", "text": "hello"}] + expected_result = MagicMock(spec=EmbeddingResult) + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_multimodal_embedding.return_value = expected_result + + result = text_embedding_model.invoke( + model=model_name, credentials=credentials, multimodel_documents=multimodel_documents + ) + + assert result == expected_result + mock_client.invoke_multimodal_embedding.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + documents=multimodel_documents, + input_type=EmbeddingInputType.DOCUMENT, + ) + + def test_invoke_no_input(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + + with pytest.raises(ValueError) as excinfo: + text_embedding_model.invoke(model=model_name, credentials=credentials) + + assert "No texts or files provided" in str(excinfo.value) + + def test_invoke_precedence(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + texts = ["hello"] + multimodel_documents = [{"type": "text", "text": "world"}] + expected_result = MagicMock(spec=EmbeddingResult) + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_text_embedding.return_value = expected_result + + result = text_embedding_model.invoke( + model=model_name, credentials=credentials, texts=texts, multimodel_documents=multimodel_documents + ) + + assert result == expected_result + mock_client.invoke_text_embedding.assert_called_once() + mock_client.invoke_multimodal_embedding.assert_not_called() + + def test_invoke_exception(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + texts = ["hello"] + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_text_embedding.side_effect = Exception("Test error") + + with pytest.raises(InvokeError) as excinfo: + text_embedding_model.invoke(model=model_name, credentials=credentials, texts=texts) + + assert "[test_provider] Error: Test error" in str(excinfo.value.description) + + def test_get_num_tokens(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + texts = ["hello", "world"] + expected_tokens = [1, 1] + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.get_text_embedding_num_tokens.return_value = expected_tokens + + result = text_embedding_model.get_num_tokens(model=model_name, credentials=credentials, texts=texts) + + assert result == expected_tokens + mock_client.get_text_embedding_num_tokens.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + texts=texts, + ) + + def test_get_context_size(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + + # Test case 1: Context size in schema + mock_schema = MagicMock() + mock_schema.model_properties = {ModelPropertyKey.CONTEXT_SIZE: 2048} + + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=mock_schema): + assert text_embedding_model._get_context_size(model_name, credentials) == 2048 + + # Test case 2: No schema + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=None): + assert text_embedding_model._get_context_size(model_name, credentials) == 1000 + + # Test case 3: Context size NOT in schema properties + mock_schema.model_properties = {} + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=mock_schema): + assert text_embedding_model._get_context_size(model_name, credentials) == 1000 + + def test_get_max_chunks(self, text_embedding_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + + # Test case 1: Max chunks in schema + mock_schema = MagicMock() + mock_schema.model_properties = {ModelPropertyKey.MAX_CHUNKS: 10} + + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=mock_schema): + assert text_embedding_model._get_max_chunks(model_name, credentials) == 10 + + # Test case 2: No schema + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=None): + assert text_embedding_model._get_max_chunks(model_name, credentials) == 1 + + # Test case 3: Max chunks NOT in schema properties + mock_schema.model_properties = {} + with patch.object(TextEmbeddingModel, "get_model_schema", return_value=mock_schema): + assert text_embedding_model._get_max_chunks(model_name, credentials) == 1 diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_tts_model.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_tts_model.py new file mode 100644 index 0000000000..b1aca9baa3 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/test_tts_model.py @@ -0,0 +1,131 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.errors.invoke import InvokeError +from dify_graph.model_runtime.model_providers.__base.tts_model import TTSModel + + +class TestTTSModel: + @pytest.fixture + def mock_plugin_model_provider(self): + return MagicMock(spec=PluginModelProviderEntity) + + @pytest.fixture + def tts_model(self, mock_plugin_model_provider): + return TTSModel( + tenant_id="tenant_123", + model_type=ModelType.TTS, + plugin_id="plugin_123", + provider_name="test_provider", + plugin_model_provider=mock_plugin_model_provider, + ) + + def test_model_type(self, tts_model): + assert tts_model.model_type == ModelType.TTS + + def test_invoke_success(self, tts_model): + model_name = "test_model" + tenant_id = "ignored_tenant_id" + credentials = {"api_key": "abc"} + content_text = "Hello world" + voice = "alloy" + user = "user_123" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_tts.return_value = [b"audio_chunk"] + + result = tts_model.invoke( + model=model_name, + tenant_id=tenant_id, + credentials=credentials, + content_text=content_text, + voice=voice, + user=user, + ) + + assert list(result) == [b"audio_chunk"] + mock_client.invoke_tts.assert_called_once_with( + tenant_id="tenant_123", + user_id="user_123", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + content_text=content_text, + voice=voice, + ) + + def test_invoke_success_no_user(self, tts_model): + model_name = "test_model" + tenant_id = "ignored_tenant_id" + credentials = {"api_key": "abc"} + content_text = "Hello world" + voice = "alloy" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_tts.return_value = [b"audio_chunk"] + + result = tts_model.invoke( + model=model_name, tenant_id=tenant_id, credentials=credentials, content_text=content_text, voice=voice + ) + + assert list(result) == [b"audio_chunk"] + mock_client.invoke_tts.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + content_text=content_text, + voice=voice, + ) + + def test_invoke_exception(self, tts_model): + model_name = "test_model" + tenant_id = "ignored_tenant_id" + credentials = {"api_key": "abc"} + content_text = "Hello world" + voice = "alloy" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.invoke_tts.side_effect = Exception("Test error") + + with pytest.raises(InvokeError) as excinfo: + tts_model.invoke( + model=model_name, + tenant_id=tenant_id, + credentials=credentials, + content_text=content_text, + voice=voice, + ) + + assert "[test_provider] Error: Test error" in str(excinfo.value.description) + + def test_get_tts_model_voices(self, tts_model): + model_name = "test_model" + credentials = {"api_key": "abc"} + language = "en-US" + + with patch("core.plugin.impl.model.PluginModelClient") as mock_client_class: + mock_client = mock_client_class.return_value + mock_client.get_tts_model_voices.return_value = [{"name": "Voice1"}] + + result = tts_model.get_tts_model_voices(model=model_name, credentials=credentials, language=language) + + assert result == [{"name": "Voice1"}] + mock_client.get_tts_model_voices.assert_called_once_with( + tenant_id="tenant_123", + user_id="unknown", + plugin_id="plugin_123", + provider="test_provider", + model=model_name, + credentials=credentials, + language=language, + ) diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/tokenizers/test_gpt2_tokenizer.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/tokenizers/test_gpt2_tokenizer.py new file mode 100644 index 0000000000..dde6ea02b5 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/__base/tokenizers/test_gpt2_tokenizer.py @@ -0,0 +1,96 @@ +from unittest.mock import MagicMock, patch + +import dify_graph.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer as gpt2_tokenizer_module +from dify_graph.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer import GPT2Tokenizer + + +class TestGPT2Tokenizer: + def setup_method(self): + # Reset the global tokenizer before each test to ensure we test initialization + gpt2_tokenizer_module._tokenizer = None + + def test_get_encoder_tiktoken(self): + """ + Test that get_encoder successfully uses tiktoken when available. + """ + mock_encoding = MagicMock() + # Mock tiktoken to be sure it's used + with patch("tiktoken.get_encoding", return_value=mock_encoding) as mock_get_encoding: + encoder = GPT2Tokenizer.get_encoder() + assert encoder == mock_encoding + mock_get_encoding.assert_called_once_with("gpt2") + + # Verify singleton behavior within the same test + encoder2 = GPT2Tokenizer.get_encoder() + assert encoder2 is encoder + assert mock_get_encoding.call_count == 1 + + def test_get_encoder_tiktoken_fallback(self): + """ + Test that get_encoder falls back to transformers when tiktoken fails. + """ + # patch tiktoken.get_encoding to raise an exception + with patch("tiktoken.get_encoding", side_effect=Exception("Tiktoken failure")): + # patch transformers.GPT2Tokenizer + with patch("transformers.GPT2Tokenizer.from_pretrained") as mock_from_pretrained: + mock_transformer_tokenizer = MagicMock() + mock_from_pretrained.return_value = mock_transformer_tokenizer + + with patch( + "dify_graph.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer.logger" + ) as mock_logger: + encoder = GPT2Tokenizer.get_encoder() + + assert encoder == mock_transformer_tokenizer + mock_from_pretrained.assert_called_once() + mock_logger.info.assert_called_once_with("Fallback to Transformers' GPT-2 tokenizer from tiktoken") + + def test_get_num_tokens(self): + """ + Test get_num_tokens returns the correct count. + """ + mock_encoder = MagicMock() + mock_encoder.encode.return_value = [1, 2, 3, 4, 5] + + with patch.object(GPT2Tokenizer, "get_encoder", return_value=mock_encoder): + tokens_count = GPT2Tokenizer.get_num_tokens("test text") + assert tokens_count == 5 + mock_encoder.encode.assert_called_once_with("test text") + + def test_get_num_tokens_by_gpt2_direct(self): + """ + Test _get_num_tokens_by_gpt2 directly. + """ + mock_encoder = MagicMock() + mock_encoder.encode.return_value = [1, 2] + + with patch.object(GPT2Tokenizer, "get_encoder", return_value=mock_encoder): + tokens_count = GPT2Tokenizer._get_num_tokens_by_gpt2("hello") + assert tokens_count == 2 + mock_encoder.encode.assert_called_once_with("hello") + + def test_get_encoder_already_initialized(self): + """ + Test that if _tokenizer is already set, it returns it immediately. + """ + mock_existing_tokenizer = MagicMock() + gpt2_tokenizer_module._tokenizer = mock_existing_tokenizer + + # Tiktoken should not be called if already initialized + with patch("tiktoken.get_encoding") as mock_get_encoding: + encoder = GPT2Tokenizer.get_encoder() + assert encoder == mock_existing_tokenizer + mock_get_encoding.assert_not_called() + + def test_get_encoder_thread_safety(self): + """ + Simple test to ensure the lock is used. + """ + mock_encoding = MagicMock() + with patch("tiktoken.get_encoding", return_value=mock_encoding): + # We patch the lock in the module + with patch("dify_graph.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer._lock") as mock_lock: + encoder = GPT2Tokenizer.get_encoder() + assert encoder == mock_encoding + mock_lock.__enter__.assert_called_once() + mock_lock.__exit__.assert_called_once() diff --git a/api/tests/unit_tests/dify_graph/model_runtime/model_providers/test_model_provider_factory.py b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/test_model_provider_factory.py new file mode 100644 index 0000000000..1ad0210375 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/model_providers/test_model_provider_factory.py @@ -0,0 +1,522 @@ +import logging +from datetime import datetime +from threading import Lock +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from redis import RedisError + +import contexts +from core.plugin.entities.plugin_daemon import PluginModelProviderEntity +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ( + AIModelEntity, + FetchFrom, + ModelPropertyKey, + ModelType, +) +from dify_graph.model_runtime.entities.provider_entities import ConfigurateMethod, ProviderEntity +from dify_graph.model_runtime.model_providers.model_provider_factory import ModelProviderFactory + + +def _provider_entity( + *, + provider: str, + supported_model_types: list[ModelType] | None = None, + models: list[AIModelEntity] | None = None, + icon_small: I18nObject | None = None, + icon_small_dark: I18nObject | None = None, +) -> ProviderEntity: + return ProviderEntity( + provider=provider, + label=I18nObject(en_US=provider), + supported_model_types=supported_model_types or [ModelType.LLM], + configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], + models=models or [], + icon_small=icon_small, + icon_small_dark=icon_small_dark, + ) + + +def _plugin_provider( + *, plugin_id: str, declaration: ProviderEntity, provider: str = "provider" +) -> PluginModelProviderEntity: + return PluginModelProviderEntity.model_construct( + id=f"{plugin_id}-id", + created_at=datetime.now(), + updated_at=datetime.now(), + provider=provider, + tenant_id="tenant", + plugin_unique_identifier=f"{plugin_id}-uid", + plugin_id=plugin_id, + declaration=declaration, + ) + + +@pytest.fixture(autouse=True) +def _reset_plugin_model_provider_context() -> None: + contexts.plugin_model_providers_lock.set(Lock()) + contexts.plugin_model_providers.set(None) + + +@pytest.fixture +def fake_plugin_manager(monkeypatch: pytest.MonkeyPatch) -> MagicMock: + manager = MagicMock() + + import core.plugin.impl.model as plugin_model_module + + monkeypatch.setattr(plugin_model_module, "PluginModelClient", lambda: manager) + return manager + + +@pytest.fixture +def factory(fake_plugin_manager: MagicMock) -> ModelProviderFactory: + return ModelProviderFactory(tenant_id="tenant") + + +def test_get_plugin_model_providers_initializes_context_on_lookup_error( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock, monkeypatch: pytest.MonkeyPatch +) -> None: + declaration = _provider_entity(provider="openai") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=declaration) + ] + + original_get = contexts.plugin_model_providers.get + calls = {"n": 0} + + def flaky_get() -> Any: + calls["n"] += 1 + if calls["n"] == 1: + raise LookupError + return original_get() + + monkeypatch.setattr(contexts.plugin_model_providers, "get", flaky_get) + + providers = factory.get_plugin_model_providers() + assert len(providers) == 1 + assert providers[0].declaration.provider == "langgenius/openai/openai" + + +def test_get_plugin_model_providers_caches_and_does_not_refetch( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock +) -> None: + declaration = _provider_entity(provider="openai") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=declaration) + ] + + first = factory.get_plugin_model_providers() + second = factory.get_plugin_model_providers() + + assert first is second + fake_plugin_manager.fetch_model_providers.assert_called_once_with("tenant") + + +def test_get_providers_returns_declarations(factory: ModelProviderFactory, fake_plugin_manager: MagicMock) -> None: + d1 = _provider_entity(provider="openai") + d2 = _provider_entity(provider="anthropic") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=d1), + _plugin_provider(plugin_id="langgenius/anthropic", declaration=d2), + ] + + providers = factory.get_providers() + assert [p.provider for p in providers] == ["langgenius/openai/openai", "langgenius/anthropic/anthropic"] + + +def test_get_plugin_model_provider_converts_short_provider_id( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock +) -> None: + declaration = _provider_entity(provider="openai") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=declaration) + ] + + provider = factory.get_plugin_model_provider("openai") + assert provider.declaration.provider == "langgenius/openai/openai" + + +def test_get_plugin_model_provider_raises_on_invalid_provider( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock +) -> None: + declaration = _provider_entity(provider="openai") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=declaration) + ] + + with pytest.raises(ValueError, match="Invalid provider"): + factory.get_plugin_model_provider("langgenius/unknown/unknown") + + +def test_get_provider_schema_returns_declaration(factory: ModelProviderFactory, fake_plugin_manager: MagicMock) -> None: + declaration = _provider_entity(provider="openai") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=declaration) + ] + + schema = factory.get_provider_schema("openai") + assert schema.provider == "langgenius/openai/openai" + + +def test_provider_credentials_validate_errors_when_schema_missing( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + schema = _provider_entity(provider="openai") + schema.provider_credential_schema = None + monkeypatch.setattr( + factory, + "get_plugin_model_provider", + lambda **_: _plugin_provider(plugin_id="langgenius/openai", declaration=schema), + ) + + with pytest.raises(ValueError, match="does not have provider_credential_schema"): + factory.provider_credentials_validate(provider="openai", credentials={"x": "y"}) + + +def test_provider_credentials_validate_filters_and_calls_plugin_validation( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock, monkeypatch: pytest.MonkeyPatch +) -> None: + schema = _provider_entity(provider="openai") + schema.provider_credential_schema = MagicMock() + plugin_provider = _plugin_provider(plugin_id="langgenius/openai", declaration=schema) + monkeypatch.setattr(factory, "get_plugin_model_provider", lambda **_: plugin_provider) + + fake_validator = MagicMock() + fake_validator.validate_and_filter.return_value = {"filtered": True} + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.model_provider_factory.ProviderCredentialSchemaValidator", + lambda _: fake_validator, + ) + + filtered = factory.provider_credentials_validate(provider="openai", credentials={"raw": True}) + assert filtered == {"filtered": True} + fake_plugin_manager.validate_provider_credentials.assert_called_once() + kwargs = fake_plugin_manager.validate_provider_credentials.call_args.kwargs + assert kwargs["plugin_id"] == "langgenius/openai" + assert kwargs["provider"] == "provider" + assert kwargs["credentials"] == {"filtered": True} + + +def test_model_credentials_validate_errors_when_schema_missing( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + schema = _provider_entity(provider="openai") + schema.model_credential_schema = None + monkeypatch.setattr( + factory, + "get_plugin_model_provider", + lambda **_: _plugin_provider(plugin_id="langgenius/openai", declaration=schema), + ) + + with pytest.raises(ValueError, match="does not have model_credential_schema"): + factory.model_credentials_validate( + provider="openai", model_type=ModelType.LLM, model="m", credentials={"x": "y"} + ) + + +def test_model_credentials_validate_filters_and_calls_plugin_validation( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock, monkeypatch: pytest.MonkeyPatch +) -> None: + schema = _provider_entity(provider="openai") + schema.model_credential_schema = MagicMock() + plugin_provider = _plugin_provider(plugin_id="langgenius/openai", declaration=schema) + monkeypatch.setattr(factory, "get_plugin_model_provider", lambda **_: plugin_provider) + + fake_validator = MagicMock() + fake_validator.validate_and_filter.return_value = {"filtered": True} + monkeypatch.setattr( + "dify_graph.model_runtime.model_providers.model_provider_factory.ModelCredentialSchemaValidator", + lambda *_: fake_validator, + ) + + filtered = factory.model_credentials_validate( + provider="openai", model_type=ModelType.TEXT_EMBEDDING, model="m", credentials={"raw": True} + ) + assert filtered == {"filtered": True} + kwargs = fake_plugin_manager.validate_model_credentials.call_args.kwargs + assert kwargs["plugin_id"] == "langgenius/openai" + assert kwargs["provider"] == "provider" + assert kwargs["model_type"] == "text-embedding" + assert kwargs["model"] == "m" + assert kwargs["credentials"] == {"filtered": True} + + +def test_get_model_schema_cache_hit(factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch) -> None: + model_schema = AIModelEntity( + model="m", + label=I18nObject(en_US="m"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + + monkeypatch.setattr(factory, "get_plugin_id_and_provider_name_from_provider", lambda *_: ("pid", "prov")) + + with patch("dify_graph.model_runtime.model_providers.model_provider_factory.redis_client") as mock_redis: + mock_redis.get.return_value = model_schema.model_dump_json().encode() + assert ( + factory.get_model_schema(provider="x", model_type=ModelType.LLM, model="m", credentials={"k": "v"}) + == model_schema + ) + + +def test_get_model_schema_cache_invalid_json_deletes_key( + factory: ModelProviderFactory, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.WARNING) + + with patch("dify_graph.model_runtime.model_providers.model_provider_factory.redis_client") as mock_redis: + mock_redis.get.return_value = b'{"model":"m"}' + factory.plugin_model_manager.get_model_schema.return_value = None + factory.get_plugin_id_and_provider_name_from_provider = lambda *_: ("pid", "prov") # type: ignore[method-assign] + assert factory.get_model_schema(provider="x", model_type=ModelType.LLM, model="m", credentials=None) is None + assert mock_redis.delete.called + assert any("Failed to validate cached plugin model schema" in r.message for r in caplog.records) + + +def test_get_model_schema_cache_delete_redis_error_is_logged( + factory: ModelProviderFactory, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.WARNING) + + with patch("dify_graph.model_runtime.model_providers.model_provider_factory.redis_client") as mock_redis: + mock_redis.get.return_value = b'{"model":"m"}' + mock_redis.delete.side_effect = RedisError("nope") + factory.plugin_model_manager.get_model_schema.return_value = None + factory.get_plugin_id_and_provider_name_from_provider = lambda *_: ("pid", "prov") # type: ignore[method-assign] + factory.get_model_schema(provider="x", model_type=ModelType.LLM, model="m", credentials=None) + assert any("Failed to delete invalid plugin model schema cache" in r.message for r in caplog.records) + + +def test_get_model_schema_redis_get_error_falls_back_to_plugin( + factory: ModelProviderFactory, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.WARNING) + factory.get_plugin_id_and_provider_name_from_provider = lambda *_: ("pid", "prov") # type: ignore[method-assign] + factory.plugin_model_manager.get_model_schema.return_value = None + + with patch("dify_graph.model_runtime.model_providers.model_provider_factory.redis_client") as mock_redis: + mock_redis.get.side_effect = RedisError("down") + assert factory.get_model_schema(provider="x", model_type=ModelType.LLM, model="m", credentials=None) is None + assert any("Failed to read plugin model schema cache" in r.message for r in caplog.records) + + +def test_get_model_schema_cache_miss_sets_cache_and_handles_setex_error( + factory: ModelProviderFactory, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.WARNING) + factory.get_plugin_id_and_provider_name_from_provider = lambda *_: ("pid", "prov") # type: ignore[method-assign] + + model_schema = AIModelEntity( + model="m", + label=I18nObject(en_US="m"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + factory.plugin_model_manager.get_model_schema.return_value = model_schema + + with patch("dify_graph.model_runtime.model_providers.model_provider_factory.redis_client") as mock_redis: + mock_redis.get.return_value = None + mock_redis.setex.side_effect = RedisError("nope") + assert ( + factory.get_model_schema(provider="x", model_type=ModelType.LLM, model="m", credentials=None) + == model_schema + ) + assert any("Failed to write plugin model schema cache" in r.message for r in caplog.records) + + +@pytest.mark.parametrize( + ("model_type", "expected_class"), + [ + (ModelType.LLM, "LargeLanguageModel"), + (ModelType.TEXT_EMBEDDING, "TextEmbeddingModel"), + (ModelType.RERANK, "RerankModel"), + (ModelType.SPEECH2TEXT, "Speech2TextModel"), + (ModelType.MODERATION, "ModerationModel"), + (ModelType.TTS, "TTSModel"), + ], +) +def test_get_model_type_instance_dispatches_by_type( + factory: ModelProviderFactory, model_type: ModelType, expected_class: str, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(factory, "get_plugin_id_and_provider_name_from_provider", lambda *_: ("pid", "prov")) + monkeypatch.setattr(factory, "get_plugin_model_provider", lambda *_: MagicMock(spec=PluginModelProviderEntity)) + + sentinel = object() + monkeypatch.setattr( + f"dify_graph.model_runtime.model_providers.model_provider_factory.{expected_class}", + MagicMock(model_validate=lambda _: sentinel), + ) + + assert factory.get_model_type_instance("langgenius/openai/openai", model_type) is sentinel + + +def test_get_model_type_instance_raises_on_unsupported( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + monkeypatch.setattr(factory, "get_plugin_id_and_provider_name_from_provider", lambda *_: ("pid", "prov")) + monkeypatch.setattr(factory, "get_plugin_model_provider", lambda *_: MagicMock(spec=PluginModelProviderEntity)) + + class UnknownModelType: + pass + + with pytest.raises(ValueError, match="Unsupported model type"): + factory.get_model_type_instance("langgenius/openai/openai", UnknownModelType()) # type: ignore[arg-type] + + +def test_get_models_filters_by_provider_and_model_type( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock +) -> None: + llm = AIModelEntity( + model="m1", + label=I18nObject(en_US="m1"), + model_type=ModelType.LLM, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + embed = AIModelEntity( + model="e1", + label=I18nObject(en_US="e1"), + model_type=ModelType.TEXT_EMBEDDING, + fetch_from=FetchFrom.PREDEFINED_MODEL, + model_properties={ModelPropertyKey.CONTEXT_SIZE: 1024}, + parameter_rules=[], + ) + + openai = _provider_entity( + provider="openai", supported_model_types=[ModelType.LLM, ModelType.TEXT_EMBEDDING], models=[llm, embed] + ) + anthropic = _provider_entity(provider="anthropic", supported_model_types=[ModelType.LLM], models=[llm]) + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=openai), + _plugin_provider(plugin_id="langgenius/anthropic", declaration=anthropic), + ] + + # ModelType filter picks only matching models + providers = factory.get_models(model_type=ModelType.TEXT_EMBEDDING) + assert len(providers) == 1 + assert providers[0].provider == "langgenius/openai/openai" + assert [m.model for m in providers[0].models] == ["e1"] + + # Provider filter excludes others + providers = factory.get_models(provider="langgenius/anthropic/anthropic", model_type=ModelType.LLM) + assert len(providers) == 1 + assert providers[0].provider == "langgenius/anthropic/anthropic" + + +def test_get_models_provider_filter_skips_non_matching( + factory: ModelProviderFactory, fake_plugin_manager: MagicMock +) -> None: + openai = _provider_entity(provider="openai") + anthropic = _provider_entity(provider="anthropic") + fake_plugin_manager.fetch_model_providers.return_value = [ + _plugin_provider(plugin_id="langgenius/openai", declaration=openai), + _plugin_provider(plugin_id="langgenius/anthropic", declaration=anthropic), + ] + + providers = factory.get_models(provider="langgenius/not-exist/not-exist", model_type=ModelType.LLM) + assert providers == [] + + +def test_get_provider_icon_fetches_asset_and_returns_mime_type( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + provider_schema = _provider_entity( + provider="langgenius/openai/openai", + icon_small=I18nObject(en_US="icon.png", zh_Hans="icon-zh.png"), + icon_small_dark=I18nObject(en_US="dark.svg", zh_Hans="dark-zh.svg"), + ) + monkeypatch.setattr(factory, "get_provider_schema", lambda *_: provider_schema) + + class FakePluginAssetManager: + def fetch_asset(self, tenant_id: str, id: str) -> bytes: + assert tenant_id == "tenant" + return f"bytes:{id}".encode() + + import core.plugin.impl.asset as asset_module + + monkeypatch.setattr(asset_module, "PluginAssetManager", FakePluginAssetManager) + + data, mime = factory.get_provider_icon("openai", "icon_small", "en_US") + assert data == b"bytes:icon.png" + assert mime == "image/png" + + data, mime = factory.get_provider_icon("openai", "icon_small_dark", "zh_Hans") + assert data == b"bytes:dark-zh.svg" + assert mime == "image/svg+xml" + + +def test_get_provider_icon_uses_zh_hans_for_small_and_en_us_for_dark( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + provider_schema = _provider_entity( + provider="langgenius/openai/openai", + icon_small=I18nObject(en_US="icon-en.png", zh_Hans="icon-zh.png"), + icon_small_dark=I18nObject(en_US="dark-en.svg", zh_Hans="dark-zh.svg"), + ) + monkeypatch.setattr(factory, "get_provider_schema", lambda *_: provider_schema) + + class FakePluginAssetManager: + def fetch_asset(self, tenant_id: str, id: str) -> bytes: + return id.encode() + + import core.plugin.impl.asset as asset_module + + monkeypatch.setattr(asset_module, "PluginAssetManager", FakePluginAssetManager) + + data, _ = factory.get_provider_icon("openai", "icon_small", "zh_Hans") + assert data == b"icon-zh.png" + + data, _ = factory.get_provider_icon("openai", "icon_small_dark", "en_US") + assert data == b"dark-en.svg" + + +def test_get_provider_icon_raises_for_missing_icons( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + provider_schema = _provider_entity(provider="langgenius/openai/openai") + monkeypatch.setattr(factory, "get_provider_schema", lambda *_: provider_schema) + + with pytest.raises(ValueError, match="does not have small icon"): + factory.get_provider_icon("openai", "icon_small", "en_US") + + with pytest.raises(ValueError, match="does not have small dark icon"): + factory.get_provider_icon("openai", "icon_small_dark", "en_US") + + +def test_get_provider_icon_raises_for_unsupported_icon_type( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + provider_schema = _provider_entity( + provider="langgenius/openai/openai", + icon_small=I18nObject(en_US="", zh_Hans=""), + ) + monkeypatch.setattr(factory, "get_provider_schema", lambda *_: provider_schema) + with pytest.raises(ValueError, match="Unsupported icon type"): + factory.get_provider_icon("openai", "nope", "en_US") + + +def test_get_provider_icon_raises_when_file_name_missing( + factory: ModelProviderFactory, monkeypatch: pytest.MonkeyPatch +) -> None: + provider_schema = _provider_entity( + provider="langgenius/openai/openai", + icon_small=I18nObject(en_US="", zh_Hans=""), + ) + monkeypatch.setattr(factory, "get_provider_schema", lambda *_: provider_schema) + with pytest.raises(ValueError, match="does not have icon"): + factory.get_provider_icon("openai", "icon_small", "en_US") + + +def test_get_plugin_id_and_provider_name_from_provider_handles_google_special_case( + factory: ModelProviderFactory, +) -> None: + plugin_id, provider_name = factory.get_plugin_id_and_provider_name_from_provider("google") + assert plugin_id == "langgenius/gemini" + assert provider_name == "google" diff --git a/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_common_validator.py b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_common_validator.py new file mode 100644 index 0000000000..6d52457c8c --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_common_validator.py @@ -0,0 +1,201 @@ +import pytest + +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.provider_entities import ( + CredentialFormSchema, + FormOption, + FormShowOnObject, + FormType, +) +from dify_graph.model_runtime.schema_validators.common_validator import CommonValidator + + +class TestCommonValidator: + def test_validate_credential_form_schema_required_missing(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, required=True + ) + with pytest.raises(ValueError, match="Variable api_key is required"): + validator._validate_credential_form_schema(schema, {}) + + def test_validate_credential_form_schema_not_required_missing_with_default(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.TEXT_INPUT, + required=False, + default="default_value", + ) + assert validator._validate_credential_form_schema(schema, {}) == "default_value" + + def test_validate_credential_form_schema_not_required_missing_no_default(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, required=False + ) + assert validator._validate_credential_form_schema(schema, {}) is None + + def test_validate_credential_form_schema_max_length_exceeded(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, max_length=5 + ) + with pytest.raises(ValueError, match="Variable api_key length should not be greater than 5"): + validator._validate_credential_form_schema(schema, {"api_key": "123456"}) + + def test_validate_credential_form_schema_not_string(self): + validator = CommonValidator() + schema = CredentialFormSchema(variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT) + with pytest.raises(ValueError, match="Variable api_key should be string"): + validator._validate_credential_form_schema(schema, {"api_key": 123}) + + def test_validate_credential_form_schema_select_invalid_option(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="mode", + label=I18nObject(en_US="Mode"), + type=FormType.SELECT, + options=[ + FormOption(label=I18nObject(en_US="Fast"), value="fast"), + FormOption(label=I18nObject(en_US="Slow"), value="slow"), + ], + ) + with pytest.raises(ValueError, match="Variable mode is not in options"): + validator._validate_credential_form_schema(schema, {"mode": "medium"}) + + def test_validate_credential_form_schema_select_valid_option(self): + validator = CommonValidator() + schema = CredentialFormSchema( + variable="mode", + label=I18nObject(en_US="Mode"), + type=FormType.SELECT, + options=[ + FormOption(label=I18nObject(en_US="Fast"), value="fast"), + FormOption(label=I18nObject(en_US="Slow"), value="slow"), + ], + ) + assert validator._validate_credential_form_schema(schema, {"mode": "fast"}) == "fast" + + def test_validate_credential_form_schema_switch_invalid(self): + validator = CommonValidator() + schema = CredentialFormSchema(variable="enabled", label=I18nObject(en_US="Enabled"), type=FormType.SWITCH) + with pytest.raises(ValueError, match="Variable enabled should be true or false"): + validator._validate_credential_form_schema(schema, {"enabled": "maybe"}) + + def test_validate_credential_form_schema_switch_valid(self): + validator = CommonValidator() + schema = CredentialFormSchema(variable="enabled", label=I18nObject(en_US="Enabled"), type=FormType.SWITCH) + assert validator._validate_credential_form_schema(schema, {"enabled": "true"}) is True + assert validator._validate_credential_form_schema(schema, {"enabled": "FALSE"}) is False + + def test_validate_and_filter_credential_form_schemas_with_show_on(self): + validator = CommonValidator() + schemas = [ + CredentialFormSchema( + variable="auth_type", + label=I18nObject(en_US="Auth Type"), + type=FormType.SELECT, + options=[ + FormOption(label=I18nObject(en_US="API Key"), value="api_key"), + FormOption(label=I18nObject(en_US="OAuth"), value="oauth"), + ], + ), + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.TEXT_INPUT, + show_on=[FormShowOnObject(variable="auth_type", value="api_key")], + ), + CredentialFormSchema( + variable="client_id", + label=I18nObject(en_US="Client ID"), + type=FormType.TEXT_INPUT, + show_on=[FormShowOnObject(variable="auth_type", value="oauth")], + ), + ] + + # Case 1: auth_type = api_key + credentials = {"auth_type": "api_key", "api_key": "my_secret"} + result = validator._validate_and_filter_credential_form_schemas(schemas, credentials) + assert "auth_type" in result + assert "api_key" in result + assert "client_id" not in result + assert result["api_key"] == "my_secret" + + # Case 2: auth_type = oauth + credentials = {"auth_type": "oauth", "client_id": "my_client"} + result = validator._validate_and_filter_credential_form_schemas(schemas, credentials) + # Note: 'auth_type' contains 'oauth'. 'result' contains keys that pass validation. + # Since 'oauth' is not an empty string, it is in result. + assert "auth_type" in result + assert "api_key" not in result + assert "client_id" in result + assert result["client_id"] == "my_client" + + def test_validate_and_filter_show_on_missing_variable(self): + validator = CommonValidator() + schemas = [ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.TEXT_INPUT, + show_on=[FormShowOnObject(variable="auth_type", value="api_key")], + ) + ] + # auth_type is missing in credentials, so api_key should be filtered out + result = validator._validate_and_filter_credential_form_schemas(schemas, {}) + assert result == {} + + def test_validate_and_filter_show_on_mismatch_value(self): + validator = CommonValidator() + schemas = [ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key"), + type=FormType.TEXT_INPUT, + show_on=[FormShowOnObject(variable="auth_type", value="api_key")], + ) + ] + # auth_type is oauth, which doesn't match show_on + result = validator._validate_and_filter_credential_form_schemas(schemas, {"auth_type": "oauth"}) + assert result == {} + + def test_validate_and_filter_multiple_show_on(self): + validator = CommonValidator() + schemas = [ + CredentialFormSchema( + variable="target", + label=I18nObject(en_US="Target"), + type=FormType.TEXT_INPUT, + show_on=[FormShowOnObject(variable="v1", value="a"), FormShowOnObject(variable="v2", value="b")], + ) + ] + # Both match + assert "target" in validator._validate_and_filter_credential_form_schemas( + schemas, {"v1": "a", "v2": "b", "target": "val"} + ) + # One mismatch + assert "target" not in validator._validate_and_filter_credential_form_schemas( + schemas, {"v1": "a", "v2": "c", "target": "val"} + ) + # One missing + assert "target" not in validator._validate_and_filter_credential_form_schemas( + schemas, {"v1": "a", "target": "val"} + ) + + def test_validate_and_filter_skips_falsy_results(self): + validator = CommonValidator() + schemas = [ + CredentialFormSchema(variable="enabled", label=I18nObject(en_US="Enabled"), type=FormType.SWITCH), + CredentialFormSchema( + variable="empty_str", label=I18nObject(en_US="Empty"), type=FormType.TEXT_INPUT, required=False + ), + ] + # Result of false switch is False. if result: is false. Not added. + # Result of empty string is "", if result: is false. Not added. + credentials = {"enabled": "false", "empty_str": ""} + result = validator._validate_and_filter_credential_form_schemas(schemas, credentials) + assert "enabled" not in result + assert "empty_str" not in result diff --git a/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_model_credential_schema_validator.py b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_model_credential_schema_validator.py new file mode 100644 index 0000000000..bab2805276 --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_model_credential_schema_validator.py @@ -0,0 +1,233 @@ +import pytest + +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.entities.provider_entities import ( + CredentialFormSchema, + FieldModelSchema, + FormOption, + FormShowOnObject, + FormType, + ModelCredentialSchema, +) +from dify_graph.model_runtime.schema_validators.model_credential_schema_validator import ModelCredentialSchemaValidator + + +def test_validate_and_filter_with_none_schema(): + validator = ModelCredentialSchemaValidator(ModelType.LLM, None) + with pytest.raises(ValueError, match="Model credential schema is None"): + validator.validate_and_filter({}) + + +def test_validate_and_filter_success(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key", zh_Hans="API Key"), + type=FormType.SECRET_INPUT, + required=True, + ), + CredentialFormSchema( + variable="optional_field", + label=I18nObject(en_US="Optional", zh_Hans="可选"), + type=FormType.TEXT_INPUT, + required=False, + default="default_val", + ), + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + credentials = {"api_key": "sk-123456"} + result = validator.validate_and_filter(credentials) + + assert result["api_key"] == "sk-123456" + assert result["optional_field"] == "default_val" + assert credentials["__model_type"] == ModelType.LLM.value + + +def test_validate_and_filter_with_show_on(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="mode", label=I18nObject(en_US="Mode", zh_Hans="模式"), type=FormType.TEXT_INPUT, required=True + ), + CredentialFormSchema( + variable="conditional_field", + label=I18nObject(en_US="Conditional", zh_Hans="条件"), + type=FormType.TEXT_INPUT, + required=True, + show_on=[FormShowOnObject(variable="mode", value="advanced")], + ), + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + # mode is 'simple', conditional_field should be filtered out + credentials = {"mode": "simple", "conditional_field": "secret"} + result = validator.validate_and_filter(credentials) + assert "conditional_field" not in result + assert result["mode"] == "simple" + + # mode is 'advanced', conditional_field should be kept + credentials = {"mode": "advanced", "conditional_field": "secret"} + result = validator.validate_and_filter(credentials) + assert result["conditional_field"] == "secret" + assert result["mode"] == "advanced" + + # show_on variable missing in credentials + credentials = {"conditional_field": "secret"} # mode missing + with pytest.raises(ValueError, match="Variable mode is required"): # because mode is required in schema + validator.validate_and_filter(credentials) + + +def test_validate_and_filter_show_on_missing_trigger_var(): + # specifically test all_show_on_match = False when variable not in credentials + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="optional_trigger", + label=I18nObject(en_US="Optional Trigger", zh_Hans="可选触发"), + type=FormType.TEXT_INPUT, + required=False, + ), + CredentialFormSchema( + variable="conditional_field", + label=I18nObject(en_US="Conditional", zh_Hans="条件"), + type=FormType.TEXT_INPUT, + required=False, + show_on=[FormShowOnObject(variable="optional_trigger", value="active")], + ), + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + # optional_trigger missing, conditional_field should be skipped + result = validator.validate_and_filter({"conditional_field": "val"}) + assert "conditional_field" not in result + + +def test_common_validator_logic_required(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="api_key", + label=I18nObject(en_US="API Key", zh_Hans="API Key"), + type=FormType.SECRET_INPUT, + required=True, + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + with pytest.raises(ValueError, match="Variable api_key is required"): + validator.validate_and_filter({}) + + with pytest.raises(ValueError, match="Variable api_key is required"): + validator.validate_and_filter({"api_key": ""}) + + +def test_common_validator_logic_max_length(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="key", + label=I18nObject(en_US="Key", zh_Hans="Key"), + type=FormType.TEXT_INPUT, + required=True, + max_length=5, + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + with pytest.raises(ValueError, match="Variable key length should not be greater than 5"): + validator.validate_and_filter({"key": "123456"}) + + +def test_common_validator_logic_invalid_type(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="key", label=I18nObject(en_US="Key", zh_Hans="Key"), type=FormType.TEXT_INPUT, required=True + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + with pytest.raises(ValueError, match="Variable key should be string"): + validator.validate_and_filter({"key": 123}) + + +def test_common_validator_logic_switch(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="enabled", + label=I18nObject(en_US="Enabled", zh_Hans="启用"), + type=FormType.SWITCH, + required=True, + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + result = validator.validate_and_filter({"enabled": "true"}) + assert result["enabled"] is True + + result = validator.validate_and_filter({"enabled": "false"}) + assert "enabled" not in result + + with pytest.raises(ValueError, match="Variable enabled should be true or false"): + validator.validate_and_filter({"enabled": "not_a_bool"}) + + +def test_common_validator_logic_options(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="choice", + label=I18nObject(en_US="Choice", zh_Hans="选择"), + type=FormType.SELECT, + required=True, + options=[ + FormOption(label=I18nObject(en_US="A", zh_Hans="A"), value="a"), + FormOption(label=I18nObject(en_US="B", zh_Hans="B"), value="b"), + ], + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + result = validator.validate_and_filter({"choice": "a"}) + assert result["choice"] == "a" + + with pytest.raises(ValueError, match="Variable choice is not in options"): + validator.validate_and_filter({"choice": "c"}) + + +def test_validate_and_filter_optional_no_default(): + schema = ModelCredentialSchema( + model=FieldModelSchema(label=I18nObject(en_US="Model", zh_Hans="模型")), + credential_form_schemas=[ + CredentialFormSchema( + variable="optional", + label=I18nObject(en_US="Optional", zh_Hans="可选"), + type=FormType.TEXT_INPUT, + required=False, + ) + ], + ) + validator = ModelCredentialSchemaValidator(ModelType.LLM, schema) + + result = validator.validate_and_filter({}) + assert "optional" not in result diff --git a/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_provider_credential_schema_validator.py b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_provider_credential_schema_validator.py new file mode 100644 index 0000000000..043306840f --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/schema_validators/test_provider_credential_schema_validator.py @@ -0,0 +1,72 @@ +import pytest + +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.provider_entities import CredentialFormSchema, FormType, ProviderCredentialSchema +from dify_graph.model_runtime.schema_validators.provider_credential_schema_validator import ( + ProviderCredentialSchemaValidator, +) + + +class TestProviderCredentialSchemaValidator: + def test_validate_and_filter_success(self): + # Setup schema + schema = ProviderCredentialSchema( + credential_form_schemas=[ + CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, required=True + ), + CredentialFormSchema( + variable="endpoint", + label=I18nObject(en_US="Endpoint"), + type=FormType.TEXT_INPUT, + required=False, + default="https://api.example.com", + ), + ] + ) + validator = ProviderCredentialSchemaValidator(schema) + + # Test valid credentials + credentials = {"api_key": "my-secret-key"} + result = validator.validate_and_filter(credentials) + + assert result == {"api_key": "my-secret-key", "endpoint": "https://api.example.com"} + + def test_validate_and_filter_missing_required(self): + # Setup schema + schema = ProviderCredentialSchema( + credential_form_schemas=[ + CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, required=True + ) + ] + ) + validator = ProviderCredentialSchemaValidator(schema) + + # Test missing required credentials + with pytest.raises(ValueError, match="Variable api_key is required"): + validator.validate_and_filter({}) + + def test_validate_and_filter_extra_fields_filtered(self): + # Setup schema + schema = ProviderCredentialSchema( + credential_form_schemas=[ + CredentialFormSchema( + variable="api_key", label=I18nObject(en_US="API Key"), type=FormType.TEXT_INPUT, required=True + ) + ] + ) + validator = ProviderCredentialSchemaValidator(schema) + + # Test credentials with extra fields + credentials = {"api_key": "my-secret-key", "extra_field": "should-be-filtered"} + result = validator.validate_and_filter(credentials) + + assert "api_key" in result + assert "extra_field" not in result + assert result == {"api_key": "my-secret-key"} + + def test_init(self): + schema = ProviderCredentialSchema(credential_form_schemas=[]) + validator = ProviderCredentialSchemaValidator(schema) + assert validator.provider_credential_schema == schema diff --git a/api/tests/unit_tests/dify_graph/model_runtime/utils/test_encoders.py b/api/tests/unit_tests/dify_graph/model_runtime/utils/test_encoders.py new file mode 100644 index 0000000000..1ce8765a3b --- /dev/null +++ b/api/tests/unit_tests/dify_graph/model_runtime/utils/test_encoders.py @@ -0,0 +1,231 @@ +import dataclasses +import datetime +from collections import deque +from decimal import Decimal +from enum import Enum +from ipaddress import IPv4Address, IPv4Interface, IPv4Network, IPv6Address, IPv6Interface, IPv6Network +from pathlib import Path, PurePath +from re import compile +from typing import Any +from unittest.mock import MagicMock +from uuid import UUID + +import pytest +from pydantic import BaseModel, ConfigDict +from pydantic.networks import AnyUrl, NameEmail +from pydantic.types import SecretBytes, SecretStr +from pydantic_core import Url +from pydantic_extra_types.color import Color + +from dify_graph.model_runtime.utils.encoders import ( + _model_dump, + decimal_encoder, + generate_encoders_by_class_tuples, + isoformat, + jsonable_encoder, +) + + +class MockEnum(Enum): + A = "a" + B = "b" + + +class MockPydanticModel(BaseModel): + model_config = ConfigDict(populate_by_name=True) + name: str + age: int + + +@dataclasses.dataclass +class MockDataclass: + name: str + value: Any + + +class MockWithDict: + def __init__(self, data): + self.data = data + + def __iter__(self): + return iter(self.data.items()) + + +class MockWithVars: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + setattr(self, k, v) + + +class TestEncoders: + def test_model_dump(self): + model = MockPydanticModel(name="test", age=20) + result = _model_dump(model) + assert result == {"name": "test", "age": 20} + + def test_isoformat(self): + d = datetime.date(2023, 1, 1) + assert isoformat(d) == "2023-01-01" + t = datetime.time(12, 0, 0) + assert isoformat(t) == "12:00:00" + + def test_decimal_encoder(self): + assert decimal_encoder(Decimal("1.0")) == 1.0 + assert decimal_encoder(Decimal(1)) == 1 + assert decimal_encoder(Decimal("1.5")) == 1.5 + assert decimal_encoder(Decimal(0)) == 0 + assert decimal_encoder(Decimal(-1)) == -1 + + def test_generate_encoders_by_class_tuples(self): + type_map = {int: str, float: str, str: int} + result = generate_encoders_by_class_tuples(type_map) + assert result[str] == (int, float) + assert result[int] == (str,) + + def test_jsonable_encoder_basic_types(self): + assert jsonable_encoder("string") == "string" + assert jsonable_encoder(123) == 123 + assert jsonable_encoder(1.23) == 1.23 + assert jsonable_encoder(None) is None + + def test_jsonable_encoder_pydantic(self): + model = MockPydanticModel(name="test", age=20) + assert jsonable_encoder(model) == {"name": "test", "age": 20} + + def test_jsonable_encoder_pydantic_root(self): + # Manually create a mock that behaves like a model with __root__ + # because Pydantic v2 handles root differently, but the code checks for "__root__" + model = MagicMock(spec=BaseModel) + # _model_dump(obj, mode="json", ...) -> model.model_dump(mode="json", ...) + model.model_dump.return_value = {"__root__": [1, 2, 3]} + assert jsonable_encoder(model) == [1, 2, 3] + + def test_jsonable_encoder_dataclass(self): + obj = MockDataclass(name="test", value=1) + assert jsonable_encoder(obj) == {"name": "test", "value": 1} + # Test dataclass type (should not be treated as instance) + # It should fall back to vars() or dict() or at least not crash + with pytest.raises(ValueError): + jsonable_encoder(MockDataclass) + + def test_jsonable_encoder_enum(self): + assert jsonable_encoder(MockEnum.A) == "a" + + def test_jsonable_encoder_path(self): + assert jsonable_encoder(Path("/tmp/test")) == "/tmp/test" + assert jsonable_encoder(PurePath("/tmp/test")) == "/tmp/test" + + def test_jsonable_encoder_decimal(self): + # In jsonable_encoder, Decimal is formatted as string via format(obj, "f") + assert jsonable_encoder(Decimal("1.23")) == "1.23" + assert jsonable_encoder(Decimal("1.000")) == "1.000" + + def test_jsonable_encoder_dict(self): + d = {"a": 1, "b": [2, 3], "_sa_instance": "hidden"} + assert jsonable_encoder(d) == {"a": 1, "b": [2, 3]} + assert jsonable_encoder(d, sqlalchemy_safe=False) == {"a": 1, "b": [2, 3], "_sa_instance": "hidden"} + + d_with_none = {"a": 1, "b": None} + assert jsonable_encoder(d_with_none, exclude_none=True) == {"a": 1} + assert jsonable_encoder(d_with_none, exclude_none=False) == {"a": 1, "b": None} + + def test_jsonable_encoder_collections(self): + assert jsonable_encoder([1, 2]) == [1, 2] + assert jsonable_encoder((1, 2)) == [1, 2] + assert jsonable_encoder({1, 2}) == [1, 2] + assert jsonable_encoder(frozenset([1, 2])) == [1, 2] + assert jsonable_encoder(deque([1, 2])) == [1, 2] + + def gen(): + yield 1 + yield 2 + + assert jsonable_encoder(gen()) == [1, 2] + + def test_jsonable_encoder_custom_encoder(self): + custom = {int: lambda x: str(x + 1)} + assert jsonable_encoder(1, custom_encoder=custom) == "2" + + # Test subclass matching for custom encoder + class SubInt(int): + pass + + assert jsonable_encoder(SubInt(1), custom_encoder=custom) == "2" + + def test_jsonable_encoder_special_types(self): + # These hit ENCODERS_BY_TYPE or encoders_by_class_tuples + assert jsonable_encoder(b"bytes") == "bytes" + assert jsonable_encoder(Color("red")) == "red" + + dt = datetime.datetime(2023, 1, 1, 12, 0, 0) + assert jsonable_encoder(dt) == dt.isoformat() + + date = datetime.date(2023, 1, 1) + assert jsonable_encoder(date) == date.isoformat() + + time = datetime.time(12, 0, 0) + assert jsonable_encoder(time) == time.isoformat() + + td = datetime.timedelta(seconds=60) + assert jsonable_encoder(td) == 60.0 + + assert jsonable_encoder(IPv4Address("127.0.0.1")) == "127.0.0.1" + assert jsonable_encoder(IPv4Interface("127.0.0.1/24")) == "127.0.0.1/24" + assert jsonable_encoder(IPv4Network("127.0.0.0/24")) == "127.0.0.0/24" + assert jsonable_encoder(IPv6Address("::1")) == "::1" + assert jsonable_encoder(IPv6Interface("::1/128")) == "::1/128" + assert jsonable_encoder(IPv6Network("::/128")) == "::/128" + + assert jsonable_encoder(NameEmail(name="test", email="test@example.com")) == "test " + + assert jsonable_encoder(compile("abc")) == "abc" + + # Secret types + # Check what they actually return in this environment + res_bytes = jsonable_encoder(SecretBytes(b"secret")) + assert "**********" in res_bytes + + res_str = jsonable_encoder(SecretStr("secret")) + assert res_str == "**********" + + u = UUID("12345678-1234-5678-1234-567812345678") + assert jsonable_encoder(u) == str(u) + + url = AnyUrl("https://example.com") + assert jsonable_encoder(url) == "https://example.com/" + + purl = Url("https://example.com") + assert jsonable_encoder(purl) == "https://example.com/" + + def test_jsonable_encoder_fallback(self): + # dict(obj) success + obj_dict = MockWithDict({"a": 1}) + assert jsonable_encoder(obj_dict) == {"a": 1} + + # vars(obj) success + obj_vars = MockWithVars(x=10, y=20) + assert jsonable_encoder(obj_vars) == {"x": 10, "y": 20} + + # error fallback + class ReallyUnserializable: + __slots__ = ["__weakref__"] # No __dict__ + + def __iter__(self): + raise TypeError("not iterable") + + with pytest.raises(ValueError) as exc: + jsonable_encoder(ReallyUnserializable()) + assert "not iterable" in str(exc.value) + + def test_jsonable_encoder_nested(self): + data = { + "model": MockPydanticModel(name="test", age=20), + "list": [Decimal("1.1"), {MockEnum.A: Path("/tmp")}], + "set": {1, 2}, + } + expected = { + "model": {"name": "test", "age": 20}, + "list": ["1.1", {"a": "/tmp"}], + "set": [1, 2], + } + assert jsonable_encoder(data) == expected diff --git a/api/tests/unit_tests/extensions/otel/test_celery_sqlcommenter.py b/api/tests/unit_tests/extensions/otel/test_celery_sqlcommenter.py new file mode 100644 index 0000000000..7a537b0502 --- /dev/null +++ b/api/tests/unit_tests/extensions/otel/test_celery_sqlcommenter.py @@ -0,0 +1,172 @@ +"""Tests for Celery SQL comment context injection.""" + +from unittest.mock import MagicMock, patch + +from opentelemetry import context + + +class TestBuildCelerySqlcommenterTags: + """Tests for _build_celery_sqlcommenter_tags.""" + + def test_includes_framework_and_task_name(self): + """Tags include celery framework version and task name.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.async_workflow_tasks.execute_workflow_team" + task.request = MagicMock() + task.request.retries = 0 + task.request.delivery_info = {} + + with patch("extensions.otel.celery_sqlcommenter._get_traceparent", return_value=None): + tags = _build_celery_sqlcommenter_tags(task) + + assert "framework" in tags + assert tags["framework"].startswith("celery:") + assert tags["task_name"] == "tasks.async_workflow_tasks.execute_workflow_team" + + def test_includes_celery_retries_when_nonzero(self): + """celery_retries is included when retries > 0.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.my_task" + task.request = MagicMock() + task.request.retries = 3 + task.request.delivery_info = {} + + with patch("extensions.otel.celery_sqlcommenter._get_traceparent", return_value=None): + tags = _build_celery_sqlcommenter_tags(task) + + assert tags["celery_retries"] == 3 + + def test_omits_celery_retries_when_zero(self): + """celery_retries is omitted when retries is 0.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.my_task" + task.request = MagicMock() + task.request.retries = 0 + task.request.delivery_info = {} + + with patch("extensions.otel.celery_sqlcommenter._get_traceparent", return_value=None): + tags = _build_celery_sqlcommenter_tags(task) + + assert "celery_retries" not in tags + + def test_includes_routing_key_from_delivery_info(self): + """routing_key is included when present in delivery_info.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.my_task" + task.request = MagicMock() + task.request.retries = 0 + task.request.delivery_info = {"routing_key": "workflow_based_app_execution"} + + with patch("extensions.otel.celery_sqlcommenter._get_traceparent", return_value=None): + tags = _build_celery_sqlcommenter_tags(task) + + assert tags["routing_key"] == "workflow_based_app_execution" + + def test_includes_traceparent_when_available(self): + """traceparent is included when injectable from current context.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.my_task" + task.request = MagicMock() + task.request.retries = 0 + task.request.delivery_info = {} + + traceparent = "00-5db86c23fa8d05b67db315694b518684-737bbf30cdcda066-00" + with patch( + "extensions.otel.celery_sqlcommenter._get_traceparent", + return_value=traceparent, + ): + tags = _build_celery_sqlcommenter_tags(task) + + assert tags["traceparent"] == traceparent + + def test_handles_task_without_request(self): + """Gracefully handles task without request attribute.""" + from extensions.otel.celery_sqlcommenter import _build_celery_sqlcommenter_tags + + task = MagicMock() + task.name = "tasks.my_task" + del task.request + + with patch("extensions.otel.celery_sqlcommenter._get_traceparent", return_value=None): + tags = _build_celery_sqlcommenter_tags(task) + + assert "framework" in tags + assert "task_name" in tags + + +class TestTaskPrerunPostrunHandlers: + """Tests for task_prerun and task_postrun signal handlers.""" + + def test_prerun_sets_context_postrun_detaches(self): + """task_prerun attaches SQLCOMMENTER context; task_postrun detaches it.""" + from extensions.otel.celery_sqlcommenter import ( + _SQLCOMMENTER_CONTEXT_KEY, + _TOKEN_ATTR, + _on_task_postrun, + _on_task_prerun, + ) + + clean_ctx = context.set_value(_SQLCOMMENTER_CONTEXT_KEY, None) + token = context.attach(clean_ctx) + try: + task = MagicMock() + task.name = "tasks.async_workflow_tasks.execute_workflow_team" + task.request = MagicMock() + task.request.retries = 1 + task.request.delivery_info = {"routing_key": "workflow_based_app_execution"} + + with patch( + "extensions.otel.celery_sqlcommenter._get_traceparent", + return_value="00-abc123-def456-00", + ): + _on_task_prerun(task=task) + + tags = context.get_value(_SQLCOMMENTER_CONTEXT_KEY) + assert tags is not None + assert tags["framework"].startswith("celery:") + assert tags["task_name"] == "tasks.async_workflow_tasks.execute_workflow_team" + assert tags["celery_retries"] == 1 + assert tags["routing_key"] == "workflow_based_app_execution" + assert tags["traceparent"] == "00-abc123-def456-00" + assert hasattr(task, _TOKEN_ATTR) + + _on_task_postrun(task=task) + + tags_after = context.get_value(_SQLCOMMENTER_CONTEXT_KEY) + assert tags_after is None + assert not hasattr(task, _TOKEN_ATTR) + finally: + context.detach(token) + + def test_prerun_skips_when_no_task(self): + """prerun does nothing when task is missing from kwargs.""" + from extensions.otel.celery_sqlcommenter import ( + _SQLCOMMENTER_CONTEXT_KEY, + _on_task_prerun, + ) + + clean_ctx = context.set_value(_SQLCOMMENTER_CONTEXT_KEY, None) + token = context.attach(clean_ctx) + try: + _on_task_prerun() + tags = context.get_value(_SQLCOMMENTER_CONTEXT_KEY) + assert tags is None + finally: + context.detach(token) + + def test_postrun_skips_when_no_token(self): + """postrun does nothing when task has no token (e.g. prerun was skipped).""" + from extensions.otel.celery_sqlcommenter import _on_task_postrun + + task = MagicMock() + _on_task_postrun(task=task) diff --git a/api/tests/unit_tests/libs/test_cron_compatibility.py b/api/tests/unit_tests/libs/test_cron_compatibility.py index 61103d7935..6f3a94f6dc 100644 --- a/api/tests/unit_tests/libs/test_cron_compatibility.py +++ b/api/tests/unit_tests/libs/test_cron_compatibility.py @@ -294,7 +294,7 @@ class TestFrontendBackendIntegration(unittest.TestCase): def test_schedule_service_integration(self): """Test integration with ScheduleService patterns.""" - from dify_graph.nodes.trigger_schedule.entities import VisualConfig + from core.workflow.nodes.trigger_schedule.entities import VisualConfig from services.trigger.schedule_service import ScheduleService # Test enhanced syntax through visual config conversion diff --git a/api/tests/unit_tests/models/test_account_models.py b/api/tests/unit_tests/models/test_account_models.py index cc311d447f..1726fc2e8b 100644 --- a/api/tests/unit_tests/models/test_account_models.py +++ b/api/tests/unit_tests/models/test_account_models.py @@ -98,7 +98,7 @@ class TestAccountModelValidation: ) # Assert - assert account.status == "active" + assert account.status == AccountStatus.ACTIVE def test_account_get_status_method(self): """Test the get_status method returns AccountStatus enum.""" @@ -106,7 +106,7 @@ class TestAccountModelValidation: account = Account( name="Test User", email="test@example.com", - status="pending", + status=AccountStatus.PENDING, ) # Act diff --git a/api/tests/unit_tests/models/test_workflow_models.py b/api/tests/unit_tests/models/test_workflow_models.py index f66f0b657d..4fcef34549 100644 --- a/api/tests/unit_tests/models/test_workflow_models.py +++ b/api/tests/unit_tests/models/test_workflow_models.py @@ -15,7 +15,7 @@ from uuid import uuid4 import pytest from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowExecutionStatus, WorkflowNodeExecutionStatus, ) @@ -471,7 +471,7 @@ class TestNodeExecutionRelationships: workflow_run_id=workflow_run_id, index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -484,7 +484,7 @@ class TestNodeExecutionRelationships: assert node_execution.workflow_id == workflow_id assert node_execution.workflow_run_id == workflow_run_id assert node_execution.node_id == "start" - assert node_execution.node_type == NodeType.START.value + assert node_execution.node_type == BuiltinNodeTypes.START assert node_execution.index == 1 def test_node_execution_with_predecessor_relationship(self): @@ -503,7 +503,7 @@ class TestNodeExecutionRelationships: index=2, predecessor_node_id=predecessor_node_id, node_id=current_node_id, - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.RUNNING.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -526,7 +526,7 @@ class TestNodeExecutionRelationships: workflow_run_id=None, # Single-step has no workflow run index=1, node_id="llm_test", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="Test LLM", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -553,7 +553,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="llm_1", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -579,7 +579,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="code_1", - node_type=NodeType.CODE.value, + node_type=BuiltinNodeTypes.CODE, title="Code Node", status=WorkflowNodeExecutionStatus.RUNNING.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -610,7 +610,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=3, node_id="code_1", - node_type=NodeType.CODE.value, + node_type=BuiltinNodeTypes.CODE, title="Code Node", status=WorkflowNodeExecutionStatus.FAILED.value, error=error_message, @@ -641,7 +641,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="llm_1", - node_type=NodeType.LLM.value, + node_type=BuiltinNodeTypes.LLM, title="LLM Node", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -664,7 +664,7 @@ class TestNodeExecutionRelationships: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -682,12 +682,12 @@ class TestNodeExecutionRelationships: """Test node execution with different node types.""" # Test various node types node_types = [ - (NodeType.START, "Start Node"), - (NodeType.LLM, "LLM Node"), - (NodeType.CODE, "Code Node"), - (NodeType.TOOL, "Tool Node"), - (NodeType.IF_ELSE, "Conditional Node"), - (NodeType.END, "End Node"), + (BuiltinNodeTypes.START, "Start Node"), + (BuiltinNodeTypes.LLM, "LLM Node"), + (BuiltinNodeTypes.CODE, "Code Node"), + (BuiltinNodeTypes.TOOL, "Tool Node"), + (BuiltinNodeTypes.IF_ELSE, "Conditional Node"), + (BuiltinNodeTypes.END, "End Node"), ] for node_type, title in node_types: @@ -699,8 +699,8 @@ class TestNodeExecutionRelationships: triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value, workflow_run_id=str(uuid4()), index=1, - node_id=f"{node_type.value}_1", - node_type=node_type.value, + node_id=f"{node_type}_1", + node_type=node_type, title=title, status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -708,7 +708,7 @@ class TestNodeExecutionRelationships: ) # Assert - assert node_execution.node_type == node_type.value + assert node_execution.node_type == node_type assert node_execution.title == title @@ -1004,7 +1004,7 @@ class TestGraphConfigurationValidation: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, @@ -1029,7 +1029,7 @@ class TestGraphConfigurationValidation: workflow_run_id=str(uuid4()), index=1, node_id="start", - node_type=NodeType.START.value, + node_type=BuiltinNodeTypes.START, title="Start", status=WorkflowNodeExecutionStatus.SUCCEEDED.value, created_by_role=CreatorUserRole.ACCOUNT.value, diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py index 06703b8e38..086d1ac52e 100644 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py +++ b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_repository.py @@ -17,7 +17,7 @@ from dify_graph.entities import ( WorkflowNodeExecution, ) from dify_graph.enums import ( - NodeType, + BuiltinNodeTypes, WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus, ) @@ -230,7 +230,7 @@ def test_to_db_model(repository): index=1, predecessor_node_id="test-predecessor-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", inputs={"input_key": "input_value"}, process_data={"process_key": "process_value"}, @@ -298,7 +298,7 @@ def test_to_domain_model(repository): db_model.predecessor_node_id = "test-predecessor-id" db_model.node_execution_id = "test-node-execution-id" db_model.node_id = "test-node-id" - db_model.node_type = NodeType.START + db_model.node_type = BuiltinNodeTypes.START db_model.title = "Test Node" db_model.inputs = json.dumps(inputs_dict) db_model.process_data = json.dumps(process_data_dict) @@ -324,7 +324,7 @@ def test_to_domain_model(repository): assert domain_model.predecessor_node_id == db_model.predecessor_node_id assert domain_model.node_execution_id == db_model.node_execution_id assert domain_model.node_id == db_model.node_id - assert domain_model.node_type == NodeType(db_model.node_type) + assert domain_model.node_type == db_model.node_type assert domain_model.title == db_model.title assert domain_model.inputs == inputs_dict assert domain_model.process_data == process_data_dict diff --git a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py index 95a7751273..e01fb8456f 100644 --- a/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/repositories/workflow_node_execution/test_sqlalchemy_workflow_node_execution_repository.py @@ -12,7 +12,7 @@ from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) from dify_graph.entities.workflow_node_execution import WorkflowNodeExecution -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, WorkflowNodeExecutionModel, WorkflowNodeExecutionTriggeredFrom @@ -62,7 +62,7 @@ class TestSQLAlchemyWorkflowNodeExecutionRepositoryProcessData: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=process_data, created_at=datetime.now(), diff --git a/api/tests/unit_tests/services/auth/test_jina_auth_standalone_module.py b/api/tests/unit_tests/services/auth/test_jina_auth_standalone_module.py new file mode 100644 index 0000000000..c2fcd71875 --- /dev/null +++ b/api/tests/unit_tests/services/auth/test_jina_auth_standalone_module.py @@ -0,0 +1,157 @@ +from __future__ import annotations + +import importlib.util +import sys +from pathlib import Path +from types import ModuleType +from unittest.mock import MagicMock + +import httpx +import pytest + + +@pytest.fixture(scope="module") +def jina_module() -> ModuleType: + """ + Load `api/services/auth/jina.py` as a standalone module. + + This repo contains both `services/auth/jina.py` and a package at + `services/auth/jina/`, so importing `services.auth.jina` can be ambiguous. + """ + + module_path = Path(__file__).resolve().parents[4] / "services" / "auth" / "jina.py" + # Use a stable module name so pytest-cov can target it with `--cov=services.auth.jina_file`. + spec = importlib.util.spec_from_file_location("services.auth.jina_file", module_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +def _credentials(api_key: str | None = "test_api_key_123", auth_type: str = "bearer") -> dict: + config: dict = {} if api_key is None else {"api_key": api_key} + return {"auth_type": auth_type, "config": config} + + +def test_init_valid_bearer_credentials(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials()) + assert auth.api_key == "test_api_key_123" + assert auth.credentials["auth_type"] == "bearer" + + +def test_init_rejects_invalid_auth_type(jina_module: ModuleType) -> None: + with pytest.raises(ValueError, match="Invalid auth type.*Bearer"): + jina_module.JinaAuth(_credentials(auth_type="basic")) + + +@pytest.mark.parametrize("credentials", [{"auth_type": "bearer", "config": {}}, {"auth_type": "bearer"}]) +def test_init_requires_api_key(jina_module: ModuleType, credentials: dict) -> None: + with pytest.raises(ValueError, match="No API key provided"): + jina_module.JinaAuth(credentials) + + +def test_prepare_headers_includes_bearer_api_key(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + assert auth._prepare_headers() == {"Content-Type": "application/json", "Authorization": "Bearer k"} + + +def test_post_request_calls_httpx(jina_module: ModuleType, monkeypatch: pytest.MonkeyPatch) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + post_mock = MagicMock(name="httpx.post") + monkeypatch.setattr(jina_module.httpx, "post", post_mock) + + auth._post_request("https://r.jina.ai", {"url": "https://example.com"}, {"h": "v"}) + post_mock.assert_called_once_with("https://r.jina.ai", headers={"h": "v"}, json={"url": "https://example.com"}) + + +def test_validate_credentials_success(jina_module: ModuleType, monkeypatch: pytest.MonkeyPatch) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + + response = MagicMock() + response.status_code = 200 + post_mock = MagicMock(return_value=response) + monkeypatch.setattr(jina_module.httpx, "post", post_mock) + + assert auth.validate_credentials() is True + post_mock.assert_called_once_with( + "https://r.jina.ai", + headers={"Content-Type": "application/json", "Authorization": "Bearer k"}, + json={"url": "https://example.com"}, + ) + + +def test_validate_credentials_non_200_raises_via_handle_error( + jina_module: ModuleType, monkeypatch: pytest.MonkeyPatch +) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + + response = MagicMock() + response.status_code = 402 + response.json.return_value = {"error": "Payment required"} + monkeypatch.setattr(jina_module.httpx, "post", MagicMock(return_value=response)) + + with pytest.raises(Exception, match="Status code: 402.*Payment required"): + auth.validate_credentials() + + +@pytest.mark.parametrize("status_code", [402, 409, 500]) +def test_handle_error_statuses_use_response_json(jina_module: ModuleType, status_code: int) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + response = MagicMock() + response.status_code = status_code + response.json.return_value = {"error": "boom"} + + with pytest.raises(Exception, match=f"Status code: {status_code}.*boom"): + auth._handle_error(response) + + +def test_handle_error_statuses_default_unknown_error(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + response = MagicMock() + response.status_code = 402 + response.json.return_value = {} + + with pytest.raises(Exception, match="Unknown error occurred"): + auth._handle_error(response) + + +def test_handle_error_with_text_json_body(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + response = MagicMock() + response.status_code = 403 + response.text = '{"error": "Forbidden"}' + + with pytest.raises(Exception, match="Status code: 403.*Forbidden"): + auth._handle_error(response) + + +def test_handle_error_with_text_json_body_missing_error(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + response = MagicMock() + response.status_code = 403 + response.text = "{}" + + with pytest.raises(Exception, match="Unknown error occurred"): + auth._handle_error(response) + + +def test_handle_error_without_text_raises_unexpected(jina_module: ModuleType) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + response = MagicMock() + response.status_code = 404 + response.text = "" + + with pytest.raises(Exception, match="Unexpected error occurred.*404"): + auth._handle_error(response) + + +def test_validate_credentials_propagates_network_errors( + jina_module: ModuleType, monkeypatch: pytest.MonkeyPatch +) -> None: + auth = jina_module.JinaAuth(_credentials(api_key="k")) + monkeypatch.setattr(jina_module.httpx, "post", MagicMock(side_effect=httpx.ConnectError("boom"))) + + with pytest.raises(httpx.ConnectError, match="boom"): + auth.validate_credentials() diff --git a/api/tests/unit_tests/services/dataset_permission_service.py b/api/tests/unit_tests/services/dataset_permission_service.py index b687f472a5..e098e90455 100644 --- a/api/tests/unit_tests/services/dataset_permission_service.py +++ b/api/tests/unit_tests/services/dataset_permission_service.py @@ -258,323 +258,6 @@ class DatasetPermissionTestDataFactory: return [{"user_id": user_id} for user_id in user_ids] -# ============================================================================ -# Tests for get_dataset_partial_member_list -# ============================================================================ - - -class TestDatasetPermissionServiceGetPartialMemberList: - """ - Comprehensive unit tests for DatasetPermissionService.get_dataset_partial_member_list method. - - This test class covers the retrieval of partial member lists for datasets, - which returns a list of account IDs that have explicit permissions for - a given dataset. - - The get_dataset_partial_member_list method: - 1. Queries DatasetPermission table for the dataset ID - 2. Selects account_id values - 3. Returns list of account IDs - - Test scenarios include: - - Retrieving list with multiple members - - Retrieving list with single member - - Retrieving empty list (no partial members) - - Database query validation - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing. - - Provides a mocked database session that can be used to verify - query construction and execution. - """ - with patch("services.dataset_service.db.session") as mock_db: - yield mock_db - - def test_get_dataset_partial_member_list_with_members(self, mock_db_session): - """ - Test retrieving partial member list with multiple members. - - Verifies that when a dataset has multiple partial members, all - account IDs are returned correctly. - - This test ensures: - - Query is constructed correctly - - All account IDs are returned - - Database query is executed - """ - # Arrange - dataset_id = "dataset-123" - expected_account_ids = ["user-456", "user-789", "user-012"] - - # Mock the scalars query to return account IDs - mock_scalars_result = Mock() - mock_scalars_result.all.return_value = expected_account_ids - mock_db_session.scalars.return_value = mock_scalars_result - - # Act - result = DatasetPermissionService.get_dataset_partial_member_list(dataset_id) - - # Assert - assert result == expected_account_ids - assert len(result) == 3 - - # Verify query was executed - mock_db_session.scalars.assert_called_once() - - def test_get_dataset_partial_member_list_with_single_member(self, mock_db_session): - """ - Test retrieving partial member list with single member. - - Verifies that when a dataset has only one partial member, the - single account ID is returned correctly. - - This test ensures: - - Query works correctly for single member - - Result is a list with one element - - Database query is executed - """ - # Arrange - dataset_id = "dataset-123" - expected_account_ids = ["user-456"] - - # Mock the scalars query to return single account ID - mock_scalars_result = Mock() - mock_scalars_result.all.return_value = expected_account_ids - mock_db_session.scalars.return_value = mock_scalars_result - - # Act - result = DatasetPermissionService.get_dataset_partial_member_list(dataset_id) - - # Assert - assert result == expected_account_ids - assert len(result) == 1 - - # Verify query was executed - mock_db_session.scalars.assert_called_once() - - def test_get_dataset_partial_member_list_empty(self, mock_db_session): - """ - Test retrieving partial member list when no members exist. - - Verifies that when a dataset has no partial members, an empty - list is returned. - - This test ensures: - - Empty list is returned correctly - - Query is executed even when no results - - No errors are raised - """ - # Arrange - dataset_id = "dataset-123" - - # Mock the scalars query to return empty list - mock_scalars_result = Mock() - mock_scalars_result.all.return_value = [] - mock_db_session.scalars.return_value = mock_scalars_result - - # Act - result = DatasetPermissionService.get_dataset_partial_member_list(dataset_id) - - # Assert - assert result == [] - assert len(result) == 0 - - # Verify query was executed - mock_db_session.scalars.assert_called_once() - - -# ============================================================================ -# Tests for update_partial_member_list -# ============================================================================ - - -class TestDatasetPermissionServiceUpdatePartialMemberList: - """ - Comprehensive unit tests for DatasetPermissionService.update_partial_member_list method. - - This test class covers the update of partial member lists for datasets, - which replaces the existing partial member list with a new one. - - The update_partial_member_list method: - 1. Deletes all existing DatasetPermission records for the dataset - 2. Creates new DatasetPermission records for each user in the list - 3. Adds all new permissions to the session - 4. Commits the transaction - 5. Rolls back on error - - Test scenarios include: - - Adding new partial members - - Updating existing partial members - - Replacing entire member list - - Handling empty member list - - Database transaction handling - - Error handling and rollback - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing. - - Provides a mocked database session that can be used to verify - database operations including queries, adds, commits, and rollbacks. - """ - with patch("services.dataset_service.db.session") as mock_db: - yield mock_db - - def test_update_partial_member_list_add_new_members(self, mock_db_session): - """ - Test adding new partial members to a dataset. - - Verifies that when updating with new members, the old members - are deleted and new members are added correctly. - - This test ensures: - - Old permissions are deleted - - New permissions are created - - All permissions are added to session - - Transaction is committed - """ - # Arrange - tenant_id = "tenant-123" - dataset_id = "dataset-123" - user_list = DatasetPermissionTestDataFactory.create_user_list_mock(["user-456", "user-789"]) - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Act - DatasetPermissionService.update_partial_member_list(tenant_id, dataset_id, user_list) - - # Assert - # Verify old permissions were deleted - mock_db_session.query.assert_called() - mock_query.where.assert_called() - - # Verify new permissions were added - mock_db_session.add_all.assert_called_once() - - # Verify transaction was committed - mock_db_session.commit.assert_called_once() - - # Verify no rollback occurred - mock_db_session.rollback.assert_not_called() - - def test_update_partial_member_list_replace_existing(self, mock_db_session): - """ - Test replacing existing partial members with new ones. - - Verifies that when updating with a different member list, the - old members are removed and new members are added. - - This test ensures: - - Old permissions are deleted - - New permissions replace old ones - - Transaction is committed successfully - """ - # Arrange - tenant_id = "tenant-123" - dataset_id = "dataset-123" - user_list = DatasetPermissionTestDataFactory.create_user_list_mock(["user-999", "user-888"]) - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Act - DatasetPermissionService.update_partial_member_list(tenant_id, dataset_id, user_list) - - # Assert - # Verify old permissions were deleted - mock_db_session.query.assert_called() - - # Verify new permissions were added - mock_db_session.add_all.assert_called_once() - - # Verify transaction was committed - mock_db_session.commit.assert_called_once() - - def test_update_partial_member_list_empty_list(self, mock_db_session): - """ - Test updating with empty member list (clearing all members). - - Verifies that when updating with an empty list, all existing - permissions are deleted and no new permissions are added. - - This test ensures: - - Old permissions are deleted - - No new permissions are added - - Transaction is committed - """ - # Arrange - tenant_id = "tenant-123" - dataset_id = "dataset-123" - user_list = [] - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Act - DatasetPermissionService.update_partial_member_list(tenant_id, dataset_id, user_list) - - # Assert - # Verify old permissions were deleted - mock_db_session.query.assert_called() - - # Verify add_all was called with empty list - mock_db_session.add_all.assert_called_once_with([]) - - # Verify transaction was committed - mock_db_session.commit.assert_called_once() - - def test_update_partial_member_list_database_error_rollback(self, mock_db_session): - """ - Test error handling and rollback on database error. - - Verifies that when a database error occurs during the update, - the transaction is rolled back and the error is re-raised. - - This test ensures: - - Error is caught and handled - - Transaction is rolled back - - Error is re-raised - - No commit occurs after error - """ - # Arrange - tenant_id = "tenant-123" - dataset_id = "dataset-123" - user_list = DatasetPermissionTestDataFactory.create_user_list_mock(["user-456"]) - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Mock commit to raise an error - database_error = Exception("Database connection error") - mock_db_session.commit.side_effect = database_error - - # Act & Assert - with pytest.raises(Exception, match="Database connection error"): - DatasetPermissionService.update_partial_member_list(tenant_id, dataset_id, user_list) - - # Verify rollback was called - mock_db_session.rollback.assert_called_once() - - # ============================================================================ # Tests for check_permission # ============================================================================ @@ -776,144 +459,6 @@ class TestDatasetPermissionServiceCheckPermission: mock_get_partial_member_list.assert_called_once_with(dataset.id) -# ============================================================================ -# Tests for clear_partial_member_list -# ============================================================================ - - -class TestDatasetPermissionServiceClearPartialMemberList: - """ - Comprehensive unit tests for DatasetPermissionService.clear_partial_member_list method. - - This test class covers the clearing of partial member lists, which removes - all DatasetPermission records for a given dataset. - - The clear_partial_member_list method: - 1. Deletes all DatasetPermission records for the dataset - 2. Commits the transaction - 3. Rolls back on error - - Test scenarios include: - - Clearing list with existing members - - Clearing empty list (no members) - - Database transaction handling - - Error handling and rollback - """ - - @pytest.fixture - def mock_db_session(self): - """ - Mock database session for testing. - - Provides a mocked database session that can be used to verify - database operations including queries, deletes, commits, and rollbacks. - """ - with patch("services.dataset_service.db.session") as mock_db: - yield mock_db - - def test_clear_partial_member_list_success(self, mock_db_session): - """ - Test successful clearing of partial member list. - - Verifies that when clearing a partial member list, all permissions - are deleted and the transaction is committed. - - This test ensures: - - All permissions are deleted - - Transaction is committed - - No errors are raised - """ - # Arrange - dataset_id = "dataset-123" - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Act - DatasetPermissionService.clear_partial_member_list(dataset_id) - - # Assert - # Verify query was executed - mock_db_session.query.assert_called() - - # Verify delete was called - mock_query.where.assert_called() - mock_query.delete.assert_called_once() - - # Verify transaction was committed - mock_db_session.commit.assert_called_once() - - # Verify no rollback occurred - mock_db_session.rollback.assert_not_called() - - def test_clear_partial_member_list_empty_list(self, mock_db_session): - """ - Test clearing partial member list when no members exist. - - Verifies that when clearing an already empty list, the operation - completes successfully without errors. - - This test ensures: - - Operation works correctly for empty lists - - Transaction is committed - - No errors are raised - """ - # Arrange - dataset_id = "dataset-123" - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Act - DatasetPermissionService.clear_partial_member_list(dataset_id) - - # Assert - # Verify query was executed - mock_db_session.query.assert_called() - - # Verify transaction was committed - mock_db_session.commit.assert_called_once() - - def test_clear_partial_member_list_database_error_rollback(self, mock_db_session): - """ - Test error handling and rollback on database error. - - Verifies that when a database error occurs during clearing, - the transaction is rolled back and the error is re-raised. - - This test ensures: - - Error is caught and handled - - Transaction is rolled back - - Error is re-raised - - No commit occurs after error - """ - # Arrange - dataset_id = "dataset-123" - - # Mock the query delete operation - mock_query = Mock() - mock_query.where.return_value = mock_query - mock_query.delete.return_value = None - mock_db_session.query.return_value = mock_query - - # Mock commit to raise an error - database_error = Exception("Database connection error") - mock_db_session.commit.side_effect = database_error - - # Act & Assert - with pytest.raises(Exception, match="Database connection error"): - DatasetPermissionService.clear_partial_member_list(dataset_id) - - # Verify rollback was called - mock_db_session.rollback.assert_called_once() - - # ============================================================================ # Tests for DatasetService.check_dataset_permission # ============================================================================ @@ -1047,72 +592,6 @@ class TestDatasetServiceCheckDatasetPermission: with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): DatasetService.check_dataset_permission(dataset, user) - def test_check_dataset_permission_partial_members_with_permission_success(self, mock_db_session): - """ - Test that user with explicit permission can access partial_members dataset. - - Verifies that when a user has an explicit DatasetPermission record - for a partial_members dataset, they can access it successfully. - - This test ensures: - - Explicit permissions are checked correctly - - Users with permissions can access - - Database query is executed - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="other-user-456", # Not the creator - ) - - # Mock permission query to return permission record - mock_permission = DatasetPermissionTestDataFactory.create_dataset_permission_mock( - dataset_id=dataset.id, account_id=user.id - ) - mock_query = Mock() - mock_query.filter_by.return_value = mock_query - mock_query.first.return_value = mock_permission - mock_db_session.query.return_value = mock_query - - # Act (should not raise) - DatasetService.check_dataset_permission(dataset, user) - - # Assert - # Verify permission query was executed - mock_db_session.query.assert_called() - - def test_check_dataset_permission_partial_members_without_permission_error(self, mock_db_session): - """ - Test error when user without permission tries to access partial_members dataset. - - Verifies that when a user does not have an explicit DatasetPermission - record for a partial_members dataset, a NoPermissionError is raised. - - This test ensures: - - Missing permissions are detected - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="other-user-456", # Not the creator - ) - - # Mock permission query to return None (no permission) - mock_query = Mock() - mock_query.filter_by.return_value = mock_query - mock_query.first.return_value = None # No permission found - mock_db_session.query.return_value = mock_query - - # Act & Assert - with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): - DatasetService.check_dataset_permission(dataset, user) - def test_check_dataset_permission_partial_members_creator_success(self, mock_db_session): """ Test that creator can access partial_members dataset without explicit permission. @@ -1311,72 +790,6 @@ class TestDatasetServiceCheckDatasetOperatorPermission: with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - def test_check_dataset_operator_permission_partial_members_with_permission_success(self, mock_db_session): - """ - Test that user with explicit permission can access partial_members dataset. - - Verifies that when a user has an explicit DatasetPermission record - for a partial_members dataset, they can access it successfully. - - This test ensures: - - Explicit permissions are checked correctly - - Users with permissions can access - - Database query is executed - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="other-user-456", # Not the creator - ) - - # Mock permission query to return permission records - mock_permission = DatasetPermissionTestDataFactory.create_dataset_permission_mock( - dataset_id=dataset.id, account_id=user.id - ) - mock_query = Mock() - mock_query.filter_by.return_value = mock_query - mock_query.all.return_value = [mock_permission] # User has permission - mock_db_session.query.return_value = mock_query - - # Act (should not raise) - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - - # Assert - # Verify permission query was executed - mock_db_session.query.assert_called() - - def test_check_dataset_operator_permission_partial_members_without_permission_error(self, mock_db_session): - """ - Test error when user without permission tries to access partial_members dataset. - - Verifies that when a user does not have an explicit DatasetPermission - record for a partial_members dataset, a NoPermissionError is raised. - - This test ensures: - - Missing permissions are detected - - Error message is clear - - Error type is correct - """ - # Arrange - user = DatasetPermissionTestDataFactory.create_user_mock(user_id="user-123", role=TenantAccountRole.NORMAL) - dataset = DatasetPermissionTestDataFactory.create_dataset_mock( - tenant_id="tenant-123", - permission=DatasetPermissionEnum.PARTIAL_TEAM, - created_by="other-user-456", # Not the creator - ) - - # Mock permission query to return empty list (no permission) - mock_query = Mock() - mock_query.filter_by.return_value = mock_query - mock_query.all.return_value = [] # No permissions found - mock_db_session.query.return_value = mock_query - - # Act & Assert - with pytest.raises(NoPermissionError, match="You do not have permission to access this dataset"): - DatasetService.check_dataset_operator_permission(user=user, dataset=dataset) - # ============================================================================ # Additional Documentation and Notes diff --git a/api/tests/unit_tests/services/enterprise/test_plugin_manager_service.py b/api/tests/unit_tests/services/enterprise/test_plugin_manager_service.py new file mode 100644 index 0000000000..d5f34d00b9 --- /dev/null +++ b/api/tests/unit_tests/services/enterprise/test_plugin_manager_service.py @@ -0,0 +1,93 @@ +"""Unit tests for PluginManagerService. + +This module covers the pre-uninstall plugin hook behavior: +- Successful API call: no exception raised, correct request sent +- API failure: soft-fail (logs and does not re-raise) +""" + +from unittest.mock import patch + +from httpx import HTTPStatusError + +from configs import dify_config +from services.enterprise.plugin_manager_service import ( + PluginManagerService, + PreUninstallPluginRequest, +) + + +class TestTryPreUninstallPlugin: + def test_try_pre_uninstall_plugin_success(self): + body = PreUninstallPluginRequest( + tenant_id="tenant-123", + plugin_unique_identifier="com.example.my_plugin", + ) + + with patch( + "services.enterprise.plugin_manager_service.EnterprisePluginManagerRequest.send_request" + ) as mock_send_request: + mock_send_request.return_value = {} + + PluginManagerService.try_pre_uninstall_plugin(body) + + mock_send_request.assert_called_once_with( + "POST", + "/pre-uninstall-plugin", + json={"tenant_id": "tenant-123", "plugin_unique_identifier": "com.example.my_plugin"}, + raise_for_status=True, + timeout=dify_config.ENTERPRISE_REQUEST_TIMEOUT, + ) + + def test_try_pre_uninstall_plugin_http_error_soft_fails(self): + body = PreUninstallPluginRequest( + tenant_id="tenant-456", + plugin_unique_identifier="com.example.other_plugin", + ) + + with ( + patch( + "services.enterprise.plugin_manager_service.EnterprisePluginManagerRequest.send_request" + ) as mock_send_request, + patch("services.enterprise.plugin_manager_service.logger") as mock_logger, + ): + mock_send_request.side_effect = HTTPStatusError( + "502 Bad Gateway", + request=None, + response=None, + ) + + PluginManagerService.try_pre_uninstall_plugin(body) + + mock_send_request.assert_called_once_with( + "POST", + "/pre-uninstall-plugin", + json={"tenant_id": "tenant-456", "plugin_unique_identifier": "com.example.other_plugin"}, + raise_for_status=True, + timeout=dify_config.ENTERPRISE_REQUEST_TIMEOUT, + ) + mock_logger.exception.assert_called_once() + + def test_try_pre_uninstall_plugin_generic_exception_soft_fails(self): + body = PreUninstallPluginRequest( + tenant_id="tenant-789", + plugin_unique_identifier="com.example.failing_plugin", + ) + + with ( + patch( + "services.enterprise.plugin_manager_service.EnterprisePluginManagerRequest.send_request" + ) as mock_send_request, + patch("services.enterprise.plugin_manager_service.logger") as mock_logger, + ): + mock_send_request.side_effect = ConnectionError("network unreachable") + + PluginManagerService.try_pre_uninstall_plugin(body) + + mock_send_request.assert_called_once_with( + "POST", + "/pre-uninstall-plugin", + json={"tenant_id": "tenant-789", "plugin_unique_identifier": "com.example.failing_plugin"}, + raise_for_status=True, + timeout=dify_config.ENTERPRISE_REQUEST_TIMEOUT, + ) + mock_logger.exception.assert_called_once() diff --git a/api/tests/unit_tests/services/plugin/__init__.py b/api/tests/unit_tests/services/plugin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/services/plugin/conftest.py b/api/tests/unit_tests/services/plugin/conftest.py new file mode 100644 index 0000000000..80c6077b0c --- /dev/null +++ b/api/tests/unit_tests/services/plugin/conftest.py @@ -0,0 +1,39 @@ +"""Shared fixtures for services.plugin test suite.""" + +from __future__ import annotations + +from unittest.mock import MagicMock + +import pytest + +from services.feature_service import PluginInstallationScope + + +def make_features( + restrict_to_marketplace: bool = False, + scope: PluginInstallationScope = PluginInstallationScope.ALL, +) -> MagicMock: + """Create a mock FeatureService.get_system_features() result.""" + features = MagicMock() + features.plugin_installation_permission.restrict_to_marketplace_only = restrict_to_marketplace + features.plugin_installation_permission.plugin_installation_scope = scope + return features + + +@pytest.fixture +def mock_installer(monkeypatch): + """Patch PluginInstaller at the service import site.""" + mock = MagicMock() + monkeypatch.setattr("services.plugin.plugin_service.PluginInstaller", lambda: mock) + return mock + + +@pytest.fixture +def mock_features(): + """Patch FeatureService to return permissive defaults.""" + from unittest.mock import patch + + features = make_features() + with patch("services.plugin.plugin_service.FeatureService") as mock_fs: + mock_fs.get_system_features.return_value = features + yield features diff --git a/api/tests/unit_tests/services/plugin/test_dependencies_analysis.py b/api/tests/unit_tests/services/plugin/test_dependencies_analysis.py new file mode 100644 index 0000000000..8f0886769c --- /dev/null +++ b/api/tests/unit_tests/services/plugin/test_dependencies_analysis.py @@ -0,0 +1,172 @@ +"""Tests for services.plugin.dependencies_analysis.DependenciesAnalysisService. + +Covers: provider ID resolution, leaked dependency detection with version +extraction, dependency generation from multiple sources, and latest +dependencies via marketplace. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin import PluginDependency, PluginInstallationSource +from services.plugin.dependencies_analysis import DependenciesAnalysisService + + +class TestAnalyzeToolDependency: + def test_valid_three_part_id(self): + result = DependenciesAnalysisService.analyze_tool_dependency("langgenius/google/google") + assert result == "langgenius/google" + + def test_single_part_expands_to_langgenius(self): + result = DependenciesAnalysisService.analyze_tool_dependency("websearch") + assert result == "langgenius/websearch" + + def test_invalid_format_raises(self): + with pytest.raises(ValueError): + DependenciesAnalysisService.analyze_tool_dependency("bad/format") + + +class TestAnalyzeModelProviderDependency: + def test_valid_three_part_id(self): + result = DependenciesAnalysisService.analyze_model_provider_dependency("langgenius/openai/openai") + assert result == "langgenius/openai" + + def test_google_maps_to_gemini(self): + result = DependenciesAnalysisService.analyze_model_provider_dependency("langgenius/google/google") + assert result == "langgenius/gemini" + + def test_single_part_expands(self): + result = DependenciesAnalysisService.analyze_model_provider_dependency("anthropic") + assert result == "langgenius/anthropic" + + +class TestGetLeakedDependencies: + def _make_dependency(self, identifier: str, dep_type=PluginDependency.Type.Marketplace): + return PluginDependency( + type=dep_type, + value=PluginDependency.Marketplace(marketplace_plugin_unique_identifier=identifier), + ) + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_returns_empty_when_all_present(self, mock_installer_cls): + mock_installer_cls.return_value.fetch_missing_dependencies.return_value = [] + deps = [self._make_dependency("org/plugin:1.0.0@hash")] + + result = DependenciesAnalysisService.get_leaked_dependencies("t1", deps) + + assert result == [] + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_returns_missing_with_version_extracted(self, mock_installer_cls): + missing = MagicMock() + missing.plugin_unique_identifier = "org/plugin:1.2.3@hash" + missing.current_identifier = "org/plugin:1.0.0@oldhash" + mock_installer_cls.return_value.fetch_missing_dependencies.return_value = [missing] + + deps = [self._make_dependency("org/plugin:1.2.3@hash")] + + result = DependenciesAnalysisService.get_leaked_dependencies("t1", deps) + + assert len(result) == 1 + assert result[0].value.version == "1.2.3" + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_skips_present_dependencies(self, mock_installer_cls): + missing = MagicMock() + missing.plugin_unique_identifier = "org/missing:1.0.0@hash" + missing.current_identifier = None + mock_installer_cls.return_value.fetch_missing_dependencies.return_value = [missing] + + deps = [ + self._make_dependency("org/present:1.0.0@hash"), + self._make_dependency("org/missing:1.0.0@hash"), + ] + + result = DependenciesAnalysisService.get_leaked_dependencies("t1", deps) + + assert len(result) == 1 + + +class TestGenerateDependencies: + def _make_installation(self, source, identifier, meta=None): + install = MagicMock() + install.source = source + install.plugin_unique_identifier = identifier + install.meta = meta or {} + return install + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_github_source(self, mock_installer_cls): + install = self._make_installation( + PluginInstallationSource.Github, + "org/plugin:1.0.0@hash", + {"repo": "org/repo", "version": "v1.0", "package": "plugin.difypkg"}, + ) + mock_installer_cls.return_value.fetch_plugin_installation_by_ids.return_value = [install] + + result = DependenciesAnalysisService.generate_dependencies("t1", ["p1"]) + + assert len(result) == 1 + assert result[0].type == PluginDependency.Type.Github + assert result[0].value.repo == "org/repo" + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_marketplace_source(self, mock_installer_cls): + install = self._make_installation(PluginInstallationSource.Marketplace, "org/plugin:1.0.0@hash") + mock_installer_cls.return_value.fetch_plugin_installation_by_ids.return_value = [install] + + result = DependenciesAnalysisService.generate_dependencies("t1", ["p1"]) + + assert result[0].type == PluginDependency.Type.Marketplace + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_package_source(self, mock_installer_cls): + install = self._make_installation(PluginInstallationSource.Package, "org/plugin:1.0.0@hash") + mock_installer_cls.return_value.fetch_plugin_installation_by_ids.return_value = [install] + + result = DependenciesAnalysisService.generate_dependencies("t1", ["p1"]) + + assert result[0].type == PluginDependency.Type.Package + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_remote_source_raises(self, mock_installer_cls): + install = self._make_installation(PluginInstallationSource.Remote, "org/plugin:1.0.0@hash") + mock_installer_cls.return_value.fetch_plugin_installation_by_ids.return_value = [install] + + with pytest.raises(ValueError, match="remote plugin"): + DependenciesAnalysisService.generate_dependencies("t1", ["p1"]) + + @patch("services.plugin.dependencies_analysis.PluginInstaller") + def test_deduplicates_input_ids(self, mock_installer_cls): + mock_installer_cls.return_value.fetch_plugin_installation_by_ids.return_value = [] + + DependenciesAnalysisService.generate_dependencies("t1", ["p1", "p1", "p2"]) + + call_args = mock_installer_cls.return_value.fetch_plugin_installation_by_ids.call_args[0] + assert len(call_args[1]) == 2 # deduplicated + + +class TestGenerateLatestDependencies: + @patch("services.plugin.dependencies_analysis.dify_config") + def test_returns_empty_when_marketplace_disabled(self, mock_config): + mock_config.MARKETPLACE_ENABLED = False + + result = DependenciesAnalysisService.generate_latest_dependencies(["p1"]) + + assert result == [] + + @patch("services.plugin.dependencies_analysis.marketplace") + @patch("services.plugin.dependencies_analysis.dify_config") + def test_returns_marketplace_deps_when_enabled(self, mock_config, mock_marketplace): + mock_config.MARKETPLACE_ENABLED = True + manifest = MagicMock() + manifest.latest_package_identifier = "org/plugin:2.0.0@newhash" + mock_marketplace.batch_fetch_plugin_manifests.return_value = [manifest] + + result = DependenciesAnalysisService.generate_latest_dependencies(["p1"]) + + assert len(result) == 1 + assert result[0].type == PluginDependency.Type.Marketplace diff --git a/api/tests/unit_tests/services/plugin/test_endpoint_service.py b/api/tests/unit_tests/services/plugin/test_endpoint_service.py new file mode 100644 index 0000000000..ddf80c8017 --- /dev/null +++ b/api/tests/unit_tests/services/plugin/test_endpoint_service.py @@ -0,0 +1,41 @@ +"""Tests for services.plugin.endpoint_service.EndpointService. + +Smoke tests to confirm delegation to PluginEndpointClient. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from services.plugin.endpoint_service import EndpointService + + +class TestEndpointServiceDelegation: + @patch("services.plugin.endpoint_service.PluginEndpointClient") + def test_create_delegates_correctly(self, mock_client_cls): + expected = MagicMock() + mock_client_cls.return_value.create_endpoint.return_value = expected + + result = EndpointService.create_endpoint("t1", "u1", "uid-1", "my-endpoint", {"key": "val"}) + + assert result is expected + mock_client_cls.return_value.create_endpoint.assert_called_once_with( + tenant_id="t1", user_id="u1", plugin_unique_identifier="uid-1", name="my-endpoint", settings={"key": "val"} + ) + + @patch("services.plugin.endpoint_service.PluginEndpointClient") + def test_list_delegates_correctly(self, mock_client_cls): + expected = MagicMock() + mock_client_cls.return_value.list_endpoints.return_value = expected + + result = EndpointService.list_endpoints("t1", "u1", 1, 10) + + assert result is expected + + @patch("services.plugin.endpoint_service.PluginEndpointClient") + def test_enable_disable_delegates(self, mock_client_cls): + EndpointService.enable_endpoint("t1", "u1", "ep-1") + mock_client_cls.return_value.enable_endpoint.assert_called_once() + + EndpointService.disable_endpoint("t1", "u1", "ep-2") + mock_client_cls.return_value.disable_endpoint.assert_called_once() diff --git a/api/tests/unit_tests/services/plugin/test_oauth_service.py b/api/tests/unit_tests/services/plugin/test_oauth_service.py new file mode 100644 index 0000000000..27df4556bc --- /dev/null +++ b/api/tests/unit_tests/services/plugin/test_oauth_service.py @@ -0,0 +1,90 @@ +"""Tests for services.plugin.oauth_service.OAuthProxyService. + +Covers: CSRF proxy context creation with Redis TTL, context consumption +with one-time use semantics, and validation error paths. +""" + +from __future__ import annotations + +import json + +import pytest + +from services.plugin.oauth_service import OAuthProxyService + + +class TestCreateProxyContext: + def test_stores_context_in_redis_with_ttl(self): + context_id = OAuthProxyService.create_proxy_context( + user_id="u1", tenant_id="t1", plugin_id="p1", provider="github" + ) + + assert context_id # non-empty UUID string + from extensions.ext_redis import redis_client + + redis_client.setex.assert_called_once() + call_args = redis_client.setex.call_args + key = call_args[0][0] + ttl = call_args[0][1] + stored_data = json.loads(call_args[0][2]) + + assert key.startswith("oauth_proxy_context:") + assert ttl == 5 * 60 + assert stored_data["user_id"] == "u1" + assert stored_data["tenant_id"] == "t1" + assert stored_data["plugin_id"] == "p1" + assert stored_data["provider"] == "github" + + def test_includes_credential_id_when_provided(self): + OAuthProxyService.create_proxy_context( + user_id="u1", tenant_id="t1", plugin_id="p1", provider="github", credential_id="cred-1" + ) + + from extensions.ext_redis import redis_client + + stored_data = json.loads(redis_client.setex.call_args[0][2]) + assert stored_data["credential_id"] == "cred-1" + + def test_excludes_credential_id_when_none(self): + OAuthProxyService.create_proxy_context(user_id="u1", tenant_id="t1", plugin_id="p1", provider="github") + + from extensions.ext_redis import redis_client + + stored_data = json.loads(redis_client.setex.call_args[0][2]) + assert "credential_id" not in stored_data + + def test_includes_extra_data(self): + OAuthProxyService.create_proxy_context( + user_id="u1", tenant_id="t1", plugin_id="p1", provider="github", extra_data={"scope": "repo"} + ) + + from extensions.ext_redis import redis_client + + stored_data = json.loads(redis_client.setex.call_args[0][2]) + assert stored_data["scope"] == "repo" + + +class TestUseProxyContext: + def test_raises_when_context_id_empty(self): + with pytest.raises(ValueError, match="context_id is required"): + OAuthProxyService.use_proxy_context("") + + def test_raises_when_context_not_found(self): + from extensions.ext_redis import redis_client + + redis_client.get.return_value = None + + with pytest.raises(ValueError, match="context_id is invalid"): + OAuthProxyService.use_proxy_context("nonexistent-id") + + def test_returns_data_and_deletes_key(self): + from extensions.ext_redis import redis_client + + stored = {"user_id": "u1", "tenant_id": "t1", "plugin_id": "p1", "provider": "github"} + redis_client.get.return_value = json.dumps(stored).encode() + + result = OAuthProxyService.use_proxy_context("valid-id") + + assert result == stored + expected_key = "oauth_proxy_context:valid-id" + redis_client.delete.assert_called_once_with(expected_key) diff --git a/api/tests/unit_tests/services/plugin/test_plugin_parameter_service.py b/api/tests/unit_tests/services/plugin/test_plugin_parameter_service.py new file mode 100644 index 0000000000..bfa9fe976b --- /dev/null +++ b/api/tests/unit_tests/services/plugin/test_plugin_parameter_service.py @@ -0,0 +1,216 @@ +"""Tests for services.plugin.plugin_parameter_service.PluginParameterService. + +Covers: dynamic select options via tool and trigger credential paths, +HIDDEN_VALUE replacement, and error handling for missing records. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from services.plugin.plugin_parameter_service import PluginParameterService + + +class TestGetDynamicSelectOptionsTool: + @patch("services.plugin.plugin_parameter_service.DynamicSelectClient") + @patch("services.plugin.plugin_parameter_service.ToolManager") + def test_no_credentials_needed(self, mock_tool_mgr, mock_client_cls): + provider_ctrl = MagicMock() + provider_ctrl.need_credentials = False + mock_tool_mgr.get_builtin_provider.return_value = provider_ctrl + mock_client_cls.return_value.fetch_dynamic_select_options.return_value.options = ["opt1"] + + result = PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="google", + action="search", + parameter="engine", + credential_id=None, + provider_type="tool", + ) + + assert result == ["opt1"] + call_kwargs = mock_client_cls.return_value.fetch_dynamic_select_options.call_args + assert call_kwargs[0][5] == {} # empty credentials + + @patch("services.plugin.plugin_parameter_service.DynamicSelectClient") + @patch("services.plugin.plugin_parameter_service.create_tool_provider_encrypter") + @patch("services.plugin.plugin_parameter_service.db") + @patch("services.plugin.plugin_parameter_service.ToolManager") + def test_fetches_credentials_with_credential_id(self, mock_tool_mgr, mock_db, mock_encrypter_fn, mock_client_cls): + provider_ctrl = MagicMock() + provider_ctrl.need_credentials = True + mock_tool_mgr.get_builtin_provider.return_value = provider_ctrl + encrypter = MagicMock() + encrypter.decrypt.return_value = {"api_key": "decrypted"} + mock_encrypter_fn.return_value = (encrypter, None) + + # Mock the Session/query chain + db_record = MagicMock() + db_record.credentials = {"api_key": "encrypted"} + db_record.credential_type = "api_key" + + with patch("services.plugin.plugin_parameter_service.Session") as mock_session_cls: + mock_session = MagicMock() + mock_session_cls.return_value.__enter__ = MagicMock(return_value=mock_session) + mock_session_cls.return_value.__exit__ = MagicMock(return_value=False) + mock_session.query.return_value.where.return_value.first.return_value = db_record + mock_client_cls.return_value.fetch_dynamic_select_options.return_value.options = ["opt1"] + + result = PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="google", + action="search", + parameter="engine", + credential_id="cred-1", + provider_type="tool", + ) + + assert result == ["opt1"] + + @patch("services.plugin.plugin_parameter_service.create_tool_provider_encrypter") + @patch("services.plugin.plugin_parameter_service.db") + @patch("services.plugin.plugin_parameter_service.ToolManager") + def test_raises_when_tool_provider_not_found(self, mock_tool_mgr, mock_db, mock_encrypter_fn): + provider_ctrl = MagicMock() + provider_ctrl.need_credentials = True + mock_tool_mgr.get_builtin_provider.return_value = provider_ctrl + mock_encrypter_fn.return_value = (MagicMock(), None) + + with patch("services.plugin.plugin_parameter_service.Session") as mock_session_cls: + mock_session = MagicMock() + mock_session_cls.return_value.__enter__ = MagicMock(return_value=mock_session) + mock_session_cls.return_value.__exit__ = MagicMock(return_value=False) + mock_session.query.return_value.where.return_value.order_by.return_value.first.return_value = None + + with pytest.raises(ValueError, match="not found"): + PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="google", + action="search", + parameter="engine", + credential_id=None, + provider_type="tool", + ) + + +class TestGetDynamicSelectOptionsTrigger: + @patch("services.plugin.plugin_parameter_service.DynamicSelectClient") + @patch("services.plugin.plugin_parameter_service.TriggerSubscriptionBuilderService") + def test_uses_subscription_builder_when_credential_id(self, mock_builder_svc, mock_client_cls): + sub = MagicMock() + sub.credentials = {"token": "abc"} + sub.credential_type = "api_key" + mock_builder_svc.get_subscription_builder.return_value = sub + mock_client_cls.return_value.fetch_dynamic_select_options.return_value.options = ["opt"] + + result = PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="github", + action="on_push", + parameter="branch", + credential_id="builder-1", + provider_type="trigger", + ) + + assert result == ["opt"] + + @patch("services.plugin.plugin_parameter_service.DynamicSelectClient") + @patch("services.plugin.plugin_parameter_service.TriggerProviderService") + @patch("services.plugin.plugin_parameter_service.TriggerSubscriptionBuilderService") + def test_falls_back_to_trigger_service(self, mock_builder_svc, mock_provider_svc, mock_client_cls): + mock_builder_svc.get_subscription_builder.return_value = None + trigger_sub = MagicMock() + api_entity = MagicMock() + api_entity.credentials = {"token": "abc"} + api_entity.credential_type = "api_key" + trigger_sub.to_api_entity.return_value = api_entity + mock_provider_svc.get_subscription_by_id.return_value = trigger_sub + mock_client_cls.return_value.fetch_dynamic_select_options.return_value.options = ["opt"] + + result = PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="github", + action="on_push", + parameter="branch", + credential_id="sub-1", + provider_type="trigger", + ) + + assert result == ["opt"] + + @patch("services.plugin.plugin_parameter_service.TriggerProviderService") + @patch("services.plugin.plugin_parameter_service.TriggerSubscriptionBuilderService") + def test_raises_when_no_subscription_found(self, mock_builder_svc, mock_provider_svc): + mock_builder_svc.get_subscription_builder.return_value = None + mock_provider_svc.get_subscription_by_id.return_value = None + + with pytest.raises(ValueError, match="not found"): + PluginParameterService.get_dynamic_select_options( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="github", + action="on_push", + parameter="branch", + credential_id="nonexistent", + provider_type="trigger", + ) + + +class TestGetDynamicSelectOptionsWithCredentials: + @patch("services.plugin.plugin_parameter_service.DynamicSelectClient") + @patch("services.plugin.plugin_parameter_service.TriggerProviderService") + def test_replaces_hidden_values(self, mock_provider_svc, mock_client_cls): + from constants import HIDDEN_VALUE + + original = MagicMock() + original.credentials = {"token": "real-secret", "name": "real-name"} + original.credential_type = "api_key" + mock_provider_svc.get_subscription_by_id.return_value = original + mock_client_cls.return_value.fetch_dynamic_select_options.return_value.options = ["opt"] + + result = PluginParameterService.get_dynamic_select_options_with_credentials( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="github", + action="on_push", + parameter="branch", + credential_id="cred-1", + credentials={"token": HIDDEN_VALUE, "name": "new-name"}, + ) + + assert result == ["opt"] + call_args = mock_client_cls.return_value.fetch_dynamic_select_options.call_args[0] + resolved = call_args[5] + assert resolved["token"] == "real-secret" # replaced + assert resolved["name"] == "new-name" # kept as-is + + @patch("services.plugin.plugin_parameter_service.TriggerProviderService") + def test_raises_when_subscription_not_found(self, mock_provider_svc): + mock_provider_svc.get_subscription_by_id.return_value = None + + with pytest.raises(ValueError, match="not found"): + PluginParameterService.get_dynamic_select_options_with_credentials( + tenant_id="t1", + user_id="u1", + plugin_id="p1", + provider="github", + action="on_push", + parameter="branch", + credential_id="nonexistent", + credentials={"token": "val"}, + ) diff --git a/api/tests/unit_tests/services/plugin/test_plugin_service.py b/api/tests/unit_tests/services/plugin/test_plugin_service.py new file mode 100644 index 0000000000..09b9ab498b --- /dev/null +++ b/api/tests/unit_tests/services/plugin/test_plugin_service.py @@ -0,0 +1,357 @@ +"""Tests for services.plugin.plugin_service.PluginService. + +Covers: version caching with Redis, install permission/scope gates, +icon URL construction, asset retrieval with MIME guessing, plugin +verification, marketplace upgrade flows, and uninstall with credential cleanup. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin import PluginInstallationSource +from core.plugin.entities.plugin_daemon import PluginVerification +from services.errors.plugin import PluginInstallationForbiddenError +from services.feature_service import PluginInstallationScope +from services.plugin.plugin_service import PluginService +from tests.unit_tests.services.plugin.conftest import make_features + + +class TestFetchLatestPluginVersion: + @patch("services.plugin.plugin_service.marketplace") + @patch("services.plugin.plugin_service.redis_client") + def test_returns_cached_version(self, mock_redis, mock_marketplace): + cached_json = PluginService.LatestPluginCache( + plugin_id="p1", + version="1.0.0", + unique_identifier="uid-1", + status="active", + deprecated_reason="", + alternative_plugin_id="", + ).model_dump_json() + mock_redis.get.return_value = cached_json + + result = PluginService.fetch_latest_plugin_version(["p1"]) + + assert result["p1"].version == "1.0.0" + mock_marketplace.batch_fetch_plugin_manifests.assert_not_called() + + @patch("services.plugin.plugin_service.marketplace") + @patch("services.plugin.plugin_service.redis_client") + def test_fetches_from_marketplace_on_cache_miss(self, mock_redis, mock_marketplace): + mock_redis.get.return_value = None + manifest = MagicMock() + manifest.plugin_id = "p1" + manifest.latest_version = "2.0.0" + manifest.latest_package_identifier = "uid-2" + manifest.status = "active" + manifest.deprecated_reason = "" + manifest.alternative_plugin_id = "" + mock_marketplace.batch_fetch_plugin_manifests.return_value = [manifest] + + result = PluginService.fetch_latest_plugin_version(["p1"]) + + assert result["p1"].version == "2.0.0" + mock_redis.setex.assert_called_once() + + @patch("services.plugin.plugin_service.marketplace") + @patch("services.plugin.plugin_service.redis_client") + def test_returns_none_for_unknown_plugin(self, mock_redis, mock_marketplace): + mock_redis.get.return_value = None + mock_marketplace.batch_fetch_plugin_manifests.return_value = [] + + result = PluginService.fetch_latest_plugin_version(["unknown"]) + + assert result["unknown"] is None + + @patch("services.plugin.plugin_service.marketplace") + @patch("services.plugin.plugin_service.redis_client") + def test_handles_marketplace_exception_gracefully(self, mock_redis, mock_marketplace): + mock_redis.get.return_value = None + mock_marketplace.batch_fetch_plugin_manifests.side_effect = RuntimeError("network error") + + result = PluginService.fetch_latest_plugin_version(["p1"]) + + assert result == {} + + +class TestCheckMarketplaceOnlyPermission: + @patch("services.plugin.plugin_service.FeatureService") + def test_raises_when_restricted(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(restrict_to_marketplace=True) + + with pytest.raises(PluginInstallationForbiddenError): + PluginService._check_marketplace_only_permission() + + @patch("services.plugin.plugin_service.FeatureService") + def test_passes_when_not_restricted(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(restrict_to_marketplace=False) + + PluginService._check_marketplace_only_permission() # should not raise + + +class TestCheckPluginInstallationScope: + @patch("services.plugin.plugin_service.FeatureService") + def test_official_only_allows_langgenius(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(scope=PluginInstallationScope.OFFICIAL_ONLY) + verification = MagicMock() + verification.authorized_category = PluginVerification.AuthorizedCategory.Langgenius + + PluginService._check_plugin_installation_scope(verification) # should not raise + + @patch("services.plugin.plugin_service.FeatureService") + def test_official_only_rejects_third_party(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(scope=PluginInstallationScope.OFFICIAL_ONLY) + + with pytest.raises(PluginInstallationForbiddenError): + PluginService._check_plugin_installation_scope(None) + + @patch("services.plugin.plugin_service.FeatureService") + def test_official_and_partners_allows_partner(self, mock_fs): + mock_fs.get_system_features.return_value = make_features( + scope=PluginInstallationScope.OFFICIAL_AND_SPECIFIC_PARTNERS + ) + verification = MagicMock() + verification.authorized_category = PluginVerification.AuthorizedCategory.Partner + + PluginService._check_plugin_installation_scope(verification) # should not raise + + @patch("services.plugin.plugin_service.FeatureService") + def test_official_and_partners_rejects_none(self, mock_fs): + mock_fs.get_system_features.return_value = make_features( + scope=PluginInstallationScope.OFFICIAL_AND_SPECIFIC_PARTNERS + ) + + with pytest.raises(PluginInstallationForbiddenError): + PluginService._check_plugin_installation_scope(None) + + @patch("services.plugin.plugin_service.FeatureService") + def test_none_scope_always_raises(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(scope=PluginInstallationScope.NONE) + verification = MagicMock() + verification.authorized_category = PluginVerification.AuthorizedCategory.Langgenius + + with pytest.raises(PluginInstallationForbiddenError): + PluginService._check_plugin_installation_scope(verification) + + @patch("services.plugin.plugin_service.FeatureService") + def test_all_scope_passes_any(self, mock_fs): + mock_fs.get_system_features.return_value = make_features(scope=PluginInstallationScope.ALL) + + PluginService._check_plugin_installation_scope(None) # should not raise + + +class TestGetPluginIconUrl: + @patch("services.plugin.plugin_service.dify_config") + def test_constructs_url_with_params(self, mock_config): + mock_config.CONSOLE_API_URL = "https://console.example.com" + + url = PluginService.get_plugin_icon_url("tenant-1", "icon.svg") + + assert "tenant_id=tenant-1" in url + assert "filename=icon.svg" in url + assert "/plugin/icon" in url + + +class TestGetAsset: + @patch("services.plugin.plugin_service.PluginAssetManager") + def test_returns_bytes_and_guessed_mime(self, mock_asset_cls): + mock_asset_cls.return_value.fetch_asset.return_value = b"" + + data, mime = PluginService.get_asset("t1", "icon.svg") + + assert data == b"" + assert "svg" in mime + + @patch("services.plugin.plugin_service.PluginAssetManager") + def test_fallback_to_octet_stream_for_unknown(self, mock_asset_cls): + mock_asset_cls.return_value.fetch_asset.return_value = b"\x00" + + _, mime = PluginService.get_asset("t1", "unknown_file") + + assert mime == "application/octet-stream" + + +class TestIsPluginVerified: + @patch("services.plugin.plugin_service.PluginInstaller") + def test_returns_true_when_verified(self, mock_installer_cls): + mock_installer_cls.return_value.fetch_plugin_manifest.return_value.verified = True + + assert PluginService.is_plugin_verified("t1", "uid-1") is True + + @patch("services.plugin.plugin_service.PluginInstaller") + def test_returns_false_on_exception(self, mock_installer_cls): + mock_installer_cls.return_value.fetch_plugin_manifest.side_effect = RuntimeError("not found") + + assert PluginService.is_plugin_verified("t1", "uid-1") is False + + +class TestUpgradePluginWithMarketplace: + @patch("services.plugin.plugin_service.dify_config") + def test_raises_when_marketplace_disabled(self, mock_config): + mock_config.MARKETPLACE_ENABLED = False + + with pytest.raises(ValueError, match="marketplace is not enabled"): + PluginService.upgrade_plugin_with_marketplace("t1", "old-uid", "new-uid") + + @patch("services.plugin.plugin_service.dify_config") + def test_raises_when_same_identifier(self, mock_config): + mock_config.MARKETPLACE_ENABLED = True + + with pytest.raises(ValueError, match="same plugin"): + PluginService.upgrade_plugin_with_marketplace("t1", "same-uid", "same-uid") + + @patch("services.plugin.plugin_service.marketplace") + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + @patch("services.plugin.plugin_service.dify_config") + def test_skips_download_when_already_installed(self, mock_config, mock_installer_cls, mock_fs, mock_marketplace): + mock_config.MARKETPLACE_ENABLED = True + mock_fs.get_system_features.return_value = make_features() + installer = mock_installer_cls.return_value + installer.fetch_plugin_manifest.return_value = MagicMock() # no exception = already installed + installer.upgrade_plugin.return_value = MagicMock() + + PluginService.upgrade_plugin_with_marketplace("t1", "old-uid", "new-uid") + + mock_marketplace.record_install_plugin_event.assert_called_once_with("new-uid") + installer.upgrade_plugin.assert_called_once() + + @patch("services.plugin.plugin_service.download_plugin_pkg") + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + @patch("services.plugin.plugin_service.dify_config") + def test_downloads_when_not_installed(self, mock_config, mock_installer_cls, mock_fs, mock_download): + mock_config.MARKETPLACE_ENABLED = True + mock_fs.get_system_features.return_value = make_features() + installer = mock_installer_cls.return_value + installer.fetch_plugin_manifest.side_effect = RuntimeError("not found") + mock_download.return_value = b"pkg-bytes" + upload_resp = MagicMock() + upload_resp.verification = None + installer.upload_pkg.return_value = upload_resp + installer.upgrade_plugin.return_value = MagicMock() + + PluginService.upgrade_plugin_with_marketplace("t1", "old-uid", "new-uid") + + mock_download.assert_called_once_with("new-uid") + installer.upload_pkg.assert_called_once() + + +class TestUpgradePluginWithGithub: + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + def test_checks_marketplace_permission_and_delegates(self, mock_installer_cls, mock_fs): + mock_fs.get_system_features.return_value = make_features() + installer = mock_installer_cls.return_value + installer.upgrade_plugin.return_value = MagicMock() + + PluginService.upgrade_plugin_with_github("t1", "old-uid", "new-uid", "org/repo", "v1", "pkg.difypkg") + + installer.upgrade_plugin.assert_called_once() + call_args = installer.upgrade_plugin.call_args + assert call_args[0][3] == PluginInstallationSource.Github + + +class TestUploadPkg: + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + def test_runs_permission_and_scope_checks(self, mock_installer_cls, mock_fs): + mock_fs.get_system_features.return_value = make_features() + upload_resp = MagicMock() + upload_resp.verification = None + mock_installer_cls.return_value.upload_pkg.return_value = upload_resp + + result = PluginService.upload_pkg("t1", b"pkg-bytes") + + assert result is upload_resp + + +class TestInstallFromMarketplacePkg: + @patch("services.plugin.plugin_service.dify_config") + def test_raises_when_marketplace_disabled(self, mock_config): + mock_config.MARKETPLACE_ENABLED = False + + with pytest.raises(ValueError, match="marketplace is not enabled"): + PluginService.install_from_marketplace_pkg("t1", ["uid-1"]) + + @patch("services.plugin.plugin_service.download_plugin_pkg") + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + @patch("services.plugin.plugin_service.dify_config") + def test_downloads_when_not_cached(self, mock_config, mock_installer_cls, mock_fs, mock_download): + mock_config.MARKETPLACE_ENABLED = True + mock_fs.get_system_features.return_value = make_features() + installer = mock_installer_cls.return_value + installer.fetch_plugin_manifest.side_effect = RuntimeError("not found") + mock_download.return_value = b"pkg" + upload_resp = MagicMock() + upload_resp.verification = None + upload_resp.unique_identifier = "resolved-uid" + installer.upload_pkg.return_value = upload_resp + installer.install_from_identifiers.return_value = "task-id" + + result = PluginService.install_from_marketplace_pkg("t1", ["uid-1"]) + + assert result == "task-id" + installer.install_from_identifiers.assert_called_once() + call_args = installer.install_from_identifiers.call_args[0] + assert call_args[1] == ["resolved-uid"] # uses response uid, not input + + @patch("services.plugin.plugin_service.FeatureService") + @patch("services.plugin.plugin_service.PluginInstaller") + @patch("services.plugin.plugin_service.dify_config") + def test_uses_cached_when_already_downloaded(self, mock_config, mock_installer_cls, mock_fs): + mock_config.MARKETPLACE_ENABLED = True + mock_fs.get_system_features.return_value = make_features() + installer = mock_installer_cls.return_value + installer.fetch_plugin_manifest.return_value = MagicMock() + decode_resp = MagicMock() + decode_resp.verification = None + installer.decode_plugin_from_identifier.return_value = decode_resp + installer.install_from_identifiers.return_value = "task-id" + + PluginService.install_from_marketplace_pkg("t1", ["uid-1"]) + + installer.install_from_identifiers.assert_called_once() + call_args = installer.install_from_identifiers.call_args[0] + assert call_args[1] == ["uid-1"] # uses original uid + + +class TestUninstall: + @patch("services.plugin.plugin_service.PluginInstaller") + def test_direct_uninstall_when_plugin_not_found(self, mock_installer_cls): + installer = mock_installer_cls.return_value + installer.list_plugins.return_value = [] + installer.uninstall.return_value = True + + result = PluginService.uninstall("t1", "install-1") + + assert result is True + installer.uninstall.assert_called_once_with("t1", "install-1") + + @patch("services.plugin.plugin_service.db") + @patch("services.plugin.plugin_service.PluginInstaller") + def test_cleans_credentials_when_plugin_found(self, mock_installer_cls, mock_db): + plugin = MagicMock() + plugin.installation_id = "install-1" + plugin.plugin_id = "org/myplugin" + installer = mock_installer_cls.return_value + installer.list_plugins.return_value = [plugin] + installer.uninstall.return_value = True + + # Mock Session context manager + mock_session = MagicMock() + mock_db.engine = MagicMock() + mock_session.scalars.return_value.all.return_value = [] # no credentials found + + with patch("services.plugin.plugin_service.Session") as mock_session_cls: + mock_session_cls.return_value.__enter__ = MagicMock(return_value=mock_session) + mock_session_cls.return_value.__exit__ = MagicMock(return_value=False) + + result = PluginService.uninstall("t1", "install-1") + + assert result is True + installer.uninstall.assert_called_once() diff --git a/api/tests/unit_tests/services/recommend_app/__init__.py b/api/tests/unit_tests/services/recommend_app/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/services/recommend_app/test_buildin_retrieval.py b/api/tests/unit_tests/services/recommend_app/test_buildin_retrieval.py new file mode 100644 index 0000000000..770344aa39 --- /dev/null +++ b/api/tests/unit_tests/services/recommend_app/test_buildin_retrieval.py @@ -0,0 +1,91 @@ +import json +from unittest.mock import MagicMock, patch + +import pytest + +from services.recommend_app.buildin.buildin_retrieval import BuildInRecommendAppRetrieval +from services.recommend_app.recommend_app_type import RecommendAppType + +SAMPLE_BUILTIN_DATA = { + "recommended_apps": { + "en-US": {"categories": ["writing"], "apps": [{"id": "app-1"}]}, + "zh-Hans": {"categories": ["search"], "apps": [{"id": "app-2"}]}, + }, + "app_details": { + "app-1": {"id": "app-1", "name": "Writer", "mode": "chat"}, + "app-2": {"id": "app-2", "name": "Searcher", "mode": "workflow"}, + }, +} + + +@pytest.fixture(autouse=True) +def _reset_cache(): + BuildInRecommendAppRetrieval.builtin_data = None + yield + BuildInRecommendAppRetrieval.builtin_data = None + + +class TestBuildInRecommendAppRetrieval: + def test_get_type(self): + retrieval = BuildInRecommendAppRetrieval() + assert retrieval.get_type() == RecommendAppType.BUILDIN + + def test_get_recommended_apps_and_categories_delegates(self): + with patch.object( + BuildInRecommendAppRetrieval, + "fetch_recommended_apps_from_builtin", + return_value={"apps": []}, + ) as mock_fetch: + retrieval = BuildInRecommendAppRetrieval() + result = retrieval.get_recommended_apps_and_categories("en-US") + mock_fetch.assert_called_once_with("en-US") + assert result == {"apps": []} + + def test_get_recommend_app_detail_delegates(self): + with patch.object( + BuildInRecommendAppRetrieval, + "fetch_recommended_app_detail_from_builtin", + return_value={"id": "app-1"}, + ) as mock_fetch: + retrieval = BuildInRecommendAppRetrieval() + result = retrieval.get_recommend_app_detail("app-1") + mock_fetch.assert_called_once_with("app-1") + assert result == {"id": "app-1"} + + def test_get_builtin_data_reads_json_and_caches(self, tmp_path): + json_file = tmp_path / "constants" / "recommended_apps.json" + json_file.parent.mkdir(parents=True) + json_file.write_text(json.dumps(SAMPLE_BUILTIN_DATA)) + + mock_app = MagicMock() + mock_app.root_path = str(tmp_path) + + with patch( + "services.recommend_app.buildin.buildin_retrieval.current_app", + mock_app, + ): + first = BuildInRecommendAppRetrieval._get_builtin_data() + second = BuildInRecommendAppRetrieval._get_builtin_data() + + assert first == SAMPLE_BUILTIN_DATA + assert first is second + + def test_fetch_recommended_apps_from_builtin(self): + BuildInRecommendAppRetrieval.builtin_data = SAMPLE_BUILTIN_DATA + result = BuildInRecommendAppRetrieval.fetch_recommended_apps_from_builtin("en-US") + assert result == SAMPLE_BUILTIN_DATA["recommended_apps"]["en-US"] + + def test_fetch_recommended_apps_from_builtin_missing_language(self): + BuildInRecommendAppRetrieval.builtin_data = SAMPLE_BUILTIN_DATA + result = BuildInRecommendAppRetrieval.fetch_recommended_apps_from_builtin("fr-FR") + assert result == {} + + def test_fetch_recommended_app_detail_from_builtin(self): + BuildInRecommendAppRetrieval.builtin_data = SAMPLE_BUILTIN_DATA + result = BuildInRecommendAppRetrieval.fetch_recommended_app_detail_from_builtin("app-1") + assert result == {"id": "app-1", "name": "Writer", "mode": "chat"} + + def test_fetch_recommended_app_detail_from_builtin_missing(self): + BuildInRecommendAppRetrieval.builtin_data = SAMPLE_BUILTIN_DATA + result = BuildInRecommendAppRetrieval.fetch_recommended_app_detail_from_builtin("nonexistent") + assert result is None diff --git a/api/tests/unit_tests/services/recommend_app/test_database_retrieval.py b/api/tests/unit_tests/services/recommend_app/test_database_retrieval.py new file mode 100644 index 0000000000..5d21665f75 --- /dev/null +++ b/api/tests/unit_tests/services/recommend_app/test_database_retrieval.py @@ -0,0 +1,145 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from services.recommend_app.database.database_retrieval import DatabaseRecommendAppRetrieval +from services.recommend_app.recommend_app_type import RecommendAppType + + +class TestDatabaseRecommendAppRetrieval: + def test_get_type(self): + assert DatabaseRecommendAppRetrieval().get_type() == RecommendAppType.DATABASE + + def test_get_recommended_apps_delegates(self): + with patch.object( + DatabaseRecommendAppRetrieval, + "fetch_recommended_apps_from_db", + return_value={"recommended_apps": [], "categories": []}, + ) as mock_fetch: + result = DatabaseRecommendAppRetrieval().get_recommended_apps_and_categories("en-US") + mock_fetch.assert_called_once_with("en-US") + assert result == {"recommended_apps": [], "categories": []} + + def test_get_recommend_app_detail_delegates(self): + with patch.object( + DatabaseRecommendAppRetrieval, + "fetch_recommended_app_detail_from_db", + return_value={"id": "app-1"}, + ) as mock_fetch: + result = DatabaseRecommendAppRetrieval().get_recommend_app_detail("app-1") + mock_fetch.assert_called_once_with("app-1") + assert result == {"id": "app-1"} + + +class TestFetchRecommendedAppsFromDb: + def _make_recommended_app(self, app_id, category, is_public=True, has_site=True): + site = ( + SimpleNamespace( + description="desc", + copyright="copy", + privacy_policy="pp", + custom_disclaimer="cd", + ) + if has_site + else None + ) + app = ( + SimpleNamespace(is_public=is_public, site=site) + if is_public + else SimpleNamespace(is_public=False, site=site) + ) + return SimpleNamespace( + id=f"rec-{app_id}", + app=app, + app_id=app_id, + category=category, + position=1, + is_listed=True, + ) + + @patch("services.recommend_app.database.database_retrieval.db") + def test_returns_apps_and_sorted_categories(self, mock_db): + rec1 = self._make_recommended_app("a1", "writing") + rec2 = self._make_recommended_app("a2", "assistant") + mock_db.session.scalars.return_value.all.return_value = [rec1, rec2] + + result = DatabaseRecommendAppRetrieval.fetch_recommended_apps_from_db("en-US") + + assert len(result["recommended_apps"]) == 2 + assert result["categories"] == ["assistant", "writing"] + + @patch("services.recommend_app.database.database_retrieval.db") + def test_falls_back_to_default_language_when_empty(self, mock_db): + mock_db.session.scalars.return_value.all.side_effect = [ + [], + [self._make_recommended_app("a1", "chat")], + ] + + result = DatabaseRecommendAppRetrieval.fetch_recommended_apps_from_db("fr-FR") + + assert len(result["recommended_apps"]) == 1 + assert mock_db.session.scalars.call_count == 2 + + @patch("services.recommend_app.database.database_retrieval.db") + def test_skips_non_public_apps(self, mock_db): + rec = self._make_recommended_app("a1", "chat", is_public=False) + mock_db.session.scalars.return_value.all.return_value = [rec] + + result = DatabaseRecommendAppRetrieval.fetch_recommended_apps_from_db("en-US") + + assert result["recommended_apps"] == [] + + @patch("services.recommend_app.database.database_retrieval.db") + def test_skips_apps_without_site(self, mock_db): + rec = self._make_recommended_app("a1", "chat", has_site=False) + mock_db.session.scalars.return_value.all.return_value = [rec] + + result = DatabaseRecommendAppRetrieval.fetch_recommended_apps_from_db("en-US") + + assert result["recommended_apps"] == [] + + +class TestFetchRecommendedAppDetailFromDb: + @patch("services.recommend_app.database.database_retrieval.db") + def test_returns_none_when_not_listed(self, mock_db): + mock_db.session.query.return_value.where.return_value.first.return_value = None + + result = DatabaseRecommendAppRetrieval.fetch_recommended_app_detail_from_db("app-1") + + assert result is None + + @patch("services.recommend_app.database.database_retrieval.AppDslService") + @patch("services.recommend_app.database.database_retrieval.db") + def test_returns_none_when_app_not_public(self, mock_db, mock_dsl): + rec_chain = MagicMock() + rec_chain.where.return_value.first.return_value = SimpleNamespace(app_id="app-1") + app_chain = MagicMock() + app_chain.where.return_value.first.return_value = SimpleNamespace(id="app-1", is_public=False) + mock_db.session.query.side_effect = [rec_chain, app_chain] + + result = DatabaseRecommendAppRetrieval.fetch_recommended_app_detail_from_db("app-1") + + assert result is None + + @patch("services.recommend_app.database.database_retrieval.AppDslService") + @patch("services.recommend_app.database.database_retrieval.db") + def test_returns_detail_on_success(self, mock_db, mock_dsl): + app_model = SimpleNamespace( + id="app-1", + name="My App", + icon="icon.png", + icon_background="#fff", + mode="chat", + is_public=True, + ) + rec_chain = MagicMock() + rec_chain.where.return_value.first.return_value = SimpleNamespace(app_id="app-1") + app_chain = MagicMock() + app_chain.where.return_value.first.return_value = app_model + mock_db.session.query.side_effect = [rec_chain, app_chain] + mock_dsl.export_dsl.return_value = "exported_yaml" + + result = DatabaseRecommendAppRetrieval.fetch_recommended_app_detail_from_db("app-1") + + assert result["id"] == "app-1" + assert result["name"] == "My App" + assert result["export_data"] == "exported_yaml" diff --git a/api/tests/unit_tests/services/recommend_app/test_recommend_app_factory.py b/api/tests/unit_tests/services/recommend_app/test_recommend_app_factory.py new file mode 100644 index 0000000000..036cba0cc0 --- /dev/null +++ b/api/tests/unit_tests/services/recommend_app/test_recommend_app_factory.py @@ -0,0 +1,28 @@ +import pytest + +from services.recommend_app.buildin.buildin_retrieval import BuildInRecommendAppRetrieval +from services.recommend_app.database.database_retrieval import DatabaseRecommendAppRetrieval +from services.recommend_app.recommend_app_factory import RecommendAppRetrievalFactory +from services.recommend_app.remote.remote_retrieval import RemoteRecommendAppRetrieval + + +class TestRecommendAppRetrievalFactory: + @pytest.mark.parametrize( + ("mode", "expected_class"), + [ + ("remote", RemoteRecommendAppRetrieval), + ("builtin", BuildInRecommendAppRetrieval), + ("db", DatabaseRecommendAppRetrieval), + ], + ) + def test_factory_returns_correct_class(self, mode, expected_class): + result = RecommendAppRetrievalFactory.get_recommend_app_factory(mode) + assert result is expected_class + + def test_factory_raises_for_unknown_mode(self): + with pytest.raises(ValueError, match="invalid fetch recommended apps mode"): + RecommendAppRetrievalFactory.get_recommend_app_factory("invalid_mode") + + def test_get_buildin_recommend_app_retrieval(self): + result = RecommendAppRetrievalFactory.get_buildin_recommend_app_retrieval() + assert result is BuildInRecommendAppRetrieval diff --git a/api/tests/unit_tests/services/recommend_app/test_recommend_app_type.py b/api/tests/unit_tests/services/recommend_app/test_recommend_app_type.py new file mode 100644 index 0000000000..08f72a6f77 --- /dev/null +++ b/api/tests/unit_tests/services/recommend_app/test_recommend_app_type.py @@ -0,0 +1,18 @@ +from services.recommend_app.recommend_app_type import RecommendAppType + + +def test_enum_values(): + assert RecommendAppType.REMOTE == "remote" + assert RecommendAppType.BUILDIN == "builtin" + assert RecommendAppType.DATABASE == "db" + + +def test_enum_membership(): + assert "remote" in RecommendAppType.__members__.values() + assert "builtin" in RecommendAppType.__members__.values() + assert "db" in RecommendAppType.__members__.values() + + +def test_enum_is_str(): + for member in RecommendAppType: + assert isinstance(member, str) diff --git a/api/tests/unit_tests/services/recommend_app/test_remote_retrieval.py b/api/tests/unit_tests/services/recommend_app/test_remote_retrieval.py new file mode 100644 index 0000000000..e322fbed4c --- /dev/null +++ b/api/tests/unit_tests/services/recommend_app/test_remote_retrieval.py @@ -0,0 +1,120 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from services.recommend_app.recommend_app_type import RecommendAppType +from services.recommend_app.remote.remote_retrieval import RemoteRecommendAppRetrieval + + +class TestRemoteRecommendAppRetrieval: + def test_get_type(self): + assert RemoteRecommendAppRetrieval().get_type() == RecommendAppType.REMOTE + + @patch.object( + RemoteRecommendAppRetrieval, + "fetch_recommended_app_detail_from_dify_official", + return_value={"id": "app-1"}, + ) + def test_get_recommend_app_detail_success(self, mock_fetch): + result = RemoteRecommendAppRetrieval().get_recommend_app_detail("app-1") + assert result == {"id": "app-1"} + mock_fetch.assert_called_once_with("app-1") + + @patch( + "services.recommend_app.remote.remote_retrieval" + ".BuildInRecommendAppRetrieval.fetch_recommended_app_detail_from_builtin", + return_value={"id": "fallback"}, + ) + @patch.object( + RemoteRecommendAppRetrieval, + "fetch_recommended_app_detail_from_dify_official", + side_effect=ConnectionError("timeout"), + ) + def test_get_recommend_app_detail_falls_back_on_error(self, mock_fetch, mock_builtin): + result = RemoteRecommendAppRetrieval().get_recommend_app_detail("app-1") + assert result == {"id": "fallback"} + mock_builtin.assert_called_once_with("app-1") + + @patch.object( + RemoteRecommendAppRetrieval, + "fetch_recommended_apps_from_dify_official", + return_value={"recommended_apps": [], "categories": []}, + ) + def test_get_recommended_apps_success(self, mock_fetch): + result = RemoteRecommendAppRetrieval().get_recommended_apps_and_categories("en-US") + assert result == {"recommended_apps": [], "categories": []} + + @patch( + "services.recommend_app.remote.remote_retrieval" + ".BuildInRecommendAppRetrieval.fetch_recommended_apps_from_builtin", + return_value={"recommended_apps": [{"id": "builtin"}]}, + ) + @patch.object( + RemoteRecommendAppRetrieval, + "fetch_recommended_apps_from_dify_official", + side_effect=ValueError("server error"), + ) + def test_get_recommended_apps_falls_back_on_error(self, mock_fetch, mock_builtin): + result = RemoteRecommendAppRetrieval().get_recommended_apps_and_categories("en-US") + assert result == {"recommended_apps": [{"id": "builtin"}]} + + +class TestFetchFromDifyOfficial: + @patch("services.recommend_app.remote.remote_retrieval.dify_config") + @patch("services.recommend_app.remote.remote_retrieval.httpx.get") + def test_detail_returns_json_on_200(self, mock_get, mock_config): + mock_config.HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN = "https://example.com" + mock_response = MagicMock(status_code=200) + mock_response.json.return_value = {"id": "app-1", "name": "Test"} + mock_get.return_value = mock_response + + result = RemoteRecommendAppRetrieval.fetch_recommended_app_detail_from_dify_official("app-1") + + assert result == {"id": "app-1", "name": "Test"} + mock_get.assert_called_once() + + @patch("services.recommend_app.remote.remote_retrieval.dify_config") + @patch("services.recommend_app.remote.remote_retrieval.httpx.get") + def test_detail_returns_none_on_non_200(self, mock_get, mock_config): + mock_config.HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN = "https://example.com" + mock_get.return_value = MagicMock(status_code=404) + + result = RemoteRecommendAppRetrieval.fetch_recommended_app_detail_from_dify_official("app-1") + + assert result is None + + @patch("services.recommend_app.remote.remote_retrieval.dify_config") + @patch("services.recommend_app.remote.remote_retrieval.httpx.get") + def test_apps_returns_sorted_categories_on_200(self, mock_get, mock_config): + mock_config.HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN = "https://example.com" + mock_response = MagicMock(status_code=200) + mock_response.json.return_value = { + "recommended_apps": [], + "categories": ["writing", "agent", "chat"], + } + mock_get.return_value = mock_response + + result = RemoteRecommendAppRetrieval.fetch_recommended_apps_from_dify_official("en-US") + + assert result["categories"] == ["agent", "chat", "writing"] + + @patch("services.recommend_app.remote.remote_retrieval.dify_config") + @patch("services.recommend_app.remote.remote_retrieval.httpx.get") + def test_apps_raises_on_non_200(self, mock_get, mock_config): + mock_config.HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN = "https://example.com" + mock_get.return_value = MagicMock(status_code=500) + + with pytest.raises(ValueError, match="fetch recommended apps failed"): + RemoteRecommendAppRetrieval.fetch_recommended_apps_from_dify_official("en-US") + + @patch("services.recommend_app.remote.remote_retrieval.dify_config") + @patch("services.recommend_app.remote.remote_retrieval.httpx.get") + def test_apps_without_categories_key(self, mock_get, mock_config): + mock_config.HOSTED_FETCH_APP_TEMPLATES_REMOTE_DOMAIN = "https://example.com" + mock_response = MagicMock(status_code=200) + mock_response.json.return_value = {"recommended_apps": []} + mock_get.return_value = mock_response + + result = RemoteRecommendAppRetrieval.fetch_recommended_apps_from_dify_official("en-US") + + assert "categories" not in result diff --git a/api/tests/unit_tests/services/retention/conversation/test_messages_clean_service.py b/api/tests/unit_tests/services/retention/conversation/test_messages_clean_service.py new file mode 100644 index 0000000000..a34defeba9 --- /dev/null +++ b/api/tests/unit_tests/services/retention/conversation/test_messages_clean_service.py @@ -0,0 +1,309 @@ +import datetime +import os +from unittest.mock import MagicMock, patch + +import pytest + +from services.retention.conversation.messages_clean_policy import ( + BillingDisabledPolicy, +) +from services.retention.conversation.messages_clean_service import MessagesCleanService + + +class TestMessagesCleanService: + @pytest.fixture(autouse=True) + def mock_db_engine(self): + with patch("services.retention.conversation.messages_clean_service.db") as mock_db: + mock_db.engine = MagicMock() + yield mock_db.engine + + @pytest.fixture + def mock_db_session(self, mock_db_engine): + with patch("services.retention.conversation.messages_clean_service.Session") as mock_session_cls: + mock_session = MagicMock() + mock_session_cls.return_value.__enter__.return_value = mock_session + yield mock_session + + @pytest.fixture + def mock_policy(self): + policy = MagicMock(spec=BillingDisabledPolicy) + return policy + + def test_run_calls_clean_messages(self, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + ) + with patch.object(service, "_clean_messages_by_time_range") as mock_clean: + mock_clean.return_value = {"total_deleted": 5} + result = service.run() + assert result == {"total_deleted": 5} + mock_clean.assert_called_once() + + def test_clean_messages_by_time_range_basic(self, mock_db_session, mock_policy): + # Arrange + end_before = datetime.datetime(2024, 1, 1, 12, 0, 0) + service = MessagesCleanService( + policy=mock_policy, + end_before=end_before, + batch_size=10, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime(2024, 1, 1, 10, 0, 0))]), # messages + MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")]), # apps + MagicMock( + rowcount=1 + ), # delete relations (this is wrong, relations delete doesn't use rowcount here, but execute) + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete relations + MagicMock(rowcount=1), # delete messages + MagicMock(all=lambda: []), # next batch empty + ] + + # Reset side_effect to be more robust + # The service calls session.execute for: + # 1. Fetch messages + # 2. Fetch apps + # 3. Batch delete relations (8 calls if IDs exist) + # 4. Delete messages + + mock_returns = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime(2024, 1, 1, 10, 0, 0))]), # fetch messages + MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")]), # fetch apps + ] + # 8 deletes for relations + mock_returns.extend([MagicMock() for _ in range(8)]) + # 1 delete for messages + mock_returns.append(MagicMock(rowcount=1)) + # Final fetch messages (empty) + mock_returns.append(MagicMock(all=lambda: [])) + + mock_db_session.execute.side_effect = mock_returns + mock_policy.filter_message_ids.return_value = ["msg1"] + + # Act + with patch("services.retention.conversation.messages_clean_service.time.sleep"): + stats = service.run() + + # Assert + assert stats["total_messages"] == 1 + assert stats["total_deleted"] == 1 + assert stats["batches"] == 2 + + def test_clean_messages_by_time_range_with_start_from(self, mock_db_session, mock_policy): + start_from = datetime.datetime(2024, 1, 1, 0, 0, 0) + end_before = datetime.datetime(2024, 1, 1, 12, 0, 0) + service = MessagesCleanService( + policy=mock_policy, + start_from=start_from, + end_before=end_before, + batch_size=10, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: []), # No messages + ] + + stats = service.run() + assert stats["total_messages"] == 0 + + def test_clean_messages_by_time_range_with_cursor(self, mock_db_session, mock_policy): + # Test pagination with cursor + end_before = datetime.datetime(2024, 1, 1, 12, 0, 0) + service = MessagesCleanService( + policy=mock_policy, + end_before=end_before, + batch_size=1, + ) + + msg1_time = datetime.datetime(2024, 1, 1, 10, 0, 0) + msg2_time = datetime.datetime(2024, 1, 1, 11, 0, 0) + + mock_returns = [] + # Batch 1 + mock_returns.append(MagicMock(all=lambda: [("msg1", "app1", msg1_time)])) + mock_returns.append(MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")])) + mock_returns.extend([MagicMock() for _ in range(8)]) # relations + mock_returns.append(MagicMock(rowcount=1)) # messages + + # Batch 2 + mock_returns.append(MagicMock(all=lambda: [("msg2", "app1", msg2_time)])) + mock_returns.append(MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")])) + mock_returns.extend([MagicMock() for _ in range(8)]) # relations + mock_returns.append(MagicMock(rowcount=1)) # messages + + # Batch 3 + mock_returns.append(MagicMock(all=lambda: [])) + + mock_db_session.execute.side_effect = mock_returns + mock_policy.filter_message_ids.return_value = ["msg1"] # Simplified + + with patch("services.retention.conversation.messages_clean_service.time.sleep"): + stats = service.run() + + assert stats["batches"] == 3 + assert stats["total_messages"] == 2 + + def test_clean_messages_by_time_range_dry_run(self, mock_db_session, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + dry_run=True, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime.now())]), # messages + MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")]), # apps + MagicMock(all=lambda: []), # next batch empty + ] + mock_policy.filter_message_ids.return_value = ["msg1"] + + with patch("services.retention.conversation.messages_clean_service.random.sample") as mock_sample: + mock_sample.return_value = ["msg1"] + stats = service.run() + assert stats["filtered_messages"] == 1 + assert stats["total_deleted"] == 0 # Dry run + mock_sample.assert_called() + + def test_clean_messages_by_time_range_no_apps_found(self, mock_db_session, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime.now())]), # messages + MagicMock(all=lambda: []), # apps NOT found + MagicMock(all=lambda: []), # next batch empty + ] + + stats = service.run() + assert stats["total_messages"] == 1 + assert stats["total_deleted"] == 0 + + def test_clean_messages_by_time_range_no_app_ids(self, mock_db_session, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime.now())]), # messages + MagicMock(all=lambda: []), # next batch empty + ] + + # We need to successfully execute line 228 and 229, then return empty at 251. + # line 228: raw_messages = list(session.execute(msg_stmt).all()) + # line 251: app_ids = list({msg.app_id for msg in messages}) + + calls = [] + + def list_side_effect(arg): + calls.append(arg) + if len(calls) == 2: # This is the second call to list() in the loop + return [] + return list(arg) + + with patch("services.retention.conversation.messages_clean_service.list", side_effect=list_side_effect): + stats = service.run() + assert stats["batches"] == 2 + assert stats["total_messages"] == 1 + + def test_from_time_range_validation(self, mock_policy): + now = datetime.datetime.now() + # Test start_from >= end_before + with pytest.raises(ValueError, match="start_from .* must be less than end_before"): + MessagesCleanService.from_time_range(mock_policy, now, now) + + # Test batch_size <= 0 + with pytest.raises(ValueError, match="batch_size .* must be greater than 0"): + MessagesCleanService.from_time_range(mock_policy, now - datetime.timedelta(days=1), now, batch_size=0) + + def test_from_time_range_success(self, mock_policy): + start = datetime.datetime(2024, 1, 1) + end = datetime.datetime(2024, 2, 1) + # Mock logger to avoid actual logging if needed, though it's fine + service = MessagesCleanService.from_time_range(mock_policy, start, end) + assert service._start_from == start + assert service._end_before == end + + def test_from_days_validation(self, mock_policy): + # Test days < 0 + with pytest.raises(ValueError, match="days .* must be greater than or equal to 0"): + MessagesCleanService.from_days(mock_policy, days=-1) + + # Test batch_size <= 0 + with pytest.raises(ValueError, match="batch_size .* must be greater than 0"): + MessagesCleanService.from_days(mock_policy, days=30, batch_size=0) + + def test_from_days_success(self, mock_policy): + with patch("services.retention.conversation.messages_clean_service.naive_utc_now") as mock_now: + fixed_now = datetime.datetime(2024, 6, 1) + mock_now.return_value = fixed_now + + service = MessagesCleanService.from_days(mock_policy, days=10) + assert service._start_from is None + assert service._end_before == fixed_now - datetime.timedelta(days=10) + + def test_clean_messages_by_time_range_no_messages_to_delete(self, mock_db_session, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + ) + + mock_db_session.execute.side_effect = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime.now())]), # messages + MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")]), # apps + MagicMock(all=lambda: []), # next batch empty + ] + mock_policy.filter_message_ids.return_value = [] # Policy says NO + + stats = service.run() + assert stats["total_messages"] == 1 + assert stats["filtered_messages"] == 0 + assert stats["total_deleted"] == 0 + + def test_batch_delete_message_relations_empty(self, mock_db_session): + MessagesCleanService._batch_delete_message_relations(mock_db_session, []) + mock_db_session.execute.assert_not_called() + + def test_batch_delete_message_relations_with_ids(self, mock_db_session): + MessagesCleanService._batch_delete_message_relations(mock_db_session, ["msg1", "msg2"]) + assert mock_db_session.execute.call_count == 8 # 8 tables to clean up + + @patch.dict(os.environ, {"SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_MAX_INTERVAL": "500"}) + def test_clean_messages_interval_from_env(self, mock_db_session, mock_policy): + service = MessagesCleanService( + policy=mock_policy, + end_before=datetime.datetime.now(), + batch_size=10, + ) + + mock_returns = [ + MagicMock(all=lambda: [("msg1", "app1", datetime.datetime.now())]), # messages + MagicMock(all=lambda: [MagicMock(id="app1", tenant_id="tenant1")]), # apps + ] + mock_returns.extend([MagicMock() for _ in range(8)]) # relations + mock_returns.append(MagicMock(rowcount=1)) # messages + mock_returns.append(MagicMock(all=lambda: [])) # next batch empty + + mock_db_session.execute.side_effect = mock_returns + mock_policy.filter_message_ids.return_value = ["msg1"] + + with patch("services.retention.conversation.messages_clean_service.time.sleep") as mock_sleep: + with patch("services.retention.conversation.messages_clean_service.random.uniform") as mock_uniform: + mock_uniform.return_value = 300.0 + service.run() + mock_uniform.assert_called_with(0, 500) + mock_sleep.assert_called_with(0.3) diff --git a/api/tests/unit_tests/services/retention/workflow_run/test_clear_free_plan_expired_workflow_run_logs.py b/api/tests/unit_tests/services/retention/workflow_run/test_clear_free_plan_expired_workflow_run_logs.py new file mode 100644 index 0000000000..0013cde79e --- /dev/null +++ b/api/tests/unit_tests/services/retention/workflow_run/test_clear_free_plan_expired_workflow_run_logs.py @@ -0,0 +1,499 @@ +""" +Unit tests for WorkflowRunCleanup service. +""" + +import datetime +from unittest.mock import MagicMock, patch + +import pytest + +from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup + + +def make_run(tenant_id: str = "t1", run_id: str = "r1", created_at: datetime.datetime | None = None): + run = MagicMock() + run.tenant_id = tenant_id + run.id = run_id + run.created_at = created_at or datetime.datetime(2024, 1, 1, tzinfo=datetime.UTC) + return run + + +@pytest.fixture +def mock_repo(): + return MagicMock() + + +@pytest.fixture +def cleanup(mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + yield WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + + +# --------------------------------------------------------------------------- +# Constructor validation +# --------------------------------------------------------------------------- + + +class TestWorkflowRunCleanupInit: + def test_only_start_from_raises(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError, match="both set or both omitted"): + WorkflowRunCleanup( + days=30, + batch_size=10, + start_from=datetime.datetime(2024, 1, 1), + workflow_run_repo=mock_repo, + ) + + def test_only_end_before_raises(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError, match="both set or both omitted"): + WorkflowRunCleanup( + days=30, + batch_size=10, + end_before=datetime.datetime(2024, 1, 1), + workflow_run_repo=mock_repo, + ) + + def test_end_before_not_greater_than_start_raises(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError, match="end_before must be greater than start_from"): + WorkflowRunCleanup( + days=30, + batch_size=10, + start_from=datetime.datetime(2024, 6, 1), + end_before=datetime.datetime(2024, 1, 1), + workflow_run_repo=mock_repo, + ) + + def test_equal_start_end_raises(self, mock_repo): + dt = datetime.datetime(2024, 1, 1) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError): + WorkflowRunCleanup(days=30, batch_size=10, start_from=dt, end_before=dt, workflow_run_repo=mock_repo) + + def test_zero_batch_size_raises(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError, match="batch_size must be greater than 0"): + WorkflowRunCleanup(days=30, batch_size=0, workflow_run_repo=mock_repo) + + def test_negative_batch_size_raises(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + with pytest.raises(ValueError): + WorkflowRunCleanup(days=30, batch_size=-1, workflow_run_repo=mock_repo) + + def test_valid_window_init(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 7 + cfg.BILLING_ENABLED = False + start = datetime.datetime(2024, 1, 1) + end = datetime.datetime(2024, 6, 1) + c = WorkflowRunCleanup(days=30, batch_size=5, start_from=start, end_before=end, workflow_run_repo=mock_repo) + assert c.window_start == start + assert c.window_end == end + + +# --------------------------------------------------------------------------- +# _empty_related_counts / _format_related_counts +# --------------------------------------------------------------------------- + + +class TestStaticHelpers: + def test_empty_related_counts(self): + counts = WorkflowRunCleanup._empty_related_counts() + assert counts == { + "node_executions": 0, + "offloads": 0, + "app_logs": 0, + "trigger_logs": 0, + "pauses": 0, + "pause_reasons": 0, + } + + def test_format_related_counts(self): + counts = { + "node_executions": 1, + "offloads": 2, + "app_logs": 3, + "trigger_logs": 4, + "pauses": 5, + "pause_reasons": 6, + } + result = WorkflowRunCleanup._format_related_counts(counts) + assert "node_executions 1" in result + assert "offloads 2" in result + assert "trigger_logs 4" in result + + +# --------------------------------------------------------------------------- +# _expiration_datetime +# --------------------------------------------------------------------------- + + +class TestExpirationDatetime: + def test_negative_returns_none(self, cleanup): + assert cleanup._expiration_datetime("t1", -1) is None + + def test_valid_timestamp(self, cleanup): + ts = int(datetime.datetime(2025, 1, 1, tzinfo=datetime.UTC).timestamp()) + result = cleanup._expiration_datetime("t1", ts) + assert result is not None + assert result.year == 2025 + + def test_overflow_returns_none(self, cleanup): + result = cleanup._expiration_datetime("t1", 2**62) + assert result is None + + +# --------------------------------------------------------------------------- +# _is_within_grace_period +# --------------------------------------------------------------------------- + + +class TestIsWithinGracePeriod: + def test_zero_grace_period_returns_false(self, cleanup): + cleanup.free_plan_grace_period_days = 0 + assert cleanup._is_within_grace_period("t1", {"expiration_date": 9999999999}) is False + + def test_within_grace_period(self, cleanup): + cleanup.free_plan_grace_period_days = 30 + # expired just 1 day ago + expired = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=1) + ts = int(expired.timestamp()) + assert cleanup._is_within_grace_period("t1", {"expiration_date": ts}) is True + + def test_outside_grace_period(self, cleanup): + cleanup.free_plan_grace_period_days = 5 + # expired 100 days ago + expired = datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=100) + ts = int(expired.timestamp()) + assert cleanup._is_within_grace_period("t1", {"expiration_date": ts}) is False + + def test_missing_expiration_date_returns_false(self, cleanup): + cleanup.free_plan_grace_period_days = 30 + assert cleanup._is_within_grace_period("t1", {"expiration_date": -1}) is False + + +# --------------------------------------------------------------------------- +# _get_cleanup_whitelist +# --------------------------------------------------------------------------- + + +class TestGetCleanupWhitelist: + def test_billing_disabled_returns_empty(self, cleanup): + cleanup._cleanup_whitelist = None + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + result = cleanup._get_cleanup_whitelist() + assert result == set() + + def test_billing_enabled_fetches_whitelist(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + bs.get_expired_subscription_cleanup_whitelist.return_value = ["t1", "t2"] + result = c._get_cleanup_whitelist() + assert result == {"t1", "t2"} + + def test_cached_whitelist_returned(self, cleanup): + cleanup._cleanup_whitelist = {"cached"} + result = cleanup._get_cleanup_whitelist() + assert result == {"cached"} + + def test_billing_service_error_returns_empty(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + bs.get_expired_subscription_cleanup_whitelist.side_effect = Exception("error") + result = c._get_cleanup_whitelist() + assert result == set() + + +# --------------------------------------------------------------------------- +# _filter_free_tenants +# --------------------------------------------------------------------------- + + +class TestFilterFreeTenants: + def test_billing_disabled_all_tenants_free(self, cleanup): + result = cleanup._filter_free_tenants(["t1", "t2"]) + assert result == {"t1", "t2"} + + def test_empty_tenants_returns_empty(self, cleanup): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = True + result = cleanup._filter_free_tenants([]) + assert result == set() + + def test_whitelisted_tenant_excluded(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + c._cleanup_whitelist = {"t1"} + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + from enums.cloud_plan import CloudPlan + + bs.get_plan_bulk_with_cache.return_value = { + "t1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1}, + "t2": {"plan": CloudPlan.SANDBOX, "expiration_date": -1}, + } + result = c._filter_free_tenants(["t1", "t2"]) + assert "t1" not in result + assert "t2" in result + + def test_paid_tenant_excluded(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + c._cleanup_whitelist = set() + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + bs.get_plan_bulk_with_cache.return_value = { + "t1": {"plan": "professional", "expiration_date": -1}, + } + result = c._filter_free_tenants(["t1"]) + assert result == set() + + def test_missing_billing_info_treats_as_non_free(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + c._cleanup_whitelist = set() + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + bs.get_plan_bulk_with_cache.return_value = {} + result = c._filter_free_tenants(["t1"]) + assert result == set() + + def test_billing_bulk_error_treats_as_non_free(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = True + c = WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + c._cleanup_whitelist = set() + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.BillingService" + ) as bs: + bs.get_plan_bulk_with_cache.side_effect = Exception("fail") + result = c._filter_free_tenants(["t1"]) + assert result == set() + + +# --------------------------------------------------------------------------- +# run() — delete mode +# --------------------------------------------------------------------------- + + +class TestRunDeleteMode: + def _make_cleanup(self, mock_repo, billing_enabled=False): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = billing_enabled + return WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo) + + def test_no_rows_stops_immediately(self, mock_repo): + mock_repo.get_runs_batch_by_time_range.return_value = [] + c = self._make_cleanup(mock_repo) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + c.run() + mock_repo.delete_runs_with_related.assert_not_called() + + def test_all_paid_skips_delete(self, mock_repo): + run = make_run("t1") + mock_repo.get_runs_batch_by_time_range.side_effect = [[run], []] + c = self._make_cleanup(mock_repo) + # billing disabled -> all free; but let's override _filter_free_tenants to return empty + c._filter_free_tenants = MagicMock(return_value=set()) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + c.run() + mock_repo.delete_runs_with_related.assert_not_called() + + def test_runs_deleted_successfully(self, mock_repo): + run = make_run("t1") + mock_repo.get_runs_batch_by_time_range.side_effect = [[run], []] + mock_repo.delete_runs_with_related.return_value = { + "runs": 1, + "node_executions": 0, + "offloads": 0, + "app_logs": 0, + "trigger_logs": 0, + "pauses": 0, + "pause_reasons": 0, + } + c = self._make_cleanup(mock_repo) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.time.sleep"): + c.run() + mock_repo.delete_runs_with_related.assert_called_once() + + def test_delete_exception_reraises(self, mock_repo): + run = make_run("t1") + mock_repo.get_runs_batch_by_time_range.side_effect = [[run], []] + mock_repo.delete_runs_with_related.side_effect = RuntimeError("db error") + c = self._make_cleanup(mock_repo) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + with pytest.raises(RuntimeError): + c.run() + + def test_summary_with_window_start(self, mock_repo): + mock_repo.get_runs_batch_by_time_range.return_value = [] + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + c = WorkflowRunCleanup( + days=30, + batch_size=10, + start_from=datetime.datetime(2024, 1, 1), + end_before=datetime.datetime(2024, 6, 1), + workflow_run_repo=mock_repo, + ) + c.run() + + +# --------------------------------------------------------------------------- +# run() — dry run mode +# --------------------------------------------------------------------------- + + +class TestRunDryRunMode: + def _make_dry_cleanup(self, mock_repo): + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + return WorkflowRunCleanup(days=30, batch_size=10, workflow_run_repo=mock_repo, dry_run=True) + + def test_dry_run_no_delete_called(self, mock_repo): + run = make_run("t1") + mock_repo.get_runs_batch_by_time_range.side_effect = [[run], []] + mock_repo.count_runs_with_related.return_value = { + "node_executions": 2, + "offloads": 0, + "app_logs": 0, + "trigger_logs": 1, + "pauses": 0, + "pause_reasons": 0, + } + c = self._make_dry_cleanup(mock_repo) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + c.run() + mock_repo.delete_runs_with_related.assert_not_called() + mock_repo.count_runs_with_related.assert_called_once() + + def test_dry_run_summary_with_window_start(self, mock_repo): + mock_repo.get_runs_batch_by_time_range.return_value = [] + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD = 0 + cfg.BILLING_ENABLED = False + c = WorkflowRunCleanup( + days=30, + batch_size=10, + start_from=datetime.datetime(2024, 1, 1), + end_before=datetime.datetime(2024, 6, 1), + workflow_run_repo=mock_repo, + dry_run=True, + ) + c.run() + + def test_dry_run_all_paid_skips_count(self, mock_repo): + run = make_run("t1") + mock_repo.get_runs_batch_by_time_range.side_effect = [[run], []] + c = self._make_dry_cleanup(mock_repo) + c._filter_free_tenants = MagicMock(return_value=set()) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.dify_config") as cfg: + cfg.BILLING_ENABLED = False + c.run() + mock_repo.count_runs_with_related.assert_not_called() + + +# --------------------------------------------------------------------------- +# _delete_trigger_logs / _count_trigger_logs +# --------------------------------------------------------------------------- + + +class TestTriggerLogMethods: + def test_delete_trigger_logs(self, cleanup): + session = MagicMock() + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.SQLAlchemyWorkflowTriggerLogRepository" + ) as RepoClass: + instance = RepoClass.return_value + instance.delete_by_run_ids.return_value = 5 + result = cleanup._delete_trigger_logs(session, ["r1", "r2"]) + assert result == 5 + + def test_count_trigger_logs(self, cleanup): + session = MagicMock() + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.SQLAlchemyWorkflowTriggerLogRepository" + ) as RepoClass: + instance = RepoClass.return_value + instance.count_by_run_ids.return_value = 3 + result = cleanup._count_trigger_logs(session, ["r1"]) + assert result == 3 + + +# --------------------------------------------------------------------------- +# _count_node_executions / _delete_node_executions +# --------------------------------------------------------------------------- + + +class TestNodeExecutionMethods: + def test_count_node_executions(self, cleanup): + session = MagicMock() + session.get_bind.return_value = MagicMock() + runs = [make_run("t1", "r1")] + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.DifyAPIRepositoryFactory" + ) as factory: + repo = factory.create_api_workflow_node_execution_repository.return_value + repo.count_by_runs.return_value = (10, 2) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.sessionmaker"): + result = cleanup._count_node_executions(session, runs) + assert result == (10, 2) + + def test_delete_node_executions(self, cleanup): + session = MagicMock() + session.get_bind.return_value = MagicMock() + runs = [make_run("t1", "r1")] + with patch( + "services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.DifyAPIRepositoryFactory" + ) as factory: + repo = factory.create_api_workflow_node_execution_repository.return_value + repo.delete_by_runs.return_value = (5, 1) + with patch("services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs.sessionmaker"): + result = cleanup._delete_node_executions(session, runs) + assert result == (5, 1) diff --git a/api/tests/unit_tests/services/retention/workflow_run/test_delete_archived_workflow_run.py b/api/tests/unit_tests/services/retention/workflow_run/test_delete_archived_workflow_run.py new file mode 100644 index 0000000000..9fe153c153 --- /dev/null +++ b/api/tests/unit_tests/services/retention/workflow_run/test_delete_archived_workflow_run.py @@ -0,0 +1,216 @@ +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy.orm import Session + +from models.workflow import WorkflowRun +from services.retention.workflow_run.delete_archived_workflow_run import ArchivedWorkflowRunDeletion, DeleteResult + + +class TestArchivedWorkflowRunDeletion: + @pytest.fixture + def mock_db(self): + with patch("services.retention.workflow_run.delete_archived_workflow_run.db") as mock_db: + mock_db.engine = MagicMock() + yield mock_db + + @pytest.fixture + def mock_sessionmaker(self): + with patch("services.retention.workflow_run.delete_archived_workflow_run.sessionmaker") as mock_sm: + mock_session = MagicMock(spec=Session) + mock_sm.return_value.return_value.__enter__.return_value = mock_session + yield mock_sm, mock_session + + @pytest.fixture + def mock_workflow_run_repo(self): + with patch( + "services.retention.workflow_run.delete_archived_workflow_run.APIWorkflowRunRepository" + ) as mock_repo_cls: + mock_repo = MagicMock() + yield mock_repo + + def test_delete_by_run_id_success(self, mock_db, mock_sessionmaker): + mock_sm, mock_session = mock_sessionmaker + run_id = "run-123" + tenant_id = "tenant-456" + + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = run_id + mock_run.tenant_id = tenant_id + mock_session.get.return_value = mock_run + + deletion = ArchivedWorkflowRunDeletion() + + with patch.object(deletion, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = MagicMock() + mock_get_repo.return_value = mock_repo + mock_repo.get_archived_run_ids.return_value = [run_id] + + with patch.object(deletion, "_delete_run") as mock_delete_run: + expected_result = DeleteResult(run_id=run_id, tenant_id=tenant_id, success=True) + mock_delete_run.return_value = expected_result + + result = deletion.delete_by_run_id(run_id) + + assert result == expected_result + mock_session.get.assert_called_once_with(WorkflowRun, run_id) + mock_repo.get_archived_run_ids.assert_called_once() + mock_delete_run.assert_called_once_with(mock_run) + + def test_delete_by_run_id_not_found(self, mock_db, mock_sessionmaker): + mock_sm, mock_session = mock_sessionmaker + run_id = "run-123" + mock_session.get.return_value = None + + deletion = ArchivedWorkflowRunDeletion() + with patch.object(deletion, "_get_workflow_run_repo"): + result = deletion.delete_by_run_id(run_id) + + assert result.success is False + assert "not found" in result.error + assert result.run_id == run_id + + def test_delete_by_run_id_not_archived(self, mock_db, mock_sessionmaker): + mock_sm, mock_session = mock_sessionmaker + run_id = "run-123" + + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = run_id + mock_session.get.return_value = mock_run + + deletion = ArchivedWorkflowRunDeletion() + with patch.object(deletion, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = MagicMock() + mock_get_repo.return_value = mock_repo + mock_repo.get_archived_run_ids.return_value = [] + + result = deletion.delete_by_run_id(run_id) + + assert result.success is False + assert "is not archived" in result.error + + def test_delete_batch(self, mock_db, mock_sessionmaker): + mock_sm, mock_session = mock_sessionmaker + deletion = ArchivedWorkflowRunDeletion() + + mock_run1 = MagicMock(spec=WorkflowRun) + mock_run1.id = "run-1" + mock_run2 = MagicMock(spec=WorkflowRun) + mock_run2.id = "run-2" + + with patch.object(deletion, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = MagicMock() + mock_get_repo.return_value = mock_repo + mock_repo.get_archived_runs_by_time_range.return_value = [mock_run1, mock_run2] + + with patch.object(deletion, "_delete_run") as mock_delete_run: + mock_delete_run.side_effect = [ + DeleteResult(run_id="run-1", tenant_id="t1", success=True), + DeleteResult(run_id="run-2", tenant_id="t1", success=True), + ] + + results = deletion.delete_batch(tenant_ids=["t1"], start_date=datetime.now(), end_date=datetime.now()) + + assert len(results) == 2 + assert results[0].run_id == "run-1" + assert results[1].run_id == "run-2" + assert mock_delete_run.call_count == 2 + + def test_delete_run_dry_run(self): + deletion = ArchivedWorkflowRunDeletion(dry_run=True) + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = "run-123" + mock_run.tenant_id = "tenant-456" + + result = deletion._delete_run(mock_run) + + assert result.success is True + assert result.run_id == "run-123" + + def test_delete_run_success(self): + deletion = ArchivedWorkflowRunDeletion(dry_run=False) + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = "run-123" + mock_run.tenant_id = "tenant-456" + + with patch.object(deletion, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = MagicMock() + mock_get_repo.return_value = mock_repo + mock_repo.delete_runs_with_related.return_value = {"workflow_runs": 1} + + result = deletion._delete_run(mock_run) + + assert result.success is True + assert result.deleted_counts == {"workflow_runs": 1} + + def test_delete_run_exception(self): + deletion = ArchivedWorkflowRunDeletion(dry_run=False) + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = "run-123" + + with patch.object(deletion, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = MagicMock() + mock_get_repo.return_value = mock_repo + mock_repo.delete_runs_with_related.side_effect = Exception("Database error") + + result = deletion._delete_run(mock_run) + + assert result.success is False + assert result.error == "Database error" + + def test_delete_trigger_logs(self): + mock_session = MagicMock(spec=Session) + run_ids = ["run-1", "run-2"] + + with patch( + "services.retention.workflow_run.delete_archived_workflow_run.SQLAlchemyWorkflowTriggerLogRepository" + ) as mock_repo_cls: + mock_repo = MagicMock() + mock_repo_cls.return_value = mock_repo + mock_repo.delete_by_run_ids.return_value = 5 + + count = ArchivedWorkflowRunDeletion._delete_trigger_logs(mock_session, run_ids) + + assert count == 5 + mock_repo_cls.assert_called_once_with(mock_session) + mock_repo.delete_by_run_ids.assert_called_once_with(run_ids) + + def test_delete_node_executions(self): + mock_session = MagicMock(spec=Session) + mock_run = MagicMock(spec=WorkflowRun) + mock_run.id = "run-1" + runs = [mock_run] + + with patch( + "repositories.factory.DifyAPIRepositoryFactory.create_api_workflow_node_execution_repository" + ) as mock_create_repo: + mock_repo = MagicMock() + mock_create_repo.return_value = mock_repo + mock_repo.delete_by_runs.return_value = (1, 2) + + with patch("services.retention.workflow_run.delete_archived_workflow_run.sessionmaker") as mock_sm: + result = ArchivedWorkflowRunDeletion._delete_node_executions(mock_session, runs) + + assert result == (1, 2) + mock_create_repo.assert_called_once() + mock_repo.delete_by_runs.assert_called_once_with(mock_session, ["run-1"]) + + def test_get_workflow_run_repo(self, mock_db): + deletion = ArchivedWorkflowRunDeletion() + + with patch( + "repositories.factory.DifyAPIRepositoryFactory.create_api_workflow_run_repository" + ) as mock_create_repo: + mock_repo = MagicMock() + mock_create_repo.return_value = mock_repo + + # First call + repo1 = deletion._get_workflow_run_repo() + assert repo1 == mock_repo + assert deletion.workflow_run_repo == mock_repo + + # Second call (should return cached) + repo2 = deletion._get_workflow_run_repo() + assert repo2 == mock_repo + mock_create_repo.assert_called_once() diff --git a/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py b/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py new file mode 100644 index 0000000000..4bfdba87a0 --- /dev/null +++ b/api/tests/unit_tests/services/retention/workflow_run/test_restore_archived_workflow_run.py @@ -0,0 +1,1117 @@ +""" +Comprehensive unit tests for WorkflowRunRestore service. + +This file provides complete test coverage for all WorkflowRunRestore methods. +Tests are organized by functionality and include edge cases, error handling, +and both positive and negative test scenarios. +""" + +import io +import json +import zipfile +from datetime import datetime +from unittest.mock import Mock, create_autospec, patch + +import pytest +from sqlalchemy import Column, Integer, MetaData, String, Table + +from libs.archive_storage import ArchiveStorageNotConfiguredError +from models.trigger import WorkflowTriggerLog +from models.workflow import ( + WorkflowAppLog, + WorkflowArchiveLog, + WorkflowNodeExecutionModel, + WorkflowNodeExecutionOffload, + WorkflowPause, + WorkflowPauseReason, + WorkflowRun, +) +from services.retention.workflow_run.restore_archived_workflow_run import ( + SCHEMA_MAPPERS, + TABLE_MODELS, + RestoreResult, + WorkflowRunRestore, +) + + +class WorkflowRunRestoreTestDataFactory: + """ + Factory for creating test data and mock objects. + + Provides reusable methods to create consistent mock objects for testing + workflow run restore operations. + """ + + @staticmethod + def create_workflow_run_mock( + run_id: str = "run-123", + tenant_id: str = "tenant-123", + app_id: str = "app-123", + created_at: datetime | None = None, + **kwargs, + ) -> Mock: + """ + Create a mock WorkflowRun object. + + Args: + run_id: Unique identifier for the workflow run + tenant_id: Tenant/workspace identifier + app_id: Application identifier + created_at: Creation timestamp + **kwargs: Additional attributes to set on the mock + + Returns: + Mock WorkflowRun object with specified attributes + """ + run = create_autospec(WorkflowRun, instance=True) + run.id = run_id + run.tenant_id = tenant_id + run.app_id = app_id + run.created_at = created_at or datetime(2024, 1, 1, 12, 0, 0) + for key, value in kwargs.items(): + setattr(run, key, value) + return run + + @staticmethod + def create_workflow_archive_log_mock( + run_id: str = "run-123", + tenant_id: str = "tenant-123", + app_id: str = "app-123", + created_at: datetime | None = None, + **kwargs, + ) -> Mock: + """ + Create a mock WorkflowArchiveLog object. + + Args: + run_id: Unique identifier for the workflow run + tenant_id: Tenant/workspace identifier + app_id: Application identifier + created_at: Creation timestamp + **kwargs: Additional attributes to set on the mock + + Returns: + Mock WorkflowArchiveLog object with specified attributes + """ + archive_log = create_autospec(WorkflowArchiveLog, instance=True) + archive_log.workflow_run_id = run_id + archive_log.tenant_id = tenant_id + archive_log.app_id = app_id + archive_log.run_created_at = created_at or datetime(2024, 1, 1, 12, 0, 0) + for key, value in kwargs.items(): + setattr(archive_log, key, value) + return archive_log + + @staticmethod + def create_archive_zip_mock( + manifest: dict | None = None, + tables_data: dict[str, list[dict]] | None = None, + ) -> bytes: + """ + Create a mock archive zip file in memory. + + Args: + manifest: Archive manifest data + tables_data: Dictionary mapping table names to list of records + + Returns: + Bytes representing the zip file + """ + if manifest is None: + manifest = { + "schema_version": "1.0", + "tables": { + "workflow_runs": {"row_count": 1}, + "workflow_app_logs": {"row_count": 2}, + }, + } + + if tables_data is None: + tables_data = { + "workflow_runs": [ + { + "id": "run-123", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + } + ], + "workflow_app_logs": [ + { + "id": "log-1", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "workflow_run_id": "run-123", + "created_from": "app", + "created_by_role": "account", + "created_by": "user-123", + }, + { + "id": "log-2", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "workflow_run_id": "run-123", + "created_from": "app", + "created_by_role": "account", + "created_by": "user-123", + }, + ], + } + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + zip_file.writestr("manifest.json", json.dumps(manifest)) + for table_name, records in tables_data.items(): + jsonl_data = "\n".join(json.dumps(record) for record in records) + zip_file.writestr(f"{table_name}.jsonl", jsonl_data) + + zip_buffer.seek(0) + return zip_buffer.getvalue() + + +# --------------------------------------------------------------------------- +# Test WorkflowRunRestore Initialization +# --------------------------------------------------------------------------- + + +class TestWorkflowRunRestoreInit: + """Tests for WorkflowRunRestore.__init__ method.""" + + def test_default_initialization(self): + """Service should initialize with default values.""" + restore = WorkflowRunRestore() + assert restore.dry_run is False + assert restore.workers == 1 + assert restore.workflow_run_repo is None + + def test_dry_run_initialization(self): + """Service should respect dry_run flag.""" + restore = WorkflowRunRestore(dry_run=True) + assert restore.dry_run is True + assert restore.workers == 1 + + def test_custom_workers_initialization(self): + """Service should accept custom workers count.""" + restore = WorkflowRunRestore(workers=5) + assert restore.workers == 5 + + def test_invalid_workers_raises_error(self): + """Service should raise ValueError for workers less than 1.""" + with pytest.raises(ValueError, match="workers must be at least 1"): + WorkflowRunRestore(workers=0) + + def test_negative_workers_raises_error(self): + """Service should raise ValueError for negative workers.""" + with pytest.raises(ValueError, match="workers must be at least 1"): + WorkflowRunRestore(workers=-1) + + +# --------------------------------------------------------------------------- +# Test _get_workflow_run_repo Method +# --------------------------------------------------------------------------- + + +class TestGetWorkflowRunRepo: + """Tests for WorkflowRunRestore._get_workflow_run_repo method.""" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.DifyAPIRepositoryFactory") + @patch("services.retention.workflow_run.restore_archived_workflow_run.sessionmaker") + @patch("services.retention.workflow_run.restore_archived_workflow_run.db") + def test_first_call_creates_repo(self, mock_db, mock_sessionmaker, mock_factory): + """First call should create and cache repository.""" + restore = WorkflowRunRestore() + + mock_session = Mock() + mock_sessionmaker.return_value = mock_session + mock_repo = Mock() + mock_factory.create_api_workflow_run_repository.return_value = mock_repo + + result = restore._get_workflow_run_repo() + + assert result is mock_repo + assert restore.workflow_run_repo is mock_repo + mock_sessionmaker.assert_called_once_with(bind=mock_db.engine, expire_on_commit=False) + mock_factory.create_api_workflow_run_repository.assert_called_once_with(mock_session) + + def test_cached_repo_returned(self): + """Subsequent calls should return cached repository.""" + restore = WorkflowRunRestore() + mock_repo = Mock() + restore.workflow_run_repo = mock_repo + + result = restore._get_workflow_run_repo() + + assert result is mock_repo + + +# --------------------------------------------------------------------------- +# Test _load_manifest_from_zip Method +# --------------------------------------------------------------------------- + + +class TestLoadManifestFromZip: + """Tests for WorkflowRunRestore._load_manifest_from_zip method.""" + + def test_load_valid_manifest(self): + """Should load manifest from valid zip.""" + manifest_data = {"schema_version": "1.0", "tables": {}} + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w") as zip_file: + zip_file.writestr("manifest.json", json.dumps(manifest_data)) + zip_buffer.seek(0) + + with zipfile.ZipFile(zip_buffer, "r") as archive: + result = WorkflowRunRestore._load_manifest_from_zip(archive) + + assert result == manifest_data + + def test_missing_manifest_raises_error(self): + """Should raise ValueError when manifest.json is missing.""" + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w") as zip_file: + zip_file.writestr("other.txt", "data") + zip_buffer.seek(0) + + with zipfile.ZipFile(zip_buffer, "r") as archive: + with pytest.raises(ValueError, match="manifest.json missing from archive bundle"): + WorkflowRunRestore._load_manifest_from_zip(archive) + + def test_invalid_json_raises_error(self): + """Should raise ValueError when manifest contains invalid JSON.""" + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w") as zip_file: + zip_file.writestr("manifest.json", "invalid json") + zip_buffer.seek(0) + + with zipfile.ZipFile(zip_buffer, "r") as archive: + with pytest.raises(json.JSONDecodeError): + WorkflowRunRestore._load_manifest_from_zip(archive) + + +# --------------------------------------------------------------------------- +# Test _get_schema_version Method +# --------------------------------------------------------------------------- + + +class TestGetSchemaVersion: + """Tests for WorkflowRunRestore._get_schema_version method.""" + + def test_valid_schema_version(self): + """Should return valid schema version from manifest.""" + restore = WorkflowRunRestore() + manifest = {"schema_version": "1.0"} + result = restore._get_schema_version(manifest) + assert result == "1.0" + + def test_missing_schema_version_defaults_to_1_0(self): + """Should default to 1.0 when schema_version is missing.""" + restore = WorkflowRunRestore() + manifest = {"tables": {}} + + with patch("services.retention.workflow_run.restore_archived_workflow_run.logger") as mock_logger: + result = restore._get_schema_version(manifest) + + assert result == "1.0" + mock_logger.warning.assert_called_once_with("Manifest missing schema_version; defaulting to 1.0") + + def test_unsupported_schema_version_raises_error(self): + """Should raise ValueError for unsupported schema version.""" + restore = WorkflowRunRestore() + manifest = {"schema_version": "2.0"} + + with pytest.raises(ValueError, match="Unsupported schema_version 2.0"): + restore._get_schema_version(manifest) + + def test_numeric_schema_version_converted_to_string(self): + """Should convert numeric schema version to string.""" + restore = WorkflowRunRestore() + manifest = {"schema_version": 1} + + # This should raise ValueError because "1" is not in SCHEMA_MAPPERS (only "1.0" is) + with pytest.raises(ValueError, match="Unsupported schema_version 1"): + restore._get_schema_version(manifest) + + +# --------------------------------------------------------------------------- +# Test _apply_schema_mapping Method +# --------------------------------------------------------------------------- + + +class TestApplySchemaMapping: + """Tests for WorkflowRunRestore._apply_schema_mapping method.""" + + def test_no_mapping_returns_original(self): + """Should return original record when no mapping exists.""" + restore = WorkflowRunRestore() + record = {"id": "test", "name": "test"} + result = restore._apply_schema_mapping("workflow_runs", "1.0", record) + assert result == record + + def test_mapping_applied(self): + """Should apply mapping when it exists.""" + restore = WorkflowRunRestore() + + def test_mapper(record): + return {**record, "mapped": True} + + # Add test mapper to SCHEMA_MAPPERS + original_mappers = SCHEMA_MAPPERS.copy() + SCHEMA_MAPPERS["1.0"]["test_table"] = test_mapper + + try: + record = {"id": "test"} + result = restore._apply_schema_mapping("test_table", "1.0", record) + assert result == {"id": "test", "mapped": True} + finally: + # Restore original mappers + SCHEMA_MAPPERS.clear() + SCHEMA_MAPPERS.update(original_mappers) + + +# --------------------------------------------------------------------------- +# Test _convert_datetime_fields Method +# --------------------------------------------------------------------------- + + +class TestConvertDatetimeFields: + """Tests for WorkflowRunRestore._convert_datetime_fields method.""" + + def test_iso_datetime_conversion(self): + """Should convert ISO datetime strings to datetime objects.""" + restore = WorkflowRunRestore() + + record = {"created_at": "2024-01-01T12:00:00", "name": "test"} + result = restore._convert_datetime_fields(record, WorkflowRun) + + assert isinstance(result["created_at"], datetime) + assert result["created_at"].year == 2024 + assert result["name"] == "test" + + def test_invalid_datetime_ignored(self): + """Should ignore invalid datetime strings.""" + restore = WorkflowRunRestore() + + record = {"created_at": "invalid-date", "name": "test"} + result = restore._convert_datetime_fields(record, WorkflowRun) + + assert result["created_at"] == "invalid-date" + assert result["name"] == "test" + + def test_non_datetime_columns_unchanged(self): + """Should leave non-datetime columns unchanged.""" + restore = WorkflowRunRestore() + + record = {"id": "test", "tenant_id": "tenant-123"} + result = restore._convert_datetime_fields(record, WorkflowRun) + + assert result["id"] == "test" + assert result["tenant_id"] == "tenant-123" + + +# --------------------------------------------------------------------------- +# Test _get_model_column_info Method +# --------------------------------------------------------------------------- + + +class TestGetModelColumnInfo: + """Tests for WorkflowRunRestore._get_model_column_info method.""" + + def test_column_info_extraction(self): + """Should extract column information correctly.""" + restore = WorkflowRunRestore() + + column_names, required_columns, non_nullable_with_default = restore._get_model_column_info(WorkflowRun) + + # Check that we get some expected columns + assert "id" in column_names + assert "tenant_id" in column_names + assert "app_id" in column_names + assert "created_at" in column_names + assert "created_by" in column_names + assert "status" in column_names + + # Columns without defaults should be required for restore inserts. + assert { + "tenant_id", + "app_id", + "workflow_id", + "type", + "triggered_from", + "version", + "status", + "created_by_role", + "created_by", + }.issubset(required_columns) + assert "id" not in required_columns + assert "created_at" not in required_columns + + # Check columns with defaults or server defaults + assert "id" in non_nullable_with_default + assert "created_at" in non_nullable_with_default + assert "elapsed_time" in non_nullable_with_default + assert "total_tokens" in non_nullable_with_default + assert "tenant_id" not in non_nullable_with_default + + def test_non_pk_auto_autoincrement_column_is_still_required(self): + """`autoincrement='auto'` should not mark non-PK columns as defaulted.""" + restore = WorkflowRunRestore() + + test_table = Table( + "test_autoincrement", + MetaData(), + Column("id", Integer, primary_key=True, autoincrement=True), + Column("required_field", String(255), nullable=False), + Column("defaulted_field", String(255), nullable=False, default="x"), + ) + + class MockModel: + __table__ = test_table + + _, required_columns, non_nullable_with_default = restore._get_model_column_info(MockModel) + + assert required_columns == {"required_field"} + assert "id" in non_nullable_with_default + assert "defaulted_field" in non_nullable_with_default + + +# --------------------------------------------------------------------------- +# Test _restore_table_records Method +# --------------------------------------------------------------------------- + + +class TestRestoreTableRecords: + """Tests for WorkflowRunRestore._restore_table_records method.""" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.TABLE_MODELS") + def test_unknown_table_returns_zero(self, mock_table_models): + """Should return 0 for unknown table.""" + restore = WorkflowRunRestore() + mock_table_models.get.return_value = None + + mock_session = Mock() + records = [{"id": "test"}] + + with patch("services.retention.workflow_run.restore_archived_workflow_run.logger") as mock_logger: + result = restore._restore_table_records(mock_session, "unknown_table", records, schema_version="1.0") + + assert result == 0 + mock_logger.warning.assert_called_once_with("Unknown table: %s", "unknown_table") + + def test_empty_records_returns_zero(self): + """Should return 0 for empty records list.""" + restore = WorkflowRunRestore() + mock_session = Mock() + + result = restore._restore_table_records(mock_session, "workflow_runs", [], schema_version="1.0") + assert result == 0 + + @patch("services.retention.workflow_run.restore_archived_workflow_run.pg_insert") + @patch("services.retention.workflow_run.restore_archived_workflow_run.cast") + def test_successful_restore(self, mock_cast, mock_pg_insert): + """Should successfully restore records.""" + restore = WorkflowRunRestore() + + # Mock session and execution + mock_session = Mock() + mock_result = Mock() + mock_result.rowcount = 2 + mock_session.execute.return_value = mock_result + mock_cast.return_value = mock_result + + # Mock insert statement + mock_stmt = Mock() + mock_stmt.on_conflict_do_nothing.return_value = mock_stmt + mock_pg_insert.return_value = mock_stmt + + records = [ + { + "id": "test1", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + }, + { + "id": "test2", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + }, + ] + + result = restore._restore_table_records(mock_session, "workflow_runs", records, schema_version="1.0") + + assert result == 2 + mock_session.execute.assert_called_once() + + def test_missing_required_columns_raises_error(self): + """Should raise ValueError for missing required columns.""" + restore = WorkflowRunRestore() + + mock_session = Mock() + # Use a dedicated mock model to isolate required-column validation behavior. + mock_model = Mock() + + # Mock a required column + required_column = Mock() + required_column.key = "required_field" + required_column.nullable = False + required_column.default = None + required_column.server_default = None + required_column.autoincrement = False + required_column.type = Mock() + + # Mock the __table__ attribute properly + mock_table = Mock() + mock_table.columns = [required_column] + mock_model.__table__ = mock_table + + records = [{"name": "test"}] # Missing required 'required_field' + + with patch.dict(TABLE_MODELS, {"test_table": mock_model}): + with pytest.raises(ValueError, match="Missing required columns for test_table"): + restore._restore_table_records(mock_session, "test_table", records, schema_version="1.0") + + +# --------------------------------------------------------------------------- +# Test _restore_from_run Method +# --------------------------------------------------------------------------- + + +class TestRestoreFromRun: + """Tests for WorkflowRunRestore._restore_from_run method.""" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + def test_archive_storage_not_configured(self, mock_get_storage): + """Should handle ArchiveStorageNotConfiguredError.""" + restore = WorkflowRunRestore() + mock_get_storage.side_effect = ArchiveStorageNotConfiguredError("Storage not configured") + + run = WorkflowRunRestoreTestDataFactory.create_workflow_run_mock() + + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + result = restore._restore_from_run(run, session_maker=lambda: Mock()) + + assert result.success is False + assert "Storage not configured" in result.error + assert result.elapsed_time > 0 + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + def test_archive_bundle_not_found(self, mock_get_storage): + """Should handle FileNotFoundError when archive bundle is missing.""" + restore = WorkflowRunRestore() + mock_storage = Mock() + mock_storage.get_object.side_effect = FileNotFoundError("Bundle not found") + mock_get_storage.return_value = mock_storage + + run = WorkflowRunRestoreTestDataFactory.create_workflow_run_mock() + + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + result = restore._restore_from_run(run, session_maker=lambda: Mock()) + + assert result.success is False + assert "Archive bundle not found" in result.error + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + def test_dry_run_mode(self, mock_get_storage): + """Should handle dry run mode correctly.""" + restore = WorkflowRunRestore(dry_run=True) + + # Mock storage and archive data + mock_storage = Mock() + archive_data = WorkflowRunRestoreTestDataFactory.create_archive_zip_mock() + mock_storage.get_object.return_value = archive_data + mock_get_storage.return_value = mock_storage + + run = WorkflowRunRestoreTestDataFactory.create_workflow_run_mock() + + # Create a proper mock session with context manager support + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + result = restore._restore_from_run(run, session_maker=lambda: mock_session) + + assert result.success is True + assert result.restored_counts["workflow_runs"] == 1 + assert result.restored_counts["workflow_app_logs"] == 2 + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + @patch("services.retention.workflow_run.restore_archived_workflow_run.pg_insert") + @patch("services.retention.workflow_run.restore_archived_workflow_run.cast") + def test_successful_restore(self, mock_cast, mock_pg_insert, mock_get_storage): + """Should successfully restore from archive.""" + restore = WorkflowRunRestore() + + # Mock storage and archive data + mock_storage = Mock() + archive_data = WorkflowRunRestoreTestDataFactory.create_archive_zip_mock() + mock_storage.get_object.return_value = archive_data + mock_get_storage.return_value = mock_storage + + # Mock session with context manager support + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + def session_maker(): + return mock_session + + # Mock database execution to return integer counts + mock_result_workflow_runs = Mock() + mock_result_workflow_runs.rowcount = 1 + mock_result_app_logs = Mock() + mock_result_app_logs.rowcount = 2 + + # Configure session.execute to return different results based on the table + def mock_execute(stmt): + if "workflow_runs" in str(stmt): + return mock_result_workflow_runs + else: + return mock_result_app_logs + + mock_session.execute.side_effect = mock_execute + mock_cast.return_value = mock_result_workflow_runs + + # Mock insert statement + mock_stmt = Mock() + mock_stmt.on_conflict_do_nothing.return_value = mock_stmt + mock_pg_insert.return_value = mock_stmt + + run = WorkflowRunRestoreTestDataFactory.create_workflow_run_mock() + + # Mock repository methods + with patch.object(restore, "_get_workflow_run_repo") as mock_get_repo: + mock_repo = Mock() + mock_get_repo.return_value = mock_repo + + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + result = restore._restore_from_run(run, session_maker=session_maker) + + assert result.success is True + assert result.restored_counts["workflow_runs"] == 1 + assert result.restored_counts["workflow_app_logs"] >= 1 # Just check it's restored + mock_session.commit.assert_called_once() + mock_repo.delete_archive_log_by_run_id.assert_called_once_with(mock_session, run.id) + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + def test_invalid_archive_bundle(self, mock_get_storage): + """Should handle invalid archive bundle.""" + restore = WorkflowRunRestore() + + # Mock storage with invalid zip data + mock_storage = Mock() + mock_storage.get_object.return_value = b"invalid zip data" + mock_get_storage.return_value = mock_storage + + run = WorkflowRunRestoreTestDataFactory.create_workflow_run_mock() + + # Create proper mock session + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + result = restore._restore_from_run(run, session_maker=lambda: mock_session) + + assert result.success is False + # The error message comes from zipfile.BadZipFile which says "File is not a zip file" + assert "File is not a zip file" in result.error + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + def test_workflow_archive_log_input(self, mock_get_storage): + """Should handle WorkflowArchiveLog input correctly.""" + restore = WorkflowRunRestore(dry_run=True) + + # Mock storage and archive data + mock_storage = Mock() + archive_data = WorkflowRunRestoreTestDataFactory.create_archive_zip_mock() + mock_storage.get_object.return_value = archive_data + mock_get_storage.return_value = mock_storage + + archive_log = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock() + + # Create proper mock session + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + result = restore._restore_from_run(archive_log, session_maker=lambda: mock_session) + + assert result.success is True + assert result.run_id == archive_log.workflow_run_id + assert result.tenant_id == archive_log.tenant_id + + +# --------------------------------------------------------------------------- +# Test restore_batch Method +# --------------------------------------------------------------------------- + + +class TestRestoreBatch: + """Tests for WorkflowRunRestore.restore_batch method.""" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.sessionmaker") + def test_empty_tenant_ids_returns_empty(self, mock_sessionmaker): + """Should return empty list when tenant_ids is empty list.""" + restore = WorkflowRunRestore() + + # Mock db.engine to avoid SQLAlchemy issues + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + result = restore.restore_batch( + tenant_ids=[], + start_date=datetime(2024, 1, 1), + end_date=datetime(2024, 1, 2), + ) + + assert result == [] + + @patch("services.retention.workflow_run.restore_archived_workflow_run.ThreadPoolExecutor") + def test_successful_batch_restore(self, mock_executor): + """Should successfully restore batch of workflow runs.""" + restore = WorkflowRunRestore(workers=2) + + # Mock session that supports context manager protocol + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + # Mock session factory that returns context manager sessions + mock_session_factory = Mock(return_value=mock_session) + + # Mock repository and archive logs + mock_repo = Mock() + archive_log1 = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock("run-1") + archive_log2 = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock("run-2") + mock_repo.get_archived_logs_by_time_range.return_value = [archive_log1, archive_log2] + + # Mock restore results + result1 = RestoreResult(run_id="run-1", tenant_id="tenant-1", success=True, restored_counts={}) + result2 = RestoreResult(run_id="run-2", tenant_id="tenant-1", success=True, restored_counts={}) + + # Mock ThreadPoolExecutor with context manager support + mock_executor_instance = Mock() + mock_executor_instance.__enter__ = Mock(return_value=mock_executor_instance) + mock_executor_instance.__exit__ = Mock(return_value=None) + mock_executor_instance.map = Mock(return_value=[result1, result2]) + mock_executor.return_value = mock_executor_instance + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch.object(restore, "_restore_from_run", side_effect=[result1, result2]): + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + # Mock sessionmaker and db.engine to avoid SQLAlchemy issues + with patch( + "services.retention.workflow_run.restore_archived_workflow_run.sessionmaker" + ) as mock_sessionmaker: + mock_sessionmaker.return_value = mock_session_factory + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + results = restore.restore_batch( + tenant_ids=["tenant-1"], + start_date=datetime(2024, 1, 1), + end_date=datetime(2024, 1, 2), + ) + + assert len(results) == 2 + assert results[0].run_id == "run-1" + assert results[1].run_id == "run-2" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.ThreadPoolExecutor") + def test_dry_run_batch_restore(self, mock_executor): + """Should handle dry run mode for batch restore.""" + restore = WorkflowRunRestore(dry_run=True) + + # Mock session that supports context manager protocol + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + # Mock session factory that returns context manager sessions + mock_session_factory = Mock(return_value=mock_session) + + mock_repo = Mock() + archive_log = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock() + mock_repo.get_archived_logs_by_time_range.return_value = [archive_log] + + result = RestoreResult(run_id="run-1", tenant_id="tenant-1", success=True, restored_counts={"workflow_runs": 1}) + + # Mock ThreadPoolExecutor with context manager support + mock_executor_instance = Mock() + mock_executor_instance.__enter__ = Mock(return_value=mock_executor_instance) + mock_executor_instance.__exit__ = Mock(return_value=None) + mock_executor_instance.map = Mock(return_value=[result]) + mock_executor.return_value = mock_executor_instance + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch.object(restore, "_restore_from_run", return_value=result): + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + # Mock sessionmaker and db.engine to avoid SQLAlchemy issues + with patch( + "services.retention.workflow_run.restore_archived_workflow_run.sessionmaker" + ) as mock_sessionmaker: + mock_sessionmaker.return_value = mock_session_factory + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + results = restore.restore_batch( + tenant_ids=["tenant-1"], + start_date=datetime(2024, 1, 1), + end_date=datetime(2024, 1, 2), + ) + + assert len(results) == 1 + assert results[0].success is True + + +# --------------------------------------------------------------------------- +# Test restore_by_run_id Method +# --------------------------------------------------------------------------- + + +class TestRestoreByRunId: + """Tests for WorkflowRunRestore.restore_by_run_id method.""" + + def test_archive_log_not_found(self): + """Should handle case when archive log is not found.""" + restore = WorkflowRunRestore() + + mock_repo = Mock() + mock_repo.get_archived_log_by_run_id.return_value = None + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + result = restore.restore_by_run_id("nonexistent-run") + + assert result.success is False + assert "not found" in result.error + assert result.run_id == "nonexistent-run" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.sessionmaker") + def test_successful_restore_by_id(self, mock_sessionmaker): + """Should successfully restore by run ID.""" + restore = WorkflowRunRestore() + + mock_session = Mock() + mock_sessionmaker.return_value = mock_session + + mock_repo = Mock() + archive_log = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock() + mock_repo.get_archived_log_by_run_id.return_value = archive_log + + result = RestoreResult(run_id="run-1", tenant_id="tenant-1", success=True, restored_counts={}) + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch.object(restore, "_restore_from_run", return_value=result): + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + # Mock db.engine to avoid SQLAlchemy issues + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + actual_result = restore.restore_by_run_id("run-1") + + assert actual_result.success is True + assert actual_result.run_id == "run-1" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.sessionmaker") + def test_dry_run_restore_by_id(self, mock_sessionmaker): + """Should handle dry run mode for restore by ID.""" + restore = WorkflowRunRestore(dry_run=True) + + mock_session = Mock() + mock_sessionmaker.return_value = mock_session + + mock_repo = Mock() + archive_log = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock() + mock_repo.get_archived_log_by_run_id.return_value = archive_log + + result = RestoreResult(run_id="run-1", tenant_id="tenant-1", success=True, restored_counts={"workflow_runs": 1}) + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch.object(restore, "_restore_from_run", return_value=result): + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + # Mock db.engine to avoid SQLAlchemy issues + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + actual_result = restore.restore_by_run_id("run-1") + + assert actual_result.success is True + assert actual_result.run_id == "run-1" + + +# --------------------------------------------------------------------------- +# Test RestoreResult Dataclass +# --------------------------------------------------------------------------- + + +class TestRestoreResult: + """Tests for RestoreResult dataclass.""" + + def test_restore_result_creation(self): + """Should create RestoreResult with all fields.""" + result = RestoreResult( + run_id="run-123", + tenant_id="tenant-123", + success=True, + restored_counts={"workflow_runs": 1, "workflow_app_logs": 2}, + error=None, + elapsed_time=5.5, + ) + + assert result.run_id == "run-123" + assert result.tenant_id == "tenant-123" + assert result.success is True + assert result.restored_counts == {"workflow_runs": 1, "workflow_app_logs": 2} + assert result.error is None + assert result.elapsed_time == 5.5 + + def test_restore_result_with_error(self): + """Should create RestoreResult with error.""" + result = RestoreResult( + run_id="run-123", + tenant_id="tenant-123", + success=False, + restored_counts={}, + error="Something went wrong", + ) + + assert result.success is False + assert result.error == "Something went wrong" + assert result.restored_counts == {} + assert result.elapsed_time == 0.0 # Default value + + +# --------------------------------------------------------------------------- +# Test Constants and Mappings +# --------------------------------------------------------------------------- + + +class TestConstantsAndMappings: + """Tests for module constants and mappings.""" + + def test_table_models_mapping(self): + """TABLE_MODELS should contain expected table mappings.""" + expected_tables = { + "workflow_runs": WorkflowRun, + "workflow_app_logs": WorkflowAppLog, + "workflow_node_executions": WorkflowNodeExecutionModel, + "workflow_node_execution_offload": WorkflowNodeExecutionOffload, + "workflow_pauses": WorkflowPause, + "workflow_pause_reasons": WorkflowPauseReason, + "workflow_trigger_logs": WorkflowTriggerLog, + } + + assert expected_tables == TABLE_MODELS + + def test_schema_mappers_structure(self): + """SCHEMA_MAPPERS should have correct structure.""" + assert isinstance(SCHEMA_MAPPERS, dict) + assert "1.0" in SCHEMA_MAPPERS + assert isinstance(SCHEMA_MAPPERS["1.0"], dict) + + +# --------------------------------------------------------------------------- +# Integration Tests +# --------------------------------------------------------------------------- + + +class TestIntegration: + """Integration tests combining multiple components.""" + + @patch("services.retention.workflow_run.restore_archived_workflow_run.get_archive_storage") + @patch("services.retention.workflow_run.restore_archived_workflow_run.ThreadPoolExecutor") + def test_full_restore_flow(self, mock_executor, mock_get_storage): + """Test complete restore flow with all components.""" + restore = WorkflowRunRestore(workers=1) + + # Mock storage + mock_storage = Mock() + manifest = { + "schema_version": "1.0", + "tables": { + "workflow_runs": {"row_count": 1}, + }, + } + tables_data = { + "workflow_runs": [ + { + "id": "run-123", + "tenant_id": "tenant-123", + "app_id": "app-123", + "workflow_id": "workflow-123", + "type": "workflow", + "triggered_from": "app", + "version": "1", + "status": "succeeded", + "created_by_role": "account", + "created_by": "user-123", + "created_at": "2024-01-01T12:00:00", + } + ], + } + archive_data = WorkflowRunRestoreTestDataFactory.create_archive_zip_mock(manifest, tables_data) + mock_storage.get_object.return_value = archive_data + mock_get_storage.return_value = mock_storage + + # Mock session that supports context manager protocol + mock_session = Mock() + mock_session.__enter__ = Mock(return_value=mock_session) + mock_session.__exit__ = Mock(return_value=None) + + # Mock session factory that returns context manager sessions + mock_session_factory = Mock(return_value=mock_session) + + mock_result = Mock() + mock_result.rowcount = 1 + mock_session.execute.return_value = mock_result + + # Mock repository + mock_repo = Mock() + archive_log = WorkflowRunRestoreTestDataFactory.create_workflow_archive_log_mock() + mock_repo.get_archived_log_by_run_id.return_value = archive_log + + # Mock ThreadPoolExecutor (not actually used in restore_by_run_id but needed for patch) + mock_executor_instance = Mock() + mock_executor_instance.__enter__ = Mock(return_value=mock_executor_instance) + mock_executor_instance.__exit__ = Mock(return_value=None) + mock_executor_instance.map = Mock(return_value=[]) + mock_executor.return_value = mock_executor_instance + + with patch.object(restore, "_get_workflow_run_repo", return_value=mock_repo): + with patch("services.retention.workflow_run.restore_archived_workflow_run.pg_insert") as mock_insert: + mock_stmt = Mock() + mock_stmt.on_conflict_do_nothing.return_value = mock_stmt + mock_insert.return_value = mock_stmt + + with patch("services.retention.workflow_run.restore_archived_workflow_run.cast") as mock_cast: + mock_cast.return_value = mock_result + + with patch("services.retention.workflow_run.restore_archived_workflow_run.click") as mock_click: + # Mock sessionmaker and db.engine to avoid SQLAlchemy issues + with patch( + "services.retention.workflow_run.restore_archived_workflow_run.sessionmaker" + ) as mock_sessionmaker: + mock_sessionmaker.return_value = mock_session_factory + with patch("services.retention.workflow_run.restore_archived_workflow_run.db") as mock_db: + mock_db.engine = Mock() + result = restore.restore_by_run_id("run-123") + + assert result.success is True + assert result.restored_counts.get("workflow_runs") == 1 diff --git a/api/tests/unit_tests/services/test_advanced_prompt_template_service.py b/api/tests/unit_tests/services/test_advanced_prompt_template_service.py new file mode 100644 index 0000000000..a6bc79e82b --- /dev/null +++ b/api/tests/unit_tests/services/test_advanced_prompt_template_service.py @@ -0,0 +1,214 @@ +""" +Unit tests for services.advanced_prompt_template_service +""" + +import copy + +from core.prompt.prompt_templates.advanced_prompt_templates import ( + BAICHUAN_CHAT_APP_CHAT_PROMPT_CONFIG, + BAICHUAN_CHAT_APP_COMPLETION_PROMPT_CONFIG, + BAICHUAN_COMPLETION_APP_CHAT_PROMPT_CONFIG, + BAICHUAN_COMPLETION_APP_COMPLETION_PROMPT_CONFIG, + BAICHUAN_CONTEXT, + CHAT_APP_CHAT_PROMPT_CONFIG, + CHAT_APP_COMPLETION_PROMPT_CONFIG, + COMPLETION_APP_CHAT_PROMPT_CONFIG, + COMPLETION_APP_COMPLETION_PROMPT_CONFIG, + CONTEXT, +) +from models.model import AppMode +from services.advanced_prompt_template_service import AdvancedPromptTemplateService + + +class TestAdvancedPromptTemplateService: + """Test suite for AdvancedPromptTemplateService.""" + + def test_get_prompt_should_use_baichuan_prompt_when_model_name_contains_baichuan(self) -> None: + """Test baichuan model names use baichuan context prompt.""" + # Arrange + args = { + "app_mode": AppMode.CHAT, + "model_mode": "chat", + "model_name": "Baichuan2-13B", + "has_context": "true", + } + + # Act + result = AdvancedPromptTemplateService.get_prompt(args) + + # Assert + assert result["chat_prompt_config"]["prompt"][0]["text"].startswith(BAICHUAN_CONTEXT) + + def test_get_prompt_should_use_common_prompt_when_model_name_not_baichuan(self) -> None: + """Test non-baichuan model names use common prompt.""" + # Arrange + args = { + "app_mode": AppMode.CHAT, + "model_mode": "completion", + "model_name": "gpt-4", + "has_context": "false", + } + original_config = copy.deepcopy(CHAT_APP_COMPLETION_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_prompt(args) + + # Assert + assert result == original_config + assert original_config == CHAT_APP_COMPLETION_PROMPT_CONFIG + + def test_get_common_prompt_should_return_empty_dict_when_app_mode_invalid(self) -> None: + """Test invalid app mode returns empty dict.""" + # Arrange + app_mode = "invalid" + model_mode = "chat" + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(app_mode, model_mode, "true") + + # Assert + assert result == {} + + def test_get_common_prompt_should_prepend_context_for_completion_prompt(self) -> None: + """Test context is prepended for completion prompt when has_context is true.""" + # Arrange + original_config = copy.deepcopy(CHAT_APP_COMPLETION_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(AppMode.CHAT, "completion", "true") + + # Assert + assert result["completion_prompt_config"]["prompt"]["text"].startswith(CONTEXT) + assert original_config == CHAT_APP_COMPLETION_PROMPT_CONFIG + + def test_get_common_prompt_should_prepend_context_for_chat_prompt(self) -> None: + """Test context is prepended for chat prompt when has_context is true.""" + # Arrange + original_config = copy.deepcopy(COMPLETION_APP_CHAT_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(AppMode.COMPLETION, "chat", "true") + + # Assert + assert result["chat_prompt_config"]["prompt"][0]["text"].startswith(CONTEXT) + assert original_config == COMPLETION_APP_CHAT_PROMPT_CONFIG + + def test_get_common_prompt_should_return_chat_prompt_without_context_when_has_context_false(self) -> None: + """Test chat prompt remains unchanged when has_context is false.""" + # Arrange + original_config = copy.deepcopy(CHAT_APP_CHAT_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(AppMode.CHAT, "chat", "false") + + # Assert + assert result == original_config + assert original_config == CHAT_APP_CHAT_PROMPT_CONFIG + + def test_get_common_prompt_should_return_completion_prompt_for_completion_app_mode(self) -> None: + """Test completion app mode with completion model returns completion prompt.""" + # Arrange + original_config = copy.deepcopy(COMPLETION_APP_COMPLETION_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(AppMode.COMPLETION, "completion", "false") + + # Assert + assert result == original_config + assert original_config == COMPLETION_APP_COMPLETION_PROMPT_CONFIG + + def test_get_common_prompt_should_return_empty_dict_when_model_mode_invalid(self) -> None: + """Test invalid model mode returns empty dict.""" + # Arrange + app_mode = AppMode.CHAT + model_mode = "invalid" + + # Act + result = AdvancedPromptTemplateService.get_common_prompt(app_mode, model_mode, "false") + + # Assert + assert result == {} + + def test_get_completion_prompt_should_not_prepend_context_when_has_context_false(self) -> None: + """Test helper keeps completion prompt unchanged when context is disabled.""" + # Arrange + prompt_template = copy.deepcopy(CHAT_APP_COMPLETION_PROMPT_CONFIG) + original_text = prompt_template["completion_prompt_config"]["prompt"]["text"] + + # Act + result = AdvancedPromptTemplateService.get_completion_prompt(prompt_template, "false", CONTEXT) + + # Assert + assert result["completion_prompt_config"]["prompt"]["text"] == original_text + + def test_get_chat_prompt_should_not_prepend_context_when_has_context_false(self) -> None: + """Test helper keeps chat prompt unchanged when context is disabled.""" + # Arrange + prompt_template = copy.deepcopy(CHAT_APP_CHAT_PROMPT_CONFIG) + original_text = prompt_template["chat_prompt_config"]["prompt"][0]["text"] + + # Act + result = AdvancedPromptTemplateService.get_chat_prompt(prompt_template, "false", CONTEXT) + + # Assert + assert result["chat_prompt_config"]["prompt"][0]["text"] == original_text + + def test_get_baichuan_prompt_should_return_chat_completion_config_when_chat_completion(self) -> None: + """Test baichuan chat/completion returns the expected config.""" + # Arrange + original_config = copy.deepcopy(BAICHUAN_CHAT_APP_COMPLETION_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_baichuan_prompt(AppMode.CHAT, "completion", "false") + + # Assert + assert result == original_config + assert original_config == BAICHUAN_CHAT_APP_COMPLETION_PROMPT_CONFIG + + def test_get_baichuan_prompt_should_return_completion_chat_config_when_completion_chat(self) -> None: + """Test baichuan completion/chat returns the expected config.""" + # Arrange + original_config = copy.deepcopy(BAICHUAN_COMPLETION_APP_CHAT_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_baichuan_prompt(AppMode.COMPLETION, "chat", "false") + + # Assert + assert result == original_config + assert original_config == BAICHUAN_COMPLETION_APP_CHAT_PROMPT_CONFIG + + def test_get_baichuan_prompt_should_return_completion_completion_config_when_enabled_context(self) -> None: + """Test baichuan completion/completion prepends baichuan context when enabled.""" + # Arrange + original_config = copy.deepcopy(BAICHUAN_COMPLETION_APP_COMPLETION_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_baichuan_prompt(AppMode.COMPLETION, "completion", "true") + + # Assert + assert result["completion_prompt_config"]["prompt"]["text"].startswith(BAICHUAN_CONTEXT) + assert original_config == BAICHUAN_COMPLETION_APP_COMPLETION_PROMPT_CONFIG + + def test_get_baichuan_prompt_should_return_chat_chat_config_when_enabled_context(self) -> None: + """Test baichuan chat/chat prepends baichuan context when enabled.""" + # Arrange + original_config = copy.deepcopy(BAICHUAN_CHAT_APP_CHAT_PROMPT_CONFIG) + + # Act + result = AdvancedPromptTemplateService.get_baichuan_prompt(AppMode.CHAT, "chat", "true") + + # Assert + assert result["chat_prompt_config"]["prompt"][0]["text"].startswith(BAICHUAN_CONTEXT) + assert original_config == BAICHUAN_CHAT_APP_CHAT_PROMPT_CONFIG + + def test_get_baichuan_prompt_should_return_empty_dict_when_invalid_inputs(self) -> None: + """Test invalid baichuan mode combinations return empty dict.""" + # Arrange + app_mode = "invalid" + model_mode = "invalid" + + # Act + result = AdvancedPromptTemplateService.get_baichuan_prompt(app_mode, model_mode, "true") + + # Assert + assert result == {} diff --git a/api/tests/unit_tests/services/test_agent_service.py b/api/tests/unit_tests/services/test_agent_service.py new file mode 100644 index 0000000000..7ce3d7ef7b --- /dev/null +++ b/api/tests/unit_tests/services/test_agent_service.py @@ -0,0 +1,346 @@ +""" +Unit tests for services.agent_service +""" + +from collections.abc import Callable +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest +import pytz + +from core.plugin.impl.exc import PluginDaemonClientSideError +from models import Account +from models.model import App, Conversation, EndUser, Message, MessageAgentThought +from services.agent_service import AgentService + + +def _make_current_user_account(timezone: str = "UTC") -> Account: + account = Account(name="Test User", email="test@example.com") + account.timezone = timezone + return account + + +def _make_app_model(app_model_config: MagicMock | None) -> MagicMock: + app_model = MagicMock(spec=App) + app_model.id = "app-123" + app_model.tenant_id = "tenant-123" + app_model.app_model_config = app_model_config + return app_model + + +def _make_conversation(from_end_user_id: str | None, from_account_id: str | None) -> MagicMock: + conversation = MagicMock(spec=Conversation) + conversation.id = "conv-123" + conversation.app_id = "app-123" + conversation.from_end_user_id = from_end_user_id + conversation.from_account_id = from_account_id + return conversation + + +def _make_message(agent_thoughts: list[MessageAgentThought]) -> MagicMock: + message = MagicMock(spec=Message) + message.id = "msg-123" + message.conversation_id = "conv-123" + message.created_at = datetime(2024, 1, 1, tzinfo=pytz.UTC) + message.provider_response_latency = 1.23 + message.answer_tokens = 4 + message.message_tokens = 6 + message.agent_thoughts = agent_thoughts + message.message_files = ["file-a.txt"] + return message + + +def _make_agent_thought() -> MagicMock: + agent_thought = MagicMock(spec=MessageAgentThought) + agent_thought.tokens = 3 + agent_thought.tool_input = "raw-input" + agent_thought.observation = "raw-output" + agent_thought.thought = "thinking" + agent_thought.created_at = datetime(2024, 1, 1, tzinfo=pytz.UTC) + agent_thought.files = [] + agent_thought.tools = ["tool_a", "dataset_tool"] + agent_thought.tool_labels = {"tool_a": "Tool A"} + agent_thought.tool_meta = { + "tool_a": { + "tool_config": { + "tool_provider_type": "custom", + "tool_provider": "provider-1", + }, + "tool_parameters": {"param": "value"}, + "time_cost": 2.5, + }, + "dataset_tool": { + "tool_config": { + "tool_provider_type": "dataset-retrieval", + "tool_provider": "dataset-provider", + } + }, + } + agent_thought.tool_inputs_dict = {"tool_a": {"q": "hello"}, "dataset_tool": {"k": "v"}} + agent_thought.tool_outputs_dict = {"tool_a": {"result": "ok"}} + return agent_thought + + +def _build_query_side_effect( + conversation: Conversation | None, + message: Message | None, + executor: EndUser | Account | None, +) -> Callable[..., MagicMock]: + def _query_side_effect(*args: object, **kwargs: object) -> MagicMock: + query = MagicMock() + query.where.return_value = query + if any(arg is Conversation for arg in args): + query.first.return_value = conversation + elif any(arg is Message for arg in args): + query.first.return_value = message + elif any(arg is EndUser for arg in args) or any(arg is Account for arg in args): + query.first.return_value = executor + return query + + return _query_side_effect + + +class TestAgentServiceGetAgentLogs: + """Test suite for AgentService.get_agent_logs.""" + + def test_get_agent_logs_should_raise_when_conversation_missing(self) -> None: + """Test missing conversation raises ValueError.""" + # Arrange + app_model = _make_app_model(MagicMock()) + with patch("services.agent_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act & Assert + with pytest.raises(ValueError): + AgentService.get_agent_logs(app_model, "missing-conv", "msg-1") + + def test_get_agent_logs_should_raise_when_message_missing(self) -> None: + """Test missing message raises ValueError.""" + # Arrange + app_model = _make_app_model(MagicMock()) + conversation = _make_conversation(from_end_user_id="end-user-1", from_account_id=None) + with patch("services.agent_service.db") as mock_db: + conversation_query = MagicMock() + conversation_query.where.return_value = conversation_query + conversation_query.first.return_value = conversation + + message_query = MagicMock() + message_query.where.return_value = message_query + message_query.first.return_value = None + + mock_db.session.query.side_effect = [conversation_query, message_query] + + # Act & Assert + with pytest.raises(ValueError): + AgentService.get_agent_logs(app_model, conversation.id, "missing-msg") + + def test_get_agent_logs_should_raise_when_app_model_config_missing(self) -> None: + """Test missing app model config raises ValueError.""" + # Arrange + app_model = _make_app_model(None) + conversation = _make_conversation(from_end_user_id="end-user-1", from_account_id=None) + message = _make_message([]) + current_user = _make_current_user_account() + + with patch("services.agent_service.db") as mock_db, patch("services.agent_service.current_user", current_user): + mock_db.session.query.side_effect = _build_query_side_effect(conversation, message, MagicMock()) + + # Act & Assert + with pytest.raises(ValueError): + AgentService.get_agent_logs(app_model, conversation.id, message.id) + + def test_get_agent_logs_should_raise_when_agent_config_missing(self) -> None: + """Test missing agent config raises ValueError.""" + # Arrange + app_model_config = MagicMock() + app_model_config.agent_mode_dict = {"strategy": "react"} + app_model_config.to_dict.return_value = {"tools": []} + app_model = _make_app_model(app_model_config) + conversation = _make_conversation(from_end_user_id="end-user-1", from_account_id=None) + message = _make_message([]) + current_user = _make_current_user_account() + + with ( + patch("services.agent_service.db") as mock_db, + patch("services.agent_service.AgentConfigManager.convert", return_value=None), + patch("services.agent_service.current_user", current_user), + ): + mock_db.session.query.side_effect = _build_query_side_effect(conversation, message, MagicMock()) + + # Act & Assert + with pytest.raises(ValueError): + AgentService.get_agent_logs(app_model, conversation.id, message.id) + + def test_get_agent_logs_should_return_logs_for_end_user_executor(self) -> None: + """Test agent logs returned for end-user executor with tool icons.""" + # Arrange + agent_thought = _make_agent_thought() + message = _make_message([agent_thought]) + conversation = _make_conversation(from_end_user_id="end-user-1", from_account_id=None) + executor = MagicMock(spec=EndUser) + executor.name = "End User" + app_model_config = MagicMock() + app_model_config.agent_mode_dict = {"strategy": "react"} + app_model_config.to_dict.return_value = {"tools": []} + app_model = _make_app_model(app_model_config) + current_user = _make_current_user_account() + agent_tool = MagicMock() + agent_tool.tool_name = "tool_a" + agent_tool.provider_type = "custom" + agent_tool.provider_id = "provider-2" + agent_config = MagicMock() + agent_config.tools = [agent_tool] + + with ( + patch("services.agent_service.db") as mock_db, + patch("services.agent_service.AgentConfigManager.convert", return_value=agent_config) as mock_convert, + patch("services.agent_service.ToolManager.get_tool_icon") as mock_get_icon, + patch("services.agent_service.current_user", current_user), + ): + mock_db.session.query.side_effect = _build_query_side_effect(conversation, message, executor) + mock_get_icon.side_effect = [None, "icon-a"] + + # Act + result = AgentService.get_agent_logs(app_model, conversation.id, message.id) + + # Assert + assert result["meta"]["status"] == "success" + assert result["meta"]["executor"] == "End User" + assert result["meta"]["total_tokens"] == 10 + assert result["meta"]["agent_mode"] == "react" + assert result["meta"]["iterations"] == 1 + assert result["files"] == ["file-a.txt"] + assert len(result["iterations"]) == 1 + tool_calls = result["iterations"][0]["tool_calls"] + assert tool_calls[0]["tool_name"] == "tool_a" + assert tool_calls[0]["tool_icon"] == "icon-a" + assert tool_calls[1]["tool_name"] == "dataset_tool" + assert tool_calls[1]["tool_icon"] == "" + mock_convert.assert_called_once() + + def test_get_agent_logs_should_return_account_executor_when_no_end_user(self) -> None: + """Test agent logs fall back to account executor when end user is missing.""" + # Arrange + agent_thought = _make_agent_thought() + message = _make_message([agent_thought]) + conversation = _make_conversation(from_end_user_id=None, from_account_id="account-1") + executor = MagicMock(spec=Account) + executor.name = "Account User" + app_model_config = MagicMock() + app_model_config.agent_mode_dict = {"strategy": "react"} + app_model_config.to_dict.return_value = {"tools": []} + app_model = _make_app_model(app_model_config) + current_user = _make_current_user_account() + agent_config = MagicMock() + agent_config.tools = [] + + with ( + patch("services.agent_service.db") as mock_db, + patch("services.agent_service.AgentConfigManager.convert", return_value=agent_config), + patch("services.agent_service.ToolManager.get_tool_icon", return_value=""), + patch("services.agent_service.current_user", current_user), + ): + mock_db.session.query.side_effect = _build_query_side_effect(conversation, message, executor) + + # Act + result = AgentService.get_agent_logs(app_model, conversation.id, message.id) + + # Assert + assert result["meta"]["executor"] == "Account User" + + def test_get_agent_logs_should_use_defaults_when_executor_and_tool_data_missing(self) -> None: + """Test unknown executor and missing tool details fall back to defaults.""" + # Arrange + agent_thought = _make_agent_thought() + agent_thought.tool_labels = {} + agent_thought.tool_inputs_dict = {} + agent_thought.tool_outputs_dict = None + agent_thought.tool_meta = {"tool_a": {"error": "failed"}} + agent_thought.tools = ["tool_a"] + + message = _make_message([agent_thought]) + conversation = _make_conversation(from_end_user_id="end-user-1", from_account_id=None) + app_model_config = MagicMock() + app_model_config.agent_mode_dict = {} + app_model_config.to_dict.return_value = {"tools": []} + app_model = _make_app_model(app_model_config) + current_user = _make_current_user_account() + agent_config = MagicMock() + agent_config.tools = [] + + with ( + patch("services.agent_service.db") as mock_db, + patch("services.agent_service.AgentConfigManager.convert", return_value=agent_config), + patch("services.agent_service.ToolManager.get_tool_icon", return_value=None), + patch("services.agent_service.current_user", current_user), + ): + mock_db.session.query.side_effect = _build_query_side_effect(conversation, message, None) + + # Act + result = AgentService.get_agent_logs(app_model, conversation.id, message.id) + + # Assert + assert result["meta"]["executor"] == "Unknown" + assert result["meta"]["agent_mode"] == "react" + tool_call = result["iterations"][0]["tool_calls"][0] + assert tool_call["status"] == "error" + assert tool_call["error"] == "failed" + assert tool_call["tool_label"] == "tool_a" + assert tool_call["tool_input"] == {} + assert tool_call["tool_output"] == {} + assert tool_call["time_cost"] == 0 + assert tool_call["tool_parameters"] == {} + assert tool_call["tool_icon"] is None + + +class TestAgentServiceProviders: + """Test suite for AgentService provider methods.""" + + def test_list_agent_providers_should_delegate_to_plugin_client(self) -> None: + """Test list_agent_providers delegates to PluginAgentClient.""" + # Arrange + tenant_id = "tenant-1" + expected = [{"name": "provider"}] + with patch("services.agent_service.PluginAgentClient") as mock_client: + mock_client.return_value.fetch_agent_strategy_providers.return_value = expected + + # Act + result = AgentService.list_agent_providers("user-1", tenant_id) + + # Assert + assert result == expected + mock_client.return_value.fetch_agent_strategy_providers.assert_called_once_with(tenant_id) + + def test_get_agent_provider_should_return_provider_when_successful(self) -> None: + """Test get_agent_provider returns provider when successful.""" + # Arrange + tenant_id = "tenant-1" + provider_name = "provider-a" + expected = {"name": provider_name} + with patch("services.agent_service.PluginAgentClient") as mock_client: + mock_client.return_value.fetch_agent_strategy_provider.return_value = expected + + # Act + result = AgentService.get_agent_provider("user-1", tenant_id, provider_name) + + # Assert + assert result == expected + mock_client.return_value.fetch_agent_strategy_provider.assert_called_once_with(tenant_id, provider_name) + + def test_get_agent_provider_should_raise_value_error_on_plugin_error(self) -> None: + """Test get_agent_provider wraps PluginDaemonClientSideError into ValueError.""" + # Arrange + tenant_id = "tenant-1" + provider_name = "provider-a" + with patch("services.agent_service.PluginAgentClient") as mock_client: + mock_client.return_value.fetch_agent_strategy_provider.side_effect = PluginDaemonClientSideError( + "plugin error" + ) + + # Act & Assert + with pytest.raises(ValueError): + AgentService.get_agent_provider("user-1", tenant_id, provider_name) diff --git a/api/tests/unit_tests/services/test_annotation_service.py b/api/tests/unit_tests/services/test_annotation_service.py new file mode 100644 index 0000000000..0aacfc7f13 --- /dev/null +++ b/api/tests/unit_tests/services/test_annotation_service.py @@ -0,0 +1,1685 @@ +""" +Unit tests for services.annotation_service +""" + +from io import BytesIO +from types import SimpleNamespace +from typing import Any, cast +from unittest.mock import MagicMock, patch + +import pandas as pd +import pytest +from werkzeug.datastructures import FileStorage +from werkzeug.exceptions import NotFound + +from models.model import App, AppAnnotationHitHistory, AppAnnotationSetting, Message, MessageAnnotation +from services.annotation_service import AppAnnotationService + + +def _make_app(app_id: str = "app-1", tenant_id: str = "tenant-1") -> MagicMock: + app = MagicMock(spec=App) + app.id = app_id + app.tenant_id = tenant_id + app.status = "normal" + return app + + +def _make_user(user_id: str = "user-1") -> MagicMock: + user = MagicMock() + user.id = user_id + return user + + +def _make_message(message_id: str = "msg-1", app_id: str = "app-1") -> MagicMock: + message = MagicMock(spec=Message) + message.id = message_id + message.app_id = app_id + message.conversation_id = "conv-1" + message.query = "default-question" + message.annotation = None + return message + + +def _make_annotation(annotation_id: str = "ann-1") -> MagicMock: + annotation = MagicMock(spec=MessageAnnotation) + annotation.id = annotation_id + annotation.content = "" + annotation.question = "" + annotation.question_text = "" + return annotation + + +def _make_setting(setting_id: str = "setting-1", with_detail: bool = True) -> MagicMock: + setting = MagicMock(spec=AppAnnotationSetting) + setting.id = setting_id + setting.score_threshold = 0.5 + setting.collection_binding_id = "collection-1" + if with_detail: + setting.collection_binding_detail = SimpleNamespace(provider_name="provider-a", model_name="model-a") + else: + setting.collection_binding_detail = None + return setting + + +def _make_file(content: bytes) -> FileStorage: + return FileStorage(stream=BytesIO(content)) + + +class TestAppAnnotationServiceUpInsert: + """Test suite for up_insert_app_annotation_from_message.""" + + def test_up_insert_app_annotation_from_message_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + args = {"answer": "hello", "message_id": "msg-1"} + current_user = _make_user() + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.up_insert_app_annotation_from_message(args, "app-1") + + def test_up_insert_app_annotation_from_message_should_raise_value_error_when_answer_missing(self) -> None: + """Test missing answer and content raises ValueError.""" + # Arrange + args = {"message_id": "msg-1"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(ValueError): + AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + def test_up_insert_app_annotation_from_message_should_raise_not_found_when_message_missing(self) -> None: + """Test missing message raises NotFound.""" + # Arrange + args = {"answer": "hello", "message_id": "msg-1"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + message_query = MagicMock() + message_query.where.return_value = message_query + message_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, message_query] + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + def test_up_insert_app_annotation_from_message_should_update_existing_annotation_when_found(self) -> None: + """Test existing annotation is updated and indexed.""" + # Arrange + args = {"answer": "updated", "message_id": "msg-1"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + annotation = _make_annotation("ann-1") + message = _make_message(message_id="msg-1", app_id=app.id) + message.annotation = annotation + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.add_annotation_to_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + message_query = MagicMock() + message_query.where.return_value = message_query + message_query.first.return_value = message + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, message_query, setting_query] + + # Act + result = AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + # Assert + assert result == annotation + assert annotation.content == "updated" + assert annotation.question == message.query + mock_db.session.add.assert_called_once_with(annotation) + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_called_once_with( + annotation.id, + message.query, + tenant_id, + app.id, + setting.collection_binding_id, + ) + + def test_up_insert_app_annotation_from_message_should_create_annotation_when_message_has_no_annotation( + self, + ) -> None: + """Test new annotation is created when message has no annotation.""" + # Arrange + args = {"answer": "hello", "message_id": "msg-1", "question": "q1"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + message = _make_message(message_id="msg-1", app_id=app.id) + message.annotation = None + annotation_instance = _make_annotation("ann-1") + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.MessageAnnotation", return_value=annotation_instance) as mock_cls, + patch("services.annotation_service.add_annotation_to_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + message_query = MagicMock() + message_query.where.return_value = message_query + message_query.first.return_value = message + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, message_query, setting_query] + + # Act + result = AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + # Assert + assert result == annotation_instance + mock_cls.assert_called_once_with( + app_id=app.id, + conversation_id=message.conversation_id, + message_id=message.id, + content="hello", + question="q1", + account_id=current_user.id, + ) + mock_db.session.add.assert_called_once_with(annotation_instance) + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_not_called() + + def test_up_insert_app_annotation_from_message_should_raise_value_error_when_question_missing(self) -> None: + """Test missing question without message_id raises ValueError.""" + # Arrange + args = {"answer": "hello"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(ValueError): + AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + def test_up_insert_app_annotation_from_message_should_create_annotation_when_message_missing(self) -> None: + """Test annotation is created when message_id is not provided.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + current_user = _make_user() + tenant_id = "tenant-1" + app = _make_app() + annotation_instance = _make_annotation("ann-1") + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.MessageAnnotation", return_value=annotation_instance) as mock_cls, + patch("services.annotation_service.add_annotation_to_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.up_insert_app_annotation_from_message(args, app.id) + + # Assert + assert result == annotation_instance + mock_cls.assert_called_once_with( + app_id=app.id, + content="hello", + question="q1", + account_id=current_user.id, + ) + mock_db.session.add.assert_called_once_with(annotation_instance) + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_called_once_with( + annotation_instance.id, + "q1", + tenant_id, + app.id, + setting.collection_binding_id, + ) + + +class TestAppAnnotationServiceEnableDisable: + """Test suite for enable/disable app annotation.""" + + def test_enable_app_annotation_should_return_processing_when_cache_hit(self) -> None: + """Test cache hit returns processing status.""" + # Arrange + args = {"score_threshold": 0.5, "embedding_provider_name": "p", "embedding_model_name": "m"} + + with ( + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.enable_annotation_reply_task") as mock_task, + ): + mock_redis.get.return_value = "job-1" + + # Act + result = AppAnnotationService.enable_app_annotation(args, "app-1") + + # Assert + assert result == {"job_id": "job-1", "job_status": "processing"} + mock_task.delay.assert_not_called() + + def test_enable_app_annotation_should_enqueue_job_when_cache_miss(self) -> None: + """Test cache miss enqueues enable task.""" + # Arrange + args = {"score_threshold": 0.5, "embedding_provider_name": "p", "embedding_model_name": "m"} + current_user = _make_user("user-1") + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.uuid.uuid4", return_value="uuid-1"), + patch("services.annotation_service.enable_annotation_reply_task") as mock_task, + ): + mock_redis.get.return_value = None + + # Act + result = AppAnnotationService.enable_app_annotation(args, "app-1") + + # Assert + assert result == {"job_id": "uuid-1", "job_status": "waiting"} + mock_redis.setnx.assert_called_once_with("enable_app_annotation_job_uuid-1", "waiting") + mock_task.delay.assert_called_once_with( + "uuid-1", + "app-1", + current_user.id, + tenant_id, + 0.5, + "p", + "m", + ) + + def test_disable_app_annotation_should_return_processing_when_cache_hit(self) -> None: + """Test disable cache hit returns processing status.""" + # Arrange + tenant_id = "tenant-1" + with ( + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.disable_annotation_reply_task") as mock_task, + ): + mock_redis.get.return_value = "job-2" + + # Act + result = AppAnnotationService.disable_app_annotation("app-1") + + # Assert + assert result == {"job_id": "job-2", "job_status": "processing"} + mock_task.delay.assert_not_called() + + def test_disable_app_annotation_should_enqueue_job_when_cache_miss(self) -> None: + """Test disable cache miss enqueues disable task.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.uuid.uuid4", return_value="uuid-2"), + patch("services.annotation_service.disable_annotation_reply_task") as mock_task, + ): + mock_redis.get.return_value = None + + # Act + result = AppAnnotationService.disable_app_annotation("app-1") + + # Assert + assert result == {"job_id": "uuid-2", "job_status": "waiting"} + mock_redis.setnx.assert_called_once_with("disable_app_annotation_job_uuid-2", "waiting") + mock_task.delay.assert_called_once_with("uuid-2", "app-1", tenant_id) + + +class TestAppAnnotationServiceListAndExport: + """Test suite for list and export methods.""" + + def test_get_annotation_list_by_app_id_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.get_annotation_list_by_app_id("app-1", 1, 10, "") + + def test_get_annotation_list_by_app_id_should_return_items_with_keyword(self) -> None: + """Test keyword search returns items and total.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + pagination = SimpleNamespace(items=["a1"], total=1) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("libs.helper.escape_like_pattern", return_value="safe"), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + mock_db.paginate.return_value = pagination + + # Act + items, total = AppAnnotationService.get_annotation_list_by_app_id(app.id, 1, 10, "keyword") + + # Assert + assert items == ["a1"] + assert total == 1 + + def test_get_annotation_list_by_app_id_should_return_items_without_keyword(self) -> None: + """Test list query without keyword returns paginated items.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + pagination = SimpleNamespace(items=["a1", "a2"], total=2) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + mock_db.paginate.return_value = pagination + + # Act + items, total = AppAnnotationService.get_annotation_list_by_app_id(app.id, 1, 10, "") + + # Assert + assert items == ["a1", "a2"] + assert total == 2 + + def test_export_annotation_list_by_app_id_should_sanitize_fields(self) -> None: + """Test export sanitizes question and content fields.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + annotation1 = _make_annotation("ann-1") + annotation1.question = "=cmd" + annotation1.content = "+1" + annotation2 = _make_annotation("ann-2") + annotation2.question = "@bad" + annotation2.content = "-2" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.CSVSanitizer.sanitize_value", side_effect=lambda v: f"safe:{v}"), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.order_by.return_value = annotation_query + annotation_query.all.return_value = [annotation1, annotation2] + + mock_db.session.query.side_effect = [app_query, annotation_query] + + # Act + result = AppAnnotationService.export_annotation_list_by_app_id(app.id) + + # Assert + assert result == [annotation1, annotation2] + assert annotation1.question == "safe:=cmd" + assert annotation1.content == "safe:+1" + assert annotation2.question == "safe:@bad" + assert annotation2.content == "safe:-2" + + def test_export_annotation_list_by_app_id_should_raise_not_found_when_app_missing(self) -> None: + """Test export raises NotFound when app is missing.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.export_annotation_list_by_app_id("app-1") + + +class TestAppAnnotationServiceDirectManipulation: + """Test suite for direct insert/update/delete methods.""" + + def test_insert_app_annotation_directly_should_raise_not_found_when_app_missing(self) -> None: + """Test insert raises NotFound when app is missing.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.insert_app_annotation_directly(args, "app-1") + + def test_insert_app_annotation_directly_should_raise_value_error_when_question_missing(self) -> None: + """Test missing question raises ValueError.""" + # Arrange + args = {"answer": "hello"} + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(ValueError): + AppAnnotationService.insert_app_annotation_directly(args, app.id) + + def test_insert_app_annotation_directly_should_create_annotation_and_index(self) -> None: + """Test insert creates annotation and triggers index task.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + current_user = _make_user("user-1") + tenant_id = "tenant-1" + app = _make_app() + annotation_instance = _make_annotation("ann-1") + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.MessageAnnotation", return_value=annotation_instance) as mock_cls, + patch("services.annotation_service.add_annotation_to_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.insert_app_annotation_directly(args, app.id) + + # Assert + assert result == annotation_instance + mock_cls.assert_called_once_with( + app_id=app.id, + content="hello", + question="q1", + account_id=current_user.id, + ) + mock_db.session.add.assert_called_once_with(annotation_instance) + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_called_once_with( + annotation_instance.id, + "q1", + tenant_id, + app.id, + setting.collection_binding_id, + ) + + def test_update_app_annotation_directly_should_raise_not_found_when_annotation_missing(self) -> None: + """Test missing annotation raises NotFound.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, annotation_query] + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.update_app_annotation_directly(args, app.id, "ann-1") + + def test_update_app_annotation_directly_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound in update path.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.update_app_annotation_directly(args, "app-1", "ann-1") + + def test_update_app_annotation_directly_should_raise_value_error_when_question_missing(self) -> None: + """Test missing question raises ValueError.""" + # Arrange + args = {"answer": "hello"} + tenant_id = "tenant-1" + app = _make_app() + annotation = _make_annotation("ann-1") + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = annotation + + mock_db.session.query.side_effect = [app_query, annotation_query] + + # Act & Assert + with pytest.raises(ValueError): + AppAnnotationService.update_app_annotation_directly(args, app.id, annotation.id) + + def test_update_app_annotation_directly_should_update_annotation_and_index(self) -> None: + """Test update changes fields and triggers index update.""" + # Arrange + args = {"answer": "hello", "question": "q1"} + tenant_id = "tenant-1" + app = _make_app() + annotation = _make_annotation("ann-1") + annotation.question_text = "q1" + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.update_annotation_to_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = annotation + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, annotation_query, setting_query] + + # Act + result = AppAnnotationService.update_app_annotation_directly(args, app.id, annotation.id) + + # Assert + assert result == annotation + assert annotation.content == "hello" + assert annotation.question == "q1" + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_called_once_with( + annotation.id, + annotation.question_text, + tenant_id, + app.id, + setting.collection_binding_id, + ) + + def test_delete_app_annotation_should_delete_annotation_and_histories(self) -> None: + """Test delete removes annotation and hit histories.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + annotation = _make_annotation("ann-1") + history1 = MagicMock(spec=AppAnnotationHitHistory) + history2 = MagicMock(spec=AppAnnotationHitHistory) + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.delete_annotation_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = annotation + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + scalars_result = MagicMock() + scalars_result.all.return_value = [history1, history2] + + mock_db.session.query.side_effect = [app_query, annotation_query, setting_query] + mock_db.session.scalars.return_value = scalars_result + + # Act + AppAnnotationService.delete_app_annotation(app.id, annotation.id) + + # Assert + mock_db.session.delete.assert_any_call(annotation) + mock_db.session.delete.assert_any_call(history1) + mock_db.session.delete.assert_any_call(history2) + mock_db.session.commit.assert_called_once() + mock_task.delay.assert_called_once_with( + annotation.id, + app.id, + tenant_id, + setting.collection_binding_id, + ) + + def test_delete_app_annotation_should_raise_not_found_when_app_missing(self) -> None: + """Test delete raises NotFound when app is missing.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.delete_app_annotation("app-1", "ann-1") + + def test_delete_app_annotation_should_raise_not_found_when_annotation_missing(self) -> None: + """Test delete raises NotFound when annotation is missing.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, annotation_query] + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.delete_app_annotation(app.id, "ann-1") + + def test_delete_app_annotations_in_batch_should_return_zero_when_none_found(self) -> None: + """Test batch delete returns zero when no annotations found.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotations_query = MagicMock() + annotations_query.outerjoin.return_value = annotations_query + annotations_query.where.return_value = annotations_query + annotations_query.all.return_value = [] + + mock_db.session.query.side_effect = [app_query, annotations_query] + + # Act + result = AppAnnotationService.delete_app_annotations_in_batch(app.id, ["ann-1"]) + + # Assert + assert result == {"deleted_count": 0} + + def test_delete_app_annotations_in_batch_should_raise_not_found_when_app_missing(self) -> None: + """Test batch delete raises NotFound when app is missing.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.delete_app_annotations_in_batch("app-1", ["ann-1"]) + + def test_delete_app_annotations_in_batch_should_delete_annotations_and_histories(self) -> None: + """Test batch delete removes annotations and triggers index deletion.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + annotation1 = _make_annotation("ann-1") + annotation2 = _make_annotation("ann-2") + setting = _make_setting() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.delete_annotation_index_task") as mock_task, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotations_query = MagicMock() + annotations_query.outerjoin.return_value = annotations_query + annotations_query.where.return_value = annotations_query + annotations_query.all.return_value = [(annotation1, setting), (annotation2, None)] + + hit_history_query = MagicMock() + hit_history_query.where.return_value = hit_history_query + hit_history_query.delete.return_value = None + + delete_query = MagicMock() + delete_query.where.return_value = delete_query + delete_query.delete.return_value = 2 + + mock_db.session.query.side_effect = [app_query, annotations_query, hit_history_query, delete_query] + + # Act + result = AppAnnotationService.delete_app_annotations_in_batch(app.id, ["ann-1", "ann-2"]) + + # Assert + assert result == {"deleted_count": 2} + mock_task.delay.assert_called_once_with(annotation1.id, app.id, tenant_id, setting.collection_binding_id) + mock_db.session.commit.assert_called_once() + + +class TestAppAnnotationServiceBatchImport: + """Test suite for batch import.""" + + def test_batch_import_app_annotations_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.batch_import_app_annotations("app-1", file) + + def test_batch_import_app_annotations_should_return_error_when_columns_invalid(self) -> None: + """Test invalid column count returns error message.""" + # Arrange + file = _make_file(b"question\nq\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["only"]}) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "Invalid CSV format" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_file_empty(self) -> None: + """Test empty file returns validation error before CSV parsing.""" + # Arrange + file = _make_file(b"") + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "empty or invalid" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_min_records_not_met(self) -> None: + """Test min records validation returns error message.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["q1"], "a": ["a1"]}) + features = SimpleNamespace(billing=SimpleNamespace(enabled=False), annotation_quota_limit=None) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch("services.annotation_service.FeatureService.get_features", return_value=features), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=2), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "at least" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_row_limit_exceeded(self) -> None: + """Test row count over max limit returns explicit error.""" + # Arrange + file = _make_file(b"question,answer\nq1,a1\nq2,a2\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["q1", "q2"], "a": ["a1", "a2"]}) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=1, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "too many records" in error_msg + + def test_batch_import_app_annotations_should_skip_malformed_rows_and_fail_min_records(self) -> None: + """Test malformed row extraction is skipped and can fail min record validation.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + app = _make_app() + malformed_row = MagicMock() + malformed_row.iloc.__getitem__.side_effect = IndexError() + df = MagicMock() + df.columns = ["q", "a"] + df.iterrows.return_value = [(0, malformed_row)] + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "at least" in error_msg + + def test_batch_import_app_annotations_should_skip_nan_rows_and_fail_min_records(self) -> None: + """Test NaN rows are skipped by validation and reported via min record check.""" + # Arrange + file = _make_file(b"question,answer\nnan,nan\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["nan"], "a": ["nan"]}) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "at least" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_question_too_long(self) -> None: + """Test oversized question is rejected with row context.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["q" * 2001], "a": ["a"]}) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "Question at row" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_answer_too_long(self) -> None: + """Test oversized answer is rejected with row context.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["q"], "a": ["a" * 10001]}) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "Answer at row" in error_msg + + def test_batch_import_app_annotations_should_return_error_when_quota_exceeded(self) -> None: + """Test quota validation returns error message.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + app = _make_app() + df = pd.DataFrame({"q": ["q1"], "a": ["a1"]}) + features = SimpleNamespace( + billing=SimpleNamespace(enabled=True), + annotation_quota_limit=SimpleNamespace(limit=1, size=1), + ) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch("services.annotation_service.FeatureService.get_features", return_value=features), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + error_msg = cast(str, result["error_msg"]) + assert "exceeds the limit" in error_msg + + def test_batch_import_app_annotations_should_enqueue_job_when_valid(self) -> None: + """Test successful batch import enqueues job and returns status.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + current_user = _make_user("user-1") + app = _make_app() + df = pd.DataFrame({"q": ["q1"], "a": ["a1"]}) + features = SimpleNamespace(billing=SimpleNamespace(enabled=False), annotation_quota_limit=None) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch("services.annotation_service.FeatureService.get_features", return_value=features), + patch("services.annotation_service.batch_import_annotations_task") as mock_task, + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.uuid.uuid4", return_value="uuid-3"), + patch("services.annotation_service.naive_utc_now", return_value=SimpleNamespace(timestamp=lambda: 1)), + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + assert result == {"job_id": "uuid-3", "job_status": "waiting", "record_count": 1} + mock_redis.zadd.assert_called_once() + mock_redis.expire.assert_called_once() + mock_redis.setnx.assert_called_once_with("app_annotation_batch_import_uuid-3", "waiting") + mock_task.delay.assert_called_once() + + def test_batch_import_app_annotations_should_cleanup_active_job_on_unexpected_exception(self) -> None: + """Test unexpected runtime errors trigger cleanup and return wrapped error.""" + # Arrange + file = _make_file(b"question,answer\nq,a\n") + tenant_id = "tenant-1" + current_user = _make_user("user-1") + app = _make_app() + df = pd.DataFrame({"q": ["q1"], "a": ["a1"]}) + features = SimpleNamespace(billing=SimpleNamespace(enabled=False), annotation_quota_limit=None) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.pd.read_csv", return_value=df), + patch("services.annotation_service.FeatureService.get_features", return_value=features), + patch("services.annotation_service.redis_client") as mock_redis, + patch("services.annotation_service.uuid.uuid4", return_value="uuid-4"), + patch("services.annotation_service.naive_utc_now", return_value=SimpleNamespace(timestamp=lambda: 1)), + patch("services.annotation_service.logger") as mock_logger, + patch( + "configs.dify_config", + new=SimpleNamespace(ANNOTATION_IMPORT_MAX_RECORDS=5, ANNOTATION_IMPORT_MIN_RECORDS=1), + ), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + mock_db.session.query.return_value = app_query + mock_redis.zadd.side_effect = RuntimeError("boom") + mock_redis.zrem.side_effect = RuntimeError("cleanup-failed") + + # Act + result = AppAnnotationService.batch_import_app_annotations(app.id, file) + + # Assert + assert result["error_msg"] == "An error occurred while processing the file: boom" + mock_redis.zrem.assert_called_once_with(f"annotation_import_active:{tenant_id}", "uuid-4") + mock_logger.debug.assert_called_once() + + +class TestAppAnnotationServiceHitHistoryAndSettings: + """Test suite for hit history and settings methods.""" + + def test_get_annotation_hit_histories_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.get_annotation_hit_histories("app-1", "ann-1", 1, 10) + + def test_get_annotation_hit_histories_should_return_items_and_total(self) -> None: + """Test hit histories pagination returns items and total.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + annotation = _make_annotation("ann-1") + pagination = SimpleNamespace(items=["h1"], total=2) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = annotation + + mock_db.session.query.side_effect = [app_query, annotation_query] + mock_db.paginate.return_value = pagination + + # Act + items, total = AppAnnotationService.get_annotation_hit_histories(app.id, annotation.id, 1, 10) + + # Assert + assert items == ["h1"] + assert total == 2 + + def test_get_annotation_hit_histories_should_raise_not_found_when_annotation_missing(self) -> None: + """Test missing annotation raises NotFound.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + annotation_query = MagicMock() + annotation_query.where.return_value = annotation_query + annotation_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, annotation_query] + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.get_annotation_hit_histories(app.id, "ann-1", 1, 10) + + def test_get_annotation_by_id_should_return_none_when_missing(self) -> None: + """Test get_annotation_by_id returns None when not found.""" + # Arrange + with patch("services.annotation_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act + result = AppAnnotationService.get_annotation_by_id("ann-1") + + # Assert + assert result is None + + def test_get_annotation_by_id_should_return_annotation_when_exists(self) -> None: + """Test get_annotation_by_id returns annotation when found.""" + # Arrange + annotation = _make_annotation("ann-1") + with patch("services.annotation_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = annotation + mock_db.session.query.return_value = query + + # Act + result = AppAnnotationService.get_annotation_by_id("ann-1") + + # Assert + assert result == annotation + + def test_add_annotation_history_should_update_hit_count_and_store_history(self) -> None: + """Test add_annotation_history updates hit count and creates history.""" + # Arrange + with ( + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.AppAnnotationHitHistory") as mock_history_cls, + ): + query = MagicMock() + query.where.return_value = query + mock_db.session.query.return_value = query + + # Act + AppAnnotationService.add_annotation_history( + annotation_id="ann-1", + app_id="app-1", + annotation_question="q", + annotation_content="a", + query="q", + user_id="user-1", + message_id="msg-1", + from_source="chat", + score=0.8, + ) + + # Assert + query.update.assert_called_once() + mock_history_cls.assert_called_once() + mock_db.session.add.assert_called_once() + mock_db.session.commit.assert_called_once() + + def test_get_app_annotation_setting_by_app_id_should_return_embedding_model_when_detail_exists(self) -> None: + """Test setting detail returns embedding model info.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + setting = _make_setting(with_detail=True) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.get_app_annotation_setting_by_app_id(app.id) + + # Assert + assert result["enabled"] is True + embedding_model = cast(dict[str, Any], result["embedding_model"]) + assert embedding_model["embedding_provider_name"] == "provider-a" + assert embedding_model["embedding_model_name"] == "model-a" + + def test_get_app_annotation_setting_by_app_id_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.get_app_annotation_setting_by_app_id("app-1") + + def test_get_app_annotation_setting_by_app_id_should_return_empty_embedding_model_when_no_detail(self) -> None: + """Test setting without detail returns empty embedding model.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + setting = _make_setting(with_detail=False) + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.get_app_annotation_setting_by_app_id(app.id) + + # Assert + assert result["enabled"] is True + assert result["embedding_model"] == {} + + def test_get_app_annotation_setting_by_app_id_should_return_disabled_when_setting_missing(self) -> None: + """Test missing setting returns disabled payload.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.get_app_annotation_setting_by_app_id(app.id) + + # Assert + assert result == {"enabled": False} + + def test_update_app_annotation_setting_should_update_and_return_detail(self) -> None: + """Test update_app_annotation_setting updates fields and returns detail.""" + # Arrange + tenant_id = "tenant-1" + current_user = _make_user("user-1") + app = _make_app() + setting = _make_setting(with_detail=True) + args = {"score_threshold": 0.8} + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.naive_utc_now", return_value="now"), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.update_app_annotation_setting(app.id, setting.id, args) + + # Assert + assert result["enabled"] is True + assert result["score_threshold"] == 0.8 + embedding_model = cast(dict[str, Any], result["embedding_model"]) + assert embedding_model["embedding_provider_name"] == "provider-a" + mock_db.session.add.assert_called_once_with(setting) + mock_db.session.commit.assert_called_once() + + def test_update_app_annotation_setting_should_return_empty_embedding_model_when_detail_missing(self) -> None: + """Test update returns empty embedding_model when collection detail is absent.""" + # Arrange + tenant_id = "tenant-1" + current_user = _make_user("user-1") + app = _make_app() + setting = _make_setting(with_detail=False) + args = {"score_threshold": 0.7} + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(current_user, tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.naive_utc_now", return_value="now"), + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = setting + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act + result = AppAnnotationService.update_app_annotation_setting(app.id, setting.id, args) + + # Assert + assert result["enabled"] is True + assert result["score_threshold"] == 0.7 + assert result["embedding_model"] == {} + + def test_update_app_annotation_setting_should_raise_not_found_when_app_missing(self) -> None: + """Test update raises NotFound when app is missing.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = None + mock_db.session.query.return_value = app_query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.update_app_annotation_setting("app-1", "setting-1", {"score_threshold": 0.5}) + + def test_update_app_annotation_setting_should_raise_not_found_when_setting_missing(self) -> None: + """Test update raises NotFound when setting is missing.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + app_query = MagicMock() + app_query.where.return_value = app_query + app_query.first.return_value = app + + setting_query = MagicMock() + setting_query.where.return_value = setting_query + setting_query.first.return_value = None + + mock_db.session.query.side_effect = [app_query, setting_query] + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.update_app_annotation_setting(app.id, "setting-1", {"score_threshold": 0.5}) + + +class TestAppAnnotationServiceClearAll: + """Test suite for clear_all_annotations.""" + + def test_clear_all_annotations_should_delete_annotations_and_histories(self) -> None: + """Test clear_all_annotations deletes all data and triggers index removal.""" + # Arrange + tenant_id = "tenant-1" + app = _make_app() + setting = _make_setting() + annotation1 = _make_annotation("ann-1") + annotation2 = _make_annotation("ann-2") + history = MagicMock(spec=AppAnnotationHitHistory) + + def query_side_effect(*args: object, **kwargs: object) -> MagicMock: + query = MagicMock() + query.where.return_value = query + if App in args: + query.first.return_value = app + elif AppAnnotationSetting in args: + query.first.return_value = setting + elif MessageAnnotation in args: + query.yield_per.return_value = [annotation1, annotation2] + elif AppAnnotationHitHistory in args: + query.yield_per.return_value = [history] + return query + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + patch("services.annotation_service.delete_annotation_index_task") as mock_task, + ): + mock_db.session.query.side_effect = query_side_effect + + # Act + result = AppAnnotationService.clear_all_annotations(app.id) + + # Assert + assert result == {"result": "success"} + mock_db.session.delete.assert_any_call(annotation1) + mock_db.session.delete.assert_any_call(annotation2) + mock_db.session.delete.assert_any_call(history) + mock_task.delay.assert_any_call(annotation1.id, app.id, tenant_id, setting.collection_binding_id) + mock_task.delay.assert_any_call(annotation2.id, app.id, tenant_id, setting.collection_binding_id) + mock_db.session.commit.assert_called_once() + + def test_clear_all_annotations_should_raise_not_found_when_app_missing(self) -> None: + """Test missing app raises NotFound.""" + # Arrange + tenant_id = "tenant-1" + + with ( + patch("services.annotation_service.current_account_with_tenant", return_value=(_make_user(), tenant_id)), + patch("services.annotation_service.db") as mock_db, + ): + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act & Assert + with pytest.raises(NotFound): + AppAnnotationService.clear_all_annotations("app-1") diff --git a/api/tests/unit_tests/services/test_api_based_extension_service.py b/api/tests/unit_tests/services/test_api_based_extension_service.py new file mode 100644 index 0000000000..7f4b5fdaa3 --- /dev/null +++ b/api/tests/unit_tests/services/test_api_based_extension_service.py @@ -0,0 +1,421 @@ +""" +Comprehensive unit tests for services/api_based_extension_service.py + +Covers: +- APIBasedExtensionService.get_all_by_tenant_id +- APIBasedExtensionService.save +- APIBasedExtensionService.delete +- APIBasedExtensionService.get_with_tenant_id +- APIBasedExtensionService._validation (new record & existing record branches) +- APIBasedExtensionService._ping_connection (pong success, wrong response, exception) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from services.api_based_extension_service import APIBasedExtensionService + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_extension( + *, + id_: str | None = None, + tenant_id: str = "tenant-001", + name: str = "my-ext", + api_endpoint: str = "https://example.com/hook", + api_key: str = "secret-key-123", +) -> MagicMock: + """Return a lightweight mock that mimics APIBasedExtension.""" + ext = MagicMock() + ext.id = id_ + ext.tenant_id = tenant_id + ext.name = name + ext.api_endpoint = api_endpoint + ext.api_key = api_key + return ext + + +# --------------------------------------------------------------------------- +# Tests: get_all_by_tenant_id +# --------------------------------------------------------------------------- + + +class TestGetAllByTenantId: + """Tests for APIBasedExtensionService.get_all_by_tenant_id.""" + + @patch("services.api_based_extension_service.decrypt_token", return_value="decrypted-key") + @patch("services.api_based_extension_service.db") + def test_returns_extensions_with_decrypted_keys(self, mock_db, mock_decrypt): + """Each api_key is decrypted and the list is returned.""" + ext1 = _make_extension(id_="id-1", api_key="enc-key-1") + ext2 = _make_extension(id_="id-2", api_key="enc-key-2") + + mock_db.session.query.return_value.filter_by.return_value.order_by.return_value.all.return_value = [ + ext1, + ext2, + ] + + result = APIBasedExtensionService.get_all_by_tenant_id("tenant-001") + + assert result == [ext1, ext2] + assert ext1.api_key == "decrypted-key" + assert ext2.api_key == "decrypted-key" + assert mock_decrypt.call_count == 2 + + @patch("services.api_based_extension_service.decrypt_token", return_value="decrypted-key") + @patch("services.api_based_extension_service.db") + def test_returns_empty_list_when_no_extensions(self, mock_db, mock_decrypt): + """Returns an empty list gracefully when no records exist.""" + mock_db.session.query.return_value.filter_by.return_value.order_by.return_value.all.return_value = [] + + result = APIBasedExtensionService.get_all_by_tenant_id("tenant-001") + + assert result == [] + mock_decrypt.assert_not_called() + + @patch("services.api_based_extension_service.decrypt_token", return_value="decrypted-key") + @patch("services.api_based_extension_service.db") + def test_calls_query_with_correct_tenant_id(self, mock_db, mock_decrypt): + """Verifies the DB is queried with the supplied tenant_id.""" + mock_db.session.query.return_value.filter_by.return_value.order_by.return_value.all.return_value = [] + + APIBasedExtensionService.get_all_by_tenant_id("tenant-xyz") + + mock_db.session.query.return_value.filter_by.assert_called_once_with(tenant_id="tenant-xyz") + + +# --------------------------------------------------------------------------- +# Tests: save +# --------------------------------------------------------------------------- + + +class TestSave: + """Tests for APIBasedExtensionService.save.""" + + @patch("services.api_based_extension_service.encrypt_token", return_value="encrypted-key") + @patch("services.api_based_extension_service.db") + @patch.object(APIBasedExtensionService, "_validation") + def test_save_new_record_encrypts_key_and_commits(self, mock_validation, mock_db, mock_encrypt): + """Happy path: validation passes, key is encrypted, record is added and committed.""" + ext = _make_extension(id_=None, api_key="plain-key-123") + + result = APIBasedExtensionService.save(ext) + + mock_validation.assert_called_once_with(ext) + mock_encrypt.assert_called_once_with(ext.tenant_id, "plain-key-123") + assert ext.api_key == "encrypted-key" + mock_db.session.add.assert_called_once_with(ext) + mock_db.session.commit.assert_called_once() + assert result is ext + + @patch("services.api_based_extension_service.encrypt_token", return_value="encrypted-key") + @patch("services.api_based_extension_service.db") + @patch.object(APIBasedExtensionService, "_validation", side_effect=ValueError("name must not be empty")) + def test_save_raises_when_validation_fails(self, mock_validation, mock_db, mock_encrypt): + """If _validation raises, save should propagate the error without touching the DB.""" + ext = _make_extension(name="") + + with pytest.raises(ValueError, match="name must not be empty"): + APIBasedExtensionService.save(ext) + + mock_db.session.add.assert_not_called() + mock_db.session.commit.assert_not_called() + + +# --------------------------------------------------------------------------- +# Tests: delete +# --------------------------------------------------------------------------- + + +class TestDelete: + """Tests for APIBasedExtensionService.delete.""" + + @patch("services.api_based_extension_service.db") + def test_delete_removes_record_and_commits(self, mock_db): + """delete() must call session.delete with the extension and then commit.""" + ext = _make_extension(id_="delete-me") + + APIBasedExtensionService.delete(ext) + + mock_db.session.delete.assert_called_once_with(ext) + mock_db.session.commit.assert_called_once() + + +# --------------------------------------------------------------------------- +# Tests: get_with_tenant_id +# --------------------------------------------------------------------------- + + +class TestGetWithTenantId: + """Tests for APIBasedExtensionService.get_with_tenant_id.""" + + @patch("services.api_based_extension_service.decrypt_token", return_value="decrypted-key") + @patch("services.api_based_extension_service.db") + def test_returns_extension_with_decrypted_key(self, mock_db, mock_decrypt): + """Found extension has its api_key decrypted before being returned.""" + ext = _make_extension(id_="ext-123", api_key="enc-key") + + (mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value) = ext + + result = APIBasedExtensionService.get_with_tenant_id("tenant-001", "ext-123") + + assert result is ext + assert ext.api_key == "decrypted-key" + mock_decrypt.assert_called_once_with(ext.tenant_id, "enc-key") + + @patch("services.api_based_extension_service.db") + def test_raises_value_error_when_not_found(self, mock_db): + """Raises ValueError when no matching extension exists.""" + (mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value) = None + + with pytest.raises(ValueError, match="API based extension is not found"): + APIBasedExtensionService.get_with_tenant_id("tenant-001", "non-existent") + + @patch("services.api_based_extension_service.decrypt_token", return_value="decrypted-key") + @patch("services.api_based_extension_service.db") + def test_queries_with_correct_tenant_and_extension_id(self, mock_db, mock_decrypt): + """Verifies both tenant_id and extension id are used in the query.""" + ext = _make_extension(id_="ext-abc") + chain = mock_db.session.query.return_value + chain.filter_by.return_value.filter_by.return_value.first.return_value = ext + + APIBasedExtensionService.get_with_tenant_id("tenant-002", "ext-abc") + + # First filter_by call uses tenant_id + chain.filter_by.assert_called_once_with(tenant_id="tenant-002") + # Second filter_by call uses id + chain.filter_by.return_value.filter_by.assert_called_once_with(id="ext-abc") + + +# --------------------------------------------------------------------------- +# Tests: _validation (new record — id is falsy) +# --------------------------------------------------------------------------- + + +class TestValidationNewRecord: + """Tests for _validation() with a brand-new record (no id).""" + + def _build_mock_db(self, name_exists: bool = False): + mock_db = MagicMock() + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = ( + MagicMock() if name_exists else None + ) + return mock_db + + @patch.object(APIBasedExtensionService, "_ping_connection") + @patch("services.api_based_extension_service.db") + def test_valid_new_extension_passes(self, mock_db, mock_ping): + """A new record with all valid fields should pass without exceptions.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, name="valid-ext", api_key="longenoughkey") + + # Should not raise + APIBasedExtensionService._validation(ext) + mock_ping.assert_called_once_with(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_name_is_empty(self, mock_db): + """Empty name raises ValueError.""" + ext = _make_extension(id_=None, name="") + with pytest.raises(ValueError, match="name must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_name_is_none(self, mock_db): + """None name raises ValueError.""" + ext = _make_extension(id_=None, name=None) + with pytest.raises(ValueError, match="name must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_name_already_exists_for_new_record(self, mock_db): + """A new record whose name already exists raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = ( + MagicMock() + ) + ext = _make_extension(id_=None, name="duplicate-name") + + with pytest.raises(ValueError, match="name must be unique, it is already existed"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_endpoint_is_empty(self, mock_db): + """Empty api_endpoint raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_endpoint="") + + with pytest.raises(ValueError, match="api_endpoint must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_endpoint_is_none(self, mock_db): + """None api_endpoint raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_endpoint=None) + + with pytest.raises(ValueError, match="api_endpoint must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_key_is_empty(self, mock_db): + """Empty api_key raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_key="") + + with pytest.raises(ValueError, match="api_key must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_key_is_none(self, mock_db): + """None api_key raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_key=None) + + with pytest.raises(ValueError, match="api_key must not be empty"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_key_too_short(self, mock_db): + """api_key shorter than 5 characters raises ValueError.""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_key="abc") + + with pytest.raises(ValueError, match="api_key must be at least 5 characters"): + APIBasedExtensionService._validation(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_api_key_exactly_four_chars(self, mock_db): + """api_key with exactly 4 characters raises ValueError (boundary condition).""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_key="1234") + + with pytest.raises(ValueError, match="api_key must be at least 5 characters"): + APIBasedExtensionService._validation(ext) + + @patch.object(APIBasedExtensionService, "_ping_connection") + @patch("services.api_based_extension_service.db") + def test_api_key_exactly_five_chars_is_accepted(self, mock_db, mock_ping): + """api_key with exactly 5 characters should pass (boundary condition).""" + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.first.return_value = None + ext = _make_extension(id_=None, api_key="12345") + + # Should not raise + APIBasedExtensionService._validation(ext) + + +# --------------------------------------------------------------------------- +# Tests: _validation (existing record — id is truthy) +# --------------------------------------------------------------------------- + + +class TestValidationExistingRecord: + """Tests for _validation() with an existing record (id is set).""" + + @patch.object(APIBasedExtensionService, "_ping_connection") + @patch("services.api_based_extension_service.db") + def test_valid_existing_extension_passes(self, mock_db, mock_ping): + """An existing record whose name is unique (excluding self) should pass.""" + # .where(...).first() → None means no *other* record has that name + ( + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.where.return_value.first.return_value + ) = None + ext = _make_extension(id_="existing-id", name="unique-name", api_key="longenoughkey") + + # Should not raise + APIBasedExtensionService._validation(ext) + mock_ping.assert_called_once_with(ext) + + @patch("services.api_based_extension_service.db") + def test_raises_if_existing_record_name_conflicts_with_another(self, mock_db): + """Existing record cannot use a name already owned by a different record.""" + ( + mock_db.session.query.return_value.filter_by.return_value.filter_by.return_value.where.return_value.first.return_value + ) = MagicMock() + ext = _make_extension(id_="existing-id", name="taken-name") + + with pytest.raises(ValueError, match="name must be unique, it is already existed"): + APIBasedExtensionService._validation(ext) + + +# --------------------------------------------------------------------------- +# Tests: _ping_connection +# --------------------------------------------------------------------------- + + +class TestPingConnection: + """Tests for APIBasedExtensionService._ping_connection.""" + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_successful_ping_returns_pong(self, mock_requestor_class): + """When the endpoint returns {"result": "pong"}, no exception is raised.""" + mock_client = MagicMock() + mock_client.request.return_value = {"result": "pong"} + mock_requestor_class.return_value = mock_client + + ext = _make_extension(api_endpoint="https://ok.example.com", api_key="secret-key") + # Should not raise + APIBasedExtensionService._ping_connection(ext) + + mock_requestor_class.assert_called_once_with(ext.api_endpoint, ext.api_key) + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_wrong_ping_response_raises_value_error(self, mock_requestor_class): + """When the response is not {"result": "pong"}, a ValueError is raised.""" + mock_client = MagicMock() + mock_client.request.return_value = {"result": "error"} + mock_requestor_class.return_value = mock_client + + ext = _make_extension() + with pytest.raises(ValueError, match="connection error"): + APIBasedExtensionService._ping_connection(ext) + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_network_exception_wraps_in_value_error(self, mock_requestor_class): + """Any exception raised during request is wrapped in a ValueError.""" + mock_client = MagicMock() + mock_client.request.side_effect = ConnectionError("network failure") + mock_requestor_class.return_value = mock_client + + ext = _make_extension() + with pytest.raises(ValueError, match="connection error: network failure"): + APIBasedExtensionService._ping_connection(ext) + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_requestor_constructor_exception_wraps_in_value_error(self, mock_requestor_class): + """Exception raised by the requestor constructor itself is wrapped.""" + mock_requestor_class.side_effect = RuntimeError("bad config") + + ext = _make_extension() + with pytest.raises(ValueError, match="connection error: bad config"): + APIBasedExtensionService._ping_connection(ext) + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_missing_result_key_raises_value_error(self, mock_requestor_class): + """A response dict without a 'result' key does not equal 'pong' → raises.""" + mock_client = MagicMock() + mock_client.request.return_value = {} # no 'result' key + mock_requestor_class.return_value = mock_client + + ext = _make_extension() + with pytest.raises(ValueError, match="connection error"): + APIBasedExtensionService._ping_connection(ext) + + @patch("services.api_based_extension_service.APIBasedExtensionRequestor") + def test_uses_ping_extension_point(self, mock_requestor_class): + """The PING extension point is passed to the client.request call.""" + from models.api_based_extension import APIBasedExtensionPoint + + mock_client = MagicMock() + mock_client.request.return_value = {"result": "pong"} + mock_requestor_class.return_value = mock_client + + ext = _make_extension() + APIBasedExtensionService._ping_connection(ext) + + call_kwargs = mock_client.request.call_args + assert call_kwargs.kwargs["point"] == APIBasedExtensionPoint.PING + assert call_kwargs.kwargs["params"] == {} diff --git a/api/tests/unit_tests/services/test_api_token_service.py b/api/tests/unit_tests/services/test_api_token_service.py new file mode 100644 index 0000000000..ad4de93b25 --- /dev/null +++ b/api/tests/unit_tests/services/test_api_token_service.py @@ -0,0 +1,466 @@ +from datetime import datetime +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest +from werkzeug.exceptions import Unauthorized + +import services.api_token_service as api_token_service_module +from services.api_token_service import ApiTokenCache, CachedApiToken + + +@pytest.fixture +def mock_db_session(): + """Fixture providing common DB session mocking for query_token_from_db tests.""" + fake_engine = MagicMock() + + session = MagicMock() + session_context = MagicMock() + session_context.__enter__.return_value = session + session_context.__exit__.return_value = None + + with ( + patch.object(api_token_service_module, "db", new=SimpleNamespace(engine=fake_engine)), + patch.object(api_token_service_module, "Session", return_value=session_context) as mock_session_class, + patch.object(api_token_service_module.ApiTokenCache, "set") as mock_cache_set, + patch.object(api_token_service_module, "record_token_usage") as mock_record_usage, + ): + yield { + "session": session, + "mock_session_class": mock_session_class, + "mock_cache_set": mock_cache_set, + "mock_record_usage": mock_record_usage, + "fake_engine": fake_engine, + } + + +class TestQueryTokenFromDb: + def test_should_return_api_token_and_cache_when_token_exists(self, mock_db_session): + """Test DB lookup success path caches token and records usage.""" + # Arrange + auth_token = "token-123" + scope = "app" + api_token = MagicMock() + + mock_db_session["session"].scalar.return_value = api_token + + # Act + result = api_token_service_module.query_token_from_db(auth_token, scope) + + # Assert + assert result == api_token + mock_db_session["mock_session_class"].assert_called_once_with( + mock_db_session["fake_engine"], expire_on_commit=False + ) + mock_db_session["mock_cache_set"].assert_called_once_with(auth_token, scope, api_token) + mock_db_session["mock_record_usage"].assert_called_once_with(auth_token, scope) + + def test_should_cache_null_and_raise_unauthorized_when_token_not_found(self, mock_db_session): + """Test DB lookup miss path caches null marker and raises Unauthorized.""" + # Arrange + auth_token = "missing-token" + scope = "app" + + mock_db_session["session"].scalar.return_value = None + + # Act / Assert + with pytest.raises(Unauthorized, match="Access token is invalid"): + api_token_service_module.query_token_from_db(auth_token, scope) + + mock_db_session["mock_cache_set"].assert_called_once_with(auth_token, scope, None) + mock_db_session["mock_record_usage"].assert_not_called() + + +class TestRecordTokenUsage: + def test_should_write_active_key_with_iso_timestamp_and_ttl(self): + """Test record_token_usage writes usage timestamp with one-hour TTL.""" + # Arrange + auth_token = "token-123" + scope = "dataset" + fixed_time = datetime(2026, 2, 24, 12, 0, 0) + expected_key = ApiTokenCache.make_active_key(auth_token, scope) + + with ( + patch.object(api_token_service_module, "naive_utc_now", return_value=fixed_time), + patch.object(api_token_service_module, "redis_client") as mock_redis, + ): + # Act + api_token_service_module.record_token_usage(auth_token, scope) + + # Assert + mock_redis.set.assert_called_once_with(expected_key, fixed_time.isoformat(), ex=3600) + + def test_should_not_raise_when_redis_write_fails(self): + """Test record_token_usage swallows Redis errors.""" + # Arrange + with patch.object(api_token_service_module, "redis_client") as mock_redis: + mock_redis.set.side_effect = Exception("redis unavailable") + + # Act / Assert + api_token_service_module.record_token_usage("token-123", "app") + + +class TestFetchTokenWithSingleFlight: + def test_should_return_cached_token_when_lock_acquired_and_cache_filled(self): + """Test single-flight returns cache when another request already populated it.""" + # Arrange + auth_token = "token-123" + scope = "app" + cached_token = CachedApiToken( + id="id-1", + app_id="app-1", + tenant_id="tenant-1", + type="app", + token=auth_token, + last_used_at=None, + created_at=None, + ) + + lock = MagicMock() + lock.acquire.return_value = True + + with ( + patch.object(api_token_service_module, "redis_client") as mock_redis, + patch.object(api_token_service_module.ApiTokenCache, "get", return_value=cached_token) as mock_cache_get, + patch.object(api_token_service_module, "query_token_from_db") as mock_query_db, + ): + mock_redis.lock.return_value = lock + + # Act + result = api_token_service_module.fetch_token_with_single_flight(auth_token, scope) + + # Assert + assert result == cached_token + mock_redis.lock.assert_called_once_with( + f"api_token_query_lock:{scope}:{auth_token}", + timeout=10, + blocking_timeout=5, + ) + lock.acquire.assert_called_once_with(blocking=True) + lock.release.assert_called_once() + mock_cache_get.assert_called_once_with(auth_token, scope) + mock_query_db.assert_not_called() + + def test_should_query_db_when_lock_acquired_and_cache_missed(self): + """Test single-flight queries DB when cache remains empty after lock acquisition.""" + # Arrange + auth_token = "token-123" + scope = "app" + db_token = MagicMock() + + lock = MagicMock() + lock.acquire.return_value = True + + with ( + patch.object(api_token_service_module, "redis_client") as mock_redis, + patch.object(api_token_service_module.ApiTokenCache, "get", return_value=None), + patch.object(api_token_service_module, "query_token_from_db", return_value=db_token) as mock_query_db, + ): + mock_redis.lock.return_value = lock + + # Act + result = api_token_service_module.fetch_token_with_single_flight(auth_token, scope) + + # Assert + assert result == db_token + mock_query_db.assert_called_once_with(auth_token, scope) + lock.release.assert_called_once() + + def test_should_query_db_directly_when_lock_not_acquired(self): + """Test lock timeout branch falls back to direct DB query.""" + # Arrange + auth_token = "token-123" + scope = "app" + db_token = MagicMock() + + lock = MagicMock() + lock.acquire.return_value = False + + with ( + patch.object(api_token_service_module, "redis_client") as mock_redis, + patch.object(api_token_service_module.ApiTokenCache, "get") as mock_cache_get, + patch.object(api_token_service_module, "query_token_from_db", return_value=db_token) as mock_query_db, + ): + mock_redis.lock.return_value = lock + + # Act + result = api_token_service_module.fetch_token_with_single_flight(auth_token, scope) + + # Assert + assert result == db_token + mock_cache_get.assert_not_called() + mock_query_db.assert_called_once_with(auth_token, scope) + lock.release.assert_not_called() + + def test_should_reraise_unauthorized_from_db_query(self): + """Test Unauthorized from DB query is propagated unchanged.""" + # Arrange + auth_token = "token-123" + scope = "app" + lock = MagicMock() + lock.acquire.return_value = True + + with ( + patch.object(api_token_service_module, "redis_client") as mock_redis, + patch.object(api_token_service_module.ApiTokenCache, "get", return_value=None), + patch.object( + api_token_service_module, + "query_token_from_db", + side_effect=Unauthorized("Access token is invalid"), + ), + ): + mock_redis.lock.return_value = lock + + # Act / Assert + with pytest.raises(Unauthorized, match="Access token is invalid"): + api_token_service_module.fetch_token_with_single_flight(auth_token, scope) + + lock.release.assert_called_once() + + def test_should_fallback_to_db_query_when_lock_raises_exception(self): + """Test Redis lock errors fall back to direct DB query.""" + # Arrange + auth_token = "token-123" + scope = "app" + db_token = MagicMock() + + lock = MagicMock() + lock.acquire.side_effect = RuntimeError("redis lock error") + + with ( + patch.object(api_token_service_module, "redis_client") as mock_redis, + patch.object(api_token_service_module, "query_token_from_db", return_value=db_token) as mock_query_db, + ): + mock_redis.lock.return_value = lock + + # Act + result = api_token_service_module.fetch_token_with_single_flight(auth_token, scope) + + # Assert + assert result == db_token + mock_query_db.assert_called_once_with(auth_token, scope) + + +class TestApiTokenCacheTenantBranches: + @patch("services.api_token_service.redis_client") + def test_delete_with_scope_should_remove_from_tenant_index_when_tenant_found(self, mock_redis): + """Test scoped delete removes cache key and tenant index membership.""" + # Arrange + token = "token-123" + scope = "app" + cache_key = ApiTokenCache._make_cache_key(token, scope) + cached_token = CachedApiToken( + id="id-1", + app_id="app-1", + tenant_id="tenant-1", + type="app", + token=token, + last_used_at=None, + created_at=None, + ) + mock_redis.get.return_value = cached_token.model_dump_json().encode("utf-8") + + with patch.object(ApiTokenCache, "_remove_from_tenant_index") as mock_remove_index: + # Act + result = ApiTokenCache.delete(token, scope) + + # Assert + assert result is True + mock_redis.delete.assert_called_once_with(cache_key) + mock_remove_index.assert_called_once_with("tenant-1", cache_key) + + @patch("services.api_token_service.redis_client") + def test_invalidate_by_tenant_should_delete_all_indexed_cache_keys(self, mock_redis): + """Test tenant invalidation deletes indexed cache entries and index key.""" + # Arrange + tenant_id = "tenant-1" + index_key = ApiTokenCache._make_tenant_index_key(tenant_id) + mock_redis.smembers.return_value = { + b"api_token:app:token-1", + b"api_token:any:token-2", + } + + # Act + result = ApiTokenCache.invalidate_by_tenant(tenant_id) + + # Assert + assert result is True + mock_redis.smembers.assert_called_once_with(index_key) + mock_redis.delete.assert_any_call("api_token:app:token-1") + mock_redis.delete.assert_any_call("api_token:any:token-2") + mock_redis.delete.assert_any_call(index_key) + + +class TestApiTokenCacheCoreBranches: + def test_cached_api_token_repr_should_include_id_and_type(self): + """Test CachedApiToken __repr__ includes key identity fields.""" + token = CachedApiToken( + id="id-123", + app_id="app-123", + tenant_id="tenant-123", + type="app", + token="token-123", + last_used_at=None, + created_at=None, + ) + + assert repr(token) == "" + + def test_serialize_token_should_handle_cached_api_token_instances(self): + """Test serialization path when input is already a CachedApiToken.""" + token = CachedApiToken( + id="id-123", + app_id="app-123", + tenant_id="tenant-123", + type="app", + token="token-123", + last_used_at=None, + created_at=None, + ) + + serialized = ApiTokenCache._serialize_token(token) + + assert isinstance(serialized, bytes) + assert b'"id":"id-123"' in serialized + assert b'"token":"token-123"' in serialized + + def test_deserialize_token_should_return_none_for_null_markers(self): + """Test null cache marker deserializes to None.""" + assert ApiTokenCache._deserialize_token("null") is None + assert ApiTokenCache._deserialize_token(b"null") is None + + def test_deserialize_token_should_return_none_for_invalid_payload(self): + """Test invalid serialized payload returns None.""" + assert ApiTokenCache._deserialize_token("not-json") is None + + @patch("services.api_token_service.redis_client") + def test_get_should_return_none_on_cache_miss(self, mock_redis): + """Test cache miss branch in ApiTokenCache.get.""" + mock_redis.get.return_value = None + + result = ApiTokenCache.get("token-123", "app") + + assert result is None + mock_redis.get.assert_called_once_with("api_token:app:token-123") + + @patch("services.api_token_service.redis_client") + def test_get_should_deserialize_cached_payload_on_cache_hit(self, mock_redis): + """Test cache hit branch in ApiTokenCache.get.""" + token = CachedApiToken( + id="id-123", + app_id="app-123", + tenant_id="tenant-123", + type="app", + token="token-123", + last_used_at=None, + created_at=None, + ) + mock_redis.get.return_value = token.model_dump_json().encode("utf-8") + + result = ApiTokenCache.get("token-123", "app") + + assert isinstance(result, CachedApiToken) + assert result.id == "id-123" + + @patch("services.api_token_service.redis_client") + def test_add_to_tenant_index_should_skip_when_tenant_id_missing(self, mock_redis): + """Test tenant index update exits early for missing tenant id.""" + ApiTokenCache._add_to_tenant_index(None, "api_token:app:token-123") + + mock_redis.sadd.assert_not_called() + mock_redis.expire.assert_not_called() + + @patch("services.api_token_service.redis_client") + def test_add_to_tenant_index_should_swallow_index_update_errors(self, mock_redis): + """Test tenant index update handles Redis write errors gracefully.""" + mock_redis.sadd.side_effect = Exception("redis down") + + ApiTokenCache._add_to_tenant_index("tenant-123", "api_token:app:token-123") + + mock_redis.sadd.assert_called_once() + + @patch("services.api_token_service.redis_client") + def test_remove_from_tenant_index_should_skip_when_tenant_id_missing(self, mock_redis): + """Test tenant index removal exits early for missing tenant id.""" + ApiTokenCache._remove_from_tenant_index(None, "api_token:app:token-123") + + mock_redis.srem.assert_not_called() + + @patch("services.api_token_service.redis_client") + def test_remove_from_tenant_index_should_swallow_redis_errors(self, mock_redis): + """Test tenant index removal handles Redis errors gracefully.""" + mock_redis.srem.side_effect = Exception("redis down") + + ApiTokenCache._remove_from_tenant_index("tenant-123", "api_token:app:token-123") + + mock_redis.srem.assert_called_once() + + @patch("services.api_token_service.redis_client") + def test_set_should_return_false_when_cache_write_raises_exception(self, mock_redis): + """Test set returns False when Redis setex fails.""" + mock_redis.setex.side_effect = Exception("redis write failed") + api_token = MagicMock() + api_token.id = "id-123" + api_token.app_id = "app-123" + api_token.tenant_id = "tenant-123" + api_token.type = "app" + api_token.token = "token-123" + api_token.last_used_at = None + api_token.created_at = None + + result = ApiTokenCache.set("token-123", "app", api_token) + + assert result is False + + @patch("services.api_token_service.redis_client") + def test_delete_without_scope_should_return_false_when_scan_fails(self, mock_redis): + """Test delete(scope=None) returns False when scan_iter raises.""" + mock_redis.scan_iter.side_effect = Exception("scan failed") + + result = ApiTokenCache.delete("token-123", None) + + assert result is False + + @patch("services.api_token_service.redis_client") + def test_delete_with_scope_should_continue_when_tenant_lookup_raises(self, mock_redis): + """Test scoped delete still succeeds when tenant lookup from cache fails.""" + token = "token-123" + scope = "app" + cache_key = ApiTokenCache._make_cache_key(token, scope) + mock_redis.get.side_effect = Exception("get failed") + + result = ApiTokenCache.delete(token, scope) + + assert result is True + mock_redis.delete.assert_called_once_with(cache_key) + + @patch("services.api_token_service.redis_client") + def test_delete_with_scope_should_return_false_when_delete_raises(self, mock_redis): + """Test scoped delete returns False when delete operation fails.""" + token = "token-123" + scope = "app" + mock_redis.get.return_value = None + mock_redis.delete.side_effect = Exception("delete failed") + + result = ApiTokenCache.delete(token, scope) + + assert result is False + + @patch("services.api_token_service.redis_client") + def test_invalidate_by_tenant_should_return_true_when_index_not_found(self, mock_redis): + """Test tenant invalidation returns True when tenant index is empty.""" + mock_redis.smembers.return_value = set() + + result = ApiTokenCache.invalidate_by_tenant("tenant-123") + + assert result is True + mock_redis.delete.assert_not_called() + + @patch("services.api_token_service.redis_client") + def test_invalidate_by_tenant_should_return_false_when_redis_raises(self, mock_redis): + """Test tenant invalidation returns False when Redis operation fails.""" + mock_redis.smembers.side_effect = Exception("redis failed") + + result = ApiTokenCache.invalidate_by_tenant("tenant-123") + + assert result is False diff --git a/api/tests/unit_tests/services/test_app_dsl_service.py b/api/tests/unit_tests/services/test_app_dsl_service.py new file mode 100644 index 0000000000..7e82f79860 --- /dev/null +++ b/api/tests/unit_tests/services/test_app_dsl_service.py @@ -0,0 +1,920 @@ +import base64 +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest +import yaml + +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from dify_graph.enums import BuiltinNodeTypes +from models import Account, AppMode +from models.model import IconType +from services import app_dsl_service +from services.app_dsl_service import ( + AppDslService, + CheckDependenciesPendingData, + ImportMode, + ImportStatus, + PendingData, + _check_version_compatibility, +) + + +class _FakeHttpResponse: + def __init__(self, content: bytes, *, raises: Exception | None = None): + self.content = content + self._raises = raises + + def raise_for_status(self) -> None: + if self._raises is not None: + raise self._raises + + +def _account_mock(*, tenant_id: str = "tenant-1", account_id: str = "account-1") -> MagicMock: + account = MagicMock(spec=Account) + account.current_tenant_id = tenant_id + account.id = account_id + return account + + +def _yaml_dump(data: dict) -> str: + return yaml.safe_dump(data, allow_unicode=True) + + +def _workflow_yaml(*, version: str = app_dsl_service.CURRENT_DSL_VERSION) -> str: + return _yaml_dump( + { + "version": version, + "kind": "app", + "app": {"name": "My App", "mode": AppMode.WORKFLOW.value}, + "workflow": {"graph": {"nodes": []}, "features": {}}, + } + ) + + +def test_check_version_compatibility_invalid_version_returns_failed(): + assert _check_version_compatibility("not-a-version") == ImportStatus.FAILED + + +def test_check_version_compatibility_newer_version_returns_pending(): + assert _check_version_compatibility("99.0.0") == ImportStatus.PENDING + + +def test_check_version_compatibility_major_older_returns_pending(monkeypatch): + monkeypatch.setattr(app_dsl_service, "CURRENT_DSL_VERSION", "1.0.0") + assert _check_version_compatibility("0.9.9") == ImportStatus.PENDING + + +def test_check_version_compatibility_minor_older_returns_completed_with_warnings(): + assert _check_version_compatibility("0.5.0") == ImportStatus.COMPLETED_WITH_WARNINGS + + +def test_check_version_compatibility_equal_returns_completed(): + assert _check_version_compatibility(app_dsl_service.CURRENT_DSL_VERSION) == ImportStatus.COMPLETED + + +def test_import_app_invalid_import_mode_raises_value_error(): + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="Invalid import_mode"): + service.import_app(account=_account_mock(), import_mode="invalid-mode", yaml_content="version: '0.1.0'") + + +def test_import_app_yaml_url_requires_url(): + service = AppDslService(MagicMock()) + result = service.import_app(account=_account_mock(), import_mode=ImportMode.YAML_URL, yaml_url=None) + assert result.status == ImportStatus.FAILED + assert "yaml_url is required" in result.error + + +def test_import_app_yaml_content_requires_content(): + service = AppDslService(MagicMock()) + result = service.import_app(account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content=None) + assert result.status == ImportStatus.FAILED + assert "yaml_content is required" in result.error + + +def test_import_app_yaml_url_fetch_error_returns_failed(monkeypatch): + def fake_get(_url: str, **_kwargs): + raise RuntimeError("boom") + + monkeypatch.setattr(app_dsl_service.ssrf_proxy, "get", fake_get) + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), import_mode=ImportMode.YAML_URL, yaml_url="https://example.com/a.yml" + ) + assert result.status == ImportStatus.FAILED + assert "Error fetching YAML from URL: boom" in result.error + + +def test_import_app_yaml_url_empty_content_returns_failed(monkeypatch): + def fake_get(_url: str, **_kwargs): + return _FakeHttpResponse(b"") + + monkeypatch.setattr(app_dsl_service.ssrf_proxy, "get", fake_get) + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), import_mode=ImportMode.YAML_URL, yaml_url="https://example.com/a.yml" + ) + assert result.status == ImportStatus.FAILED + assert "Empty content" in result.error + + +def test_import_app_yaml_url_file_too_large_returns_failed(monkeypatch): + def fake_get(_url: str, **_kwargs): + return _FakeHttpResponse(b"x" * (app_dsl_service.DSL_MAX_SIZE + 1)) + + monkeypatch.setattr(app_dsl_service.ssrf_proxy, "get", fake_get) + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), import_mode=ImportMode.YAML_URL, yaml_url="https://example.com/a.yml" + ) + assert result.status == ImportStatus.FAILED + assert "File size exceeds" in result.error + + +def test_import_app_yaml_not_mapping_returns_failed(): + service = AppDslService(MagicMock()) + result = service.import_app(account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content="[]") + assert result.status == ImportStatus.FAILED + assert "content must be a mapping" in result.error + + +def test_import_app_version_not_str_returns_failed(): + service = AppDslService(MagicMock()) + yaml_content = _yaml_dump({"version": 1, "kind": "app", "app": {"name": "x", "mode": "workflow"}}) + result = service.import_app(account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content=yaml_content) + assert result.status == ImportStatus.FAILED + assert "Invalid version type" in result.error + + +def test_import_app_missing_app_data_returns_failed(): + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_yaml_dump({"version": "0.6.0", "kind": "app"}), + ) + assert result.status == ImportStatus.FAILED + assert "Missing app data" in result.error + + +def test_import_app_app_id_not_found_returns_failed(monkeypatch): + def fake_select(_model): + stmt = MagicMock() + stmt.where.return_value = stmt + return stmt + + monkeypatch.setattr(app_dsl_service, "select", fake_select) + + session = MagicMock() + session.scalar.return_value = None + service = AppDslService(session) + result = service.import_app( + account=_account_mock(), + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_workflow_yaml(), + app_id="missing-app", + ) + assert result.status == ImportStatus.FAILED + assert result.error == "App not found" + + +def test_import_app_overwrite_only_allows_workflow_and_advanced_chat(monkeypatch): + def fake_select(_model): + stmt = MagicMock() + stmt.where.return_value = stmt + return stmt + + monkeypatch.setattr(app_dsl_service, "select", fake_select) + + existing_app = SimpleNamespace(id="app-1", tenant_id="tenant-1", mode=AppMode.CHAT.value) + + session = MagicMock() + session.scalar.return_value = existing_app + service = AppDslService(session) + result = service.import_app( + account=_account_mock(), + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_workflow_yaml(), + app_id="app-1", + ) + assert result.status == ImportStatus.FAILED + assert "Only workflow or advanced chat apps" in result.error + + +def test_import_app_pending_stores_import_info_in_redis(): + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_workflow_yaml(version="99.0.0"), + name="n", + description="d", + icon_type="emoji", + icon="i", + icon_background="#000000", + ) + assert result.status == ImportStatus.PENDING + assert result.imported_dsl_version == "99.0.0" + + app_dsl_service.redis_client.setex.assert_called_once() + call = app_dsl_service.redis_client.setex.call_args + redis_key = call.args[0] + assert redis_key.startswith(app_dsl_service.IMPORT_INFO_REDIS_KEY_PREFIX) + + +def test_import_app_completed_uses_declared_dependencies(monkeypatch): + dependencies_payload = [{"id": "langgenius/google", "version": "1.0.0"}] + + plugin_deps = [SimpleNamespace(model_dump=lambda: dependencies_payload[0])] + monkeypatch.setattr( + app_dsl_service.PluginDependency, + "model_validate", + lambda d: plugin_deps[0], + ) + + created_app = SimpleNamespace(id="app-new", mode=AppMode.WORKFLOW.value, tenant_id="tenant-1") + monkeypatch.setattr(AppDslService, "_create_or_update_app", lambda *_args, **_kwargs: created_app) + + draft_var_service = MagicMock() + monkeypatch.setattr(app_dsl_service, "WorkflowDraftVariableService", lambda *args, **kwargs: draft_var_service) + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_yaml_dump( + { + "version": app_dsl_service.CURRENT_DSL_VERSION, + "kind": "app", + "app": {"name": "My App", "mode": AppMode.WORKFLOW.value}, + "workflow": {"graph": {"nodes": []}, "features": {}}, + "dependencies": dependencies_payload, + } + ), + ) + + assert result.status == ImportStatus.COMPLETED + assert result.app_id == "app-new" + draft_var_service.delete_workflow_variables.assert_called_once_with(app_id="app-new") + + +@pytest.mark.parametrize("has_workflow", [True, False]) +def test_import_app_legacy_versions_extract_dependencies(monkeypatch, has_workflow: bool): + monkeypatch.setattr( + AppDslService, + "_extract_dependencies_from_workflow_graph", + lambda *_args, **_kwargs: ["from-workflow"], + ) + monkeypatch.setattr( + AppDslService, + "_extract_dependencies_from_model_config", + lambda *_args, **_kwargs: ["from-model-config"], + ) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "generate_latest_dependencies", + lambda deps: [SimpleNamespace(model_dump=lambda: {"dep": deps[0]})], + ) + + created_app = SimpleNamespace(id="app-legacy", mode=AppMode.WORKFLOW.value, tenant_id="tenant-1") + monkeypatch.setattr(AppDslService, "_create_or_update_app", lambda *_args, **_kwargs: created_app) + + draft_var_service = MagicMock() + monkeypatch.setattr(app_dsl_service, "WorkflowDraftVariableService", lambda *args, **kwargs: draft_var_service) + + data: dict = { + "version": "0.1.5", + "kind": "app", + "app": {"name": "Legacy", "mode": AppMode.WORKFLOW.value}, + } + if has_workflow: + data["workflow"] = {"graph": {"nodes": []}, "features": {}} + else: + data["model_config"] = {"model": {"provider": "openai"}} + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content=_yaml_dump(data) + ) + assert result.status == ImportStatus.COMPLETED_WITH_WARNINGS + draft_var_service.delete_workflow_variables.assert_called_once_with(app_id="app-legacy") + + +def test_import_app_yaml_error_returns_failed(monkeypatch): + def bad_safe_load(_content: str): + raise yaml.YAMLError("bad") + + monkeypatch.setattr(app_dsl_service.yaml, "safe_load", bad_safe_load) + + service = AppDslService(MagicMock()) + result = service.import_app(account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content="x: y") + assert result.status == ImportStatus.FAILED + assert result.error.startswith("Invalid YAML format:") + + +def test_import_app_unexpected_error_returns_failed(monkeypatch): + monkeypatch.setattr( + AppDslService, "_create_or_update_app", lambda *_args, **_kwargs: (_ for _ in ()).throw(ValueError("oops")) + ) + + service = AppDslService(MagicMock()) + result = service.import_app( + account=_account_mock(), import_mode=ImportMode.YAML_CONTENT, yaml_content=_workflow_yaml() + ) + assert result.status == ImportStatus.FAILED + assert result.error == "oops" + + +def test_confirm_import_expired_returns_failed(): + service = AppDslService(MagicMock()) + result = service.confirm_import(import_id="import-1", account=_account_mock()) + assert result.status == ImportStatus.FAILED + assert "expired" in result.error + + +def test_confirm_import_invalid_pending_data_type_returns_failed(): + app_dsl_service.redis_client.get.return_value = 123 + service = AppDslService(MagicMock()) + result = service.confirm_import(import_id="import-1", account=_account_mock()) + assert result.status == ImportStatus.FAILED + assert "Invalid import information" in result.error + + +def test_confirm_import_success_deletes_redis_key(monkeypatch): + def fake_select(_model): + stmt = MagicMock() + stmt.where.return_value = stmt + return stmt + + monkeypatch.setattr(app_dsl_service, "select", fake_select) + + session = MagicMock() + session.scalar.return_value = None + service = AppDslService(session) + + pending = PendingData( + import_mode=ImportMode.YAML_CONTENT, + yaml_content=_workflow_yaml(), + name="name", + description="desc", + icon_type="emoji", + icon="🤖", + icon_background="#fff", + app_id=None, + ) + app_dsl_service.redis_client.get.return_value = pending.model_dump_json() + + created_app = SimpleNamespace(id="confirmed-app", mode=AppMode.WORKFLOW.value, tenant_id="tenant-1") + monkeypatch.setattr(AppDslService, "_create_or_update_app", lambda *_args, **_kwargs: created_app) + + result = service.confirm_import(import_id="import-1", account=_account_mock()) + assert result.status == ImportStatus.COMPLETED + assert result.app_id == "confirmed-app" + app_dsl_service.redis_client.delete.assert_called_once() + + +def test_confirm_import_exception_returns_failed(monkeypatch): + app_dsl_service.redis_client.get.return_value = "not-json" + monkeypatch.setattr( + PendingData, "model_validate_json", lambda *_args, **_kwargs: (_ for _ in ()).throw(ValueError("bad")) + ) + + service = AppDslService(MagicMock()) + result = service.confirm_import(import_id="import-1", account=_account_mock()) + assert result.status == ImportStatus.FAILED + assert result.error == "bad" + + +def test_check_dependencies_returns_empty_when_no_redis_data(): + service = AppDslService(MagicMock()) + result = service.check_dependencies(app_model=SimpleNamespace(id="app-1", tenant_id="tenant-1")) + assert result.leaked_dependencies == [] + + +def test_check_dependencies_calls_analysis_service(monkeypatch): + pending = CheckDependenciesPendingData(dependencies=[], app_id="app-1").model_dump_json() + app_dsl_service.redis_client.get.return_value = pending + dep = app_dsl_service.PluginDependency.model_validate( + {"type": "package", "value": {"plugin_unique_identifier": "acme/foo", "version": "1.0.0"}} + ) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "get_leaked_dependencies", + lambda *, tenant_id, dependencies: [dep], + ) + + service = AppDslService(MagicMock()) + result = service.check_dependencies(app_model=SimpleNamespace(id="app-1", tenant_id="tenant-1")) + assert len(result.leaked_dependencies) == 1 + + +def test_create_or_update_app_missing_mode_raises(): + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="loss app mode"): + service._create_or_update_app(app=None, data={"app": {}}, account=_account_mock()) + + +def test_create_or_update_app_existing_app_updates_fields(monkeypatch): + fixed_now = object() + monkeypatch.setattr(app_dsl_service, "naive_utc_now", lambda: fixed_now) + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = None + monkeypatch.setattr(app_dsl_service, "WorkflowService", lambda: workflow_service) + monkeypatch.setattr( + app_dsl_service.variable_factory, + "build_environment_variable_from_mapping", + lambda _m: SimpleNamespace(kind="env"), + ) + monkeypatch.setattr( + app_dsl_service.variable_factory, + "build_conversation_variable_from_mapping", + lambda _m: SimpleNamespace(kind="conv"), + ) + + app = SimpleNamespace( + id="app-1", + tenant_id="tenant-1", + mode=AppMode.WORKFLOW.value, + name="old", + description="old-desc", + icon_type=IconType.EMOJI, + icon="old-icon", + icon_background="#111111", + updated_by=None, + updated_at=None, + app_model_config=None, + ) + service = AppDslService(MagicMock()) + updated = service._create_or_update_app( + app=app, + data={ + "app": {"mode": AppMode.WORKFLOW.value, "name": "yaml-name", "icon_type": IconType.IMAGE, "icon": "X"}, + "workflow": {"graph": {"nodes": []}, "features": {}}, + }, + account=_account_mock(), + name="override-name", + description=None, + icon_background="#222222", + ) + assert updated is app + assert app.name == "override-name" + assert app.icon_type == IconType.IMAGE + assert app.icon == "X" + assert app.icon_background == "#222222" + assert app.updated_at is fixed_now + + +def test_create_or_update_app_new_app_requires_tenant(): + account = _account_mock() + account.current_tenant_id = None + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="Current tenant is not set"): + service._create_or_update_app( + app=None, + data={"app": {"mode": AppMode.WORKFLOW.value, "name": "n"}}, + account=account, + ) + + +def test_create_or_update_app_creates_workflow_app_and_saves_dependencies(monkeypatch): + class DummyApp(SimpleNamespace): + pass + + monkeypatch.setattr(app_dsl_service, "App", DummyApp) + + sent: list[tuple[str, object]] = [] + monkeypatch.setattr(app_dsl_service.app_was_created, "send", lambda app, account: sent.append((app.id, account.id))) + + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = SimpleNamespace(unique_hash="uh") + monkeypatch.setattr(app_dsl_service, "WorkflowService", lambda: workflow_service) + + monkeypatch.setattr( + app_dsl_service.variable_factory, + "build_environment_variable_from_mapping", + lambda _m: SimpleNamespace(kind="env"), + ) + monkeypatch.setattr( + app_dsl_service.variable_factory, + "build_conversation_variable_from_mapping", + lambda _m: SimpleNamespace(kind="conv"), + ) + + monkeypatch.setattr( + AppDslService, "decrypt_dataset_id", lambda *_args, **_kwargs: "00000000-0000-0000-0000-000000000000" + ) + + session = MagicMock() + service = AppDslService(session) + deps = [ + app_dsl_service.PluginDependency.model_validate( + {"type": "package", "value": {"plugin_unique_identifier": "acme/foo", "version": "1.0.0"}} + ) + ] + data = { + "app": {"mode": AppMode.WORKFLOW.value, "name": "n"}, + "workflow": { + "environment_variables": [{"x": 1}], + "conversation_variables": [{"y": 2}], + "graph": { + "nodes": [ + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["enc-1", "enc-2"]}}, + ] + }, + "features": {}, + }, + } + + app = service._create_or_update_app(app=None, data=data, account=_account_mock(), dependencies=deps) + + assert app.tenant_id == "tenant-1" + assert sent == [(app.id, "account-1")] + app_dsl_service.redis_client.setex.assert_called() + workflow_service.sync_draft_workflow.assert_called_once() + + passed_graph = workflow_service.sync_draft_workflow.call_args.kwargs["graph"] + dataset_ids = passed_graph["nodes"][0]["data"]["dataset_ids"] + assert dataset_ids == ["00000000-0000-0000-0000-000000000000", "00000000-0000-0000-0000-000000000000"] + + +def test_create_or_update_app_workflow_missing_workflow_data_raises(): + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="Missing workflow data"): + service._create_or_update_app( + app=SimpleNamespace( + id="a", + tenant_id="t", + mode=AppMode.WORKFLOW.value, + name="n", + description="d", + icon_background="#fff", + app_model_config=None, + ), + data={"app": {"mode": AppMode.WORKFLOW.value}}, + account=_account_mock(), + ) + + +def test_create_or_update_app_chat_requires_model_config(): + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="Missing model_config"): + service._create_or_update_app( + app=SimpleNamespace( + id="a", + tenant_id="t", + mode=AppMode.CHAT.value, + name="n", + description="d", + icon_background="#fff", + app_model_config=None, + ), + data={"app": {"mode": AppMode.CHAT.value}}, + account=_account_mock(), + ) + + +def test_create_or_update_app_chat_creates_model_config_and_sends_event(monkeypatch): + class DummyModelConfig(SimpleNamespace): + def from_model_config_dict(self, _cfg: dict): + return self + + monkeypatch.setattr(app_dsl_service, "AppModelConfig", DummyModelConfig) + + sent: list[str] = [] + monkeypatch.setattr( + app_dsl_service.app_model_config_was_updated, "send", lambda app, app_model_config: sent.append(app.id) + ) + + session = MagicMock() + service = AppDslService(session) + + app = SimpleNamespace( + id="app-1", + tenant_id="tenant-1", + mode=AppMode.CHAT.value, + name="n", + description="d", + icon_background="#fff", + app_model_config=None, + ) + service._create_or_update_app( + app=app, + data={"app": {"mode": AppMode.CHAT.value}, "model_config": {"model": {"provider": "openai"}}}, + account=_account_mock(), + ) + + assert app.app_model_config_id is not None + assert sent == ["app-1"] + session.add.assert_called() + + +def test_create_or_update_app_invalid_mode_raises(): + service = AppDslService(MagicMock()) + with pytest.raises(ValueError, match="Invalid app mode"): + service._create_or_update_app( + app=SimpleNamespace( + id="a", + tenant_id="t", + mode=AppMode.RAG_PIPELINE.value, + name="n", + description="d", + icon_background="#fff", + app_model_config=None, + ), + data={"app": {"mode": AppMode.RAG_PIPELINE.value}}, + account=_account_mock(), + ) + + +def test_export_dsl_delegates_by_mode(monkeypatch): + workflow_calls: list[bool] = [] + model_calls: list[bool] = [] + monkeypatch.setattr(AppDslService, "_append_workflow_export_data", lambda **_kwargs: workflow_calls.append(True)) + monkeypatch.setattr( + AppDslService, "_append_model_config_export_data", lambda *_args, **_kwargs: model_calls.append(True) + ) + + workflow_app = SimpleNamespace( + mode=AppMode.WORKFLOW.value, + tenant_id="tenant-1", + name="n", + icon="i", + icon_type="emoji", + icon_background="#fff", + description="d", + use_icon_as_answer_icon=False, + app_model_config=None, + ) + AppDslService.export_dsl(workflow_app) + assert workflow_calls == [True] + + chat_app = SimpleNamespace( + mode=AppMode.CHAT.value, + tenant_id="tenant-1", + name="n", + icon="i", + icon_type="emoji", + icon_background="#fff", + description="d", + use_icon_as_answer_icon=False, + app_model_config=SimpleNamespace(to_dict=lambda: {"agent_mode": {"tools": []}}), + ) + AppDslService.export_dsl(chat_app) + assert model_calls == [True] + + +def test_append_workflow_export_data_filters_and_overrides(monkeypatch): + workflow_dict = { + "graph": { + "nodes": [ + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL, "dataset_ids": ["d1", "d2"]}}, + {"data": {"type": BuiltinNodeTypes.TOOL, "credential_id": "secret"}}, + { + "data": { + "type": BuiltinNodeTypes.AGENT, + "agent_parameters": {"tools": {"value": [{"credential_id": "secret"}]}}, + } + }, + {"data": {"type": TRIGGER_SCHEDULE_NODE_TYPE, "config": {"x": 1}}}, + {"data": {"type": TRIGGER_WEBHOOK_NODE_TYPE, "webhook_url": "x", "webhook_debug_url": "y"}}, + {"data": {"type": TRIGGER_PLUGIN_NODE_TYPE, "subscription_id": "s"}}, + ] + } + } + + workflow = SimpleNamespace(to_dict=lambda *, include_secret: workflow_dict) + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = workflow + monkeypatch.setattr(app_dsl_service, "WorkflowService", lambda: workflow_service) + + monkeypatch.setattr( + AppDslService, "encrypt_dataset_id", lambda *, dataset_id, tenant_id: f"enc:{tenant_id}:{dataset_id}" + ) + monkeypatch.setattr( + TriggerScheduleNode := app_dsl_service.TriggerScheduleNode, + "get_default_config", + lambda: {"config": {"default": True}}, + ) + monkeypatch.setattr(AppDslService, "_extract_dependencies_from_workflow", lambda *_args, **_kwargs: ["dep-1"]) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "generate_dependencies", + lambda *, tenant_id, dependencies: [ + SimpleNamespace(model_dump=lambda: {"tenant": tenant_id, "dep": dependencies[0]}) + ], + ) + monkeypatch.setattr(app_dsl_service, "jsonable_encoder", lambda x: x) + + export_data: dict = {} + AppDslService._append_workflow_export_data( + export_data=export_data, + app_model=SimpleNamespace(tenant_id="tenant-1"), + include_secret=False, + workflow_id=None, + ) + + nodes = export_data["workflow"]["graph"]["nodes"] + assert nodes[0]["data"]["dataset_ids"] == ["enc:tenant-1:d1", "enc:tenant-1:d2"] + assert "credential_id" not in nodes[1]["data"] + assert "credential_id" not in nodes[2]["data"]["agent_parameters"]["tools"]["value"][0] + assert nodes[3]["data"]["config"] == {"default": True} + assert nodes[4]["data"]["webhook_url"] == "" + assert nodes[4]["data"]["webhook_debug_url"] == "" + assert nodes[5]["data"]["subscription_id"] == "" + assert export_data["dependencies"] == [{"tenant": "tenant-1", "dep": "dep-1"}] + + +def test_append_workflow_export_data_missing_workflow_raises(monkeypatch): + workflow_service = MagicMock() + workflow_service.get_draft_workflow.return_value = None + monkeypatch.setattr(app_dsl_service, "WorkflowService", lambda: workflow_service) + + with pytest.raises(ValueError, match="Missing draft workflow configuration"): + AppDslService._append_workflow_export_data( + export_data={}, + app_model=SimpleNamespace(tenant_id="tenant-1"), + include_secret=False, + workflow_id=None, + ) + + +def test_append_model_config_export_data_filters_credential_id(monkeypatch): + monkeypatch.setattr(AppDslService, "_extract_dependencies_from_model_config", lambda *_args, **_kwargs: ["dep-1"]) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "generate_dependencies", + lambda *, tenant_id, dependencies: [ + SimpleNamespace(model_dump=lambda: {"tenant": tenant_id, "dep": dependencies[0]}) + ], + ) + monkeypatch.setattr(app_dsl_service, "jsonable_encoder", lambda x: x) + + app_model_config = SimpleNamespace(to_dict=lambda: {"agent_mode": {"tools": [{"credential_id": "secret"}]}}) + app_model = SimpleNamespace(tenant_id="tenant-1", app_model_config=app_model_config) + export_data: dict = {} + + AppDslService._append_model_config_export_data(export_data, app_model) + assert export_data["model_config"]["agent_mode"]["tools"] == [{}] + assert export_data["dependencies"] == [{"tenant": "tenant-1", "dep": "dep-1"}] + + +def test_append_model_config_export_data_requires_app_config(): + with pytest.raises(ValueError, match="Missing app configuration"): + AppDslService._append_model_config_export_data({}, SimpleNamespace(app_model_config=None)) + + +def test_extract_dependencies_from_workflow_graph_covers_all_node_types(monkeypatch): + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "analyze_tool_dependency", + lambda provider_id: f"tool:{provider_id}", + ) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "analyze_model_provider_dependency", + lambda provider: f"model:{provider}", + ) + + monkeypatch.setattr(app_dsl_service.ToolNodeData, "model_validate", lambda _d: SimpleNamespace(provider_id="p1")) + monkeypatch.setattr( + app_dsl_service.LLMNodeData, "model_validate", lambda _d: SimpleNamespace(model=SimpleNamespace(provider="m1")) + ) + monkeypatch.setattr( + app_dsl_service.QuestionClassifierNodeData, + "model_validate", + lambda _d: SimpleNamespace(model=SimpleNamespace(provider="m2")), + ) + monkeypatch.setattr( + app_dsl_service.ParameterExtractorNodeData, + "model_validate", + lambda _d: SimpleNamespace(model=SimpleNamespace(provider="m3")), + ) + + def kr_validate(_d): + return SimpleNamespace( + retrieval_mode="multiple", + multiple_retrieval_config=SimpleNamespace( + reranking_mode="weighted_score", + weights=SimpleNamespace(vector_setting=SimpleNamespace(embedding_provider_name="m4")), + reranking_model=None, + ), + single_retrieval_config=None, + ) + + monkeypatch.setattr(app_dsl_service.KnowledgeRetrievalNodeData, "model_validate", kr_validate) + + graph = { + "nodes": [ + {"data": {"type": BuiltinNodeTypes.TOOL}}, + {"data": {"type": BuiltinNodeTypes.LLM}}, + {"data": {"type": BuiltinNodeTypes.QUESTION_CLASSIFIER}}, + {"data": {"type": BuiltinNodeTypes.PARAMETER_EXTRACTOR}}, + {"data": {"type": BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL}}, + {"data": {"type": "unknown"}}, + ] + } + + deps = AppDslService._extract_dependencies_from_workflow_graph(graph) + assert deps == ["tool:p1", "model:m1", "model:m2", "model:m3", "model:m4"] + + +def test_extract_dependencies_from_workflow_graph_handles_exceptions(monkeypatch): + monkeypatch.setattr( + app_dsl_service.ToolNodeData, "model_validate", lambda _d: (_ for _ in ()).throw(ValueError("bad")) + ) + deps = AppDslService._extract_dependencies_from_workflow_graph( + {"nodes": [{"data": {"type": BuiltinNodeTypes.TOOL}}]} + ) + assert deps == [] + + +def test_extract_dependencies_from_model_config_parses_providers(monkeypatch): + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "analyze_model_provider_dependency", + lambda provider: f"model:{provider}", + ) + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "analyze_tool_dependency", + lambda provider_id: f"tool:{provider_id}", + ) + + deps = AppDslService._extract_dependencies_from_model_config( + { + "model": {"provider": "p1"}, + "dataset_configs": { + "datasets": {"datasets": [{"reranking_model": {"reranking_provider_name": {"provider": "p2"}}}]} + }, + "agent_mode": {"tools": [{"provider_id": "t1"}]}, + } + ) + assert deps == ["model:p1", "model:p2", "tool:t1"] + + +def test_extract_dependencies_from_model_config_handles_exceptions(monkeypatch): + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "analyze_model_provider_dependency", + lambda _p: (_ for _ in ()).throw(ValueError("bad")), + ) + deps = AppDslService._extract_dependencies_from_model_config({"model": {"provider": "p1"}}) + assert deps == [] + + +def test_get_leaked_dependencies_empty_returns_empty(): + assert AppDslService.get_leaked_dependencies("tenant-1", []) == [] + + +def test_get_leaked_dependencies_delegates(monkeypatch): + monkeypatch.setattr( + app_dsl_service.DependenciesAnalysisService, + "get_leaked_dependencies", + lambda *, tenant_id, dependencies: [SimpleNamespace(tenant_id=tenant_id, deps=dependencies)], + ) + res = AppDslService.get_leaked_dependencies("tenant-1", [SimpleNamespace(id="x")]) + assert len(res) == 1 + + +def test_encrypt_decrypt_dataset_id_respects_config(monkeypatch): + tenant_id = "tenant-1" + dataset_uuid = "00000000-0000-0000-0000-000000000000" + + monkeypatch.setattr(app_dsl_service.dify_config, "DSL_EXPORT_ENCRYPT_DATASET_ID", False) + assert AppDslService.encrypt_dataset_id(dataset_id=dataset_uuid, tenant_id=tenant_id) == dataset_uuid + + monkeypatch.setattr(app_dsl_service.dify_config, "DSL_EXPORT_ENCRYPT_DATASET_ID", True) + encrypted = AppDslService.encrypt_dataset_id(dataset_id=dataset_uuid, tenant_id=tenant_id) + assert encrypted != dataset_uuid + assert base64.b64decode(encrypted.encode()) + assert AppDslService.decrypt_dataset_id(encrypted_data=encrypted, tenant_id=tenant_id) == dataset_uuid + + +def test_decrypt_dataset_id_returns_plain_uuid_unchanged(): + value = "00000000-0000-0000-0000-000000000000" + assert AppDslService.decrypt_dataset_id(encrypted_data=value, tenant_id="tenant-1") == value + + +def test_decrypt_dataset_id_returns_none_on_invalid_data(monkeypatch): + monkeypatch.setattr(app_dsl_service.dify_config, "DSL_EXPORT_ENCRYPT_DATASET_ID", True) + assert AppDslService.decrypt_dataset_id(encrypted_data="not-base64", tenant_id="tenant-1") is None + + +def test_decrypt_dataset_id_returns_none_when_decrypted_is_not_uuid(monkeypatch): + monkeypatch.setattr(app_dsl_service.dify_config, "DSL_EXPORT_ENCRYPT_DATASET_ID", True) + encrypted = AppDslService.encrypt_dataset_id(dataset_id="not-a-uuid", tenant_id="tenant-1") + assert AppDslService.decrypt_dataset_id(encrypted_data=encrypted, tenant_id="tenant-1") is None + + +def test_is_valid_uuid_handles_bad_inputs(): + assert AppDslService._is_valid_uuid("00000000-0000-0000-0000-000000000000") is True + assert AppDslService._is_valid_uuid("nope") is False diff --git a/api/tests/unit_tests/services/test_app_generate_service.py b/api/tests/unit_tests/services/test_app_generate_service.py index 47b759bc7d..c2b430c551 100644 --- a/api/tests/unit_tests/services/test_app_generate_service.py +++ b/api/tests/unit_tests/services/test_app_generate_service.py @@ -1,14 +1,50 @@ +""" +Comprehensive unit tests for services.app_generate_service.AppGenerateService. + +Covers: + - _build_streaming_task_on_subscribe (streams / pubsub / exception / idempotency) + - generate (COMPLETION / AGENT_CHAT / CHAT / ADVANCED_CHAT / WORKFLOW / invalid mode, + streaming & blocking, billing, quota-refund-on-error, rate_limit.exit) + - _get_max_active_requests (all limit combos) + - generate_single_iteration (ADVANCED_CHAT / WORKFLOW / invalid mode) + - generate_single_loop (ADVANCED_CHAT / WORKFLOW / invalid mode) + - generate_more_like_this + - _get_workflow (debugger / non-debugger / specific id / invalid format / not found) + - get_response_generator (ended / non-ended workflow run) +""" + +import threading +import time +import uuid +from contextlib import contextmanager from unittest.mock import MagicMock -import services.app_generate_service as app_generate_service_module +import pytest + +import services.app_generate_service as ags_module +from core.app.entities.app_invoke_entities import InvokeFrom from models.model import AppMode from services.app_generate_service import AppGenerateService +from services.errors.app import WorkflowIdFormatError, WorkflowNotFoundError +# --------------------------------------------------------------------------- +# Helpers / Fakes +# --------------------------------------------------------------------------- class _DummyRateLimit: + """Minimal stand-in for RateLimit that never touches Redis.""" + + _instance_dict: dict[str, "_DummyRateLimit"] = {} + + def __new__(cls, client_id: str, max_active_requests: int): + # avoid singleton caching across tests + instance = object.__new__(cls) + return instance + def __init__(self, client_id: str, max_active_requests: int) -> None: self.client_id = client_id self.max_active_requests = max_active_requests + self._exited: list[str] = [] @staticmethod def gen_request_key() -> str: @@ -18,101 +54,720 @@ class _DummyRateLimit: return request_id or "dummy-request-id" def exit(self, request_id: str) -> None: - return None + self._exited.append(request_id) def generate(self, generator, request_id: str): return generator -def test_workflow_blocking_injects_pause_state_config(mocker, monkeypatch): - monkeypatch.setattr(app_generate_service_module.dify_config, "BILLING_ENABLED", False) - mocker.patch("services.app_generate_service.RateLimit", _DummyRateLimit) +def _make_app(mode: AppMode | str, *, max_active_requests: int = 0, is_agent: bool = False) -> MagicMock: + app = MagicMock() + app.mode = mode + app.id = "app-id" + app.tenant_id = "tenant-id" + app.max_active_requests = max_active_requests + app.is_agent = is_agent + return app - workflow = MagicMock() - workflow.id = "workflow-id" - workflow.created_by = "owner-id" - - mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) - - generator_spy = mocker.patch( - "services.app_generate_service.WorkflowAppGenerator.generate", - return_value={"result": "ok"}, - ) - - app_model = MagicMock() - app_model.mode = AppMode.WORKFLOW - app_model.id = "app-id" - app_model.tenant_id = "tenant-id" - app_model.max_active_requests = 0 - app_model.is_agent = False +def _make_user() -> MagicMock: user = MagicMock() user.id = "user-id" - - result = AppGenerateService.generate( - app_model=app_model, - user=user, - args={"inputs": {"k": "v"}}, - invoke_from=MagicMock(), - streaming=False, - ) - - assert result == {"result": "ok"} - - call_kwargs = generator_spy.call_args.kwargs - pause_state_config = call_kwargs.get("pause_state_config") - assert pause_state_config is not None - assert pause_state_config.state_owner_user_id == "owner-id" + return user -def test_advanced_chat_blocking_returns_dict_and_does_not_use_event_retrieval(mocker, monkeypatch): - """ - Regression test: ADVANCED_CHAT in blocking mode should return a plain dict - (non-streaming), and must not go through the async retrieve_events path. - Keeps behavior consistent with WORKFLOW blocking branch. - """ - # Disable billing and stub RateLimit to a no-op that just passes values through - monkeypatch.setattr(app_generate_service_module.dify_config, "BILLING_ENABLED", False) - mocker.patch("services.app_generate_service.RateLimit", _DummyRateLimit) - - # Arrange a fake workflow and wire AppGenerateService._get_workflow to return it +def _make_workflow(*, workflow_id: str = "workflow-id", created_by: str = "owner-id") -> MagicMock: workflow = MagicMock() - workflow.id = "workflow-id" - mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + workflow.id = workflow_id + workflow.created_by = created_by + return workflow - # Spy on the streaming retrieval path to ensure it's NOT called - retrieve_spy = mocker.patch("services.app_generate_service.AdvancedChatAppGenerator.retrieve_events") - # Make AdvancedChatAppGenerator.generate return a plain dict when streaming=False - generate_spy = mocker.patch( - "services.app_generate_service.AdvancedChatAppGenerator.generate", - return_value={"result": "ok"}, - ) +@contextmanager +def _noop_rate_limit_context(rate_limit, request_id): + """Drop-in replacement for rate_limit_context that doesn't touch Redis.""" + yield - # Minimal app model for ADVANCED_CHAT - app_model = MagicMock() - app_model.mode = AppMode.ADVANCED_CHAT - app_model.id = "app-id" - app_model.tenant_id = "tenant-id" - app_model.max_active_requests = 0 - app_model.is_agent = False - user = MagicMock() - user.id = "user-id" +# --------------------------------------------------------------------------- +# _build_streaming_task_on_subscribe +# --------------------------------------------------------------------------- +class TestBuildStreamingTaskOnSubscribe: + """Tests for AppGenerateService._build_streaming_task_on_subscribe.""" - # Must include query and inputs for AdvancedChatAppGenerator - args = {"workflow_id": "wf-1", "query": "hello", "inputs": {}} + def test_streams_mode_starts_immediately(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "streams") + called = [] + cb = AppGenerateService._build_streaming_task_on_subscribe(lambda: called.append(1)) + # task started immediately during build + assert called == [1] + # calling the returned callback is idempotent + cb() + assert called == [1] # not called again - # Act: call service with streaming=False (blocking mode) - result = AppGenerateService.generate( - app_model=app_model, - user=user, - args=args, - invoke_from=MagicMock(), - streaming=False, - ) + def test_pubsub_mode_starts_on_subscribe(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "pubsub") + monkeypatch.setattr(ags_module, "SSE_TASK_START_FALLBACK_MS", 60_000) # large to prevent timer + called = [] + cb = AppGenerateService._build_streaming_task_on_subscribe(lambda: called.append(1)) + assert called == [] + cb() + assert called == [1] + # second call is idempotent + cb() + assert called == [1] - # Assert: returns the dict from generate(), and did not call retrieve_events() - assert result == {"result": "ok"} - assert generate_spy.call_args.kwargs.get("streaming") is False - retrieve_spy.assert_not_called() + def test_sharded_mode_starts_on_subscribe(self, monkeypatch): + """sharded is treated like pubsub (i.e. not 'streams').""" + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "sharded") + monkeypatch.setattr(ags_module, "SSE_TASK_START_FALLBACK_MS", 60_000) + called = [] + cb = AppGenerateService._build_streaming_task_on_subscribe(lambda: called.append(1)) + assert called == [] + cb() + assert called == [1] + + def test_pubsub_fallback_timer_fires(self, monkeypatch): + """When nobody subscribes fast enough the fallback timer fires.""" + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "pubsub") + monkeypatch.setattr(ags_module, "SSE_TASK_START_FALLBACK_MS", 50) # 50 ms + called = [] + _cb = AppGenerateService._build_streaming_task_on_subscribe(lambda: called.append(1)) + time.sleep(0.2) # give the timer time to fire + assert called == [1] + + def test_exception_in_start_task_returns_false(self, monkeypatch): + """When start_task raises, _try_start returns False and next call retries.""" + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "streams") + call_count = 0 + + def _bad(): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise RuntimeError("boom") + + cb = AppGenerateService._build_streaming_task_on_subscribe(_bad) + # first call inside build raised, but is caught; second call via cb succeeds + assert call_count == 1 + cb() + assert call_count == 2 + + def test_concurrent_subscribe_only_starts_once(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "pubsub") + monkeypatch.setattr(ags_module, "SSE_TASK_START_FALLBACK_MS", 60_000) + call_count = 0 + + def _inc(): + nonlocal call_count + call_count += 1 + + cb = AppGenerateService._build_streaming_task_on_subscribe(_inc) + threads = [threading.Thread(target=cb) for _ in range(10)] + for t in threads: + t.start() + for t in threads: + t.join() + assert call_count == 1 + + +# --------------------------------------------------------------------------- +# _get_max_active_requests +# --------------------------------------------------------------------------- +class TestGetMaxActiveRequests: + def test_both_zero_returns_zero(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "APP_MAX_ACTIVE_REQUESTS", 0) + monkeypatch.setattr(ags_module.dify_config, "APP_DEFAULT_ACTIVE_REQUESTS", 0) + app = _make_app(AppMode.CHAT, max_active_requests=0) + assert AppGenerateService._get_max_active_requests(app) == 0 + + def test_app_limit_only(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "APP_MAX_ACTIVE_REQUESTS", 0) + monkeypatch.setattr(ags_module.dify_config, "APP_DEFAULT_ACTIVE_REQUESTS", 0) + app = _make_app(AppMode.CHAT, max_active_requests=5) + assert AppGenerateService._get_max_active_requests(app) == 5 + + def test_config_limit_only(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "APP_MAX_ACTIVE_REQUESTS", 10) + monkeypatch.setattr(ags_module.dify_config, "APP_DEFAULT_ACTIVE_REQUESTS", 0) + app = _make_app(AppMode.CHAT, max_active_requests=0) + assert AppGenerateService._get_max_active_requests(app) == 10 + + def test_both_non_zero_returns_min(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "APP_MAX_ACTIVE_REQUESTS", 20) + monkeypatch.setattr(ags_module.dify_config, "APP_DEFAULT_ACTIVE_REQUESTS", 0) + app = _make_app(AppMode.CHAT, max_active_requests=5) + assert AppGenerateService._get_max_active_requests(app) == 5 + + def test_default_active_requests_used_when_app_has_none(self, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "APP_MAX_ACTIVE_REQUESTS", 0) + monkeypatch.setattr(ags_module.dify_config, "APP_DEFAULT_ACTIVE_REQUESTS", 15) + app = _make_app(AppMode.CHAT, max_active_requests=0) + assert AppGenerateService._get_max_active_requests(app) == 15 + + +# --------------------------------------------------------------------------- +# generate – every AppMode branch +# --------------------------------------------------------------------------- +class TestGenerate: + """Tests for AppGenerateService.generate covering each mode.""" + + @pytest.fixture(autouse=True) + def _common(self, mocker, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "BILLING_ENABLED", False) + mocker.patch("services.app_generate_service.RateLimit", _DummyRateLimit) + # Prevent AppExecutionParams.new from touching real models via isinstance + mocker.patch( + "services.app_generate_service.rate_limit_context", + _noop_rate_limit_context, + ) + + # -- COMPLETION --------------------------------------------------------- + def test_completion_mode(self, mocker): + gen_spy = mocker.patch( + "services.app_generate_service.CompletionAppGenerator.generate", + return_value={"result": "ok"}, + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + result = AppGenerateService.generate( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "ok"} + gen_spy.assert_called_once() + + # -- AGENT_CHAT via mode ------------------------------------------------ + def test_agent_chat_mode(self, mocker): + gen_spy = mocker.patch( + "services.app_generate_service.AgentChatAppGenerator.generate", + return_value={"result": "agent"}, + ) + mocker.patch( + "services.app_generate_service.AgentChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + result = AppGenerateService.generate( + app_model=_make_app(AppMode.AGENT_CHAT), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "agent"} + gen_spy.assert_called_once() + + # -- AGENT_CHAT via is_agent flag (non-AGENT_CHAT mode) ----------------- + def test_agent_via_is_agent_flag(self, mocker): + gen_spy = mocker.patch( + "services.app_generate_service.AgentChatAppGenerator.generate", + return_value={"result": "agent-via-flag"}, + ) + mocker.patch( + "services.app_generate_service.AgentChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + app = _make_app(AppMode.CHAT, is_agent=True) + result = AppGenerateService.generate( + app_model=app, + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "agent-via-flag"} + gen_spy.assert_called_once() + + # -- CHAT --------------------------------------------------------------- + def test_chat_mode(self, mocker): + gen_spy = mocker.patch( + "services.app_generate_service.ChatAppGenerator.generate", + return_value={"result": "chat"}, + ) + mocker.patch( + "services.app_generate_service.ChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + app = _make_app(AppMode.CHAT, is_agent=False) + result = AppGenerateService.generate( + app_model=app, + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "chat"} + gen_spy.assert_called_once() + + # -- ADVANCED_CHAT blocking --------------------------------------------- + def test_advanced_chat_blocking(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + + retrieve_spy = mocker.patch("services.app_generate_service.AdvancedChatAppGenerator.retrieve_events") + gen_spy = mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.generate", + return_value={"result": "advanced-blocking"}, + ) + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + result = AppGenerateService.generate( + app_model=_make_app(AppMode.ADVANCED_CHAT), + user=_make_user(), + args={"workflow_id": None, "query": "hi", "inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "advanced-blocking"} + assert gen_spy.call_args.kwargs.get("streaming") is False + retrieve_spy.assert_not_called() + + # -- ADVANCED_CHAT streaming -------------------------------------------- + def test_advanced_chat_streaming(self, mocker, monkeypatch): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + mocker.patch( + "services.app_generate_service.AppExecutionParams.new", + return_value=MagicMock(workflow_run_id="wfr-1", model_dump_json=MagicMock(return_value="{}")), + ) + delay_spy = mocker.patch("services.app_generate_service.workflow_based_app_execution_task.delay") + # Let _build_streaming_task_on_subscribe call the real on_subscribe + # so the inner closure (line 165) actually executes. + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "streams") + gen_instance = MagicMock() + gen_instance.retrieve_events.return_value = iter([]) + gen_instance.convert_to_event_stream.side_effect = lambda x: x + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator", + return_value=gen_instance, + ) + + result = AppGenerateService.generate( + app_model=_make_app(AppMode.ADVANCED_CHAT), + user=_make_user(), + args={"workflow_id": None, "query": "hi", "inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=True, + ) + # In streaming mode it should go through retrieve_events, not generate + gen_instance.retrieve_events.assert_called_once() + # The inner on_subscribe closure was invoked by _build_streaming_task_on_subscribe + delay_spy.assert_called_once() + + # -- WORKFLOW blocking -------------------------------------------------- + def test_workflow_blocking(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + gen_spy = mocker.patch( + "services.app_generate_service.WorkflowAppGenerator.generate", + return_value={"result": "workflow-blocking"}, + ) + mocker.patch( + "services.app_generate_service.WorkflowAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + result = AppGenerateService.generate( + app_model=_make_app(AppMode.WORKFLOW), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + assert result == {"result": "workflow-blocking"} + call_kwargs = gen_spy.call_args.kwargs + assert call_kwargs.get("pause_state_config") is not None + assert call_kwargs["pause_state_config"].state_owner_user_id == "owner-id" + + # -- WORKFLOW streaming ------------------------------------------------- + def test_workflow_streaming(self, mocker, monkeypatch): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + mocker.patch( + "services.app_generate_service.AppExecutionParams.new", + return_value=MagicMock(workflow_run_id="wfr-2", model_dump_json=MagicMock(return_value="{}")), + ) + delay_spy = mocker.patch("services.app_generate_service.workflow_based_app_execution_task.delay") + # Let _build_streaming_task_on_subscribe invoke the real on_subscribe + # so the inner closure (line 216) actually executes. + monkeypatch.setattr(ags_module.dify_config, "PUBSUB_REDIS_CHANNEL_TYPE", "streams") + retrieve_spy = mocker.patch( + "services.app_generate_service.MessageBasedAppGenerator.retrieve_events", + return_value=iter([]), + ) + mocker.patch( + "services.app_generate_service.WorkflowAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + result = AppGenerateService.generate( + app_model=_make_app(AppMode.WORKFLOW), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=True, + ) + retrieve_spy.assert_called_once() + # The inner on_subscribe closure was invoked by _build_streaming_task_on_subscribe + delay_spy.assert_called_once() + + # -- Invalid mode ------------------------------------------------------- + def test_invalid_mode_raises(self, mocker): + app = _make_app("invalid-mode", is_agent=False) + with pytest.raises(ValueError, match="Invalid app mode"): + AppGenerateService.generate( + app_model=app, + user=_make_user(), + args={}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + + +# --------------------------------------------------------------------------- +# generate – billing / quota +# --------------------------------------------------------------------------- +class TestGenerateBilling: + @pytest.fixture(autouse=True) + def _common(self, mocker, monkeypatch): + mocker.patch("services.app_generate_service.RateLimit", _DummyRateLimit) + mocker.patch( + "services.app_generate_service.rate_limit_context", + _noop_rate_limit_context, + ) + + def test_billing_enabled_consumes_quota(self, mocker, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "BILLING_ENABLED", True) + quota_charge = MagicMock() + consume_mock = mocker.patch( + "services.app_generate_service.QuotaType.WORKFLOW.consume", + return_value=quota_charge, + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.generate", + return_value={"ok": True}, + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + AppGenerateService.generate( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + consume_mock.assert_called_once_with("tenant-id") + + def test_billing_quota_exceeded_raises_rate_limit_error(self, mocker, monkeypatch): + from services.errors.app import QuotaExceededError + from services.errors.llm import InvokeRateLimitError + + monkeypatch.setattr(ags_module.dify_config, "BILLING_ENABLED", True) + mocker.patch( + "services.app_generate_service.QuotaType.WORKFLOW.consume", + side_effect=QuotaExceededError(feature="workflow", tenant_id="t", required=1), + ) + + with pytest.raises(InvokeRateLimitError): + AppGenerateService.generate( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + + def test_exception_refunds_quota_and_exits_rate_limit(self, mocker, monkeypatch): + monkeypatch.setattr(ags_module.dify_config, "BILLING_ENABLED", True) + quota_charge = MagicMock() + mocker.patch( + "services.app_generate_service.QuotaType.WORKFLOW.consume", + return_value=quota_charge, + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.generate", + side_effect=RuntimeError("boom"), + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + with pytest.raises(RuntimeError, match="boom"): + AppGenerateService.generate( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + quota_charge.refund.assert_called_once() + + def test_rate_limit_exit_called_in_finally_for_blocking(self, mocker, monkeypatch): + """For non-streaming (blocking) calls, rate_limit.exit should be called in finally.""" + monkeypatch.setattr(ags_module.dify_config, "BILLING_ENABLED", False) + + exit_calls: list[str] = [] + + class _TrackingRateLimit(_DummyRateLimit): + def exit(self, request_id: str) -> None: + exit_calls.append(request_id) + + mocker.patch("services.app_generate_service.RateLimit", _TrackingRateLimit) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.generate", + return_value={"ok": True}, + ) + mocker.patch( + "services.app_generate_service.CompletionAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + + AppGenerateService.generate( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + args={"inputs": {}}, + invoke_from=InvokeFrom.SERVICE_API, + streaming=False, + ) + # exit is called in finally block for non-streaming + assert len(exit_calls) >= 1 + + +# --------------------------------------------------------------------------- +# _get_workflow +# --------------------------------------------------------------------------- +class TestGetWorkflow: + def test_debugger_fetches_draft(self, mocker): + draft_wf = _make_workflow() + ws = MagicMock() + ws.get_draft_workflow.return_value = draft_wf + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + result = AppGenerateService._get_workflow(_make_app(AppMode.WORKFLOW), InvokeFrom.DEBUGGER) + assert result is draft_wf + ws.get_draft_workflow.assert_called_once() + + def test_debugger_raises_when_no_draft(self, mocker): + ws = MagicMock() + ws.get_draft_workflow.return_value = None + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + with pytest.raises(ValueError, match="Workflow not initialized"): + AppGenerateService._get_workflow(_make_app(AppMode.WORKFLOW), InvokeFrom.DEBUGGER) + + def test_non_debugger_fetches_published(self, mocker): + pub_wf = _make_workflow() + ws = MagicMock() + ws.get_published_workflow.return_value = pub_wf + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + result = AppGenerateService._get_workflow(_make_app(AppMode.WORKFLOW), InvokeFrom.SERVICE_API) + assert result is pub_wf + ws.get_published_workflow.assert_called_once() + + def test_non_debugger_raises_when_no_published(self, mocker): + ws = MagicMock() + ws.get_published_workflow.return_value = None + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + with pytest.raises(ValueError, match="Workflow not published"): + AppGenerateService._get_workflow(_make_app(AppMode.WORKFLOW), InvokeFrom.SERVICE_API) + + def test_specific_workflow_id_valid_uuid(self, mocker): + valid_uuid = str(uuid.uuid4()) + specific_wf = _make_workflow(workflow_id=valid_uuid) + ws = MagicMock() + ws.get_published_workflow_by_id.return_value = specific_wf + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + result = AppGenerateService._get_workflow( + _make_app(AppMode.WORKFLOW), InvokeFrom.SERVICE_API, workflow_id=valid_uuid + ) + assert result is specific_wf + ws.get_published_workflow_by_id.assert_called_once() + + def test_specific_workflow_id_invalid_uuid(self, mocker): + ws = MagicMock() + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + with pytest.raises(WorkflowIdFormatError): + AppGenerateService._get_workflow( + _make_app(AppMode.WORKFLOW), InvokeFrom.SERVICE_API, workflow_id="not-a-uuid" + ) + + def test_specific_workflow_id_not_found(self, mocker): + valid_uuid = str(uuid.uuid4()) + ws = MagicMock() + ws.get_published_workflow_by_id.return_value = None + mocker.patch("services.app_generate_service.WorkflowService", return_value=ws) + + with pytest.raises(WorkflowNotFoundError): + AppGenerateService._get_workflow( + _make_app(AppMode.WORKFLOW), InvokeFrom.SERVICE_API, workflow_id=valid_uuid + ) + + +# --------------------------------------------------------------------------- +# generate_single_iteration +# --------------------------------------------------------------------------- +class TestGenerateSingleIteration: + def test_advanced_chat_mode(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + gen_spy = mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + iter_spy = mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.single_iteration_generate", + return_value={"event": "iteration"}, + ) + app = _make_app(AppMode.ADVANCED_CHAT) + result = AppGenerateService.generate_single_iteration( + app_model=app, user=_make_user(), node_id="n1", args={"k": "v"} + ) + iter_spy.assert_called_once() + assert result == {"event": "iteration"} + + def test_workflow_mode(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + iter_spy = mocker.patch( + "services.app_generate_service.WorkflowAppGenerator.single_iteration_generate", + return_value={"event": "wf-iteration"}, + ) + app = _make_app(AppMode.WORKFLOW) + result = AppGenerateService.generate_single_iteration( + app_model=app, user=_make_user(), node_id="n1", args={"k": "v"} + ) + iter_spy.assert_called_once() + assert result == {"event": "wf-iteration"} + + def test_invalid_mode_raises(self, mocker): + app = _make_app(AppMode.CHAT) + with pytest.raises(ValueError, match="Invalid app mode"): + AppGenerateService.generate_single_iteration(app_model=app, user=_make_user(), node_id="n1", args={}) + + +# --------------------------------------------------------------------------- +# generate_single_loop +# --------------------------------------------------------------------------- +class TestGenerateSingleLoop: + def test_advanced_chat_mode(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + loop_spy = mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.single_loop_generate", + return_value={"event": "loop"}, + ) + app = _make_app(AppMode.ADVANCED_CHAT) + result = AppGenerateService.generate_single_loop( + app_model=app, user=_make_user(), node_id="n1", args=MagicMock() + ) + loop_spy.assert_called_once() + assert result == {"event": "loop"} + + def test_workflow_mode(self, mocker): + workflow = _make_workflow() + mocker.patch.object(AppGenerateService, "_get_workflow", return_value=workflow) + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator.convert_to_event_stream", + side_effect=lambda x: x, + ) + loop_spy = mocker.patch( + "services.app_generate_service.WorkflowAppGenerator.single_loop_generate", + return_value={"event": "wf-loop"}, + ) + app = _make_app(AppMode.WORKFLOW) + result = AppGenerateService.generate_single_loop( + app_model=app, user=_make_user(), node_id="n1", args=MagicMock() + ) + loop_spy.assert_called_once() + assert result == {"event": "wf-loop"} + + def test_invalid_mode_raises(self, mocker): + app = _make_app(AppMode.COMPLETION) + with pytest.raises(ValueError, match="Invalid app mode"): + AppGenerateService.generate_single_loop(app_model=app, user=_make_user(), node_id="n1", args=MagicMock()) + + +# --------------------------------------------------------------------------- +# generate_more_like_this +# --------------------------------------------------------------------------- +class TestGenerateMoreLikeThis: + def test_delegates_to_completion_generator(self, mocker): + gen_spy = mocker.patch( + "services.app_generate_service.CompletionAppGenerator.generate_more_like_this", + return_value={"result": "similar"}, + ) + result = AppGenerateService.generate_more_like_this( + app_model=_make_app(AppMode.COMPLETION), + user=_make_user(), + message_id="msg-1", + invoke_from=InvokeFrom.SERVICE_API, + streaming=True, + ) + assert result == {"result": "similar"} + gen_spy.assert_called_once() + assert gen_spy.call_args.kwargs["stream"] is True + + +# --------------------------------------------------------------------------- +# get_response_generator +# --------------------------------------------------------------------------- +class TestGetResponseGenerator: + def test_non_ended_workflow_run(self, mocker): + app = _make_app(AppMode.ADVANCED_CHAT) + workflow_run = MagicMock() + workflow_run.id = "run-1" + workflow_run.status.is_ended.return_value = False + + gen_instance = MagicMock() + gen_instance.retrieve_events.return_value = iter([{"event": "started"}]) + gen_instance.convert_to_event_stream.side_effect = lambda x: x + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator", + return_value=gen_instance, + ) + + result = AppGenerateService.get_response_generator(app_model=app, workflow_run=workflow_run) + gen_instance.retrieve_events.assert_called_once() + + def test_ended_workflow_run_still_returns_generator(self, mocker): + """Even when the run is ended, the current code still returns a generator (TODO branch).""" + app = _make_app(AppMode.WORKFLOW) + workflow_run = MagicMock() + workflow_run.id = "run-2" + workflow_run.status.is_ended.return_value = True + + gen_instance = MagicMock() + gen_instance.retrieve_events.return_value = iter([]) + gen_instance.convert_to_event_stream.side_effect = lambda x: x + mocker.patch( + "services.app_generate_service.AdvancedChatAppGenerator", + return_value=gen_instance, + ) + + result = AppGenerateService.get_response_generator(app_model=app, workflow_run=workflow_run) + # current impl falls through the TODO and still creates a generator + gen_instance.retrieve_events.assert_called_once() diff --git a/api/tests/unit_tests/services/test_app_model_config_service.py b/api/tests/unit_tests/services/test_app_model_config_service.py new file mode 100644 index 0000000000..d4b4bf14a3 --- /dev/null +++ b/api/tests/unit_tests/services/test_app_model_config_service.py @@ -0,0 +1,88 @@ +from unittest.mock import patch + +import pytest + +from models.model import AppMode +from services.app_model_config_service import AppModelConfigService + + +@pytest.fixture +def mock_config_managers(): + """Fixture that patches all app config manager validate methods. + + Returns a dictionary containing the mocked config_validate methods for each manager. + """ + with ( + patch("services.app_model_config_service.ChatAppConfigManager.config_validate") as mock_chat_validate, + patch("services.app_model_config_service.AgentChatAppConfigManager.config_validate") as mock_agent_validate, + patch( + "services.app_model_config_service.CompletionAppConfigManager.config_validate" + ) as mock_completion_validate, + ): + mock_chat_validate.return_value = {"manager": "chat"} + mock_agent_validate.return_value = {"manager": "agent"} + mock_completion_validate.return_value = {"manager": "completion"} + + yield { + "chat": mock_chat_validate, + "agent": mock_agent_validate, + "completion": mock_completion_validate, + } + + +class TestAppModelConfigService: + @pytest.mark.parametrize( + ("app_mode", "selected_manager"), + [ + (AppMode.CHAT, "chat"), + (AppMode.AGENT_CHAT, "agent"), + (AppMode.COMPLETION, "completion"), + ], + ) + def test_should_route_validation_to_correct_manager_based_on_app_mode( + self, app_mode, selected_manager, mock_config_managers + ): + """Test configuration validation is delegated to the expected manager for each supported app mode.""" + tenant_id = "tenant-123" + config = {"temperature": 0.5} + + mock_chat_validate = mock_config_managers["chat"] + mock_agent_validate = mock_config_managers["agent"] + mock_completion_validate = mock_config_managers["completion"] + + result = AppModelConfigService.validate_configuration(tenant_id=tenant_id, config=config, app_mode=app_mode) + + assert result == {"manager": selected_manager} + + if selected_manager == "chat": + mock_chat_validate.assert_called_once_with(tenant_id, config) + mock_agent_validate.assert_not_called() + mock_completion_validate.assert_not_called() + elif selected_manager == "agent": + mock_agent_validate.assert_called_once_with(tenant_id, config) + mock_chat_validate.assert_not_called() + mock_completion_validate.assert_not_called() + else: + mock_completion_validate.assert_called_once_with(tenant_id, config) + mock_chat_validate.assert_not_called() + mock_agent_validate.assert_not_called() + + def test_should_raise_value_error_when_app_mode_is_not_supported(self, mock_config_managers): + """Test unsupported app modes raise ValueError with the invalid mode in the message.""" + tenant_id = "tenant-123" + config = {"temperature": 0.5} + + mock_chat_validate = mock_config_managers["chat"] + mock_agent_validate = mock_config_managers["agent"] + mock_completion_validate = mock_config_managers["completion"] + + with pytest.raises(ValueError, match=f"Invalid app mode: {AppMode.WORKFLOW}"): + AppModelConfigService.validate_configuration( + tenant_id=tenant_id, + config=config, + app_mode=AppMode.WORKFLOW, + ) + + mock_chat_validate.assert_not_called() + mock_agent_validate.assert_not_called() + mock_completion_validate.assert_not_called() diff --git a/api/tests/unit_tests/services/test_app_service.py b/api/tests/unit_tests/services/test_app_service.py new file mode 100644 index 0000000000..bff8dc92c6 --- /dev/null +++ b/api/tests/unit_tests/services/test_app_service.py @@ -0,0 +1,609 @@ +"""Unit tests for services.app_service.""" + +import json +from types import SimpleNamespace +from typing import cast +from unittest.mock import MagicMock, patch + +import pytest + +from core.errors.error import ProviderTokenNotInitError +from models import Account, Tenant +from models.model import App, AppMode +from services.app_service import AppService + + +@pytest.fixture +def service() -> AppService: + """Provide AppService instance.""" + return AppService() + + +@pytest.fixture +def account() -> Account: + """Create account object for create_app tests.""" + tenant = Tenant(name="Tenant") + tenant.id = "tenant-1" + result = Account(name="Account User", email="account@example.com") + result.id = "acc-1" + result._current_tenant = tenant + return result + + +@pytest.fixture +def default_args() -> dict: + """Create default create_app args.""" + return { + "name": "Test App", + "mode": AppMode.CHAT.value, + "icon": "🤖", + "icon_background": "#FFFFFF", + } + + +@pytest.fixture +def app_template() -> dict: + """Create basic app template for create_app tests.""" + return { + AppMode.CHAT: { + "app": {}, + "model_config": { + "model": { + "provider": "provider-a", + "name": "model-a", + "mode": "chat", + "completion_params": {}, + } + }, + } + } + + +def _make_current_user() -> Account: + user = Account(name="Tester", email="tester@example.com") + user.id = "user-1" + tenant = Tenant(name="Tenant") + tenant.id = "tenant-1" + user._current_tenant = tenant + return user + + +class TestAppServicePagination: + """Test suite for get_paginate_apps.""" + + def test_get_paginate_apps_should_return_none_when_tag_filter_empty(self, service: AppService) -> None: + """Test pagination returns None when tag filter has no targets.""" + # Arrange + args = {"mode": "chat", "page": 1, "limit": 20, "tag_ids": ["tag-1"]} + + with patch("services.app_service.TagService.get_target_ids_by_tag_ids", return_value=[]): + # Act + result = service.get_paginate_apps("user-1", "tenant-1", args) + + # Assert + assert result is None + + def test_get_paginate_apps_should_delegate_to_db_paginate(self, service: AppService) -> None: + """Test pagination delegates to db.paginate when filters are valid.""" + # Arrange + args = { + "mode": "workflow", + "is_created_by_me": True, + "name": "My_App%", + "tag_ids": ["tag-1"], + "page": 2, + "limit": 10, + } + expected_pagination = MagicMock() + + with ( + patch("services.app_service.TagService.get_target_ids_by_tag_ids", return_value=["app-1"]), + patch("libs.helper.escape_like_pattern", return_value="escaped"), + patch("services.app_service.db") as mock_db, + ): + mock_db.paginate.return_value = expected_pagination + + # Act + result = service.get_paginate_apps("user-1", "tenant-1", args) + + # Assert + assert result is expected_pagination + mock_db.paginate.assert_called_once() + + +class TestAppServiceCreate: + """Test suite for create_app.""" + + def test_create_app_should_create_with_matching_default_model( + self, + service: AppService, + account: Account, + default_args: dict, + app_template: dict, + ) -> None: + """Test create_app uses matching default model and persists app config.""" + # Arrange + app_instance = SimpleNamespace(id="app-1", tenant_id="tenant-1") + app_model_config = SimpleNamespace(id="cfg-1") + model_instance = SimpleNamespace( + model_name="model-a", + provider="provider-a", + model_type_instance=MagicMock(), + credentials={"k": "v"}, + ) + + with ( + patch("services.app_service.default_app_templates", app_template), + patch("services.app_service.App", return_value=app_instance), + patch("services.app_service.AppModelConfig", return_value=app_model_config), + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.app_service.db") as mock_db, + patch("services.app_service.app_was_created") as mock_event, + patch("services.app_service.FeatureService.get_system_features") as mock_features, + patch("services.app_service.BillingService") as mock_billing, + patch("services.app_service.dify_config") as mock_config, + ): + manager = mock_model_manager.return_value + manager.get_default_model_instance.return_value = model_instance + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)) + mock_config.BILLING_ENABLED = True + + # Act + result = service.create_app("tenant-1", default_args, account) + + # Assert + assert result is app_instance + assert app_instance.app_model_config_id == "cfg-1" + mock_db.session.add.assert_any_call(app_instance) + mock_db.session.add.assert_any_call(app_model_config) + assert mock_db.session.flush.call_count == 2 + mock_db.session.commit.assert_called_once() + mock_event.send.assert_called_once_with(app_instance, account=account) + mock_billing.clean_billing_info_cache.assert_called_once_with("tenant-1") + + def test_create_app_should_raise_when_model_schema_missing( + self, + service: AppService, + account: Account, + default_args: dict, + app_template: dict, + ) -> None: + """Test create_app raises ValueError when non-matching model has no schema.""" + # Arrange + app_instance = SimpleNamespace(id="app-1") + model_instance = SimpleNamespace( + model_name="model-b", + provider="provider-b", + model_type_instance=MagicMock(), + credentials={"k": "v"}, + ) + model_instance.model_type_instance.get_model_schema.return_value = None + + with ( + patch("services.app_service.default_app_templates", app_template), + patch("services.app_service.App", return_value=app_instance), + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.app_service.db") as mock_db, + ): + manager = mock_model_manager.return_value + manager.get_default_model_instance.return_value = model_instance + + # Act & Assert + with pytest.raises(ValueError, match="model schema not found"): + service.create_app("tenant-1", default_args, account) + mock_db.session.commit.assert_not_called() + + def test_create_app_should_fallback_to_default_provider_when_model_missing( + self, + service: AppService, + account: Account, + default_args: dict, + app_template: dict, + ) -> None: + """Test create_app falls back to provider/model name when no default model instance is available.""" + # Arrange + app_instance = SimpleNamespace(id="app-1", tenant_id="tenant-1") + app_model_config = SimpleNamespace(id="cfg-1") + + with ( + patch("services.app_service.default_app_templates", app_template), + patch("services.app_service.App", return_value=app_instance), + patch("services.app_service.AppModelConfig", return_value=app_model_config), + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.app_service.db") as mock_db, + patch("services.app_service.app_was_created") as mock_event, + patch("services.app_service.FeatureService.get_system_features") as mock_features, + patch("services.app_service.EnterpriseService") as mock_enterprise, + patch("services.app_service.dify_config") as mock_config, + ): + manager = mock_model_manager.return_value + manager.get_default_model_instance.side_effect = ProviderTokenNotInitError("not ready") + manager.get_default_provider_model_name.return_value = ("fallback-provider", "fallback-model") + mock_features.return_value = SimpleNamespace(webapp_auth=SimpleNamespace(enabled=True)) + mock_config.BILLING_ENABLED = False + + # Act + result = service.create_app("tenant-1", default_args, account) + + # Assert + assert result is app_instance + mock_event.send.assert_called_once_with(app_instance, account=account) + mock_db.session.commit.assert_called_once() + mock_enterprise.WebAppAuth.update_app_access_mode.assert_called_once_with("app-1", "private") + + def test_create_app_should_log_and_fallback_on_unexpected_model_error( + self, + service: AppService, + account: Account, + default_args: dict, + app_template: dict, + ) -> None: + """Test unexpected model manager errors are logged and fallback provider is used.""" + # Arrange + app_instance = SimpleNamespace(id="app-1", tenant_id="tenant-1") + app_model_config = SimpleNamespace(id="cfg-1") + + with ( + patch("services.app_service.default_app_templates", app_template), + patch("services.app_service.App", return_value=app_instance), + patch("services.app_service.AppModelConfig", return_value=app_model_config), + patch("services.app_service.ModelManager") as mock_model_manager, + patch("services.app_service.db"), + patch("services.app_service.app_was_created"), + patch( + "services.app_service.FeatureService.get_system_features", + return_value=SimpleNamespace(webapp_auth=SimpleNamespace(enabled=False)), + ), + patch("services.app_service.dify_config", new=SimpleNamespace(BILLING_ENABLED=False)), + patch("services.app_service.logger") as mock_logger, + ): + manager = mock_model_manager.return_value + manager.get_default_model_instance.side_effect = RuntimeError("boom") + manager.get_default_provider_model_name.return_value = ("fallback-provider", "fallback-model") + + # Act + result = service.create_app("tenant-1", default_args, account) + + # Assert + assert result is app_instance + mock_logger.exception.assert_called_once() + + +class TestAppServiceGetAndUpdate: + """Test suite for app retrieval and update methods.""" + + def test_get_app_should_return_original_when_not_agent_app(self, service: AppService) -> None: + """Test get_app returns original app for non-agent modes.""" + # Arrange + app = MagicMock() + app.mode = AppMode.CHAT + app.is_agent = False + + with patch("services.app_service.current_user", _make_current_user()): + # Act + result = service.get_app(app) + + # Assert + assert result is app + + def test_get_app_should_return_original_when_model_config_missing(self, service: AppService) -> None: + """Test get_app returns app when agent mode has no model config.""" + # Arrange + app = MagicMock() + app.id = "app-1" + app.mode = AppMode.AGENT_CHAT + app.is_agent = False + app.app_model_config = None + + with patch("services.app_service.current_user", _make_current_user()): + # Act + result = service.get_app(app) + + # Assert + assert result is app + + def test_get_app_should_mask_tool_parameters_for_agent_tools(self, service: AppService) -> None: + """Test get_app decrypts and masks secret tool parameters.""" + # Arrange + tool = { + "provider_type": "builtin", + "provider_id": "provider-1", + "tool_name": "tool-a", + "tool_parameters": {"secret": "encrypted"}, + "extra": True, + } + model_config = MagicMock() + model_config.agent_mode_dict = {"tools": [tool, {"skip": True}]} + + app = MagicMock() + app.id = "app-1" + app.mode = AppMode.AGENT_CHAT + app.is_agent = False + app.app_model_config = model_config + + manager = MagicMock() + manager.decrypt_tool_parameters.return_value = {"secret": "decrypted"} + manager.mask_tool_parameters.return_value = {"secret": "***"} + + with ( + patch("services.app_service.current_user", _make_current_user()), + patch("services.app_service.ToolManager.get_agent_tool_runtime", return_value=MagicMock()), + patch("services.app_service.ToolParameterConfigurationManager", return_value=manager), + ): + # Act + result = service.get_app(app) + + # Assert + assert result.app_model_config is model_config + assert tool["tool_parameters"] == {"secret": "***"} + assert json.loads(model_config.agent_mode)["tools"][0]["tool_parameters"] == {"secret": "***"} + + def test_get_app_should_continue_when_tool_parameter_masking_fails(self, service: AppService) -> None: + """Test get_app logs and continues when masking fails.""" + # Arrange + tool = { + "provider_type": "builtin", + "provider_id": "provider-1", + "tool_name": "tool-a", + "tool_parameters": {"secret": "encrypted"}, + "extra": True, + } + model_config = MagicMock() + model_config.agent_mode_dict = {"tools": [tool]} + + app = MagicMock() + app.id = "app-1" + app.mode = AppMode.AGENT_CHAT + app.is_agent = False + app.app_model_config = model_config + + with ( + patch("services.app_service.current_user", _make_current_user()), + patch("services.app_service.ToolManager.get_agent_tool_runtime", side_effect=RuntimeError("mask-failed")), + patch("services.app_service.logger") as mock_logger, + ): + # Act + result = service.get_app(app) + + # Assert + assert result.app_model_config is model_config + mock_logger.exception.assert_called_once() + + def test_update_methods_should_mutate_app_and_commit(self, service: AppService) -> None: + """Test update methods set fields and commit changes.""" + # Arrange + app = cast( + App, + SimpleNamespace( + name="old", + description="old", + icon_type="emoji", + icon="a", + icon_background="#111", + enable_site=True, + enable_api=True, + ), + ) + args = { + "name": "new", + "description": "new-desc", + "icon_type": "image", + "icon": "new-icon", + "icon_background": "#222", + "use_icon_as_answer_icon": True, + "max_active_requests": 5, + } + user = SimpleNamespace(id="user-1") + + with ( + patch("services.app_service.current_user", user), + patch("services.app_service.db") as mock_db, + patch("services.app_service.naive_utc_now", return_value="now"), + ): + # Act + updated = service.update_app(app, args) + renamed = service.update_app_name(app, "rename") + iconed = service.update_app_icon(app, "icon-2", "#333") + site_same = service.update_app_site_status(app, app.enable_site) + api_same = service.update_app_api_status(app, app.enable_api) + site_changed = service.update_app_site_status(app, False) + api_changed = service.update_app_api_status(app, False) + + # Assert + assert updated is app + assert renamed is app + assert iconed is app + assert site_same is app + assert api_same is app + assert site_changed is app + assert api_changed is app + assert mock_db.session.commit.call_count >= 5 + + +class TestAppServiceDeleteAndMeta: + """Test suite for delete and metadata methods.""" + + def test_delete_app_should_cleanup_and_enqueue_task(self, service: AppService) -> None: + """Test delete_app removes app, runs cleanup, and triggers async deletion task.""" + # Arrange + app = cast(App, SimpleNamespace(id="app-1", tenant_id="tenant-1")) + + with ( + patch("services.app_service.db") as mock_db, + patch( + "services.app_service.FeatureService.get_system_features", + return_value=SimpleNamespace(webapp_auth=SimpleNamespace(enabled=True)), + ), + patch("services.app_service.EnterpriseService") as mock_enterprise, + patch( + "services.app_service.dify_config", + new=SimpleNamespace(BILLING_ENABLED=True, CONSOLE_API_URL="https://console.example"), + ), + patch("services.app_service.BillingService") as mock_billing, + patch("services.app_service.remove_app_and_related_data_task") as mock_task, + ): + # Act + service.delete_app(app) + + # Assert + mock_db.session.delete.assert_called_once_with(app) + mock_db.session.commit.assert_called_once() + mock_enterprise.WebAppAuth.cleanup_webapp.assert_called_once_with("app-1") + mock_billing.clean_billing_info_cache.assert_called_once_with("tenant-1") + mock_task.delay.assert_called_once_with(tenant_id="tenant-1", app_id="app-1") + + def test_get_app_meta_should_handle_workflow_and_tool_provider_icons(self, service: AppService) -> None: + """Test get_app_meta extracts builtin and API tool icons from workflow graph.""" + # Arrange + workflow = SimpleNamespace( + graph_dict={ + "nodes": [ + { + "data": { + "type": "tool", + "provider_type": "builtin", + "provider_id": "builtin-provider", + "tool_name": "tool_builtin", + } + }, + { + "data": { + "type": "tool", + "provider_type": "api", + "provider_id": "api-provider-id", + "tool_name": "tool_api", + } + }, + ] + } + ) + app = cast( + App, + SimpleNamespace( + mode=AppMode.WORKFLOW.value, + workflow=workflow, + app_model_config=None, + tenant_id="tenant-1", + icon_type="emoji", + icon_background="#fff", + ), + ) + + provider = SimpleNamespace(icon=json.dumps({"background": "#000", "content": "A"})) + + with ( + patch("services.app_service.dify_config", new=SimpleNamespace(CONSOLE_API_URL="https://console.example")), + patch("services.app_service.db") as mock_db, + ): + query = MagicMock() + query.where.return_value = query + query.first.return_value = provider + mock_db.session.query.return_value = query + + # Act + meta = service.get_app_meta(app) + + # Assert + assert meta["tool_icons"]["tool_builtin"].endswith("/builtin-provider/icon") + assert meta["tool_icons"]["tool_api"] == {"background": "#000", "content": "A"} + + def test_get_app_meta_should_use_default_api_icon_on_lookup_error(self, service: AppService) -> None: + """Test get_app_meta falls back to default icon when API provider lookup fails.""" + # Arrange + app_model_config = SimpleNamespace( + agent_mode_dict={ + "tools": [{"provider_type": "api", "provider_id": "x", "tool_name": "t", "tool_parameters": {}}] + } + ) + app = cast(App, SimpleNamespace(mode=AppMode.CHAT.value, app_model_config=app_model_config, workflow=None)) + + with ( + patch("services.app_service.dify_config", new=SimpleNamespace(CONSOLE_API_URL="https://console.example")), + patch("services.app_service.db") as mock_db, + ): + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act + meta = service.get_app_meta(app) + + # Assert + assert meta["tool_icons"]["t"] == {"background": "#252525", "content": "\ud83d\ude01"} + + def test_get_app_meta_should_return_empty_when_required_data_missing(self, service: AppService) -> None: + """Test get_app_meta returns empty metadata when workflow/model config is absent.""" + # Arrange + workflow_app = cast(App, SimpleNamespace(mode=AppMode.WORKFLOW.value, workflow=None)) + chat_app = cast(App, SimpleNamespace(mode=AppMode.CHAT.value, app_model_config=None)) + + # Act + workflow_meta = service.get_app_meta(workflow_app) + chat_meta = service.get_app_meta(chat_app) + + # Assert + assert workflow_meta == {"tool_icons": {}} + assert chat_meta == {"tool_icons": {}} + + +class TestAppServiceCodeLookup: + """Test suite for app code lookup methods.""" + + def test_get_app_code_by_id_should_raise_when_site_missing(self) -> None: + """Test get_app_code_by_id raises when site is missing.""" + # Arrange + with patch("services.app_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act & Assert + with pytest.raises(ValueError, match="not found"): + AppService.get_app_code_by_id("app-1") + + def test_get_app_code_by_id_should_return_code(self) -> None: + """Test get_app_code_by_id returns site code.""" + # Arrange + site = SimpleNamespace(code="code-1") + with patch("services.app_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = site + mock_db.session.query.return_value = query + + # Act + result = AppService.get_app_code_by_id("app-1") + + # Assert + assert result == "code-1" + + def test_get_app_id_by_code_should_raise_when_site_missing(self) -> None: + """Test get_app_id_by_code raises when code does not exist.""" + # Arrange + with patch("services.app_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = None + mock_db.session.query.return_value = query + + # Act & Assert + with pytest.raises(ValueError, match="not found"): + AppService.get_app_id_by_code("missing") + + def test_get_app_id_by_code_should_return_app_id(self) -> None: + """Test get_app_id_by_code returns linked app id.""" + # Arrange + site = SimpleNamespace(app_id="app-1") + with patch("services.app_service.db") as mock_db: + query = MagicMock() + query.where.return_value = query + query.first.return_value = site + mock_db.session.query.return_value = query + + # Act + result = AppService.get_app_id_by_code("code-1") + + # Assert + assert result == "app-1" diff --git a/api/tests/unit_tests/services/test_async_workflow_service.py b/api/tests/unit_tests/services/test_async_workflow_service.py new file mode 100644 index 0000000000..639e091041 --- /dev/null +++ b/api/tests/unit_tests/services/test_async_workflow_service.py @@ -0,0 +1,507 @@ +import json +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +import services.async_workflow_service as async_workflow_service_module +from models.enums import AppTriggerType, CreatorUserRole, WorkflowRunTriggeredFrom, WorkflowTriggerStatus +from services.async_workflow_service import AsyncWorkflowService +from services.errors.app import QuotaExceededError, WorkflowNotFoundError, WorkflowQuotaLimitError +from services.workflow.entities import AsyncTriggerResponse, TriggerData +from services.workflow.queue_dispatcher import QueuePriority + + +class AsyncWorkflowServiceTestDataFactory: + """Factory helpers for async workflow service unit tests.""" + + @staticmethod + def create_trigger_data( + app_id: str = "app-123", + tenant_id: str = "tenant-123", + workflow_id: str | None = "workflow-123", + root_node_id: str = "root-node-123", + ) -> TriggerData: + """Create valid trigger data for async workflow execution tests.""" + return TriggerData( + app_id=app_id, + tenant_id=tenant_id, + workflow_id=workflow_id, + root_node_id=root_node_id, + inputs={"name": "dify"}, + files=[], + trigger_type=AppTriggerType.UNKNOWN, + trigger_from=WorkflowRunTriggeredFrom.APP_RUN, + trigger_metadata=None, + ) + + @staticmethod + def create_trigger_log_with_data(trigger_data: TriggerData, retry_count: int = 0) -> MagicMock: + """Create a mock trigger log with serialized trigger data.""" + trigger_log = MagicMock() + trigger_log.id = "trigger-log-123" + trigger_log.trigger_data = trigger_data.model_dump_json() + trigger_log.retry_count = retry_count + trigger_log.error = "previous-error" + trigger_log.status = WorkflowTriggerStatus.FAILED + trigger_log.to_dict.return_value = {"id": trigger_log.id} + return trigger_log + + +class TestAsyncWorkflowService: + @pytest.fixture + def async_workflow_trigger_mocks(self): + """Shared fixture for async workflow trigger tests. + + Yields mocks for: + - repo: SQLAlchemyWorkflowTriggerLogRepository + - dispatcher_manager_class: QueueDispatcherManager class + - dispatcher: dispatcher instance + - quota_workflow: QuotaType.WORKFLOW + - get_workflow: AsyncWorkflowService._get_workflow method + - professional_task: execute_workflow_professional + - team_task: execute_workflow_team + - sandbox_task: execute_workflow_sandbox + """ + mock_repo = MagicMock() + + def _create_side_effect(new_log): + new_log.id = "trigger-log-123" + return new_log + + mock_repo.create.side_effect = _create_side_effect + + mock_dispatcher = MagicMock() + quota_workflow = MagicMock() + mock_get_workflow = MagicMock() + + mock_professional_task = MagicMock() + mock_team_task = MagicMock() + mock_sandbox_task = MagicMock() + + with ( + patch.object( + async_workflow_service_module, + "SQLAlchemyWorkflowTriggerLogRepository", + return_value=mock_repo, + ), + patch.object(async_workflow_service_module, "QueueDispatcherManager") as mock_dispatcher_manager_class, + patch.object(async_workflow_service_module, "WorkflowService"), + patch.object( + async_workflow_service_module.AsyncWorkflowService, + "_get_workflow", + ) as mock_get_workflow, + patch.object( + async_workflow_service_module, + "QuotaType", + new=SimpleNamespace(WORKFLOW=quota_workflow), + ), + patch.object(async_workflow_service_module, "execute_workflow_professional") as mock_professional_task, + patch.object(async_workflow_service_module, "execute_workflow_team") as mock_team_task, + patch.object(async_workflow_service_module, "execute_workflow_sandbox") as mock_sandbox_task, + ): + # Configure dispatcher_manager to return our mock_dispatcher + mock_dispatcher_manager_class.return_value.get_dispatcher.return_value = mock_dispatcher + + yield { + "repo": mock_repo, + "dispatcher_manager_class": mock_dispatcher_manager_class, + "dispatcher": mock_dispatcher, + "quota_workflow": quota_workflow, + "get_workflow": mock_get_workflow, + "professional_task": mock_professional_task, + "team_task": mock_team_task, + "sandbox_task": mock_sandbox_task, + } + + @pytest.mark.parametrize( + ("queue_name", "selected_task_attr"), + [ + (QueuePriority.PROFESSIONAL, "execute_workflow_professional"), + (QueuePriority.TEAM, "execute_workflow_team"), + (QueuePriority.SANDBOX, "execute_workflow_sandbox"), + ], + ) + def test_should_dispatch_to_matching_celery_task_when_triggering_workflow( + self, queue_name, selected_task_attr, async_workflow_trigger_mocks + ): + """Test queue-based task routing and successful async trigger response.""" + # Arrange + session = MagicMock() + session.commit = MagicMock() + app_model = MagicMock() + app_model.id = "app-123" + session.scalar.return_value = app_model + trigger_data = AsyncWorkflowServiceTestDataFactory.create_trigger_data() + workflow = MagicMock() + workflow.id = "workflow-123" + + mocks = async_workflow_trigger_mocks + mocks["dispatcher"].get_queue_name.return_value = queue_name + mocks["get_workflow"].return_value = workflow + + task_result = MagicMock() + task_result.id = "task-123" + mocks["professional_task"].delay.return_value = task_result + mocks["team_task"].delay.return_value = task_result + mocks["sandbox_task"].delay.return_value = task_result + + class DummyAccount: + def __init__(self, user_id: str): + self.id = user_id + + with patch.object(async_workflow_service_module, "Account", DummyAccount): + user = DummyAccount("account-123") + + # Act + result = AsyncWorkflowService.trigger_workflow_async(session=session, user=user, trigger_data=trigger_data) + + # Assert + assert isinstance(result, AsyncTriggerResponse) + assert result.workflow_trigger_log_id == "trigger-log-123" + assert result.task_id == "task-123" + assert result.status == "queued" + assert result.queue == queue_name + + mocks["quota_workflow"].consume.assert_called_once_with("tenant-123") + assert session.commit.call_count == 2 + + created_log = mocks["repo"].create.call_args[0][0] + assert created_log.status == WorkflowTriggerStatus.QUEUED + assert created_log.queue_name == queue_name + assert created_log.created_by_role == CreatorUserRole.ACCOUNT + assert created_log.created_by == "account-123" + assert created_log.trigger_data == trigger_data.model_dump_json() + assert created_log.inputs == json.dumps(dict(trigger_data.inputs)) + assert created_log.celery_task_id == "task-123" + + task_mocks = { + "execute_workflow_professional": mocks["professional_task"], + "execute_workflow_team": mocks["team_task"], + "execute_workflow_sandbox": mocks["sandbox_task"], + } + for task_attr, task_mock in task_mocks.items(): + if task_attr == selected_task_attr: + task_mock.delay.assert_called_once_with({"workflow_trigger_log_id": "trigger-log-123"}) + else: + task_mock.delay.assert_not_called() + + def test_should_set_end_user_role_when_triggered_by_end_user(self, async_workflow_trigger_mocks): + """Test that non-account users are tracked as END_USER in trigger logs.""" + # Arrange + session = MagicMock() + session.commit = MagicMock() + app_model = MagicMock() + app_model.id = "app-123" + session.scalar.return_value = app_model + trigger_data = AsyncWorkflowServiceTestDataFactory.create_trigger_data() + workflow = MagicMock() + workflow.id = "workflow-123" + + mocks = async_workflow_trigger_mocks + mocks["dispatcher"].get_queue_name.return_value = QueuePriority.SANDBOX + mocks["get_workflow"].return_value = workflow + + task_result = MagicMock(id="task-123") + mocks["sandbox_task"].delay.return_value = task_result + + user = SimpleNamespace(id="end-user-123") + + # Act + AsyncWorkflowService.trigger_workflow_async(session=session, user=user, trigger_data=trigger_data) + + # Assert + created_log = mocks["repo"].create.call_args[0][0] + assert created_log.created_by_role == CreatorUserRole.END_USER + assert created_log.created_by == "end-user-123" + + def test_should_raise_workflow_not_found_when_app_does_not_exist(self): + """Test trigger failure when app lookup returns no result.""" + # Arrange + session = MagicMock() + session.scalar.return_value = None + trigger_data = AsyncWorkflowServiceTestDataFactory.create_trigger_data(app_id="missing-app") + + with ( + patch.object(async_workflow_service_module, "SQLAlchemyWorkflowTriggerLogRepository"), + patch.object(async_workflow_service_module, "QueueDispatcherManager"), + patch.object(async_workflow_service_module, "WorkflowService"), + ): + # Act / Assert + with pytest.raises(WorkflowNotFoundError, match="App not found: missing-app"): + AsyncWorkflowService.trigger_workflow_async( + session=session, + user=SimpleNamespace(id="user-123"), + trigger_data=trigger_data, + ) + + def test_should_mark_log_rate_limited_and_raise_when_quota_exceeded(self, async_workflow_trigger_mocks): + """Test quota-exceeded path updates trigger log and raises WorkflowQuotaLimitError.""" + # Arrange + session = MagicMock() + session.commit = MagicMock() + app_model = MagicMock() + app_model.id = "app-123" + session.scalar.return_value = app_model + trigger_data = AsyncWorkflowServiceTestDataFactory.create_trigger_data() + workflow = MagicMock() + workflow.id = "workflow-123" + + mocks = async_workflow_trigger_mocks + mocks["dispatcher"].get_queue_name.return_value = QueuePriority.TEAM + mocks["get_workflow"].return_value = workflow + mocks["quota_workflow"].consume.side_effect = QuotaExceededError( + feature="workflow", + tenant_id="tenant-123", + required=1, + ) + + # Act / Assert + with pytest.raises( + WorkflowQuotaLimitError, + match="Workflow execution quota limit reached for tenant tenant-123", + ): + AsyncWorkflowService.trigger_workflow_async( + session=session, + user=SimpleNamespace(id="user-123"), + trigger_data=trigger_data, + ) + + assert session.commit.call_count == 2 + updated_log = mocks["repo"].update.call_args[0][0] + assert updated_log.status == WorkflowTriggerStatus.RATE_LIMITED + assert "Quota limit reached" in updated_log.error + mocks["professional_task"].delay.assert_not_called() + mocks["team_task"].delay.assert_not_called() + mocks["sandbox_task"].delay.assert_not_called() + + def test_should_raise_when_reinvoke_target_log_does_not_exist(self): + """Test reinvoke_trigger error path when original trigger log is missing.""" + # Arrange + session = MagicMock() + repo = MagicMock() + repo.get_by_id.return_value = None + + with patch.object(async_workflow_service_module, "SQLAlchemyWorkflowTriggerLogRepository", return_value=repo): + # Act / Assert + with pytest.raises(ValueError, match="Trigger log not found: missing-log"): + AsyncWorkflowService.reinvoke_trigger( + session=session, + user=SimpleNamespace(id="user-123"), + workflow_trigger_log_id="missing-log", + ) + + def test_should_update_original_log_and_requeue_when_reinvoking(self): + """Test reinvoke flow updates original log state and triggers a new async run.""" + # Arrange + session = MagicMock() + trigger_data = AsyncWorkflowServiceTestDataFactory.create_trigger_data() + trigger_log = AsyncWorkflowServiceTestDataFactory.create_trigger_log_with_data(trigger_data, retry_count=1) + repo = MagicMock() + repo.get_by_id.return_value = trigger_log + + expected_response = AsyncTriggerResponse( + workflow_trigger_log_id="new-trigger-log-456", + task_id="task-456", + status="queued", + queue=QueuePriority.TEAM, + ) + + with ( + patch.object(async_workflow_service_module, "SQLAlchemyWorkflowTriggerLogRepository", return_value=repo), + patch.object( + async_workflow_service_module.AsyncWorkflowService, + "trigger_workflow_async", + return_value=expected_response, + ) as mock_trigger_workflow_async, + ): + user = SimpleNamespace(id="user-123") + + # Act + response = AsyncWorkflowService.reinvoke_trigger( + session=session, + user=user, + workflow_trigger_log_id="trigger-log-123", + ) + + # Assert + assert response == expected_response + assert trigger_log.status == WorkflowTriggerStatus.RETRYING + assert trigger_log.retry_count == 2 + assert trigger_log.error is None + assert trigger_log.triggered_at is not None + repo.update.assert_called_once_with(trigger_log) + session.commit.assert_called_once() + called_trigger_data = mock_trigger_workflow_async.call_args[0][2] + assert isinstance(called_trigger_data, TriggerData) + assert called_trigger_data.app_id == "app-123" + + @pytest.mark.parametrize( + ("repo_result", "expected"), + [ + (None, None), + (MagicMock(), {"id": "trigger-log-123"}), + ], + ) + def test_should_return_trigger_log_dict_or_none(self, repo_result, expected): + """Test get_trigger_log returns serialized log data or None.""" + # Arrange + mock_session = MagicMock() + mock_repo = MagicMock() + fake_engine = MagicMock() + mock_repo.get_by_id.return_value = repo_result + if repo_result: + repo_result.to_dict.return_value = expected + + mock_session_context = MagicMock() + mock_session_context.__enter__.return_value = mock_session + mock_session_context.__exit__.return_value = None + + with ( + patch.object(async_workflow_service_module, "db", new=SimpleNamespace(engine=fake_engine)), + patch.object( + async_workflow_service_module, "Session", return_value=mock_session_context + ) as mock_session_class, + patch.object( + async_workflow_service_module, + "SQLAlchemyWorkflowTriggerLogRepository", + return_value=mock_repo, + ), + ): + # Act + result = AsyncWorkflowService.get_trigger_log("trigger-log-123", tenant_id="tenant-123") + + # Assert + assert result == expected + mock_session_class.assert_called_once_with(fake_engine) + mock_repo.get_by_id.assert_called_once_with("trigger-log-123", "tenant-123") + + def test_should_return_recent_logs_as_dict_list(self): + """Test get_recent_logs converts repository models into dictionaries.""" + # Arrange + mock_session = MagicMock() + mock_repo = MagicMock() + log1 = MagicMock() + log1.to_dict.return_value = {"id": "log-1"} + log2 = MagicMock() + log2.to_dict.return_value = {"id": "log-2"} + mock_repo.get_recent_logs.return_value = [log1, log2] + + mock_session_context = MagicMock() + mock_session_context.__enter__.return_value = mock_session + mock_session_context.__exit__.return_value = None + + with ( + patch.object(async_workflow_service_module, "db", new=SimpleNamespace(engine=MagicMock())), + patch.object(async_workflow_service_module, "Session", return_value=mock_session_context), + patch.object( + async_workflow_service_module, + "SQLAlchemyWorkflowTriggerLogRepository", + return_value=mock_repo, + ), + ): + # Act + result = AsyncWorkflowService.get_recent_logs( + tenant_id="tenant-123", + app_id="app-123", + hours=12, + limit=50, + offset=10, + ) + + # Assert + assert result == [{"id": "log-1"}, {"id": "log-2"}] + mock_repo.get_recent_logs.assert_called_once_with( + tenant_id="tenant-123", + app_id="app-123", + hours=12, + limit=50, + offset=10, + ) + + def test_should_return_failed_logs_for_retry_as_dict_list(self): + """Test get_failed_logs_for_retry serializes repository logs into dicts.""" + # Arrange + mock_session = MagicMock() + mock_repo = MagicMock() + log = MagicMock() + log.to_dict.return_value = {"id": "failed-log-1"} + mock_repo.get_failed_for_retry.return_value = [log] + + mock_session_context = MagicMock() + mock_session_context.__enter__.return_value = mock_session + mock_session_context.__exit__.return_value = None + + with ( + patch.object(async_workflow_service_module, "db", new=SimpleNamespace(engine=MagicMock())), + patch.object(async_workflow_service_module, "Session", return_value=mock_session_context), + patch.object( + async_workflow_service_module, + "SQLAlchemyWorkflowTriggerLogRepository", + return_value=mock_repo, + ), + ): + # Act + result = AsyncWorkflowService.get_failed_logs_for_retry(tenant_id="tenant-123", max_retry_count=4, limit=20) + + # Assert + assert result == [{"id": "failed-log-1"}] + mock_repo.get_failed_for_retry.assert_called_once_with(tenant_id="tenant-123", max_retry_count=4, limit=20) + + +class TestAsyncWorkflowServiceGetWorkflow: + def test_should_return_specific_workflow_when_workflow_id_exists(self): + """Test _get_workflow returns published workflow by id when provided.""" + # Arrange + workflow_service = MagicMock() + app_model = MagicMock() + workflow = MagicMock() + workflow_service.get_published_workflow_by_id.return_value = workflow + + # Act + result = AsyncWorkflowService._get_workflow(workflow_service, app_model, workflow_id="workflow-123") + + # Assert + assert result == workflow + workflow_service.get_published_workflow_by_id.assert_called_once_with(app_model, "workflow-123") + workflow_service.get_published_workflow.assert_not_called() + + def test_should_raise_when_specific_workflow_id_not_found(self): + """Test _get_workflow raises WorkflowNotFoundError for unknown workflow id.""" + # Arrange + workflow_service = MagicMock() + app_model = MagicMock() + workflow_service.get_published_workflow_by_id.return_value = None + + # Act / Assert + with pytest.raises(WorkflowNotFoundError, match="Published workflow not found: workflow-404"): + AsyncWorkflowService._get_workflow(workflow_service, app_model, workflow_id="workflow-404") + + def test_should_return_default_published_workflow_when_workflow_id_not_provided(self): + """Test _get_workflow returns default published workflow when no id is provided.""" + # Arrange + workflow_service = MagicMock() + app_model = MagicMock() + app_model.id = "app-123" + workflow = MagicMock() + workflow_service.get_published_workflow.return_value = workflow + + # Act + result = AsyncWorkflowService._get_workflow(workflow_service, app_model) + + # Assert + assert result == workflow + workflow_service.get_published_workflow.assert_called_once_with(app_model) + workflow_service.get_published_workflow_by_id.assert_not_called() + + def test_should_raise_when_default_published_workflow_not_found(self): + """Test _get_workflow raises WorkflowNotFoundError when app has no published workflow.""" + # Arrange + workflow_service = MagicMock() + app_model = MagicMock() + app_model.id = "app-123" + workflow_service.get_published_workflow.return_value = None + + # Act / Assert + with pytest.raises(WorkflowNotFoundError, match="No published workflow found for app: app-123"): + AsyncWorkflowService._get_workflow(workflow_service, app_model) diff --git a/api/tests/unit_tests/services/test_attachment_service.py b/api/tests/unit_tests/services/test_attachment_service.py new file mode 100644 index 0000000000..88be20bc41 --- /dev/null +++ b/api/tests/unit_tests/services/test_attachment_service.py @@ -0,0 +1,73 @@ +import base64 +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from werkzeug.exceptions import NotFound + +import services.attachment_service as attachment_service_module +from models.model import UploadFile +from services.attachment_service import AttachmentService + + +class TestAttachmentService: + def test_should_initialize_with_sessionmaker_when_sessionmaker_is_provided(self): + """Test that AttachmentService keeps the provided sessionmaker instance.""" + session_factory = sessionmaker() + + service = AttachmentService(session_factory=session_factory) + + assert service._session_maker is session_factory + + def test_should_initialize_with_bound_sessionmaker_when_engine_is_provided(self): + """Test that AttachmentService builds a sessionmaker bound to the provided engine.""" + engine = create_engine("sqlite:///:memory:") + + service = AttachmentService(session_factory=engine) + session = service._session_maker() + try: + assert session.bind == engine + finally: + session.close() + engine.dispose() + + @pytest.mark.parametrize("invalid_session_factory", [None, "not-a-session-factory", 1]) + def test_should_raise_assertion_error_when_session_factory_type_is_invalid(self, invalid_session_factory): + """Test that invalid session_factory types are rejected.""" + with pytest.raises(AssertionError, match="must be a sessionmaker or an Engine."): + AttachmentService(session_factory=invalid_session_factory) + + def test_should_return_base64_encoded_blob_when_file_exists(self): + """Test that existing files are loaded from storage and returned as base64.""" + service = AttachmentService(session_factory=sessionmaker()) + upload_file = MagicMock(spec=UploadFile) + upload_file.key = "upload-file-key" + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = upload_file + service._session_maker = MagicMock(return_value=session) + + with patch.object(attachment_service_module.storage, "load_once", return_value=b"binary-content") as mock_load: + result = service.get_file_base64("file-123") + + assert result == base64.b64encode(b"binary-content").decode() + service._session_maker.assert_called_once_with(expire_on_commit=False) + session.query.assert_called_once_with(UploadFile) + mock_load.assert_called_once_with("upload-file-key") + + def test_should_raise_not_found_when_file_does_not_exist(self): + """Test that missing files raise NotFound and never call storage.""" + service = AttachmentService(session_factory=sessionmaker()) + + session = MagicMock() + session.query.return_value.where.return_value.first.return_value = None + service._session_maker = MagicMock(return_value=session) + + with patch.object(attachment_service_module.storage, "load_once") as mock_load: + with pytest.raises(NotFound, match="File not found"): + service.get_file_base64("missing-file") + + service._session_maker.assert_called_once_with(expire_on_commit=False) + session.query.assert_called_once_with(UploadFile) + mock_load.assert_not_called() diff --git a/api/tests/unit_tests/services/test_batch_indexing_base.py b/api/tests/unit_tests/services/test_batch_indexing_base.py new file mode 100644 index 0000000000..bd68b67d89 --- /dev/null +++ b/api/tests/unit_tests/services/test_batch_indexing_base.py @@ -0,0 +1,387 @@ +from dataclasses import asdict +from typing import Any, ClassVar, cast +from unittest.mock import MagicMock, patch + +import pytest + +from core.entities.document_task import DocumentTask +from enums.cloud_plan import CloudPlan +from services.document_indexing_proxy.batch_indexing_base import BatchDocumentIndexingProxy + +# --------------------------------------------------------------------------- +# Concrete subclass for testing (the base class is abstract) +# --------------------------------------------------------------------------- + + +class ConcreteBatchProxy(BatchDocumentIndexingProxy): + """Minimal concrete implementation that provides the required class-level vars.""" + + QUEUE_NAME: ClassVar[str] = "test_queue" + NORMAL_TASK_FUNC: ClassVar[Any] = MagicMock(name="NORMAL_TASK_FUNC") + PRIORITY_TASK_FUNC: ClassVar[Any] = MagicMock(name="PRIORITY_TASK_FUNC") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +TENANT_ID = "tenant-abc" +DATASET_ID = "dataset-xyz" +DOC_IDS: list[str] = ["doc-1", "doc-2", "doc-3"] + + +def make_proxy(**kwargs: Any) -> ConcreteBatchProxy: + """Factory: returns a ConcreteBatchProxy with TenantIsolatedTaskQueue mocked out.""" + with patch("services.document_indexing_proxy.batch_indexing_base.TenantIsolatedTaskQueue") as MockQueue: + proxy = ConcreteBatchProxy( + tenant_id=kwargs.get("tenant_id", TENANT_ID), + dataset_id=kwargs.get("dataset_id", DATASET_ID), + document_ids=kwargs.get("document_ids", DOC_IDS), + ) + # Expose the mock queue on the proxy so tests can assert on it + proxy._tenant_isolated_task_queue = MockQueue.return_value + return proxy + + +# --------------------------------------------------------------------------- +# Test suite +# --------------------------------------------------------------------------- + + +class TestBatchDocumentIndexingProxyInit: + """Tests for __init__ of BatchDocumentIndexingProxy.""" + + def test_should_store_document_ids_when_initialized(self) -> None: + """Verify that document_ids are stored on the proxy instance.""" + # Arrange + doc_ids: list[str] = ["doc-a", "doc-b"] + + # Act + with patch("services.document_indexing_proxy.batch_indexing_base.TenantIsolatedTaskQueue"): + proxy = ConcreteBatchProxy(TENANT_ID, DATASET_ID, doc_ids) + + # Assert + assert proxy._document_ids == doc_ids + + def test_should_propagate_tenant_and_dataset_to_base_when_initialized(self) -> None: + """Verify that tenant_id and dataset_id are forwarded to the parent class.""" + # Arrange / Act + with patch("services.document_indexing_proxy.batch_indexing_base.TenantIsolatedTaskQueue"): + proxy = ConcreteBatchProxy(TENANT_ID, DATASET_ID, DOC_IDS) + + # Assert + assert proxy._tenant_id == TENANT_ID + assert proxy._dataset_id == DATASET_ID + + def test_should_create_tenant_isolated_queue_with_correct_args_when_initialized(self) -> None: + """Verify that TenantIsolatedTaskQueue is constructed with (tenant_id, QUEUE_NAME).""" + # Arrange / Act + with patch("services.document_indexing_proxy.batch_indexing_base.TenantIsolatedTaskQueue") as MockQueue: + ConcreteBatchProxy(TENANT_ID, DATASET_ID, DOC_IDS) + + # Assert + MockQueue.assert_called_once_with(TENANT_ID, ConcreteBatchProxy.QUEUE_NAME) + + @pytest.mark.parametrize("doc_ids", [[], ["single-doc"], ["d1", "d2", "d3", "d4"]]) + def test_should_accept_any_length_document_ids_when_initialized(self, doc_ids: list[str]) -> None: + """Verify that empty, single, and multiple document IDs are all accepted.""" + # Arrange / Act + with patch("services.document_indexing_proxy.batch_indexing_base.TenantIsolatedTaskQueue"): + proxy = ConcreteBatchProxy(TENANT_ID, DATASET_ID, doc_ids) + + # Assert + assert list(proxy._document_ids) == doc_ids + + +class TestSendToDirectQueue: + """Tests for _send_to_direct_queue.""" + + def test_should_call_task_func_delay_with_correct_args_when_sent_to_direct_queue( + self, + ) -> None: + """Verify that task_func.delay is called with the right kwargs.""" + # Arrange + proxy = make_proxy() + task_func = MagicMock() + + # Act + proxy._send_to_direct_queue(task_func) + + # Assert + task_func.delay.assert_called_once_with( + tenant_id=TENANT_ID, + dataset_id=DATASET_ID, + document_ids=DOC_IDS, + ) + + def test_should_not_interact_with_tenant_queue_when_sent_to_direct_queue(self) -> None: + """Direct queue path must never touch the tenant-isolated queue.""" + # Arrange + proxy = make_proxy() + task_func = MagicMock() + + # Act + proxy._send_to_direct_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + mock_queue.push_tasks.assert_not_called() + mock_queue.set_task_waiting_time.assert_not_called() + + def test_should_forward_any_callable_when_sent_to_direct_queue(self) -> None: + """Verify that different task functions are each called correctly.""" + # Arrange + proxy = make_proxy() + task_a, task_b = MagicMock(), MagicMock() + + # Act + proxy._send_to_direct_queue(task_a) + proxy._send_to_direct_queue(task_b) + + # Assert + task_a.delay.assert_called_once() + task_b.delay.assert_called_once() + + +class TestSendToTenantQueue: + """Tests for _send_to_tenant_queue — both branches.""" + + # ------------------------------------------------------------------ + # Branch 1: get_task_key() is truthy → push to waiting queue + # ------------------------------------------------------------------ + + def test_should_push_task_to_queue_when_task_key_exists(self) -> None: + """When get_task_key() is truthy, tasks must be pushed via push_tasks().""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = "existing-key" + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + expected_payload = [asdict(DocumentTask(tenant_id=TENANT_ID, dataset_id=DATASET_ID, document_ids=DOC_IDS))] + mock_queue.push_tasks.assert_called_once_with(expected_payload) + + def test_should_not_call_task_func_delay_when_task_key_exists(self) -> None: + """When a key already exists, task_func.delay must never be called.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = "existing-key" + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + cast(MagicMock, task_func.delay).assert_not_called() + + def test_should_not_set_waiting_time_when_task_key_exists(self) -> None: + """When a key already exists, set_task_waiting_time must never be called.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = "existing-key" + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + mock_queue.set_task_waiting_time.assert_not_called() + + def test_should_serialize_document_task_correctly_when_pushing_to_queue(self) -> None: + """Verify the serialised payload matches asdict(DocumentTask(...)).""" + # Arrange + proxy = make_proxy(document_ids=["doc-x"]) + proxy._tenant_isolated_task_queue.get_task_key.return_value = "k" + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert — inspect the payload passed to push_tasks + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + call_args = mock_queue.push_tasks.call_args + pushed_list = call_args[0][0] # first positional arg + assert len(pushed_list) == 1 + assert pushed_list[0]["tenant_id"] == TENANT_ID + assert pushed_list[0]["dataset_id"] == DATASET_ID + assert pushed_list[0]["document_ids"] == ["doc-x"] + + # ------------------------------------------------------------------ + # Branch 2: get_task_key() is falsy → set flag + dispatch via delay + # ------------------------------------------------------------------ + + def test_should_set_waiting_time_and_call_delay_when_no_task_key(self) -> None: + """When get_task_key() is falsy, set_task_waiting_time and task_func.delay are invoked.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = None + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + mock_queue.set_task_waiting_time.assert_called_once() + cast(MagicMock, task_func.delay).assert_called_once_with( + tenant_id=TENANT_ID, + dataset_id=DATASET_ID, + document_ids=DOC_IDS, + ) + + def test_should_not_push_tasks_when_no_task_key(self) -> None: + """When get_task_key() is falsy, push_tasks must never be called.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = None + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + mock_queue.push_tasks.assert_not_called() + + @pytest.mark.parametrize("falsy_key", [None, "", 0, False]) + def test_should_init_task_when_key_is_any_falsy_value(self, falsy_key: Any) -> None: + """Verify that any falsy return from get_task_key() triggers the init branch.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = falsy_key + task_func = MagicMock() + + # Act + proxy._send_to_tenant_queue(task_func) + + # Assert + mock_queue = cast(MagicMock, proxy._tenant_isolated_task_queue) + mock_queue.set_task_waiting_time.assert_called_once() + cast(MagicMock, task_func.delay).assert_called_once() + + +class TestDispatchRouting: + """Tests for the _dispatch / delay routing logic inherited from the base class.""" + + def _mock_features(self, enabled: bool, plan: CloudPlan) -> MagicMock: + features = MagicMock() + features.billing.enabled = enabled + features.billing.subscription.plan = plan + return features + + def test_should_send_to_normal_tenant_queue_when_billing_enabled_and_sandbox_plan(self) -> None: + """Sandbox plan routes to normal priority queue with tenant isolation.""" + # Arrange + proxy = make_proxy() + proxy._tenant_isolated_task_queue.get_task_key.return_value = None + + with patch("services.document_indexing_proxy.base.FeatureService.get_features") as mock_features: + mock_features.return_value = self._mock_features(enabled=True, plan=CloudPlan.SANDBOX) + + # Act + with patch.object(proxy, "_send_to_default_tenant_queue") as mock_method: + proxy._dispatch() + + # Assert + mock_method.assert_called_once() + + def test_should_send_to_priority_tenant_queue_when_billing_enabled_and_paid_plan(self) -> None: + """Non-sandbox paid plan routes to priority queue with tenant isolation.""" + # Arrange + proxy = make_proxy() + + with patch("services.document_indexing_proxy.base.FeatureService.get_features") as mock_features: + mock_features.return_value = self._mock_features(enabled=True, plan=CloudPlan.PROFESSIONAL) + + # Act + with patch.object(proxy, "_send_to_priority_tenant_queue") as mock_method: + proxy._dispatch() + + # Assert + mock_method.assert_called_once() + + def test_should_send_to_priority_direct_queue_when_billing_not_enabled(self) -> None: + """Self-hosted / no billing → priority direct queue (no tenant isolation).""" + # Arrange + proxy = make_proxy() + + with patch("services.document_indexing_proxy.base.FeatureService.get_features") as mock_features: + mock_features.return_value = self._mock_features(enabled=False, plan=CloudPlan.SANDBOX) + + # Act + with patch.object(proxy, "_send_to_priority_direct_queue") as mock_method: + proxy._dispatch() + + # Assert + mock_method.assert_called_once() + + def test_should_call_dispatch_when_delay_is_invoked(self) -> None: + """Calling delay() must invoke _dispatch() exactly once.""" + # Arrange + proxy = make_proxy() + + # Act + with patch.object(proxy, "_dispatch") as mock_dispatch: + proxy.delay() + + # Assert + mock_dispatch.assert_called_once() + + def test_should_use_feature_service_for_billing_info(self) -> None: + """Verify that FeatureService.get_features is consulted during dispatch.""" + # Arrange + proxy = make_proxy() + + with patch("services.document_indexing_proxy.base.FeatureService.get_features") as mock_features: + mock_features.return_value = self._mock_features(enabled=False, plan=CloudPlan.SANDBOX) + with patch.object(proxy, "_send_to_priority_direct_queue"): + # Act + proxy._dispatch() + + # Assert + mock_features.assert_called_once_with(TENANT_ID) + + +class TestBaseRouterHelpers: + """Tests for the three routing helper methods from the base class.""" + + def test_should_call_send_to_tenant_queue_with_normal_func_when_default_tenant_queue(self) -> None: + """_send_to_default_tenant_queue must forward NORMAL_TASK_FUNC.""" + # Arrange + proxy = make_proxy() + + # Act + with patch.object(proxy, "_send_to_tenant_queue") as mock_method: + proxy._send_to_default_tenant_queue() + + # Assert + mock_method.assert_called_once_with(ConcreteBatchProxy.NORMAL_TASK_FUNC) + + def test_should_call_send_to_tenant_queue_with_priority_func_when_priority_tenant_queue(self) -> None: + """_send_to_priority_tenant_queue must forward PRIORITY_TASK_FUNC.""" + # Arrange + proxy = make_proxy() + + # Act + with patch.object(proxy, "_send_to_tenant_queue") as mock_method: + proxy._send_to_priority_tenant_queue() + + # Assert + mock_method.assert_called_once_with(ConcreteBatchProxy.PRIORITY_TASK_FUNC) + + def test_should_call_send_to_direct_queue_with_priority_func_when_priority_direct_queue(self) -> None: + """_send_to_priority_direct_queue must forward PRIORITY_TASK_FUNC.""" + # Arrange + proxy = make_proxy() + + # Act + with patch.object(proxy, "_send_to_direct_queue") as mock_method: + proxy._send_to_priority_direct_queue() + + # Assert + mock_method.assert_called_once_with(ConcreteBatchProxy.PRIORITY_TASK_FUNC) diff --git a/api/tests/unit_tests/services/test_clear_free_plan_tenant_expired_logs.py b/api/tests/unit_tests/services/test_clear_free_plan_tenant_expired_logs.py index 5099362e00..1926cb133a 100644 --- a/api/tests/unit_tests/services/test_clear_free_plan_tenant_expired_logs.py +++ b/api/tests/unit_tests/services/test_clear_free_plan_tenant_expired_logs.py @@ -1,9 +1,12 @@ import datetime -from unittest.mock import Mock, patch +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch import pytest from sqlalchemy.orm import Session +from enums.cloud_plan import CloudPlan +from services import clear_free_plan_tenant_expired_logs as service_module from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpiredLogs @@ -156,13 +159,453 @@ class TestClearFreePlanTenantExpiredLogs: # Should call delete for each table that has records assert mock_session.query.return_value.where.return_value.delete.called - def test_clear_message_related_tables_logging_output( - self, mock_session, sample_message_ids, sample_records, capsys + def test_clear_message_related_tables_all_serialization_fails_skips_backup_but_deletes( + self, mock_session, sample_message_ids ): - """Test that logging output is generated.""" + record = Mock() + record.id = "record-1" + record.to_dict.side_effect = Exception("Serialization error") + with patch("services.clear_free_plan_tenant_expired_logs.storage") as mock_storage: - mock_session.query.return_value.where.return_value.all.return_value = sample_records + mock_session.query.return_value.where.return_value.all.return_value = [record] ClearFreePlanTenantExpiredLogs._clear_message_related_tables(mock_session, "tenant-123", sample_message_ids) - pass + mock_storage.save.assert_not_called() + assert mock_session.query.return_value.where.return_value.delete.called + + +class _ImmediateFuture: + def __init__(self, fn, args, kwargs): + self._fn = fn + self._args = args + self._kwargs = kwargs + + def result(self): + return self._fn(*self._args, **self._kwargs) + + +class _ImmediateExecutor: + def __init__(self, *args, **kwargs) -> None: + self.submitted: list[tuple[object, tuple[object, ...], dict[str, object]]] = [] + + def submit(self, fn, *args, **kwargs): + self.submitted.append((fn, args, kwargs)) + return _ImmediateFuture(fn, args, kwargs) + + +def _session_wrapper_for_no_autoflush(session: Mock) -> Mock: + """ + ClearFreePlanTenantExpiredLogs.process_tenant uses: + with Session(db.engine).no_autoflush as session: + so Session(db.engine) must return an object with a no_autoflush context manager. + """ + cm = MagicMock() + cm.__enter__.return_value = session + cm.__exit__.return_value = None + + wrapper = MagicMock() + wrapper.no_autoflush = cm + return wrapper + + +def _session_wrapper_for_direct(session: Mock) -> Mock: + """ClearFreePlanTenantExpiredLogs.process uses: with Session(db.engine) as session:""" + wrapper = MagicMock() + wrapper.__enter__.return_value = session + wrapper.__exit__.return_value = None + return wrapper + + +def test_process_tenant_processes_all_batches(monkeypatch: pytest.MonkeyPatch) -> None: + flask_app = service_module.Flask("test-app") + + monkeypatch.setattr( + service_module, + "db", + SimpleNamespace( + engine=object(), + session=SimpleNamespace( + scalars=lambda _stmt: SimpleNamespace( + all=lambda: [SimpleNamespace(id="app-1"), SimpleNamespace(id="app-2")] + ) + ), + ), + ) + + mock_storage = MagicMock() + monkeypatch.setattr(service_module, "storage", mock_storage) + monkeypatch.setattr(service_module.click, "echo", lambda *_args, **_kwargs: None) + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + + clear_related = MagicMock() + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "_clear_message_related_tables", clear_related) + + # Session sequence for messages, conversations, workflow_app_logs loops: + # - messages: one batch then empty + # - conversations: one batch then empty + # - workflow app logs: one batch then empty + msg1 = SimpleNamespace(id="m1", to_dict=lambda: {"id": "m1"}) + conv1 = SimpleNamespace(id="c1", to_dict=lambda: {"id": "c1"}) + log1 = SimpleNamespace(id="l1", to_dict=lambda: {"id": "l1"}) + + def make_query_with_batches(batches: list[list[object]]): + q = MagicMock() + q.where.return_value = q + q.limit.return_value = q + q.all.side_effect = batches + q.delete.return_value = 1 + return q + + msg_session_1 = MagicMock() + msg_session_1.query.side_effect = lambda model: ( + make_query_with_batches([[msg1], []]) if model == service_module.Message else MagicMock() + ) + msg_session_1.commit.return_value = None + + msg_session_2 = MagicMock() + msg_session_2.query.side_effect = lambda model: ( + make_query_with_batches([[]]) if model == service_module.Message else MagicMock() + ) + msg_session_2.commit.return_value = None + + conv_session_1 = MagicMock() + conv_session_1.query.side_effect = lambda model: ( + make_query_with_batches([[conv1], []]) if model == service_module.Conversation else MagicMock() + ) + conv_session_1.commit.return_value = None + + conv_session_2 = MagicMock() + conv_session_2.query.side_effect = lambda model: ( + make_query_with_batches([[]]) if model == service_module.Conversation else MagicMock() + ) + conv_session_2.commit.return_value = None + + wal_session_1 = MagicMock() + wal_session_1.query.side_effect = lambda model: ( + make_query_with_batches([[log1], []]) if model == service_module.WorkflowAppLog else MagicMock() + ) + wal_session_1.commit.return_value = None + + wal_session_2 = MagicMock() + wal_session_2.query.side_effect = lambda model: ( + make_query_with_batches([[]]) if model == service_module.WorkflowAppLog else MagicMock() + ) + wal_session_2.commit.return_value = None + + session_wrappers = [ + _session_wrapper_for_no_autoflush(msg_session_1), + _session_wrapper_for_no_autoflush(msg_session_2), + _session_wrapper_for_no_autoflush(conv_session_1), + _session_wrapper_for_no_autoflush(conv_session_2), + _session_wrapper_for_no_autoflush(wal_session_1), + _session_wrapper_for_no_autoflush(wal_session_2), + ] + + monkeypatch.setattr(service_module, "Session", lambda _engine: session_wrappers.pop(0)) + + def fake_select(*_args, **_kwargs): + stmt = MagicMock() + stmt.where.return_value = stmt + return stmt + + monkeypatch.setattr(service_module, "select", fake_select) + + # Repositories for workflow node executions and workflow runs + node_repo = MagicMock() + node_repo.get_expired_executions_batch.side_effect = [[SimpleNamespace(id="ne-1")], []] + node_repo.delete_executions_by_ids.return_value = 1 + + run_repo = MagicMock() + run_repo.get_expired_runs_batch.side_effect = [[SimpleNamespace(id="wr-1", to_dict=lambda: {"id": "wr-1"})], []] + run_repo.delete_runs_by_ids.return_value = 1 + + monkeypatch.setattr(service_module, "sessionmaker", lambda **_kwargs: object()) + monkeypatch.setattr( + service_module.DifyAPIRepositoryFactory, + "create_api_workflow_node_execution_repository", + lambda _sm: node_repo, + ) + monkeypatch.setattr( + service_module.DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda _sm: run_repo, + ) + + ClearFreePlanTenantExpiredLogs.process_tenant(flask_app, "tenant-1", days=7, batch=10) + + # messages backup, conversations backup, node executions backup, runs backup, workflow app logs backup + assert mock_storage.save.call_count >= 5 + clear_related.assert_called() + + +def test_process_with_tenant_ids_filters_by_plan_and_logs_errors(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(service_module, "db", SimpleNamespace(engine=object())) + + # Total tenant count query + count_session = MagicMock() + count_query = MagicMock() + count_query.count.return_value = 2 + count_session.query.return_value = count_query + + monkeypatch.setattr(service_module, "Session", lambda _engine: _session_wrapper_for_direct(count_session)) + + # Avoid LocalProxy usage + flask_app = service_module.Flask("test-app") + monkeypatch.setattr(service_module, "current_app", SimpleNamespace(_get_current_object=lambda: flask_app)) + + executor = _ImmediateExecutor() + monkeypatch.setattr(service_module, "ThreadPoolExecutor", lambda **_kwargs: executor) + + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + echo_mock = MagicMock() + monkeypatch.setattr(service_module.click, "echo", echo_mock) + + monkeypatch.setattr(service_module.dify_config, "BILLING_ENABLED", True) + + def fake_get_info(tenant_id: str): + if tenant_id == "t_sandbox": + return {"subscription": {"plan": CloudPlan.SANDBOX}} + if tenant_id == "t_fail": + raise RuntimeError("boom") + return {"subscription": {"plan": "team"}} + + monkeypatch.setattr(service_module.BillingService, "get_info", staticmethod(fake_get_info)) + + process_tenant_mock = MagicMock(side_effect=lambda *_args, **_kwargs: (_ for _ in ()).throw(RuntimeError("err"))) + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "process_tenant", process_tenant_mock) + + logger_exc = MagicMock() + monkeypatch.setattr(service_module.logger, "exception", logger_exc) + + ClearFreePlanTenantExpiredLogs.process(days=7, batch=10, tenant_ids=["t_sandbox", "t_paid", "t_fail"]) + + # Only sandbox tenant should attempt processing, and its failure should be swallowed + logged. + assert process_tenant_mock.call_count == 1 + assert logger_exc.call_count >= 1 + + +def test_process_without_tenant_ids_batches_and_scales_interval(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(service_module, "db", SimpleNamespace(engine=object())) + monkeypatch.setattr(service_module.dify_config, "BILLING_ENABLED", False) + + started_at = datetime.datetime(2023, 4, 3, 8, 59, 24) + fixed_now = started_at + datetime.timedelta(hours=2) + + class FixedDateTime(datetime.datetime): + @classmethod + def now(cls, tz=None): + return fixed_now + + monkeypatch.setattr(service_module.datetime, "datetime", FixedDateTime) + + # Avoid LocalProxy usage + flask_app = service_module.Flask("test-app") + monkeypatch.setattr(service_module, "current_app", SimpleNamespace(_get_current_object=lambda: flask_app)) + + executor = _ImmediateExecutor() + monkeypatch.setattr(service_module, "ThreadPoolExecutor", lambda **_kwargs: executor) + + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + monkeypatch.setattr(service_module.click, "echo", lambda *_args, **_kwargs: None) + + # Sessions used: + # 1) total tenant count + # 2) per-batch tenant scan (count + tenant list) + total_session = MagicMock() + total_query = MagicMock() + total_query.count.return_value = 250 + total_session.query.return_value = total_query + + batch_session = MagicMock() + q1 = MagicMock() + q1.where.return_value = q1 + q1.count.return_value = 200 + q2 = MagicMock() + q2.where.return_value = q2 + q2.count.return_value = 200 + q3 = MagicMock() + q3.where.return_value = q3 + q3.count.return_value = 200 + q4 = MagicMock() + q4.where.return_value = q4 + q4.count.return_value = 50 # choose this interval, then scale it + + rows = [SimpleNamespace(id="tenant-a"), SimpleNamespace(id="tenant-b")] + q_rs = MagicMock() + q_rs.where.return_value = q_rs + q_rs.order_by.return_value = rows + + batch_session.query.side_effect = [q1, q2, q3, q4, q_rs] + + sessions = [_session_wrapper_for_direct(total_session), _session_wrapper_for_direct(batch_session)] + monkeypatch.setattr(service_module, "Session", lambda _engine: sessions.pop(0)) + + process_tenant_mock = MagicMock() + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "process_tenant", process_tenant_mock) + + ClearFreePlanTenantExpiredLogs.process(days=7, batch=10, tenant_ids=[]) + + # Should submit/process tenants from the batch query + assert process_tenant_mock.call_count == 2 + + +def test_process_with_tenant_ids_emits_progress_every_100(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(service_module, "db", SimpleNamespace(engine=object())) + + count_session = MagicMock() + count_query = MagicMock() + count_query.count.return_value = 100 + count_session.query.return_value = count_query + monkeypatch.setattr(service_module, "Session", lambda _engine: _session_wrapper_for_direct(count_session)) + + flask_app = service_module.Flask("test-app") + monkeypatch.setattr(service_module, "current_app", SimpleNamespace(_get_current_object=lambda: flask_app)) + monkeypatch.setattr(service_module.dify_config, "BILLING_ENABLED", False) + + executor = _ImmediateExecutor() + monkeypatch.setattr(service_module, "ThreadPoolExecutor", lambda **_kwargs: executor) + + echo_mock = MagicMock() + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + monkeypatch.setattr(service_module.click, "echo", echo_mock) + + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "process_tenant", MagicMock()) + + tenant_ids = [f"t{i}" for i in range(100)] + ClearFreePlanTenantExpiredLogs.process(days=7, batch=10, tenant_ids=tenant_ids) + + assert any("Processed 100 tenants" in str(call.args[0]) for call in echo_mock.call_args_list) + + +def test_process_without_tenant_ids_all_intervals_too_many_uses_min_interval(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(service_module, "db", SimpleNamespace(engine=object())) + monkeypatch.setattr(service_module.dify_config, "BILLING_ENABLED", False) + + started_at = datetime.datetime(2023, 4, 3, 8, 59, 24) + # Keep the total range smaller than the minimum interval (1 hour) so the loop runs once. + fixed_now = started_at + datetime.timedelta(minutes=30) + + class FixedDateTime(datetime.datetime): + @classmethod + def now(cls, tz=None): + return fixed_now + + monkeypatch.setattr(service_module.datetime, "datetime", FixedDateTime) + + flask_app = service_module.Flask("test-app") + monkeypatch.setattr(service_module, "current_app", SimpleNamespace(_get_current_object=lambda: flask_app)) + + executor = _ImmediateExecutor() + monkeypatch.setattr(service_module, "ThreadPoolExecutor", lambda **_kwargs: executor) + + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + monkeypatch.setattr(service_module.click, "echo", lambda *_args, **_kwargs: None) + + total_session = MagicMock() + total_query = MagicMock() + total_query.count.return_value = 250 + total_session.query.return_value = total_query + + batch_session = MagicMock() + # Count results for all 5 intervals, all > 100 => take the for-else path. + count_queries = [] + for _ in range(5): + q = MagicMock() + q.where.return_value = q + q.count.return_value = 200 + count_queries.append(q) + + rows = [SimpleNamespace(id="tenant-a")] + q_rs = MagicMock() + q_rs.where.return_value = q_rs + q_rs.order_by.return_value = rows + + batch_session.query.side_effect = [*count_queries, q_rs] + + sessions = [_session_wrapper_for_direct(total_session), _session_wrapper_for_direct(batch_session)] + monkeypatch.setattr(service_module, "Session", lambda _engine: sessions.pop(0)) + + process_tenant_mock = MagicMock() + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "process_tenant", process_tenant_mock) + + ClearFreePlanTenantExpiredLogs.process(days=7, batch=10, tenant_ids=[]) + + assert process_tenant_mock.call_count == 1 + assert len(count_queries) == 5 + assert batch_session.query.call_count >= 6 + + +def test_process_tenant_repo_loops_break_on_empty_second_batch(monkeypatch: pytest.MonkeyPatch) -> None: + flask_app = service_module.Flask("test-app") + + monkeypatch.setattr( + service_module, + "db", + SimpleNamespace( + engine=object(), + session=SimpleNamespace(scalars=lambda _stmt: SimpleNamespace(all=lambda: [SimpleNamespace(id="app-1")])), + ), + ) + mock_storage = MagicMock() + monkeypatch.setattr(service_module, "storage", mock_storage) + monkeypatch.setattr(service_module.click, "echo", lambda *_args, **_kwargs: None) + monkeypatch.setattr(service_module.click, "style", lambda msg, **_kwargs: msg) + monkeypatch.setattr(ClearFreePlanTenantExpiredLogs, "_clear_message_related_tables", MagicMock()) + + # Make message/conversation/workflow_app_log loops no-op (empty immediately) + empty_session = MagicMock() + q_empty = MagicMock() + q_empty.where.return_value = q_empty + q_empty.limit.return_value = q_empty + q_empty.all.return_value = [] + empty_session.query.return_value = q_empty + empty_session.commit.return_value = None + session_wrappers = [ + _session_wrapper_for_no_autoflush(empty_session), + _session_wrapper_for_no_autoflush(empty_session), + _session_wrapper_for_no_autoflush(empty_session), + ] + monkeypatch.setattr(service_module, "Session", lambda _engine: session_wrappers.pop(0)) + + def fake_select(*_args, **_kwargs): + stmt = MagicMock() + stmt.where.return_value = stmt + return stmt + + monkeypatch.setattr(service_module, "select", fake_select) + + # Repos: first returns exactly batch items -> no "< batch" break, second returns [] -> hit the len==0 break. + node_repo = MagicMock() + node_repo.get_expired_executions_batch.side_effect = [ + [SimpleNamespace(id="ne-1"), SimpleNamespace(id="ne-2")], + [], + ] + node_repo.delete_executions_by_ids.return_value = 2 + + run_repo = MagicMock() + run_repo.get_expired_runs_batch.side_effect = [ + [ + SimpleNamespace(id="wr-1", to_dict=lambda: {"id": "wr-1"}), + SimpleNamespace(id="wr-2", to_dict=lambda: {"id": "wr-2"}), + ], + [], + ] + run_repo.delete_runs_by_ids.return_value = 2 + + monkeypatch.setattr(service_module, "sessionmaker", lambda **_kwargs: object()) + monkeypatch.setattr( + service_module.DifyAPIRepositoryFactory, + "create_api_workflow_node_execution_repository", + lambda _sm: node_repo, + ) + monkeypatch.setattr( + service_module.DifyAPIRepositoryFactory, + "create_api_workflow_run_repository", + lambda _sm: run_repo, + ) + + ClearFreePlanTenantExpiredLogs.process_tenant(flask_app, "tenant-1", days=7, batch=2) + + assert node_repo.get_expired_executions_batch.call_count == 2 + assert run_repo.get_expired_runs_batch.call_count == 2 diff --git a/api/tests/unit_tests/services/test_code_based_extension_service.py b/api/tests/unit_tests/services/test_code_based_extension_service.py new file mode 100644 index 0000000000..f6538a140a --- /dev/null +++ b/api/tests/unit_tests/services/test_code_based_extension_service.py @@ -0,0 +1,89 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from services.code_based_extension_service import CodeBasedExtensionService + + +class TestCodeBasedExtensionService: + def test_should_return_only_non_builtin_extensions_with_public_fields(self, monkeypatch: pytest.MonkeyPatch): + """Test service returns only non-builtin extensions with name/label/form_schema fields.""" + moderation_extension = SimpleNamespace( + name="custom-moderation", + label={"en-US": "Custom Moderation"}, + form_schema=[{"variable": "api_key"}], + builtin=False, + extension_class=object, + position=20, + ) + builtin_extension = SimpleNamespace( + name="builtin-moderation", + label={"en-US": "Builtin Moderation"}, + form_schema=[{"variable": "token"}], + builtin=True, + extension_class=object, + position=1, + ) + retrieval_extension = SimpleNamespace( + name="custom-retrieval", + label={"en-US": "Custom Retrieval"}, + form_schema=None, + builtin=False, + extension_class=object, + position=30, + ) + module_extensions_mock = MagicMock(return_value=[moderation_extension, builtin_extension, retrieval_extension]) + monkeypatch.setattr( + "services.code_based_extension_service.code_based_extension.module_extensions", + module_extensions_mock, + ) + + result = CodeBasedExtensionService.get_code_based_extension("external_data_tool") + + assert result == [ + { + "name": "custom-moderation", + "label": {"en-US": "Custom Moderation"}, + "form_schema": [{"variable": "api_key"}], + }, + { + "name": "custom-retrieval", + "label": {"en-US": "Custom Retrieval"}, + "form_schema": None, + }, + ] + assert set(result[0].keys()) == {"name", "label", "form_schema"} + module_extensions_mock.assert_called_once_with("external_data_tool") + + def test_should_return_empty_list_when_all_extensions_are_builtin(self, monkeypatch: pytest.MonkeyPatch): + """Test builtin extensions are filtered out completely.""" + builtin_extension = SimpleNamespace( + name="builtin-moderation", + label={"en-US": "Builtin Moderation"}, + form_schema=[{"variable": "token"}], + builtin=True, + ) + module_extensions_mock = MagicMock(return_value=[builtin_extension]) + monkeypatch.setattr( + "services.code_based_extension_service.code_based_extension.module_extensions", + module_extensions_mock, + ) + + result = CodeBasedExtensionService.get_code_based_extension("moderation") + + assert result == [] + module_extensions_mock.assert_called_once_with("moderation") + + def test_should_propagate_error_when_module_extensions_lookup_fails(self, monkeypatch: pytest.MonkeyPatch): + """Test ValueError from extension lookup bubbles up unchanged.""" + module_extensions_mock = MagicMock(side_effect=ValueError("Extension Module invalid-module not found")) + monkeypatch.setattr( + "services.code_based_extension_service.code_based_extension.module_extensions", + module_extensions_mock, + ) + + with pytest.raises(ValueError, match="Extension Module invalid-module not found"): + CodeBasedExtensionService.get_code_based_extension("invalid-module") + + module_extensions_mock.assert_called_once_with("invalid-module") diff --git a/api/tests/unit_tests/services/test_conversation_service.py b/api/tests/unit_tests/services/test_conversation_service.py index d8ecdf45fd..75551531a2 100644 --- a/api/tests/unit_tests/services/test_conversation_service.py +++ b/api/tests/unit_tests/services/test_conversation_service.py @@ -1,18 +1,29 @@ """ Comprehensive unit tests for ConversationService. -This file keeps non-SQL guard/unit tests. -SQL-related tests were migrated to testcontainers integration tests. +This file provides complete test coverage for all ConversationService methods. +Tests are organized by functionality and include edge cases, error handling, +and both positive and negative test scenarios. """ -from datetime import datetime +from datetime import datetime, timedelta from unittest.mock import MagicMock, Mock, create_autospec, patch +import pytest +from sqlalchemy import asc, desc + from core.app.entities.app_invoke_entities import InvokeFrom -from models import Account -from models.model import App, Conversation, EndUser +from libs.infinite_scroll_pagination import InfiniteScrollPagination +from models import Account, ConversationVariable +from models.model import App, Conversation, EndUser, Message from services.conversation_service import ConversationService -from services.message_service import MessageService +from services.errors.conversation import ( + ConversationNotExistsError, + ConversationVariableNotExistsError, + ConversationVariableTypeMismatchError, + LastConversationNotExistsError, +) +from services.errors.message import MessageNotExistsError class ConversationServiceTestDataFactory: @@ -116,6 +127,84 @@ class ConversationServiceTestDataFactory: setattr(conversation, key, value) return conversation + @staticmethod + def create_message_mock( + message_id: str = "msg-123", + conversation_id: str = "conv-123", + app_id: str = "app-123", + **kwargs, + ) -> Mock: + """ + Create a mock Message object. + + Args: + message_id: Unique identifier for the message + conversation_id: Associated conversation identifier + app_id: Associated app identifier + **kwargs: Additional attributes to set on the mock + + Returns: + Mock Message object with specified attributes + """ + message = create_autospec(Message, instance=True) + message.id = message_id + message.conversation_id = conversation_id + message.app_id = app_id + message.query = kwargs.get("query", "Test message content") + message.created_at = kwargs.get("created_at", datetime.utcnow()) + for key, value in kwargs.items(): + setattr(message, key, value) + return message + + @staticmethod + def create_conversation_variable_mock( + variable_id: str = "var-123", + conversation_id: str = "conv-123", + app_id: str = "app-123", + **kwargs, + ) -> Mock: + """ + Create a mock ConversationVariable object. + + Args: + variable_id: Unique identifier for the variable + conversation_id: Associated conversation identifier + app_id: Associated app identifier + **kwargs: Additional attributes to set on the mock + + Returns: + Mock ConversationVariable object with specified attributes + """ + variable = create_autospec(ConversationVariable, instance=True) + variable.id = variable_id + variable.conversation_id = conversation_id + variable.app_id = app_id + variable.data = {"name": kwargs.get("name", "test_var"), "value": kwargs.get("value", "test_value")} + variable.created_at = kwargs.get("created_at", datetime.utcnow()) + variable.updated_at = kwargs.get("updated_at", datetime.utcnow()) + + # Mock to_variable method + mock_variable = Mock() + mock_variable.id = variable_id + mock_variable.name = kwargs.get("name", "test_var") + mock_variable.value_type = kwargs.get("value_type", "string") + mock_variable.value = kwargs.get("value", "test_value") + mock_variable.description = kwargs.get("description", "") + mock_variable.selector = kwargs.get("selector", {}) + mock_variable.model_dump.return_value = { + "id": variable_id, + "name": kwargs.get("name", "test_var"), + "value_type": kwargs.get("value_type", "string"), + "value": kwargs.get("value", "test_value"), + "description": kwargs.get("description", ""), + "selector": kwargs.get("selector", {}), + } + variable.to_variable.return_value = mock_variable + + for key, value in kwargs.items(): + setattr(variable, key, value) + return variable + class TestConversationServicePagination: """Test conversation pagination operations.""" @@ -175,99 +264,958 @@ class TestConversationServicePagination: assert result.limit == 20 -class TestConversationServiceMessageCreation: - """ - Test message creation and pagination. +class TestConversationServiceHelpers: + """Test helper methods in ConversationService.""" - Tests MessageService operations for creating and retrieving messages - within conversations. - """ - - def test_pagination_returns_empty_when_no_user(self): + def test_get_sort_params_with_descending_sort(self): """ - Test that pagination returns empty result when user is None. + Test _get_sort_params with descending sort prefix. - This ensures proper handling of unauthenticated requests. + When sort_by starts with '-', should return field name and desc function. + """ + # Act + field, direction = ConversationService._get_sort_params("-updated_at") + + # Assert + assert field == "updated_at" + assert direction == desc + + def test_get_sort_params_with_ascending_sort(self): + """ + Test _get_sort_params with ascending sort. + + When sort_by doesn't start with '-', should return field name and asc function. + """ + # Act + field, direction = ConversationService._get_sort_params("created_at") + + # Assert + assert field == "created_at" + assert direction == asc + + def test_build_filter_condition_with_descending_sort(self): + """ + Test _build_filter_condition with descending sort direction. + + Should create a less-than filter condition. """ # Arrange - app_model = ConversationServiceTestDataFactory.create_app_mock() + mock_conversation = ConversationServiceTestDataFactory.create_conversation_mock() + mock_conversation.updated_at = datetime.utcnow() # Act - result = MessageService.pagination_by_first_id( - app_model=app_model, - user=None, - conversation_id="conv-123", - first_id=None, - limit=10, + condition = ConversationService._build_filter_condition( + sort_field="updated_at", + sort_direction=desc, + reference_conversation=mock_conversation, ) # Assert - assert result.data == [] - assert result.has_more is False + # The condition should be a comparison expression + assert condition is not None - def test_pagination_returns_empty_when_no_conversation_id(self): + def test_build_filter_condition_with_ascending_sort(self): """ - Test that pagination returns empty result when conversation_id is None. + Test _build_filter_condition with ascending sort direction. - This ensures proper handling of invalid requests. + Should create a greater-than filter condition. + """ + # Arrange + mock_conversation = ConversationServiceTestDataFactory.create_conversation_mock() + mock_conversation.created_at = datetime.utcnow() + + # Act + condition = ConversationService._build_filter_condition( + sort_field="created_at", + sort_direction=asc, + reference_conversation=mock_conversation, + ) + + # Assert + # The condition should be a comparison expression + assert condition is not None + + +class TestConversationServiceGetConversation: + """Test conversation retrieval operations.""" + + @patch("services.conversation_service.db.session") + def test_get_conversation_success_with_account(self, mock_db_session): + """ + Test successful conversation retrieval with account user. + + Should return conversation when found with proper filters. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock( + from_account_id=user.id, from_source="console" + ) + + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.first.return_value = conversation + + # Act + result = ConversationService.get_conversation(app_model, "conv-123", user) + + # Assert + assert result == conversation + mock_db_session.query.assert_called_once_with(Conversation) + + @patch("services.conversation_service.db.session") + def test_get_conversation_success_with_end_user(self, mock_db_session): + """ + Test successful conversation retrieval with end user. + + Should return conversation when found with proper filters for API user. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_end_user_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock( + from_end_user_id=user.id, from_source="api" + ) + + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.first.return_value = conversation + + # Act + result = ConversationService.get_conversation(app_model, "conv-123", user) + + # Assert + assert result == conversation + + @patch("services.conversation_service.db.session") + def test_get_conversation_not_found_raises_error(self, mock_db_session): + """ + Test that get_conversation raises error when conversation not found. + + Should raise ConversationNotExistsError when no matching conversation found. """ # Arrange app_model = ConversationServiceTestDataFactory.create_app_mock() user = ConversationServiceTestDataFactory.create_account_mock() - # Act - result = MessageService.pagination_by_first_id( - app_model=app_model, - user=user, - conversation_id="", - first_id=None, - limit=10, - ) + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.first.return_value = None - # Assert - assert result.data == [] - assert result.has_more is False + # Act & Assert + with pytest.raises(ConversationNotExistsError): + ConversationService.get_conversation(app_model, "conv-123", user) -class TestConversationServiceSummarization: - """ - Test conversation summarization (auto-generated names). +class TestConversationServiceRename: + """Test conversation rename operations.""" - Tests the auto_generate_name functionality that creates conversation - titles based on the first message. - """ - - @patch("services.conversation_service.db.session", autospec=True) - @patch("services.conversation_service.ConversationService.get_conversation", autospec=True) - @patch("services.conversation_service.ConversationService.auto_generate_name", autospec=True) - def test_rename_with_auto_generate(self, mock_auto_generate, mock_get_conversation, mock_db_session): + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_rename_with_manual_name(self, mock_get_conversation, mock_db_session): """ - Test renaming conversation with auto-generation enabled. + Test renaming conversation with manual name. - When auto_generate is True, the service should call the auto_generate_name - method to generate a new name for the conversation. + Should update conversation name and timestamp when auto_generate is False. """ # Arrange app_model = ConversationServiceTestDataFactory.create_app_mock() user = ConversationServiceTestDataFactory.create_account_mock() conversation = ConversationServiceTestDataFactory.create_conversation_mock() - conversation.name = "Auto-generated Name" - # Mock the conversation lookup to return our test conversation mock_get_conversation.return_value = conversation - # Mock the auto_generate_name method to return the conversation + # Act + result = ConversationService.rename( + app_model=app_model, + conversation_id="conv-123", + user=user, + name="New Name", + auto_generate=False, + ) + + # Assert + assert result == conversation + assert conversation.name == "New Name" + mock_db_session.commit.assert_called_once() + + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.ConversationService.get_conversation") + @patch("services.conversation_service.ConversationService.auto_generate_name") + def test_rename_with_auto_generate(self, mock_auto_generate, mock_get_conversation, mock_db_session): + """ + Test renaming conversation with auto-generation. + + Should call auto_generate_name when auto_generate is True. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation mock_auto_generate.return_value = conversation # Act result = ConversationService.rename( app_model=app_model, - conversation_id=conversation.id, + conversation_id="conv-123", user=user, - name="", + name=None, auto_generate=True, ) # Assert - mock_auto_generate.assert_called_once_with(app_model, conversation) assert result == conversation + mock_auto_generate.assert_called_once_with(app_model, conversation) + + +class TestConversationServiceAutoGenerateName: + """Test conversation auto-name generation operations.""" + + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.LLMGenerator") + def test_auto_generate_name_success(self, mock_llm_generator, mock_db_session): + """ + Test successful auto-generation of conversation name. + + Should generate name using LLMGenerator and update conversation. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + message = ConversationServiceTestDataFactory.create_message_mock( + conversation_id=conversation.id, app_id=app_model.id + ) + + # Mock database query to return message + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.order_by.return_value.first.return_value = message + + # Mock LLM generator + mock_llm_generator.generate_conversation_name.return_value = "Generated Name" + + # Act + result = ConversationService.auto_generate_name(app_model, conversation) + + # Assert + assert result == conversation + assert conversation.name == "Generated Name" + mock_llm_generator.generate_conversation_name.assert_called_once_with( + app_model.tenant_id, message.query, conversation.id, app_model.id + ) + mock_db_session.commit.assert_called_once() + + @patch("services.conversation_service.db.session") + def test_auto_generate_name_no_message_raises_error(self, mock_db_session): + """ + Test auto-generation fails when no message found. + + Should raise MessageNotExistsError when conversation has no messages. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + # Mock database query to return None + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.order_by.return_value.first.return_value = None + + # Act & Assert + with pytest.raises(MessageNotExistsError): + ConversationService.auto_generate_name(app_model, conversation) + + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.LLMGenerator") + def test_auto_generate_name_handles_llm_exception(self, mock_llm_generator, mock_db_session): + """ + Test auto-generation handles LLM generator exceptions gracefully. + + Should continue without name when LLMGenerator fails. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + message = ConversationServiceTestDataFactory.create_message_mock( + conversation_id=conversation.id, app_id=app_model.id + ) + + # Mock database query to return message + mock_query = mock_db_session.query.return_value + mock_query.where.return_value.order_by.return_value.first.return_value = message + + # Mock LLM generator to raise exception + mock_llm_generator.generate_conversation_name.side_effect = Exception("LLM Error") + + # Act + result = ConversationService.auto_generate_name(app_model, conversation) + + # Assert + assert result == conversation + # Name should remain unchanged due to exception + mock_db_session.commit.assert_called_once() + + +class TestConversationServiceDelete: + """Test conversation deletion operations.""" + + @patch("services.conversation_service.delete_conversation_related_data") + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_delete_success(self, mock_get_conversation, mock_db_session, mock_delete_task): + """ + Test successful conversation deletion. + + Should delete conversation and schedule cleanup task. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock(name="Test App") + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Act + ConversationService.delete(app_model, "conv-123", user) + + # Assert + mock_db_session.delete.assert_called_once_with(conversation) + mock_db_session.commit.assert_called_once() + mock_delete_task.delay.assert_called_once_with(conversation.id) + + @patch("services.conversation_service.db.session") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_delete_handles_exception_and_rollback(self, mock_get_conversation, mock_db_session): + """ + Test deletion handles exceptions and rolls back transaction. + + Should rollback database changes when deletion fails. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + mock_db_session.delete.side_effect = Exception("Database Error") + + # Act & Assert + with pytest.raises(Exception, match="Database Error"): + ConversationService.delete(app_model, "conv-123", user) + + # Assert rollback was called + mock_db_session.rollback.assert_called_once() + + +class TestConversationServiceConversationalVariable: + """Test conversational variable operations.""" + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_get_conversational_variable_success(self, mock_get_conversation, mock_session_factory): + """ + Test successful retrieval of conversational variables. + + Should return paginated list of variables for conversation. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session and variables + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + variable1 = ConversationServiceTestDataFactory.create_conversation_variable_mock() + variable2 = ConversationServiceTestDataFactory.create_conversation_variable_mock(variable_id="var-456") + + mock_session.scalars.return_value.all.return_value = [variable1, variable2] + + # Act + result = ConversationService.get_conversational_variable( + app_model=app_model, + conversation_id="conv-123", + user=user, + limit=10, + last_id=None, + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + assert len(result.data) == 2 + assert result.limit == 10 + assert result.has_more is False + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_get_conversational_variable_with_last_id(self, mock_get_conversation, mock_session_factory): + """ + Test retrieval of variables with last_id pagination. + + Should filter variables created after last_id. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session and variables + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + last_variable = ConversationServiceTestDataFactory.create_conversation_variable_mock( + created_at=datetime.utcnow() - timedelta(hours=1) + ) + variable = ConversationServiceTestDataFactory.create_conversation_variable_mock(created_at=datetime.utcnow()) + + mock_session.scalar.return_value = last_variable + mock_session.scalars.return_value.all.return_value = [variable] + + # Act + result = ConversationService.get_conversational_variable( + app_model=app_model, + conversation_id="conv-123", + user=user, + limit=10, + last_id="var-123", + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + assert len(result.data) == 1 + assert result.limit == 10 + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_get_conversational_variable_last_id_not_found_raises_error( + self, mock_get_conversation, mock_session_factory + ): + """ + Test that invalid last_id raises ConversationVariableNotExistsError. + + Should raise error when last_id doesn't exist. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session.scalar.return_value = None + + # Act & Assert + with pytest.raises(ConversationVariableNotExistsError): + ConversationService.get_conversational_variable( + app_model=app_model, + conversation_id="conv-123", + user=user, + limit=10, + last_id="invalid-id", + ) + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + @patch("services.conversation_service.dify_config") + def test_get_conversational_variable_with_name_filter_mysql( + self, mock_config, mock_get_conversation, mock_session_factory + ): + """ + Test variable filtering by name for MySQL databases. + + Should apply JSON extraction filter for variable names. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + mock_config.DB_TYPE = "mysql" + + # Mock session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session.scalars.return_value.all.return_value = [] + + # Act + ConversationService.get_conversational_variable( + app_model=app_model, + conversation_id="conv-123", + user=user, + limit=10, + last_id=None, + variable_name="test_var", + ) + + # Assert - JSON filter should be applied + assert mock_session.scalars.called + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + @patch("services.conversation_service.dify_config") + def test_get_conversational_variable_with_name_filter_postgresql( + self, mock_config, mock_get_conversation, mock_session_factory + ): + """ + Test variable filtering by name for PostgreSQL databases. + + Should apply JSON extraction filter for variable names. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + mock_config.DB_TYPE = "postgresql" + + # Mock session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session.scalars.return_value.all.return_value = [] + + # Act + ConversationService.get_conversational_variable( + app_model=app_model, + conversation_id="conv-123", + user=user, + limit=10, + last_id=None, + variable_name="test_var", + ) + + # Assert - JSON filter should be applied + assert mock_session.scalars.called + + +class TestConversationServiceUpdateVariable: + """Test conversation variable update operations.""" + + @patch("services.conversation_service.variable_factory") + @patch("services.conversation_service.ConversationVariableUpdater") + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_update_conversation_variable_success( + self, mock_get_conversation, mock_session_factory, mock_updater_class, mock_variable_factory + ): + """ + Test successful update of conversation variable. + + Should update variable value and return updated data. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session and existing variable + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + existing_variable = ConversationServiceTestDataFactory.create_conversation_variable_mock(value_type="string") + mock_session.scalar.return_value = existing_variable + + # Mock variable factory and updater + updated_variable = Mock() + updated_variable.model_dump.return_value = {"id": "var-123", "name": "test_var", "value": "new_value"} + mock_variable_factory.build_conversation_variable_from_mapping.return_value = updated_variable + + mock_updater = MagicMock() + mock_updater_class.return_value = mock_updater + + # Act + result = ConversationService.update_conversation_variable( + app_model=app_model, + conversation_id="conv-123", + variable_id="var-123", + user=user, + new_value="new_value", + ) + + # Assert + assert result["id"] == "var-123" + assert result["value"] == "new_value" + mock_updater.update.assert_called_once_with("conv-123", updated_variable) + mock_updater.flush.assert_called_once() + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_update_conversation_variable_not_found_raises_error(self, mock_get_conversation, mock_session_factory): + """ + Test update fails when variable doesn't exist. + + Should raise ConversationVariableNotExistsError. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session.scalar.return_value = None + + # Act & Assert + with pytest.raises(ConversationVariableNotExistsError): + ConversationService.update_conversation_variable( + app_model=app_model, + conversation_id="conv-123", + variable_id="invalid-id", + user=user, + new_value="new_value", + ) + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_update_conversation_variable_type_mismatch_raises_error(self, mock_get_conversation, mock_session_factory): + """ + Test update fails when value type doesn't match expected type. + + Should raise ConversationVariableTypeMismatchError. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session and existing variable + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + existing_variable = ConversationServiceTestDataFactory.create_conversation_variable_mock(value_type="number") + mock_session.scalar.return_value = existing_variable + + # Act & Assert - Try to set string value for number variable + with pytest.raises(ConversationVariableTypeMismatchError): + ConversationService.update_conversation_variable( + app_model=app_model, + conversation_id="conv-123", + variable_id="var-123", + user=user, + new_value="string_value", # Wrong type + ) + + @patch("services.conversation_service.session_factory") + @patch("services.conversation_service.ConversationService.get_conversation") + def test_update_conversation_variable_integer_number_compatibility( + self, mock_get_conversation, mock_session_factory + ): + """ + Test that integer type accepts number values. + + Should allow number values for integer type variables. + """ + # Arrange + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + + mock_get_conversation.return_value = conversation + + # Mock session and existing variable + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + existing_variable = ConversationServiceTestDataFactory.create_conversation_variable_mock(value_type="integer") + mock_session.scalar.return_value = existing_variable + + # Mock variable factory and updater + updated_variable = Mock() + updated_variable.model_dump.return_value = {"id": "var-123", "name": "test_var", "value": 42} + + with ( + patch("services.conversation_service.variable_factory") as mock_variable_factory, + patch("services.conversation_service.ConversationVariableUpdater") as mock_updater_class, + ): + mock_variable_factory.build_conversation_variable_from_mapping.return_value = updated_variable + mock_updater = MagicMock() + mock_updater_class.return_value = mock_updater + + # Act + result = ConversationService.update_conversation_variable( + app_model=app_model, + conversation_id="conv-123", + variable_id="var-123", + user=user, + new_value=42, # Number value for integer type + ) + + # Assert + assert result["value"] == 42 + mock_updater.update.assert_called_once() + + +class TestConversationServicePaginationAdvanced: + """Advanced pagination tests for ConversationService.""" + + @patch("services.conversation_service.session_factory") + def test_pagination_by_last_id_with_last_id_not_found(self, mock_session_factory): + """ + Test pagination with invalid last_id raises error. + + Should raise LastConversationNotExistsError when last_id doesn't exist. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session.scalar.return_value = None + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act & Assert + with pytest.raises(LastConversationNotExistsError): + ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id="invalid-id", + limit=20, + invoke_from=InvokeFrom.WEB_APP, + ) + + @patch("services.conversation_service.session_factory") + def test_pagination_by_last_id_with_exclude_ids(self, mock_session_factory): + """ + Test pagination with exclude_ids filter. + + Should exclude specified conversation IDs from results. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + mock_session.scalars.return_value.all.return_value = [conversation] + mock_session.scalar.return_value = conversation + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + exclude_ids=["excluded-123"], + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + assert len(result.data) == 1 + + @patch("services.conversation_service.session_factory") + def test_pagination_by_last_id_has_more_detection(self, mock_session_factory): + """ + Test pagination has_more detection logic. + + Should set has_more=True when there are more results beyond limit. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + # Return exactly limit items to trigger has_more check + conversations = [ + ConversationServiceTestDataFactory.create_conversation_mock(conversation_id=f"conv-{i}") for i in range(20) + ] + mock_session.scalars.return_value.all.return_value = conversations + mock_session.scalar.return_value = conversations[-1] + + # Mock count query to return > 0 + mock_session.scalar.return_value = 5 # Additional items exist + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + assert result.has_more is True + + @patch("services.conversation_service.session_factory") + def test_pagination_by_last_id_with_different_sort_by(self, mock_session_factory): + """ + Test pagination with different sort fields. + + Should handle various sort_by parameters correctly. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + conversation = ConversationServiceTestDataFactory.create_conversation_mock() + mock_session.scalars.return_value.all.return_value = [conversation] + mock_session.scalar.return_value = conversation + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Test different sort fields + sort_fields = ["created_at", "-updated_at", "name", "-status"] + + for sort_by in sort_fields: + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + sort_by=sort_by, + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + + +class TestConversationServiceEdgeCases: + """Test edge cases and error scenarios.""" + + @patch("services.conversation_service.session_factory") + def test_pagination_with_end_user_api_source(self, mock_session_factory): + """ + Test pagination correctly handles EndUser with API source. + + Should use 'api' as from_source for EndUser instances. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + conversation = ConversationServiceTestDataFactory.create_conversation_mock( + from_source="api", from_end_user_id="user-123" + ) + mock_session.scalars.return_value.all.return_value = [conversation] + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_end_user_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + + @patch("services.conversation_service.session_factory") + def test_pagination_with_account_console_source(self, mock_session_factory): + """ + Test pagination correctly handles Account with console source. + + Should use 'console' as from_source for Account instances. + """ + # Arrange + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + + conversation = ConversationServiceTestDataFactory.create_conversation_mock( + from_source="console", from_account_id="account-123" + ) + mock_session.scalars.return_value.all.return_value = [conversation] + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + + def test_pagination_with_include_ids_filter(self): + """ + Test pagination with include_ids filter. + + Should only return conversations with IDs in include_ids list. + """ + # Arrange + mock_session = MagicMock() + mock_session.scalars.return_value.all.return_value = [] + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + include_ids=["conv-123", "conv-456"], + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + # Verify that include_ids filter was applied + assert mock_session.scalars.called + + def test_pagination_with_empty_exclude_ids(self): + """ + Test pagination with empty exclude_ids list. + + Should handle empty exclude_ids gracefully. + """ + # Arrange + mock_session = MagicMock() + mock_session.scalars.return_value.all.return_value = [] + + app_model = ConversationServiceTestDataFactory.create_app_mock() + user = ConversationServiceTestDataFactory.create_account_mock() + + # Act + result = ConversationService.pagination_by_last_id( + session=mock_session, + app_model=app_model, + user=user, + last_id=None, + limit=20, + invoke_from=InvokeFrom.WEB_APP, + exclude_ids=[], + ) + + # Assert + assert isinstance(result, InfiniteScrollPagination) + assert result.has_more is False diff --git a/api/tests/unit_tests/services/test_conversation_variable_updater.py b/api/tests/unit_tests/services/test_conversation_variable_updater.py new file mode 100644 index 0000000000..20f7caa78e --- /dev/null +++ b/api/tests/unit_tests/services/test_conversation_variable_updater.py @@ -0,0 +1,75 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +from dify_graph.variables import StringVariable +from services.conversation_variable_updater import ConversationVariableNotFoundError, ConversationVariableUpdater + + +class TestConversationVariableUpdater: + def test_should_update_conversation_variable_data_and_commit(self): + """Test update persists serialized variable data when the row exists.""" + conversation_id = "conv-123" + variable = StringVariable( + id="var-123", + name="topic", + value="new value", + ) + expected_json = variable.model_dump_json() + + row = SimpleNamespace(data="old value") + session = MagicMock() + session.scalar.return_value = row + + session_context = MagicMock() + session_context.__enter__.return_value = session + session_context.__exit__.return_value = None + + session_maker = MagicMock(return_value=session_context) + updater = ConversationVariableUpdater(session_maker) + + updater.update(conversation_id=conversation_id, variable=variable) + + session_maker.assert_called_once_with() + session.scalar.assert_called_once() + stmt = session.scalar.call_args.args[0] + compiled_params = stmt.compile().params + assert variable.id in compiled_params.values() + assert conversation_id in compiled_params.values() + assert row.data == expected_json + session.commit.assert_called_once() + + def test_should_raise_not_found_error_when_conversation_variable_missing(self): + """Test update raises ConversationVariableNotFoundError when no matching row exists.""" + conversation_id = "conv-404" + variable = StringVariable( + id="var-404", + name="topic", + value="value", + ) + + session = MagicMock() + session.scalar.return_value = None + + session_context = MagicMock() + session_context.__enter__.return_value = session + session_context.__exit__.return_value = None + + session_maker = MagicMock(return_value=session_context) + updater = ConversationVariableUpdater(session_maker) + + with pytest.raises(ConversationVariableNotFoundError, match="conversation variable not found in the database"): + updater.update(conversation_id=conversation_id, variable=variable) + + session.commit.assert_not_called() + + def test_should_do_nothing_when_flush_is_called(self): + """Test flush currently behaves as a no-op and returns None.""" + session_maker = MagicMock() + updater = ConversationVariableUpdater(session_maker) + + result = updater.flush() + + assert result is None + session_maker.assert_not_called() diff --git a/api/tests/unit_tests/services/test_credit_pool_service.py b/api/tests/unit_tests/services/test_credit_pool_service.py new file mode 100644 index 0000000000..9ef314cb9e --- /dev/null +++ b/api/tests/unit_tests/services/test_credit_pool_service.py @@ -0,0 +1,157 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +import services.credit_pool_service as credit_pool_service_module +from core.errors.error import QuotaExceededError +from models import TenantCreditPool +from services.credit_pool_service import CreditPoolService + + +@pytest.fixture +def mock_credit_deduction_setup(): + """Fixture providing common setup for credit deduction tests.""" + pool = SimpleNamespace(remaining_credits=50) + fake_engine = MagicMock() + session = MagicMock() + session_context = MagicMock() + session_context.__enter__.return_value = session + session_context.__exit__.return_value = None + + mock_get_pool = patch.object(CreditPoolService, "get_pool", return_value=pool) + mock_db = patch.object(credit_pool_service_module, "db", new=SimpleNamespace(engine=fake_engine)) + mock_session = patch.object(credit_pool_service_module, "Session", return_value=session_context) + + return { + "pool": pool, + "fake_engine": fake_engine, + "session": session, + "session_context": session_context, + "patches": (mock_get_pool, mock_db, mock_session), + } + + +class TestCreditPoolService: + def test_should_create_default_pool_with_trial_type_and_configured_quota(self): + """Test create_default_pool persists a trial pool using configured hosted credits.""" + tenant_id = "tenant-123" + hosted_pool_credits = 5000 + + with ( + patch.object(credit_pool_service_module.dify_config, "HOSTED_POOL_CREDITS", hosted_pool_credits), + patch.object(credit_pool_service_module, "db") as mock_db, + ): + pool = CreditPoolService.create_default_pool(tenant_id) + + assert isinstance(pool, TenantCreditPool) + assert pool.tenant_id == tenant_id + assert pool.pool_type == "trial" + assert pool.quota_limit == hosted_pool_credits + assert pool.quota_used == 0 + mock_db.session.add.assert_called_once_with(pool) + mock_db.session.commit.assert_called_once() + + def test_should_return_first_pool_from_query_when_get_pool_called(self): + """Test get_pool queries by tenant and pool_type and returns first result.""" + tenant_id = "tenant-123" + pool_type = "enterprise" + expected_pool = MagicMock(spec=TenantCreditPool) + + with patch.object(credit_pool_service_module, "db") as mock_db: + query = mock_db.session.query.return_value + filtered_query = query.filter_by.return_value + filtered_query.first.return_value = expected_pool + + result = CreditPoolService.get_pool(tenant_id=tenant_id, pool_type=pool_type) + + assert result == expected_pool + mock_db.session.query.assert_called_once_with(TenantCreditPool) + query.filter_by.assert_called_once_with(tenant_id=tenant_id, pool_type=pool_type) + filtered_query.first.assert_called_once() + + def test_should_return_false_when_pool_not_found_in_check_credits_available(self): + """Test check_credits_available returns False when tenant has no pool.""" + with patch.object(CreditPoolService, "get_pool", return_value=None) as mock_get_pool: + result = CreditPoolService.check_credits_available(tenant_id="tenant-123", credits_required=10) + + assert result is False + mock_get_pool.assert_called_once_with("tenant-123", "trial") + + def test_should_return_true_when_remaining_credits_cover_required_amount(self): + """Test check_credits_available returns True when remaining credits are sufficient.""" + pool = SimpleNamespace(remaining_credits=100) + + with patch.object(CreditPoolService, "get_pool", return_value=pool) as mock_get_pool: + result = CreditPoolService.check_credits_available(tenant_id="tenant-123", credits_required=60) + + assert result is True + mock_get_pool.assert_called_once_with("tenant-123", "trial") + + def test_should_return_false_when_remaining_credits_are_insufficient(self): + """Test check_credits_available returns False when required credits exceed remaining credits.""" + pool = SimpleNamespace(remaining_credits=30) + + with patch.object(CreditPoolService, "get_pool", return_value=pool): + result = CreditPoolService.check_credits_available(tenant_id="tenant-123", credits_required=60) + + assert result is False + + def test_should_raise_quota_exceeded_when_pool_not_found_in_check_and_deduct(self): + """Test check_and_deduct_credits raises when tenant credit pool does not exist.""" + with patch.object(CreditPoolService, "get_pool", return_value=None): + with pytest.raises(QuotaExceededError, match="Credit pool not found"): + CreditPoolService.check_and_deduct_credits(tenant_id="tenant-123", credits_required=10) + + def test_should_raise_quota_exceeded_when_pool_has_no_remaining_credits(self): + """Test check_and_deduct_credits raises when remaining credits are zero or negative.""" + pool = SimpleNamespace(remaining_credits=0) + + with patch.object(CreditPoolService, "get_pool", return_value=pool): + with pytest.raises(QuotaExceededError, match="No credits remaining"): + CreditPoolService.check_and_deduct_credits(tenant_id="tenant-123", credits_required=10) + + def test_should_deduct_minimum_of_required_and_remaining_credits(self, mock_credit_deduction_setup): + """Test check_and_deduct_credits updates quota_used by the actual deducted amount.""" + tenant_id = "tenant-123" + pool_type = "trial" + credits_required = 200 + remaining_credits = 120 + expected_deducted_credits = 120 + + mock_credit_deduction_setup["pool"].remaining_credits = remaining_credits + patches = mock_credit_deduction_setup["patches"] + session = mock_credit_deduction_setup["session"] + + with patches[0], patches[1], patches[2]: + result = CreditPoolService.check_and_deduct_credits( + tenant_id=tenant_id, + credits_required=credits_required, + pool_type=pool_type, + ) + + assert result == expected_deducted_credits + session.execute.assert_called_once() + session.commit.assert_called_once() + + stmt = session.execute.call_args.args[0] + compiled_params = stmt.compile().params + assert tenant_id in compiled_params.values() + assert pool_type in compiled_params.values() + assert expected_deducted_credits in compiled_params.values() + + def test_should_raise_quota_exceeded_when_deduction_update_fails(self, mock_credit_deduction_setup): + """Test check_and_deduct_credits translates DB update failures to QuotaExceededError.""" + mock_credit_deduction_setup["pool"].remaining_credits = 50 + mock_credit_deduction_setup["session"].execute.side_effect = Exception("db failure") + session = mock_credit_deduction_setup["session"] + + patches = mock_credit_deduction_setup["patches"] + mock_logger = patch.object(credit_pool_service_module, "logger") + + with patches[0], patches[1], patches[2], mock_logger as mock_logger_obj: + with pytest.raises(QuotaExceededError, match="Failed to deduct credits"): + CreditPoolService.check_and_deduct_credits(tenant_id="tenant-123", credits_required=10) + + session.commit.assert_not_called() + mock_logger_obj.exception.assert_called_once() diff --git a/api/tests/unit_tests/services/test_dataset_service_delete_dataset.py b/api/tests/unit_tests/services/test_dataset_service_delete_dataset.py deleted file mode 100644 index cc718c9997..0000000000 --- a/api/tests/unit_tests/services/test_dataset_service_delete_dataset.py +++ /dev/null @@ -1,216 +0,0 @@ -from unittest.mock import Mock, patch - -import pytest - -from models.account import Account, TenantAccountRole -from models.dataset import Dataset -from services.dataset_service import DatasetService - - -class DatasetDeleteTestDataFactory: - """Factory class for creating test data and mock objects for dataset delete tests.""" - - @staticmethod - def create_dataset_mock( - dataset_id: str = "dataset-123", - tenant_id: str = "test-tenant-123", - created_by: str = "creator-456", - doc_form: str | None = None, - indexing_technique: str | None = "high_quality", - **kwargs, - ) -> Mock: - """Create a mock dataset with specified attributes.""" - dataset = Mock(spec=Dataset) - dataset.id = dataset_id - dataset.tenant_id = tenant_id - dataset.created_by = created_by - dataset.doc_form = doc_form - dataset.indexing_technique = indexing_technique - for key, value in kwargs.items(): - setattr(dataset, key, value) - return dataset - - @staticmethod - def create_user_mock( - user_id: str = "user-789", - tenant_id: str = "test-tenant-123", - role: TenantAccountRole = TenantAccountRole.ADMIN, - **kwargs, - ) -> Mock: - """Create a mock user with specified attributes.""" - user = Mock(spec=Account) - user.id = user_id - user.current_tenant_id = tenant_id - user.current_role = role - for key, value in kwargs.items(): - setattr(user, key, value) - return user - - -class TestDatasetServiceDeleteDataset: - """ - Comprehensive unit tests for DatasetService.delete_dataset method. - - This test suite covers all deletion scenarios including: - - Normal dataset deletion with documents - - Empty dataset deletion (no documents, doc_form is None) - - Dataset deletion with missing indexing_technique - - Permission checks - - Event handling - - This test suite provides regression protection for issue #27073. - """ - - @pytest.fixture - def mock_dataset_service_dependencies(self): - """Common mock setup for dataset service dependencies.""" - with ( - patch("services.dataset_service.DatasetService.get_dataset") as mock_get_dataset, - patch("services.dataset_service.DatasetService.check_dataset_permission") as mock_check_perm, - patch("extensions.ext_database.db.session") as mock_db, - patch("services.dataset_service.dataset_was_deleted") as mock_dataset_was_deleted, - ): - yield { - "get_dataset": mock_get_dataset, - "check_permission": mock_check_perm, - "db_session": mock_db, - "dataset_was_deleted": mock_dataset_was_deleted, - } - - def test_delete_dataset_with_documents_success(self, mock_dataset_service_dependencies): - """ - Test successful deletion of a dataset with documents. - - This test verifies: - - Dataset is retrieved correctly - - Permission check is performed - - dataset_was_deleted event is sent - - Dataset is deleted from database - - Method returns True - """ - # Arrange - dataset = DatasetDeleteTestDataFactory.create_dataset_mock( - doc_form="text_model", indexing_technique="high_quality" - ) - user = DatasetDeleteTestDataFactory.create_user_mock() - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - - # Act - result = DatasetService.delete_dataset(dataset.id, user) - - # Assert - assert result is True - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset.id) - mock_dataset_service_dependencies["check_permission"].assert_called_once_with(dataset, user) - mock_dataset_service_dependencies["dataset_was_deleted"].send.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].delete.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].commit.assert_called_once() - - def test_delete_empty_dataset_success(self, mock_dataset_service_dependencies): - """ - Test successful deletion of an empty dataset (no documents, doc_form is None). - - This test verifies that: - - Empty datasets can be deleted without errors - - dataset_was_deleted event is sent (event handler will skip cleanup if doc_form is None) - - Dataset is deleted from database - - Method returns True - - This is the primary test for issue #27073 where deleting an empty dataset - caused internal server error due to assertion failure in event handlers. - """ - # Arrange - dataset = DatasetDeleteTestDataFactory.create_dataset_mock(doc_form=None, indexing_technique=None) - user = DatasetDeleteTestDataFactory.create_user_mock() - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - - # Act - result = DatasetService.delete_dataset(dataset.id, user) - - # Assert - Verify complete deletion flow - assert result is True - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset.id) - mock_dataset_service_dependencies["check_permission"].assert_called_once_with(dataset, user) - mock_dataset_service_dependencies["dataset_was_deleted"].send.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].delete.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].commit.assert_called_once() - - def test_delete_dataset_with_partial_none_values(self, mock_dataset_service_dependencies): - """ - Test deletion of dataset with partial None values. - - This test verifies that datasets with partial None values (e.g., doc_form exists - but indexing_technique is None) can be deleted successfully. The event handler - will skip cleanup if any required field is None. - - Improvement based on Gemini Code Assist suggestion: Added comprehensive assertions - to verify all core deletion operations are performed, not just event sending. - """ - # Arrange - dataset = DatasetDeleteTestDataFactory.create_dataset_mock(doc_form="text_model", indexing_technique=None) - user = DatasetDeleteTestDataFactory.create_user_mock() - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - - # Act - result = DatasetService.delete_dataset(dataset.id, user) - - # Assert - Verify complete deletion flow (Gemini suggestion implemented) - assert result is True - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset.id) - mock_dataset_service_dependencies["check_permission"].assert_called_once_with(dataset, user) - mock_dataset_service_dependencies["dataset_was_deleted"].send.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].delete.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].commit.assert_called_once() - - def test_delete_dataset_with_doc_form_none_indexing_technique_exists(self, mock_dataset_service_dependencies): - """ - Test deletion of dataset where doc_form is None but indexing_technique exists. - - This edge case can occur in certain dataset configurations and should be handled - gracefully by the event handler's conditional check. - """ - # Arrange - dataset = DatasetDeleteTestDataFactory.create_dataset_mock(doc_form=None, indexing_technique="high_quality") - user = DatasetDeleteTestDataFactory.create_user_mock() - - mock_dataset_service_dependencies["get_dataset"].return_value = dataset - - # Act - result = DatasetService.delete_dataset(dataset.id, user) - - # Assert - Verify complete deletion flow - assert result is True - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset.id) - mock_dataset_service_dependencies["check_permission"].assert_called_once_with(dataset, user) - mock_dataset_service_dependencies["dataset_was_deleted"].send.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].delete.assert_called_once_with(dataset) - mock_dataset_service_dependencies["db_session"].commit.assert_called_once() - - def test_delete_dataset_not_found(self, mock_dataset_service_dependencies): - """ - Test deletion attempt when dataset doesn't exist. - - This test verifies that: - - Method returns False when dataset is not found - - No deletion operations are performed - - No events are sent - """ - # Arrange - dataset_id = "non-existent-dataset" - user = DatasetDeleteTestDataFactory.create_user_mock() - - mock_dataset_service_dependencies["get_dataset"].return_value = None - - # Act - result = DatasetService.delete_dataset(dataset_id, user) - - # Assert - assert result is False - mock_dataset_service_dependencies["get_dataset"].assert_called_once_with(dataset_id) - mock_dataset_service_dependencies["check_permission"].assert_not_called() - mock_dataset_service_dependencies["dataset_was_deleted"].send.assert_not_called() - mock_dataset_service_dependencies["db_session"].delete.assert_not_called() - mock_dataset_service_dependencies["db_session"].commit.assert_not_called() diff --git a/api/tests/unit_tests/services/test_datasource_provider_service.py b/api/tests/unit_tests/services/test_datasource_provider_service.py new file mode 100644 index 0000000000..105ef7ba48 --- /dev/null +++ b/api/tests/unit_tests/services/test_datasource_provider_service.py @@ -0,0 +1,760 @@ +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy.orm import Session + +from core.plugin.entities.plugin_daemon import CredentialType +from dify_graph.model_runtime.entities.provider_entities import FormType +from models.account import Account +from models.model import EndUser +from models.oauth import DatasourceProvider +from models.provider_ids import DatasourceProviderID +from services.datasource_provider_service import DatasourceProviderService, get_current_user + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def make_id(s: str = "org/plugin/provider") -> DatasourceProviderID: + return DatasourceProviderID(s) + + +# --------------------------------------------------------------------------- +# Test class +# --------------------------------------------------------------------------- + + +class TestDatasourceProviderService: + """Comprehensive tests for DatasourceProviderService targeting >95% coverage.""" + + @pytest.fixture + def service(self): + return DatasourceProviderService() + + @pytest.fixture + def mock_db_session(self): + """ + Robust, chainable query mock. + q returns itself for .filter_by(), .order_by(), .where() so any + SQLAlchemy chaining pattern works without multiple brittle sub-mocks. + """ + with patch("services.datasource_provider_service.Session") as mock_cls: + sess = MagicMock(spec=Session) + + q = MagicMock() + sess.query.return_value = q + + # Self-returning chain — any method called on q returns q + q.filter_by.return_value = q + q.order_by.return_value = q + q.where.return_value = q + + # Default terminal values (tests override per-case) + q.first.return_value = None + q.all.return_value = [] + q.count.return_value = 0 + q.delete.return_value = 1 + + mock_cls.return_value.__enter__.return_value = sess + mock_cls.return_value.no_autoflush.__enter__.return_value = sess + + yield sess + + @pytest.fixture(autouse=True) + def patch_db(self, mock_db_session): + with patch("services.datasource_provider_service.db") as mock_db: + mock_db.session = mock_db_session + mock_db.engine = MagicMock() + yield mock_db + + @pytest.fixture(autouse=True) + def patch_externals(self): + with ( + patch("httpx.request") as mock_httpx, + patch("services.datasource_provider_service.dify_config") as mock_cfg, + patch("services.datasource_provider_service.encrypter") as mock_enc, + patch("services.datasource_provider_service.redis_client") as mock_redis, + patch("services.datasource_provider_service.generate_incremental_name") as mock_genname, + patch("services.datasource_provider_service.OAuthHandler") as mock_oauth, + ): + mock_cfg.CONSOLE_API_URL = "http://localhost" + mock_enc.encrypt_token.return_value = "enc_tok" + mock_enc.decrypt_token.return_value = "dec_tok" + mock_enc.decrypt.return_value = {"k": "dec"} + mock_enc.encrypt.return_value = {"k": "enc"} + mock_enc.obfuscated_token.return_value = "obf" + mock_enc.mask_plugin_credentials.return_value = {"k": "mask"} + + mock_redis.lock.return_value.__enter__.return_value = MagicMock() + mock_genname.return_value = "gen_name" + + mock_oauth.return_value.refresh_credentials.return_value = MagicMock( + credentials={"k": "v"}, expires_at=9999 + ) + + resp = MagicMock() + resp.status_code = 200 + resp.json.return_value = { + "code": 0, + "message": "ok", + "data": { + "provider": "prov", + "plugin_unique_identifier": "pui", + "plugin_id": "org/plug", + "is_authorized": False, + "declaration": { + "identity": { + "author": "a", + "name": "n", + "description": {"en_US": "d"}, + "icon": "i", + "label": {"en_US": "l"}, + }, + "credentials_schema": [], + "oauth_schema": {"credentials_schema": [], "client_schema": []}, + "provider_type": "local_file", + "datasources": [], + }, + }, + } + mock_httpx.return_value = resp + + # Store handles for assertions + self._enc = mock_enc + self._redis = mock_redis + yield + + @pytest.fixture + def mock_user(self): + u = MagicMock() + u.id = "uid-1" + return u + + # ----------------------------------------------------------------------- + # get_current_user (lines 27-40) + # ----------------------------------------------------------------------- + + def test_should_return_proxy_when_current_object_is_account(self): + with patch("libs.login.current_user", new_callable=MagicMock) as proxy: + user_obj = MagicMock() + user_obj.__class__ = Account + proxy._get_current_object.return_value = user_obj + assert get_current_user() is proxy + + def test_should_return_proxy_when_current_object_is_enduser(self): + with patch("libs.login.current_user", new_callable=MagicMock) as proxy: + user_obj = MagicMock() + user_obj.__class__ = EndUser + proxy._get_current_object.return_value = user_obj + assert get_current_user() is proxy + + def test_should_return_proxy_when_get_current_object_raises_attribute_error(self): + """AttributeError from LocalProxy falls back to the proxy itself.""" + with patch("libs.login.current_user", new_callable=MagicMock) as proxy: + proxy._get_current_object.side_effect = AttributeError("no attr") + proxy.__class__ = Account # make the proxy itself satisfy isinstance + assert get_current_user() is proxy + + def test_should_raise_type_error_when_user_is_not_account_or_enduser(self): + with patch("libs.login.current_user", new_callable=MagicMock) as proxy: + proxy._get_current_object.return_value = "plain_string" + with pytest.raises(TypeError, match="current_user must be Account or EndUser"): + get_current_user() + + # ----------------------------------------------------------------------- + # is_system_oauth_params_exist (line 357-363) + # ----------------------------------------------------------------------- + + def test_should_return_true_when_system_oauth_params_exist(self, service, mock_db_session): + mock_db_session.query().first.return_value = MagicMock() + assert service.is_system_oauth_params_exist(make_id()) is True + + def test_should_return_false_when_system_oauth_params_missing(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + assert service.is_system_oauth_params_exist(make_id()) is False + + # ----------------------------------------------------------------------- + # is_tenant_oauth_params_enabled (lines 365-379) + # NOTE: uses .count() not .first() + # ----------------------------------------------------------------------- + + def test_should_return_true_when_tenant_oauth_params_enabled(self, service, mock_db_session): + mock_db_session.query().count.return_value = 1 + assert service.is_tenant_oauth_params_enabled("t1", make_id()) is True + + def test_should_return_false_when_tenant_oauth_params_disabled(self, service, mock_db_session): + mock_db_session.query().count.return_value = 0 + assert service.is_tenant_oauth_params_enabled("t1", make_id()) is False + + # ----------------------------------------------------------------------- + # remove_oauth_custom_client_params (lines 55-61) + # ----------------------------------------------------------------------- + + def test_should_delete_tenant_config_when_removing_oauth_params(self, service, mock_db_session): + service.remove_oauth_custom_client_params("t1", make_id()) + mock_db_session.query().delete.assert_called_once() + + # ----------------------------------------------------------------------- + # setup_oauth_custom_client_params (315-351) + # ----------------------------------------------------------------------- + + def test_should_skip_db_write_when_credentials_are_none(self, service, mock_db_session): + """When credentials=None, should return immediately without any DB write.""" + service.setup_oauth_custom_client_params("t1", make_id(), None, None) + mock_db_session.add.assert_not_called() + + def test_should_create_new_config_when_none_exists(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + with patch.object(service, "get_oauth_encrypter", return_value=(self._enc, None)): + service.setup_oauth_custom_client_params("t1", make_id(), {"k": "v"}, True) + mock_db_session.add.assert_called_once() + + def test_should_update_existing_config_when_record_found(self, service, mock_db_session): + existing = MagicMock() + mock_db_session.query().first.return_value = existing + with patch.object(service, "get_oauth_encrypter", return_value=(self._enc, None)): + service.setup_oauth_custom_client_params("t1", make_id(), {"k": "v"}, False) + mock_db_session.add.assert_not_called() # update in place, no add + + # ----------------------------------------------------------------------- + # decrypt / encrypt credentials (lines 70-98) + # ----------------------------------------------------------------------- + + def test_should_decrypt_secret_fields_when_decrypting_api_key_credentials(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "enc_val"} + with patch.object(service, "extract_secret_variables", return_value=["sk"]): + result = service.decrypt_datasource_provider_credentials("t1", p, "org/plug", "prov") + assert result["sk"] == "dec_tok" + + def test_should_encrypt_secret_fields_when_encrypting_api_key_credentials(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + with patch.object(service, "extract_secret_variables", return_value=["sk"]): + result = service.encrypt_datasource_provider_credentials("t1", "prov", "org/plug", {"sk": "plain"}, p) + assert result["sk"] == "enc_tok" + self._enc.encrypt_token.assert_called() + + # ----------------------------------------------------------------------- + # get_datasource_credentials (lines 113-165) + # ----------------------------------------------------------------------- + + def test_should_return_empty_dict_when_credential_not_found(self, service, mock_db_session, mock_user): + with patch("services.datasource_provider_service.get_current_user", return_value=mock_user): + mock_db_session.query().first.return_value = None + assert service.get_datasource_credentials("t1", "prov", "org/plug") == {} + + def test_should_refresh_oauth_tokens_when_expired(self, service, mock_db_session, mock_user): + """Expired OAuth credential (expires_at near zero) triggers a silent refresh.""" + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "oauth2" + p.expires_at = 0 # expired + p.encrypted_credentials = {"tok": "x"} + mock_db_session.query().first.return_value = p + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "get_oauth_client", return_value={"oc": "v"}), + patch.object(service, "decrypt_datasource_provider_credentials", return_value={"tok": "plain"}), + ): + service.get_datasource_credentials("t1", "prov", "org/plug") + mock_db_session.commit.assert_called_once() + + def test_should_return_decrypted_credentials_when_api_key_not_expired(self, service, mock_db_session, mock_user): + """API key credentials with expires_at=-1 skip refresh and return directly.""" + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + p.expires_at = -1 # sentinel: never expires + p.encrypted_credentials = {"k": "v"} + mock_db_session.query().first.return_value = p + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "decrypt_datasource_provider_credentials", return_value={"k": "plain"}), + ): + result = service.get_datasource_credentials("t1", "prov", "org/plug") + assert result == {"k": "plain"} + + def test_should_fetch_by_credential_id_when_provided(self, service, mock_db_session, mock_user): + """When credential_id is passed, the credential_id filter path (line 113) is taken.""" + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + p.expires_at = -1 + p.encrypted_credentials = {} + mock_db_session.query().first.return_value = p + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "decrypt_datasource_provider_credentials", return_value={"k": "v"}), + ): + result = service.get_datasource_credentials("t1", "prov", "org/plug", credential_id="cred-id") + assert result == {"k": "v"} + + # ----------------------------------------------------------------------- + # get_all_datasource_credentials_by_provider (lines 176-228) + # ----------------------------------------------------------------------- + + def test_should_return_empty_list_when_no_provider_credentials_exist(self, service, mock_db_session, mock_user): + with patch("services.datasource_provider_service.get_current_user", return_value=mock_user): + mock_db_session.query().all.return_value = [] + assert service.get_all_datasource_credentials_by_provider("t1", "prov", "org/plug") == [] + + def test_should_refresh_and_return_credentials_when_oauth_expired(self, service, mock_db_session, mock_user): + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "oauth2" + p.expires_at = 0 + p.encrypted_credentials = {"t": "x"} + mock_db_session.query().all.return_value = [p] + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "get_oauth_client", return_value={"oc": "v"}), + patch.object(service, "decrypt_datasource_provider_credentials", return_value={"t": "plain"}), + ): + result = service.get_all_datasource_credentials_by_provider("t1", "prov", "org/plug") + assert len(result) == 1 + + # ----------------------------------------------------------------------- + # update_datasource_provider_name (lines 236-303) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_provider_not_found_on_name_update(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + with pytest.raises(ValueError, match="not found"): + service.update_datasource_provider_name("t1", make_id(), "new", "cred-id") + + def test_should_return_early_when_new_name_matches_current(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.name = "same" + mock_db_session.query().first.return_value = p + service.update_datasource_provider_name("t1", make_id(), "same", "cred-id") + mock_db_session.commit.assert_not_called() + + def test_should_raise_value_error_when_name_already_exists(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.name = "old_name" + p.is_default = False + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 1 # conflict + with pytest.raises(ValueError, match="already exists"): + service.update_datasource_provider_name("t1", make_id(), "new_name", "some-id") + + def test_should_update_name_and_commit_when_no_conflict(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.name = "old_name" + p.is_default = False + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + service.update_datasource_provider_name("t1", make_id(), "new_name", "some-id") + assert p.name == "new_name" + mock_db_session.commit.assert_called_once() + + # ----------------------------------------------------------------------- + # set_default_datasource_provider (lines 277-303) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_target_provider_not_found(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + with pytest.raises(ValueError, match="not found"): + service.set_default_datasource_provider("t1", make_id(), "bad-id") + + def test_should_mark_target_as_default_and_commit(self, service, mock_db_session): + target = MagicMock(spec=DatasourceProvider) + target.provider = "provider" + target.plugin_id = "org/plug" + mock_db_session.query().first.return_value = target + service.set_default_datasource_provider("t1", make_id(), "new-id") + assert target.is_default is True + mock_db_session.commit.assert_called_once() + + # ----------------------------------------------------------------------- + # get_oauth_encrypter (lines 404-420) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_oauth_schema_missing(self, service): + pm = MagicMock() + pm.declaration.oauth_schema = None + with patch.object(service.provider_manager, "fetch_datasource_provider", return_value=pm): + with pytest.raises(ValueError, match="oauth schema not found"): + service.get_oauth_encrypter("t1", make_id()) + + def test_should_return_encrypter_when_oauth_schema_exists(self, service): + schema_item = MagicMock() + schema_item.to_basic_provider_config.return_value = MagicMock() + pm = MagicMock() + pm.declaration.oauth_schema.client_schema = [schema_item] + with ( + patch.object(service.provider_manager, "fetch_datasource_provider", return_value=pm), + patch( + "services.datasource_provider_service.create_provider_encrypter", + return_value=(MagicMock(), MagicMock()), + ), + ): + result = service.get_oauth_encrypter("t1", make_id()) + assert result is not None + + # ----------------------------------------------------------------------- + # get_tenant_oauth_client (lines 381-402) + # ----------------------------------------------------------------------- + + def test_should_return_masked_credentials_when_mask_is_true(self, service, mock_db_session): + tenant_params = MagicMock() + tenant_params.client_params = {"k": "v"} + mock_db_session.query().first.return_value = tenant_params + with patch.object(service, "get_oauth_encrypter", return_value=(self._enc, None)): + result = service.get_tenant_oauth_client("t1", make_id(), mask=True) + assert result == {"k": "mask"} + + def test_should_return_decrypted_credentials_when_mask_is_false(self, service, mock_db_session): + tenant_params = MagicMock() + tenant_params.client_params = {"k": "v"} + mock_db_session.query().first.return_value = tenant_params + with patch.object(service, "get_oauth_encrypter", return_value=(self._enc, None)): + result = service.get_tenant_oauth_client("t1", make_id(), mask=False) + assert result == {"k": "dec"} + + def test_should_return_none_when_no_tenant_oauth_config_exists(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + assert service.get_tenant_oauth_client("t1", make_id()) is None + + # ----------------------------------------------------------------------- + # get_oauth_client (lines 423-457) + # ----------------------------------------------------------------------- + + def test_should_use_tenant_config_when_available(self, service, mock_db_session): + mock_db_session.query().first.return_value = MagicMock(client_params={"k": "v"}) + with patch.object(service, "get_oauth_encrypter", return_value=(self._enc, None)): + result = service.get_oauth_client("t1", make_id()) + assert result == {"k": "dec"} + + def test_should_fallback_to_system_credentials_when_tenant_config_missing(self, service, mock_db_session): + mock_db_session.query().first.side_effect = [None, MagicMock(system_credentials={"k": "sys"})] + with ( + patch.object(service.provider_manager, "fetch_datasource_provider"), + patch("services.datasource_provider_service.PluginService.is_plugin_verified", return_value=True), + ): + result = service.get_oauth_client("t1", make_id()) + assert result == {"k": "sys"} + + def test_should_raise_value_error_when_no_oauth_config_available(self, service, mock_db_session): + """Neither tenant nor system credentials → raises ValueError.""" + mock_db_session.query().first.side_effect = [None, None] + with ( + patch.object(service.provider_manager, "fetch_datasource_provider"), + patch("services.datasource_provider_service.PluginService.is_plugin_verified", return_value=False), + ): + with pytest.raises(ValueError, match="Please configure oauth client params"): + service.get_oauth_client("t1", make_id()) + + # ----------------------------------------------------------------------- + # add_datasource_oauth_provider (lines 539-607) + # ----------------------------------------------------------------------- + + def test_should_add_oauth_provider_successfully_when_name_is_unique(self, service, mock_db_session): + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=[]): + service.add_datasource_oauth_provider("new", "t1", make_id(), "http://cb", 9999, {}) + mock_db_session.add.assert_called_once() + mock_db_session.commit.assert_called_once() + + def test_should_auto_rename_when_oauth_provider_name_conflicts(self, service, mock_db_session): + """Conflict on name results in auto-incremented name, not an error.""" + mock_db_session.query().count.return_value = 1 # conflict first, then auto-named + mock_db_session.query().all.return_value = [] + with ( + patch.object(service, "extract_secret_variables", return_value=[]), + patch.object(service, "generate_next_datasource_provider_name", return_value="new_gen"), + ): + service.add_datasource_oauth_provider("conflict", "t1", make_id(), "http://cb", 9999, {}) + mock_db_session.add.assert_called_once() + + def test_should_auto_generate_name_when_none_provided_for_oauth(self, service, mock_db_session): + """name=None causes auto-generation via generate_next_datasource_provider_name.""" + mock_db_session.query().count.return_value = 0 + mock_db_session.query().all.return_value = [] + with ( + patch.object(service, "extract_secret_variables", return_value=[]), + patch.object(service, "generate_next_datasource_provider_name", return_value="auto"), + ): + service.add_datasource_oauth_provider(None, "t1", make_id(), "http://cb", 9999, {}) + mock_db_session.add.assert_called_once() + + def test_should_encrypt_secret_fields_when_adding_oauth_provider(self, service, mock_db_session): + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=["secret_key"]): + service.add_datasource_oauth_provider("nm", "t1", make_id(), "http://cb", 9999, {"secret_key": "value"}) + self._enc.encrypt_token.assert_called() + + def test_should_acquire_redis_lock_when_adding_oauth_provider(self, service, mock_db_session): + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=[]): + service.add_datasource_oauth_provider("nm", "t1", make_id(), "http://cb", 9999, {}) + self._redis.lock.assert_called() + + # ----------------------------------------------------------------------- + # reauthorize_datasource_oauth_provider (lines 477-537) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_credential_id_not_found_on_reauth(self, service, mock_db_session): + mock_db_session.query().first.return_value = None + with patch.object(service, "extract_secret_variables", return_value=[]): + with pytest.raises(ValueError, match="not found"): + service.reauthorize_datasource_oauth_provider("n", "t1", make_id(), "u", 1, {}, "bad-id") + + def test_should_reauthorize_and_commit_when_credential_found(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=[]): + service.reauthorize_datasource_oauth_provider("n", "t1", make_id(), "u", 1, {}, "oid") + mock_db_session.commit.assert_called_once() + + def test_should_auto_rename_when_reauth_name_conflicts(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 1 # conflict + mock_db_session.query().all.return_value = [] + with patch.object(service, "extract_secret_variables", return_value=["tok"]): + service.reauthorize_datasource_oauth_provider( + "conflict_name", "t1", make_id(), "u", 9999, {"tok": "v"}, "cred-id" + ) + mock_db_session.commit.assert_called_once() + + def test_should_encrypt_secret_fields_when_reauthorizing(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=["tok"]): + service.reauthorize_datasource_oauth_provider(None, "t1", make_id(), "u", 9999, {"tok": "val"}, "cred-id") + self._enc.encrypt_token.assert_called() + + def test_should_acquire_redis_lock_when_reauthorizing(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + with patch.object(service, "extract_secret_variables", return_value=[]): + service.reauthorize_datasource_oauth_provider("n", "t1", make_id(), "u", 1, {}, "oid") + self._redis.lock.assert_called() + + # ----------------------------------------------------------------------- + # add_datasource_api_key_provider (lines 608-675) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_api_key_name_already_exists(self, service, mock_db_session, mock_user): + """explicit name supplied + conflict → raises ValueError immediately.""" + mock_db_session.query().count.return_value = 1 + with patch("services.datasource_provider_service.get_current_user", return_value=mock_user): + with pytest.raises(ValueError, match="already exists"): + service.add_datasource_api_key_provider("clash", "t1", make_id(), {"sk": "v"}) + + def test_should_raise_value_error_when_credentials_validation_fails(self, service, mock_db_session, mock_user): + mock_db_session.query().count.return_value = 0 + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service.provider_manager, "validate_provider_credentials", side_effect=Exception("bad cred")), + patch.object(service, "extract_secret_variables", return_value=[]), + ): + with pytest.raises(ValueError, match="Failed to validate"): + service.add_datasource_api_key_provider("nm", "t1", make_id(), {"k": "v"}) + + def test_should_add_api_key_provider_and_commit_when_valid(self, service, mock_db_session, mock_user): + mock_db_session.query().count.return_value = 0 + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service.provider_manager, "validate_provider_credentials"), + patch.object(service, "extract_secret_variables", return_value=["sk"]), + ): + service.add_datasource_api_key_provider(None, "t1", make_id(), {"sk": "v"}) + mock_db_session.add.assert_called_once() + mock_db_session.commit.assert_called_once() + + def test_should_acquire_redis_lock_when_adding_api_key_provider(self, service, mock_db_session, mock_user): + mock_db_session.query().count.return_value = 0 + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service.provider_manager, "validate_provider_credentials"), + patch.object(service, "extract_secret_variables", return_value=[]), + ): + service.add_datasource_api_key_provider(None, "t1", make_id(), {}) + self._redis.lock.assert_called() + + # ----------------------------------------------------------------------- + # extract_secret_variables (lines 666-699) + # ----------------------------------------------------------------------- + + def test_should_extract_secret_variable_names_for_api_key_schema(self, service): + schema = MagicMock() + schema.name = "my_secret" + schema.type = MagicMock() + schema.type.value = FormType.SECRET_INPUT # "secret-input" + pm = MagicMock() + pm.declaration.credentials_schema = [schema] + with patch.object(service.provider_manager, "fetch_datasource_provider", return_value=pm): + result = service.extract_secret_variables("t1", "org/plug/prov", CredentialType.API_KEY) + assert "my_secret" in result + + def test_should_extract_secret_variable_names_for_oauth2_schema(self, service): + schema = MagicMock() + schema.name = "oauth_secret" + schema.type = MagicMock() + schema.type.value = FormType.SECRET_INPUT + pm = MagicMock() + pm.declaration.oauth_schema.credentials_schema = [schema] + with patch.object(service.provider_manager, "fetch_datasource_provider", return_value=pm): + result = service.extract_secret_variables("t1", "org/plug/prov", CredentialType.OAUTH2) + assert "oauth_secret" in result + + def test_should_raise_value_error_when_credential_type_is_invalid(self, service): + pm = MagicMock() + with patch.object(service.provider_manager, "fetch_datasource_provider", return_value=pm): + with pytest.raises(ValueError, match="Invalid credential type"): + service.extract_secret_variables("t1", "org/plug/prov", CredentialType.UNAUTHORIZED) + + # ----------------------------------------------------------------------- + # list_datasource_credentials (lines 721-754) + # ----------------------------------------------------------------------- + + def test_should_return_empty_list_when_no_credentials_stored(self, service, mock_db_session): + mock_db_session.query().all.return_value = [] + assert service.list_datasource_credentials("t1", "prov", "org/plug") == [] + + def test_should_return_masked_credentials_list_when_credentials_exist(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "v"} + p.is_default = False + mock_db_session.query().all.return_value = [p] + with patch.object(service, "extract_secret_variables", return_value=["sk"]): + result = service.list_datasource_credentials("t1", "prov", "org/plug") + assert len(result) == 1 + + # ----------------------------------------------------------------------- + # get_all_datasource_credentials (lines 808-871) + # ----------------------------------------------------------------------- + + def test_should_aggregate_credentials_for_non_hardcoded_plugin(self, service): + with patch("services.datasource_provider_service.PluginDatasourceManager") as mock_mgr: + ds = MagicMock() + ds.provider = "prov" + ds.plugin_id = "org/plug" + ds.declaration.identity.label.model_dump.return_value = {"en_US": "Label"} + mock_mgr.return_value.fetch_installed_datasource_providers.return_value = [ds] + cred = {"credential": {"k": "v"}, "is_default": True} + with patch.object(service, "list_datasource_credentials", return_value=[cred]): + results = service.get_all_datasource_credentials("t1") + assert len(results) == 1 + + def test_should_include_oauth_schema_for_hardcoded_plugin_ids(self, service, mock_db_session): + """Lines 819-871: get_all_datasource_credentials covers hardcoded langgenius plugin IDs.""" + with patch("services.datasource_provider_service.PluginDatasourceManager") as mock_mgr: + ds = MagicMock() + ds.plugin_id = "langgenius/firecrawl_datasource" + ds.provider = "firecrawl" + ds.plugin_unique_identifier = "pui" + ds.declaration.identity.icon = "icon" + ds.declaration.identity.name = "langgenius/firecrawl_datasource" + ds.declaration.identity.label.model_dump.return_value = {"en_US": "Firecrawl"} + ds.declaration.identity.description.model_dump.return_value = {"en_US": "desc"} + ds.declaration.identity.author = "langgenius" + ds.declaration.credentials_schema = [] + ds.declaration.oauth_schema.client_schema = [] + ds.declaration.oauth_schema.credentials_schema = [] + mock_mgr.return_value.fetch_installed_datasource_providers.return_value = [ds] + with ( + patch.object(service, "list_datasource_credentials", return_value=[]), + patch.object(service, "get_tenant_oauth_client", return_value=None), + patch.object(service, "is_tenant_oauth_params_enabled", return_value=False), + patch.object(service, "is_system_oauth_params_exist", return_value=False), + ): + results = service.get_all_datasource_credentials("t1") + assert len(results) == 1 + assert results[0]["oauth_schema"] is not None + + # ----------------------------------------------------------------------- + # get_real_datasource_credentials (lines 873-915) + # ----------------------------------------------------------------------- + + def test_should_return_empty_list_when_no_real_credentials_exist(self, service, mock_db_session): + mock_db_session.query().all.return_value = [] + assert service.get_real_datasource_credentials("t1", "prov", "org/plug") == [] + + def test_should_return_decrypted_credential_list_when_credentials_exist(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "v"} + mock_db_session.query().all.return_value = [p] + with patch.object(service, "extract_secret_variables", return_value=["sk"]): + result = service.get_real_datasource_credentials("t1", "prov", "org/plug") + assert len(result) == 1 + + # ----------------------------------------------------------------------- + # update_datasource_credentials (lines 917-978) + # ----------------------------------------------------------------------- + + def test_should_raise_value_error_when_credential_not_found_on_update(self, service, mock_db_session, mock_user): + mock_db_session.query().first.return_value = None + with patch("services.datasource_provider_service.get_current_user", return_value=mock_user): + with pytest.raises(ValueError, match="not found"): + service.update_datasource_credentials("t1", "id", "prov", "org/plug", {}, "name") + + def test_should_raise_value_error_when_new_name_already_used_on_update(self, service, mock_db_session, mock_user): + p = MagicMock(spec=DatasourceProvider) + p.name = "old_name" + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "e"} + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 1 + with patch("services.datasource_provider_service.get_current_user", return_value=mock_user): + with pytest.raises(ValueError, match="already exists"): + service.update_datasource_credentials("t1", "id", "prov", "org/plug", {}, "new_name") + + def test_should_raise_value_error_when_credential_validation_fails_on_update( + self, service, mock_db_session, mock_user + ): + p = MagicMock(spec=DatasourceProvider) + p.name = "old_name" + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "e"} + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "extract_secret_variables", return_value=["sk"]), + patch.object(service.provider_manager, "validate_provider_credentials", side_effect=Exception("bad")), + ): + with pytest.raises(ValueError, match="Failed to validate"): + service.update_datasource_credentials("t1", "id", "prov", "org/plug", {"sk": "v"}, "name") + + def test_should_encrypt_credentials_and_commit_when_update_succeeds(self, service, mock_db_session, mock_user): + """Verifies that encrypted_credentials is reassigned with encrypted value and commit is called.""" + p = MagicMock(spec=DatasourceProvider) + p.name = "old_name" + p.auth_type = "api_key" + p.encrypted_credentials = {"sk": "old_enc"} + mock_db_session.query().first.return_value = p + mock_db_session.query().count.return_value = 0 + with ( + patch("services.datasource_provider_service.get_current_user", return_value=mock_user), + patch.object(service, "extract_secret_variables", return_value=["sk"]), + patch.object(service.provider_manager, "validate_provider_credentials"), + ): + service.update_datasource_credentials("t1", "id", "prov", "org/plug", {"sk": "new_val"}, "name") + # encrypter must have been called with the new secret value + self._enc.encrypt_token.assert_called() + # commit must be called exactly once + mock_db_session.commit.assert_called_once() + + # ----------------------------------------------------------------------- + # remove_datasource_credentials (lines 980-997) + # ----------------------------------------------------------------------- + + def test_should_delete_provider_and_commit_when_found(self, service, mock_db_session): + p = MagicMock(spec=DatasourceProvider) + mock_db_session.query().first.return_value = p + service.remove_datasource_credentials("t1", "id", "prov", "org/plug") + mock_db_session.delete.assert_called_once_with(p) + mock_db_session.commit.assert_called_once() + + def test_should_do_nothing_when_credential_not_found_on_remove(self, service, mock_db_session): + """No error raised; no delete called when record doesn't exist (lines 994 branch).""" + mock_db_session.query().first.return_value = None + service.remove_datasource_credentials("t1", "id", "prov", "org/plug") + mock_db_session.delete.assert_not_called() diff --git a/api/tests/unit_tests/services/test_end_user_service.py b/api/tests/unit_tests/services/test_end_user_service.py index 7f087a17d8..a3b1f46436 100644 --- a/api/tests/unit_tests/services/test_end_user_service.py +++ b/api/tests/unit_tests/services/test_end_user_service.py @@ -3,7 +3,7 @@ from unittest.mock import MagicMock, patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom -from models.model import App, EndUser +from models.model import App, DefaultEndUserSessionID, EndUser from services.end_user_service import EndUserService @@ -44,6 +44,145 @@ class TestEndUserServiceFactory: return end_user +class TestEndUserServiceGetEndUserById: + """Unit tests for EndUserService.get_end_user_by_id method.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestEndUserServiceFactory() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_get_end_user_by_id_success(self, mock_db, mock_session_class, factory): + """Test successful retrieval of end user by ID.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + end_user_id = "user-789" + + mock_end_user = factory.create_end_user_mock(user_id=end_user_id, tenant_id=tenant_id, app_id=app_id) + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = mock_end_user + + # Act + result = EndUserService.get_end_user_by_id(tenant_id=tenant_id, app_id=app_id, end_user_id=end_user_id) + + # Assert + assert result == mock_end_user + mock_session.query.assert_called_once_with(EndUser) + mock_query.where.assert_called_once() + mock_query.first.assert_called_once() + mock_context.__enter__.assert_called_once() + mock_context.__exit__.assert_called_once() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_get_end_user_by_id_not_found(self, mock_db, mock_session_class): + """Test retrieval of non-existent end user returns None.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + end_user_id = "user-789" + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = None + + # Act + result = EndUserService.get_end_user_by_id(tenant_id=tenant_id, app_id=app_id, end_user_id=end_user_id) + + # Assert + assert result is None + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_get_end_user_by_id_query_parameters(self, mock_db, mock_session_class): + """Test that query parameters are correctly applied.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + end_user_id = "user-789" + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = None + + # Act + EndUserService.get_end_user_by_id(tenant_id=tenant_id, app_id=app_id, end_user_id=end_user_id) + + # Assert + # Verify the where clause was called with the correct conditions + call_args = mock_query.where.call_args[0] + assert len(call_args) == 3 + # Check that the conditions match the expected filters + # (We can't easily test the exact conditions without importing SQLAlchemy) + + +class TestEndUserServiceGetOrCreateEndUser: + """Unit tests for EndUserService.get_or_create_end_user method.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestEndUserServiceFactory() + + @patch("services.end_user_service.EndUserService.get_or_create_end_user_by_type") + def test_get_or_create_end_user_with_user_id(self, mock_get_or_create_by_type, factory): + """Test get_or_create_end_user with specific user_id.""" + # Arrange + app_mock = factory.create_app_mock() + user_id = "user-123" + expected_end_user = factory.create_end_user_mock() + mock_get_or_create_by_type.return_value = expected_end_user + + # Act + result = EndUserService.get_or_create_end_user(app_mock, user_id) + + # Assert + assert result == expected_end_user + mock_get_or_create_by_type.assert_called_once_with( + InvokeFrom.SERVICE_API, app_mock.tenant_id, app_mock.id, user_id + ) + + @patch("services.end_user_service.EndUserService.get_or_create_end_user_by_type") + def test_get_or_create_end_user_without_user_id(self, mock_get_or_create_by_type, factory): + """Test get_or_create_end_user without user_id (None).""" + # Arrange + app_mock = factory.create_app_mock() + expected_end_user = factory.create_end_user_mock() + mock_get_or_create_by_type.return_value = expected_end_user + + # Act + result = EndUserService.get_or_create_end_user(app_mock, None) + + # Assert + assert result == expected_end_user + mock_get_or_create_by_type.assert_called_once_with( + InvokeFrom.SERVICE_API, app_mock.tenant_id, app_mock.id, None + ) + + class TestEndUserServiceGetOrCreateEndUserByType: """ Unit tests for EndUserService.get_or_create_end_user_by_type method. @@ -60,6 +199,191 @@ class TestEndUserServiceGetOrCreateEndUserByType: """Provide test data factory.""" return TestEndUserServiceFactory() + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_new_end_user_with_user_id(self, mock_db, mock_session_class, factory): + """Test creating a new end user with specific user_id.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = None # No existing user + + # Act + result = EndUserService.get_or_create_end_user_by_type( + type=type_enum, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + # Verify new EndUser was created with correct parameters + mock_session.add.assert_called_once() + mock_session.commit.assert_called_once() + added_user = mock_session.add.call_args[0][0] + assert added_user.tenant_id == tenant_id + assert added_user.app_id == app_id + assert added_user.type == type_enum + assert added_user.session_id == user_id + assert added_user.external_user_id == user_id + assert added_user._is_anonymous is False + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_new_end_user_default_session(self, mock_db, mock_session_class, factory): + """Test creating a new end user with default session ID.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = None + type_enum = InvokeFrom.WEB_APP + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = None # No existing user + + # Act + result = EndUserService.get_or_create_end_user_by_type( + type=type_enum, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + added_user = mock_session.add.call_args[0][0] + assert added_user.session_id == DefaultEndUserSessionID.DEFAULT_SESSION_ID + assert added_user.external_user_id == DefaultEndUserSessionID.DEFAULT_SESSION_ID + assert added_user._is_anonymous is True + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + @patch("services.end_user_service.logger") + def test_existing_user_same_type(self, mock_logger, mock_db, mock_session_class, factory): + """Test retrieving existing user with same type.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + existing_user = factory.create_end_user_mock( + tenant_id=tenant_id, app_id=app_id, session_id=user_id, type=type_enum + ) + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = existing_user + + # Act + result = EndUserService.get_or_create_end_user_by_type( + type=type_enum, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + assert result == existing_user + mock_session.add.assert_not_called() + mock_session.commit.assert_not_called() + mock_logger.info.assert_not_called() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + @patch("services.end_user_service.logger") + def test_existing_user_different_type_upgrade(self, mock_logger, mock_db, mock_session_class, factory): + """Test upgrading existing user with different type.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = "user-789" + old_type = InvokeFrom.WEB_APP + new_type = InvokeFrom.SERVICE_API + + existing_user = factory.create_end_user_mock( + tenant_id=tenant_id, app_id=app_id, session_id=user_id, type=old_type + ) + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = existing_user + + # Act + result = EndUserService.get_or_create_end_user_by_type( + type=new_type, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + assert result == existing_user + assert existing_user.type == new_type + mock_session.commit.assert_called_once() + mock_logger.info.assert_called_once() + logger_call_args = mock_logger.info.call_args[0] + assert "Upgrading legacy EndUser" in logger_call_args[0] + # The old and new types are passed as separate arguments + assert mock_logger.info.call_args[0][1] == existing_user.id + assert mock_logger.info.call_args[0][2] == old_type + assert mock_logger.info.call_args[0][3] == new_type + assert mock_logger.info.call_args[0][4] == user_id + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_query_ordering_prioritizes_exact_type_match(self, mock_db, mock_session_class, factory): + """Test that query ordering prioritizes exact type matches.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = "user-789" + target_type = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = None + + # Act + EndUserService.get_or_create_end_user_by_type( + type=target_type, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + mock_query.order_by.assert_called_once() + # Verify that case statement is used for ordering + order_by_call = mock_query.order_by.call_args[0][0] + # The exact structure depends on SQLAlchemy's case implementation + # but we can verify it was called + # Test 10: Session context manager properly closes @patch("services.end_user_service.Session") @patch("services.end_user_service.db") @@ -93,3 +417,425 @@ class TestEndUserServiceGetOrCreateEndUserByType: # Verify context manager was entered and exited mock_context.__enter__.assert_called_once() mock_context.__exit__.assert_called_once() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_all_invokefrom_types_supported(self, mock_db, mock_session_class): + """Test that all InvokeFrom enum values are supported.""" + # Arrange + tenant_id = "tenant-123" + app_id = "app-456" + user_id = "user-789" + + for invoke_type in InvokeFrom: + with patch("services.end_user_service.Session") as mock_session_class: + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.first.return_value = None + + # Act + result = EndUserService.get_or_create_end_user_by_type( + type=invoke_type, tenant_id=tenant_id, app_id=app_id, user_id=user_id + ) + + # Assert + added_user = mock_session.add.call_args[0][0] + assert added_user.type == invoke_type + + +class TestEndUserServiceCreateEndUserBatch: + """Unit tests for EndUserService.create_end_user_batch method.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestEndUserServiceFactory() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_empty_app_ids(self, mock_db, mock_session_class): + """Test batch creation with empty app_ids list.""" + # Arrange + tenant_id = "tenant-123" + app_ids: list[str] = [] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert result == {} + mock_session_class.assert_not_called() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_default_session_id(self, mock_db, mock_session_class): + """Test batch creation with empty user_id (uses default session).""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789"] + user_id = "" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert len(result) == 2 + for app_id, end_user in result.items(): + assert end_user.session_id == DefaultEndUserSessionID.DEFAULT_SESSION_ID + assert end_user.external_user_id == DefaultEndUserSessionID.DEFAULT_SESSION_ID + assert end_user._is_anonymous is True + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_deduplicate_app_ids(self, mock_db, mock_session_class): + """Test that duplicate app_ids are deduplicated while preserving order.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789", "app-456", "app-123", "app-789"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + # Should have 3 unique app_ids in original order + assert len(result) == 3 + assert "app-456" in result + assert "app-789" in result + assert "app-123" in result + + # Verify the order is preserved + added_users = mock_session.add_all.call_args[0][0] + assert len(added_users) == 3 + assert added_users[0].app_id == "app-456" + assert added_users[1].app_id == "app-789" + assert added_users[2].app_id == "app-123" + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_all_existing_users(self, mock_db, mock_session_class, factory): + """Test batch creation when all users already exist.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + existing_user1 = factory.create_end_user_mock( + tenant_id=tenant_id, app_id="app-456", session_id=user_id, type=type_enum + ) + existing_user2 = factory.create_end_user_mock( + tenant_id=tenant_id, app_id="app-789", session_id=user_id, type=type_enum + ) + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [existing_user1, existing_user2] + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert len(result) == 2 + assert result["app-456"] == existing_user1 + assert result["app-789"] == existing_user2 + mock_session.add_all.assert_not_called() + mock_session.commit.assert_not_called() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_partial_existing_users(self, mock_db, mock_session_class, factory): + """Test batch creation with some existing and some new users.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789", "app-123"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + existing_user1 = factory.create_end_user_mock( + tenant_id=tenant_id, app_id="app-456", session_id=user_id, type=type_enum + ) + # app-789 and app-123 don't exist + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [existing_user1] + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert len(result) == 3 + assert result["app-456"] == existing_user1 + assert "app-789" in result + assert "app-123" in result + + # Should create 2 new users + mock_session.add_all.assert_called_once() + added_users = mock_session.add_all.call_args[0][0] + assert len(added_users) == 2 + + mock_session.commit.assert_called_once() + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_handles_duplicates_in_existing(self, mock_db, mock_session_class, factory): + """Test batch creation handles duplicates in existing users gracefully.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + # Simulate duplicate records in database + existing_user1 = factory.create_end_user_mock( + user_id="user-1", tenant_id=tenant_id, app_id="app-456", session_id=user_id, type=type_enum + ) + existing_user2 = factory.create_end_user_mock( + user_id="user-2", tenant_id=tenant_id, app_id="app-456", session_id=user_id, type=type_enum + ) + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [existing_user1, existing_user2] + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert len(result) == 1 + # Should prefer the first one found + assert result["app-456"] == existing_user1 + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_all_invokefrom_types(self, mock_db, mock_session_class): + """Test batch creation with all InvokeFrom types.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456"] + user_id = "user-789" + + for invoke_type in InvokeFrom: + with patch("services.end_user_service.Session") as mock_session_class: + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + result = EndUserService.create_end_user_batch( + type=invoke_type, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + added_user = mock_session.add_all.call_args[0][0][0] + assert added_user.type == invoke_type + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_single_app_id(self, mock_db, mock_session_class, factory): + """Test batch creation with single app_id.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + result = EndUserService.create_end_user_batch( + type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id + ) + + # Assert + assert len(result) == 1 + assert "app-456" in result + mock_session.add_all.assert_called_once() + added_users = mock_session.add_all.call_args[0][0] + assert len(added_users) == 1 + assert added_users[0].app_id == "app-456" + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_anonymous_vs_authenticated(self, mock_db, mock_session_class): + """Test batch creation correctly sets anonymous flag.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789"] + + # Test with regular user ID + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act - authenticated user + result = EndUserService.create_end_user_batch( + type=InvokeFrom.SERVICE_API, tenant_id=tenant_id, app_ids=app_ids, user_id="user-789" + ) + + # Assert + added_users = mock_session.add_all.call_args[0][0] + for user in added_users: + assert user._is_anonymous is False + + # Test with default session ID + mock_session.reset_mock() + mock_query.reset_mock() + mock_query.all.return_value = [] + + # Act - anonymous user + result = EndUserService.create_end_user_batch( + type=InvokeFrom.SERVICE_API, + tenant_id=tenant_id, + app_ids=app_ids, + user_id=DefaultEndUserSessionID.DEFAULT_SESSION_ID, + ) + + # Assert + added_users = mock_session.add_all.call_args[0][0] + for user in added_users: + assert user._is_anonymous is True + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_efficient_single_query(self, mock_db, mock_session_class): + """Test that batch creation uses efficient single query for existing users.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456", "app-789", "app-123"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + EndUserService.create_end_user_batch(type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id) + + # Assert + # Should make exactly one query to check for existing users + mock_session.query.assert_called_once_with(EndUser) + mock_query.where.assert_called_once() + mock_query.all.assert_called_once() + + # Verify the where clause uses .in_() for app_ids + where_call = mock_query.where.call_args[0] + # The exact structure depends on SQLAlchemy implementation + # but we can verify it was called with the right parameters + + @patch("services.end_user_service.Session") + @patch("services.end_user_service.db") + def test_create_batch_session_context_manager(self, mock_db, mock_session_class): + """Test that batch creation properly uses session context manager.""" + # Arrange + tenant_id = "tenant-123" + app_ids = ["app-456"] + user_id = "user-789" + type_enum = InvokeFrom.SERVICE_API + + mock_session = MagicMock() + mock_context = MagicMock() + mock_context.__enter__.return_value = mock_session + mock_session_class.return_value = mock_context + + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.all.return_value = [] # No existing users + + # Act + EndUserService.create_end_user_batch(type=type_enum, tenant_id=tenant_id, app_ids=app_ids, user_id=user_id) + + # Assert + mock_context.__enter__.assert_called_once() + mock_context.__exit__.assert_called_once() + mock_session.commit.assert_called_once() diff --git a/api/tests/unit_tests/services/test_export_app_messages.py b/api/tests/unit_tests/services/test_export_app_messages.py new file mode 100644 index 0000000000..5f2d3f21c0 --- /dev/null +++ b/api/tests/unit_tests/services/test_export_app_messages.py @@ -0,0 +1,43 @@ +import datetime + +import pytest + +from services.retention.conversation.message_export_service import AppMessageExportService + + +def test_validate_export_filename_accepts_relative_path(): + assert AppMessageExportService.validate_export_filename("exports/2026/test01") == "exports/2026/test01" + + +@pytest.mark.parametrize( + "filename", + [ + "test01.jsonl.gz", + "test01.jsonl", + "test01.gz", + "/tmp/test01", + "exports/../test01", + "bad\x00name", + "bad\tname", + "a" * 1025, + ], +) +def test_validate_export_filename_rejects_invalid_values(filename: str): + with pytest.raises(ValueError): + AppMessageExportService.validate_export_filename(filename) + + +def test_service_derives_output_names_from_filename_base(): + service = AppMessageExportService( + app_id="736b9b03-20f2-4697-91da-8d00f6325900", + start_from=None, + end_before=datetime.datetime(2026, 3, 1), + filename="exports/2026/test01", + batch_size=1000, + use_cloud_storage=True, + dry_run=True, + ) + + assert service._filename_base == "exports/2026/test01" + assert service.output_gz_name == "exports/2026/test01.jsonl.gz" + assert service.output_jsonl_name == "exports/2026/test01.jsonl" diff --git a/api/tests/unit_tests/services/test_file_service.py b/api/tests/unit_tests/services/test_file_service.py new file mode 100644 index 0000000000..b7259c3e82 --- /dev/null +++ b/api/tests/unit_tests/services/test_file_service.py @@ -0,0 +1,420 @@ +import base64 +import hashlib +import os +from unittest.mock import MagicMock, patch + +import pytest +from sqlalchemy import Engine +from sqlalchemy.orm import Session, sessionmaker +from werkzeug.exceptions import NotFound + +from configs import dify_config +from models.enums import CreatorUserRole +from models.model import Account, EndUser, UploadFile +from services.errors.file import BlockedFileExtensionError, FileTooLargeError, UnsupportedFileTypeError +from services.file_service import FileService + + +class TestFileService: + @pytest.fixture + def mock_db_session(self): + session = MagicMock(spec=Session) + # Mock context manager behavior + session.__enter__.return_value = session + return session + + @pytest.fixture + def mock_session_maker(self, mock_db_session): + maker = MagicMock(spec=sessionmaker) + maker.return_value = mock_db_session + return maker + + @pytest.fixture + def file_service(self, mock_session_maker): + return FileService(session_factory=mock_session_maker) + + def test_init_with_engine(self): + engine = MagicMock(spec=Engine) + service = FileService(session_factory=engine) + assert isinstance(service._session_maker, sessionmaker) + + def test_init_with_sessionmaker(self): + maker = MagicMock(spec=sessionmaker) + service = FileService(session_factory=maker) + assert service._session_maker == maker + + def test_init_invalid_factory(self): + with pytest.raises(AssertionError, match="must be a sessionmaker or an Engine."): + FileService(session_factory="invalid") + + @patch("services.file_service.storage") + @patch("services.file_service.naive_utc_now") + @patch("services.file_service.extract_tenant_id") + @patch("services.file_service.file_helpers.get_signed_file_url") + def test_upload_file_success( + self, mock_get_url, mock_tenant_id, mock_now, mock_storage, file_service, mock_db_session + ): + # Setup + mock_tenant_id.return_value = "tenant_id" + mock_now.return_value = "2024-01-01" + mock_get_url.return_value = "http://signed-url" + + user = MagicMock(spec=Account) + user.id = "user_id" + content = b"file content" + filename = "test.jpg" + mimetype = "image/jpeg" + + # Execute + result = file_service.upload_file(filename=filename, content=content, mimetype=mimetype, user=user) + + # Assert + assert isinstance(result, UploadFile) + assert result.name == filename + assert result.tenant_id == "tenant_id" + assert result.size == len(content) + assert result.extension == "jpg" + assert result.mime_type == mimetype + assert result.created_by_role == CreatorUserRole.ACCOUNT + assert result.created_by == "user_id" + assert result.hash == hashlib.sha3_256(content).hexdigest() + assert result.source_url == "http://signed-url" + + mock_storage.save.assert_called_once() + mock_db_session.add.assert_called_once_with(result) + mock_db_session.commit.assert_called_once() + + def test_upload_file_invalid_characters(self, file_service): + with pytest.raises(ValueError, match="Filename contains invalid characters"): + file_service.upload_file(filename="invalid/file.txt", content=b"", mimetype="text/plain", user=MagicMock()) + + def test_upload_file_long_filename(self, file_service, mock_db_session): + # Setup + long_name = "a" * 210 + ".txt" + user = MagicMock(spec=Account) + user.id = "user_id" + + with ( + patch("services.file_service.storage"), + patch("services.file_service.extract_tenant_id") as mock_tenant, + patch("services.file_service.file_helpers.get_signed_file_url"), + ): + mock_tenant.return_value = "tenant" + result = file_service.upload_file(filename=long_name, content=b"test", mimetype="text/plain", user=user) + assert len(result.name) <= 205 # 200 + . + extension + assert result.name.endswith(".txt") + + def test_upload_file_blocked_extension(self, file_service): + with patch.object(dify_config, "inner_UPLOAD_FILE_EXTENSION_BLACKLIST", "exe"): + with pytest.raises(BlockedFileExtensionError): + file_service.upload_file( + filename="test.exe", content=b"", mimetype="application/octet-stream", user=MagicMock() + ) + + def test_upload_file_unsupported_type_for_datasets(self, file_service): + with pytest.raises(UnsupportedFileTypeError): + file_service.upload_file( + filename="test.jpg", content=b"", mimetype="image/jpeg", user=MagicMock(), source="datasets" + ) + + def test_upload_file_too_large(self, file_service): + # 16MB file for an image with 15MB limit + content = b"a" * (16 * 1024 * 1024) + with patch.object(dify_config, "UPLOAD_IMAGE_FILE_SIZE_LIMIT", 15): + with pytest.raises(FileTooLargeError): + file_service.upload_file(filename="test.jpg", content=content, mimetype="image/jpeg", user=MagicMock()) + + def test_upload_file_end_user(self, file_service, mock_db_session): + user = MagicMock(spec=EndUser) + user.id = "end_user_id" + + with ( + patch("services.file_service.storage"), + patch("services.file_service.extract_tenant_id") as mock_tenant, + patch("services.file_service.file_helpers.get_signed_file_url"), + ): + mock_tenant.return_value = "tenant" + result = file_service.upload_file(filename="test.txt", content=b"test", mimetype="text/plain", user=user) + assert result.created_by_role == CreatorUserRole.END_USER + + def test_is_file_size_within_limit(self): + with ( + patch.object(dify_config, "UPLOAD_IMAGE_FILE_SIZE_LIMIT", 10), + patch.object(dify_config, "UPLOAD_VIDEO_FILE_SIZE_LIMIT", 20), + patch.object(dify_config, "UPLOAD_AUDIO_FILE_SIZE_LIMIT", 30), + patch.object(dify_config, "UPLOAD_FILE_SIZE_LIMIT", 5), + ): + # Image + assert FileService.is_file_size_within_limit(extension="jpg", file_size=10 * 1024 * 1024) is True + assert FileService.is_file_size_within_limit(extension="png", file_size=11 * 1024 * 1024) is False + + # Video + assert FileService.is_file_size_within_limit(extension="mp4", file_size=20 * 1024 * 1024) is True + assert FileService.is_file_size_within_limit(extension="avi", file_size=21 * 1024 * 1024) is False + + # Audio + assert FileService.is_file_size_within_limit(extension="mp3", file_size=30 * 1024 * 1024) is True + assert FileService.is_file_size_within_limit(extension="wav", file_size=31 * 1024 * 1024) is False + + # Default + assert FileService.is_file_size_within_limit(extension="txt", file_size=5 * 1024 * 1024) is True + assert FileService.is_file_size_within_limit(extension="pdf", file_size=6 * 1024 * 1024) is False + + def test_get_file_base64_success(self, file_service, mock_db_session): + # Setup + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.key = "test_key" + mock_db_session.query().where().first.return_value = upload_file + + with patch("services.file_service.storage") as mock_storage: + mock_storage.load_once.return_value = b"test content" + + # Execute + result = file_service.get_file_base64("file_id") + + # Assert + assert result == base64.b64encode(b"test content").decode() + mock_storage.load_once.assert_called_once_with("test_key") + + def test_get_file_base64_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with pytest.raises(NotFound, match="File not found"): + file_service.get_file_base64("non_existent") + + def test_upload_text_success(self, file_service, mock_db_session): + # Setup + text = "sample text" + text_name = "test.txt" + user_id = "user_id" + tenant_id = "tenant_id" + + with patch("services.file_service.storage") as mock_storage: + # Execute + result = file_service.upload_text(text, text_name, user_id, tenant_id) + + # Assert + assert result.name == text_name + assert result.size == len(text) + assert result.tenant_id == tenant_id + assert result.created_by == user_id + assert result.used is True + assert result.extension == "txt" + mock_storage.save.assert_called_once() + mock_db_session.add.assert_called_once() + mock_db_session.commit.assert_called_once() + + def test_upload_text_long_name(self, file_service, mock_db_session): + long_name = "a" * 210 + with patch("services.file_service.storage"): + result = file_service.upload_text("text", long_name, "user", "tenant") + assert len(result.name) == 200 + + def test_get_file_preview_success(self, file_service, mock_db_session): + # Setup + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "pdf" + mock_db_session.query().where().first.return_value = upload_file + + with patch("services.file_service.ExtractProcessor.load_from_upload_file") as mock_extract: + mock_extract.return_value = "Extracted text content" + + # Execute + result = file_service.get_file_preview("file_id") + + # Assert + assert result == "Extracted text content" + + def test_get_file_preview_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with pytest.raises(NotFound, match="File not found"): + file_service.get_file_preview("non_existent") + + def test_get_file_preview_unsupported_type(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "exe" + mock_db_session.query().where().first.return_value = upload_file + with pytest.raises(UnsupportedFileTypeError): + file_service.get_file_preview("file_id") + + def test_get_image_preview_success(self, file_service, mock_db_session): + # Setup + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "jpg" + upload_file.mime_type = "image/jpeg" + upload_file.key = "key" + mock_db_session.query().where().first.return_value = upload_file + + with ( + patch("services.file_service.file_helpers.verify_image_signature") as mock_verify, + patch("services.file_service.storage") as mock_storage, + ): + mock_verify.return_value = True + mock_storage.load.return_value = iter([b"chunk1"]) + + # Execute + gen, mime = file_service.get_image_preview("file_id", "ts", "nonce", "sign") + + # Assert + assert list(gen) == [b"chunk1"] + assert mime == "image/jpeg" + + def test_get_image_preview_invalid_sig(self, file_service): + with patch("services.file_service.file_helpers.verify_image_signature") as mock_verify: + mock_verify.return_value = False + with pytest.raises(NotFound, match="File not found or signature is invalid"): + file_service.get_image_preview("file_id", "ts", "nonce", "sign") + + def test_get_image_preview_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with patch("services.file_service.file_helpers.verify_image_signature") as mock_verify: + mock_verify.return_value = True + with pytest.raises(NotFound, match="File not found or signature is invalid"): + file_service.get_image_preview("file_id", "ts", "nonce", "sign") + + def test_get_image_preview_unsupported_type(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "txt" + mock_db_session.query().where().first.return_value = upload_file + with patch("services.file_service.file_helpers.verify_image_signature") as mock_verify: + mock_verify.return_value = True + with pytest.raises(UnsupportedFileTypeError): + file_service.get_image_preview("file_id", "ts", "nonce", "sign") + + def test_get_file_generator_by_file_id_success(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.key = "key" + mock_db_session.query().where().first.return_value = upload_file + + with ( + patch("services.file_service.file_helpers.verify_file_signature") as mock_verify, + patch("services.file_service.storage") as mock_storage, + ): + mock_verify.return_value = True + mock_storage.load.return_value = iter([b"chunk"]) + + gen, file = file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + assert list(gen) == [b"chunk"] + assert file == upload_file + + def test_get_file_generator_by_file_id_invalid_sig(self, file_service): + with patch("services.file_service.file_helpers.verify_file_signature") as mock_verify: + mock_verify.return_value = False + with pytest.raises(NotFound, match="File not found or signature is invalid"): + file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + + def test_get_file_generator_by_file_id_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with patch("services.file_service.file_helpers.verify_file_signature") as mock_verify: + mock_verify.return_value = True + with pytest.raises(NotFound, match="File not found or signature is invalid"): + file_service.get_file_generator_by_file_id("file_id", "ts", "nonce", "sign") + + def test_get_public_image_preview_success(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "png" + upload_file.mime_type = "image/png" + upload_file.key = "key" + mock_db_session.query().where().first.return_value = upload_file + + with patch("services.file_service.storage") as mock_storage: + mock_storage.load.return_value = b"image content" + gen, mime = file_service.get_public_image_preview("file_id") + assert gen == b"image content" + assert mime == "image/png" + + def test_get_public_image_preview_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with pytest.raises(NotFound, match="File not found or signature is invalid"): + file_service.get_public_image_preview("file_id") + + def test_get_public_image_preview_unsupported_type(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.extension = "txt" + mock_db_session.query().where().first.return_value = upload_file + with pytest.raises(UnsupportedFileTypeError): + file_service.get_public_image_preview("file_id") + + def test_get_file_content_success(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.key = "key" + mock_db_session.query().where().first.return_value = upload_file + + with patch("services.file_service.storage") as mock_storage: + mock_storage.load.return_value = b"hello world" + result = file_service.get_file_content("file_id") + assert result == "hello world" + + def test_get_file_content_not_found(self, file_service, mock_db_session): + mock_db_session.query().where().first.return_value = None + with pytest.raises(NotFound, match="File not found"): + file_service.get_file_content("file_id") + + def test_delete_file_success(self, file_service, mock_db_session): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "file_id" + upload_file.key = "key" + # For session.scalar(select(...)) + mock_db_session.scalar.return_value = upload_file + + with patch("services.file_service.storage") as mock_storage: + file_service.delete_file("file_id") + mock_storage.delete.assert_called_once_with("key") + mock_db_session.delete.assert_called_once_with(upload_file) + + def test_delete_file_not_found(self, file_service, mock_db_session): + mock_db_session.scalar.return_value = None + file_service.delete_file("file_id") + # Should return without doing anything + + @patch("services.file_service.db") + def test_get_upload_files_by_ids_empty(self, mock_db): + result = FileService.get_upload_files_by_ids("tenant_id", []) + assert result == {} + + @patch("services.file_service.db") + def test_get_upload_files_by_ids(self, mock_db): + upload_file = MagicMock(spec=UploadFile) + upload_file.id = "550e8400-e29b-41d4-a716-446655440000" + upload_file.tenant_id = "tenant_id" + mock_db.session.scalars().all.return_value = [upload_file] + + result = FileService.get_upload_files_by_ids("tenant_id", ["550e8400-e29b-41d4-a716-446655440000"]) + assert result["550e8400-e29b-41d4-a716-446655440000"] == upload_file + + def test_sanitize_zip_entry_name(self): + assert FileService._sanitize_zip_entry_name("path/to/file.txt") == "file.txt" + assert FileService._sanitize_zip_entry_name("../../../etc/passwd") == "passwd" + assert FileService._sanitize_zip_entry_name(" ") == "file" + assert FileService._sanitize_zip_entry_name("a\\b") == "a_b" + + def test_dedupe_zip_entry_name(self): + used = {"a.txt"} + assert FileService._dedupe_zip_entry_name("b.txt", used) == "b.txt" + assert FileService._dedupe_zip_entry_name("a.txt", used) == "a (1).txt" + used.add("a (1).txt") + assert FileService._dedupe_zip_entry_name("a.txt", used) == "a (2).txt" + + def test_build_upload_files_zip_tempfile(self): + upload_file = MagicMock(spec=UploadFile) + upload_file.name = "test.txt" + upload_file.key = "key" + + with ( + patch("services.file_service.storage") as mock_storage, + patch("services.file_service.os.remove") as mock_remove, + ): + mock_storage.load.return_value = [b"chunk1", b"chunk2"] + + with FileService.build_upload_files_zip_tempfile(upload_files=[upload_file]) as tmp_path: + assert os.path.exists(tmp_path) + + mock_remove.assert_called_once() diff --git a/api/tests/unit_tests/services/test_hit_testing_service.py b/api/tests/unit_tests/services/test_hit_testing_service.py new file mode 100644 index 0000000000..80e9729f5b --- /dev/null +++ b/api/tests/unit_tests/services/test_hit_testing_service.py @@ -0,0 +1,385 @@ +import json +from typing import Any, cast +from unittest.mock import ANY, MagicMock, patch + +import pytest + +from core.rag.models.document import Document +from models.dataset import Dataset +from services.hit_testing_service import HitTestingService + + +class TestHitTestingService: + """Test suite for HitTestingService""" + + # ===== Utility Method Tests ===== + + def test_escape_query_for_search_should_escape_double_quotes(self): + """Test that escape_query_for_search escapes double quotes correctly""" + # Arrange + query = 'test "query" with quotes' + expected = 'test \\"query\\" with quotes' + + # Act + result = HitTestingService.escape_query_for_search(query) + + # Assert + assert result == expected + + def test_hit_testing_args_check_should_pass_with_valid_query(self): + """Test that hit_testing_args_check passes with a valid query""" + # Arrange + args = {"query": "valid query"} + + # Act & Assert (should not raise) + HitTestingService.hit_testing_args_check(args) + + def test_hit_testing_args_check_should_pass_with_valid_attachments(self): + """Test that hit_testing_args_check passes with valid attachment_ids""" + # Arrange + args = {"attachment_ids": ["id1", "id2"]} + + # Act & Assert (should not raise) + HitTestingService.hit_testing_args_check(args) + + def test_hit_testing_args_check_should_raise_error_when_no_query_or_attachments(self): + """Test that hit_testing_args_check raises ValueError if both query and attachment_ids are missing""" + # Arrange + args = {} + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + HitTestingService.hit_testing_args_check(args) + assert "Query or attachment_ids is required" in str(exc_info.value) + + def test_hit_testing_args_check_should_raise_error_when_query_too_long(self): + """Test that hit_testing_args_check raises ValueError if query exceeds 250 characters""" + # Arrange + args = {"query": "a" * 251} + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + HitTestingService.hit_testing_args_check(args) + assert "Query cannot exceed 250 characters" in str(exc_info.value) + + def test_hit_testing_args_check_should_raise_error_when_attachments_not_list(self): + """Test that hit_testing_args_check raises ValueError if attachment_ids is not a list""" + # Arrange + args = {"attachment_ids": "not a list"} + + # Act & Assert + with pytest.raises(ValueError) as exc_info: + HitTestingService.hit_testing_args_check(args) + assert "Attachment_ids must be a list" in str(exc_info.value) + + # ===== Response Formatting Tests ===== + + @patch("core.rag.datasource.retrieval_service.RetrievalService.format_retrieval_documents") + def test_compact_retrieve_response_should_format_correctly(self, mock_format): + """Test that compact_retrieve_response formats the response correctly""" + # Arrange + query = "test query" + mock_doc = MagicMock(spec=Document) + documents = [mock_doc] + + mock_record = MagicMock() + mock_record.model_dump.return_value = {"content": "formatted content"} + mock_format.return_value = [mock_record] + + # Act + result = cast(dict[str, Any], HitTestingService.compact_retrieve_response(query, documents)) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + assert len(result["records"]) == 1 + assert cast(dict[str, Any], result["records"][0])["content"] == "formatted content" + mock_format.assert_called_once_with(documents) + + def test_compact_external_retrieve_response_should_return_records_for_external_provider(self): + """Test that compact_external_retrieve_response returns records when dataset provider is external""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.provider = "external" + query = "test query" + documents = [ + {"content": "c1", "title": "t1", "score": 0.9, "metadata": {"m1": "v1"}}, + {"content": "c2", "title": "t2", "score": 0.8, "metadata": {"m2": "v2"}}, + ] + + # Act + result = cast(dict[str, Any], HitTestingService.compact_external_retrieve_response(dataset, query, documents)) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + assert len(result["records"]) == 2 + assert cast(dict[str, Any], result["records"][0])["content"] == "c1" + assert cast(dict[str, Any], result["records"][1])["title"] == "t2" + + def test_compact_external_retrieve_response_should_return_empty_for_non_external_provider(self): + """Test that compact_external_retrieve_response returns empty records for non-external provider""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.provider = "not_external" + query = "test query" + documents = [{"content": "c1"}] + + # Act + result = cast(dict[str, Any], HitTestingService.compact_external_retrieve_response(dataset, query, documents)) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + assert result["records"] == [] + + # ===== External Retrieve Tests ===== + + @patch("core.rag.datasource.retrieval_service.RetrievalService.external_retrieve") + @patch("extensions.ext_database.db.session.add") + @patch("extensions.ext_database.db.session.commit") + def test_external_retrieve_should_succeed_for_external_provider(self, mock_commit, mock_add, mock_ext_retrieve): + """Test that external_retrieve successfully retrieves from external provider and commits query""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + dataset.provider = "external" + query = 'test "query"' + account = MagicMock() + account.id = "account_id" + + mock_ext_retrieve.return_value = [{"content": "ext content", "score": 1.0}] + + # Act + result = cast( + dict[str, Any], + HitTestingService.external_retrieve( + dataset=dataset, + query=query, + account=account, + external_retrieval_model={"model": "test"}, + metadata_filtering_conditions={"key": "val"}, + ), + ) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + assert cast(dict[str, Any], result["records"][0])["content"] == "ext content" + + # Verify call to RetrievalService.external_retrieve with escaped query + mock_ext_retrieve.assert_called_once_with( + dataset_id="dataset_id", + query='test \\"query\\"', + external_retrieval_model={"model": "test"}, + metadata_filtering_conditions={"key": "val"}, + ) + + # Verify DatasetQuery record was added and committed + mock_add.assert_called_once() + mock_commit.assert_called_once() + + def test_external_retrieve_should_return_empty_for_non_external_provider(self): + """Test that external_retrieve returns empty results immediately if provider is not external""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.provider = "not_external" + query = "test query" + account = MagicMock() + + # Act + result = cast(dict[str, Any], HitTestingService.external_retrieve(dataset, query, account)) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + assert result["records"] == [] + + # ===== Retrieve Tests ===== + + @patch("core.rag.datasource.retrieval_service.RetrievalService.retrieve") + @patch("extensions.ext_database.db.session.add") + @patch("extensions.ext_database.db.session.commit") + def test_retrieve_should_use_default_model_when_none_provided(self, mock_commit, mock_add, mock_retrieve): + """Test that retrieve uses default model when retrieval_model is not provided""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + dataset.retrieval_model = None + query = "test query" + account = MagicMock() + account.id = "account_id" + + mock_retrieve.return_value = [] + + # Act + result = cast( + dict[str, Any], + HitTestingService.retrieve( + dataset=dataset, query=query, account=account, retrieval_model=None, external_retrieval_model={} + ), + ) + + # Assert + assert cast(dict[str, Any], result["query"])["content"] == query + mock_retrieve.assert_called_once() + # Verify top_k from default_retrieval_model (4) + assert mock_retrieve.call_args.kwargs["top_k"] == 4 + mock_commit.assert_called_once() + + @patch("core.rag.datasource.retrieval_service.RetrievalService.retrieve") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval.get_metadata_filter_condition") + @patch("extensions.ext_database.db.session.add") + @patch("extensions.ext_database.db.session.commit") + def test_retrieve_should_handle_metadata_filtering(self, mock_commit, mock_add, mock_get_meta, mock_retrieve): + """Test that retrieve correctly calls metadata filtering when conditions are present""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + query = "test query" + account = MagicMock() + account.id = "account_id" + + retrieval_model = { + "search_method": "semantic_search", + "metadata_filtering_conditions": {"some": "condition"}, + "top_k": 5, + "reranking_enable": False, + "score_threshold_enabled": False, + } + + # Mock metadata filtering response + mock_get_meta.return_value = ({"dataset_id": ["doc_id1"]}, "condition_string") + mock_retrieve.return_value = [] + + # Act + HitTestingService.retrieve( + dataset=dataset, query=query, account=account, retrieval_model=retrieval_model, external_retrieval_model={} + ) + + # Assert + mock_get_meta.assert_called_once() + mock_retrieve.assert_called_once() + assert mock_retrieve.call_args.kwargs["document_ids_filter"] == ["doc_id1"] + + @patch("core.rag.datasource.retrieval_service.RetrievalService.retrieve") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval.get_metadata_filter_condition") + def test_retrieve_should_return_empty_if_metadata_filtering_fails(self, mock_get_meta, mock_retrieve): + """Test that retrieve returns empty response if metadata filtering returns condition but no document IDs""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + query = "test query" + account = MagicMock() + + retrieval_model = { + "search_method": "semantic_search", + "metadata_filtering_conditions": {"some": "condition"}, + "top_k": 5, + "reranking_enable": False, + "score_threshold_enabled": False, + } + + # Mock metadata filtering response: condition returned but no IDs + mock_get_meta.return_value = ({}, "condition_string") + + # Act + result = cast( + dict[str, Any], + HitTestingService.retrieve( + dataset=dataset, + query=query, + account=account, + retrieval_model=retrieval_model, + external_retrieval_model={}, + ), + ) + + # Assert + assert result["records"] == [] + mock_retrieve.assert_not_called() + + @patch("core.rag.datasource.retrieval_service.RetrievalService.retrieve") + @patch("extensions.ext_database.db.session.add") + @patch("extensions.ext_database.db.session.commit") + def test_retrieve_should_handle_attachments(self, mock_commit, mock_add, mock_retrieve): + """Test that retrieve handles attachment_ids and adds them to DatasetQuery""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + query = "test query" + account = MagicMock() + account.id = "account_id" + attachment_ids = ["att1", "att2"] + + retrieval_model = { + "search_method": "semantic_search", + "top_k": 4, + "reranking_enable": False, + "score_threshold_enabled": False, + } + mock_retrieve.return_value = [] + + # Act + HitTestingService.retrieve( + dataset=dataset, + query=query, + account=account, + retrieval_model=retrieval_model, + external_retrieval_model={}, + attachment_ids=attachment_ids, + ) + + # Assert + mock_retrieve.assert_called_once_with( + retrieval_method=ANY, + dataset_id="dataset_id", + query=query, + attachment_ids=attachment_ids, + top_k=4, + score_threshold=0.0, + reranking_model=None, + reranking_mode="reranking_model", + weights=None, + document_ids_filter=None, + ) + # Verify DatasetQuery record (there should be 2 queries: 1 text, 2 images) + # The content is json.dumps([{"content_type": "text_query", ...}, {"content_type": "image_query", ...}]) + called_query = mock_add.call_args[0][0] + query_content = json.loads(called_query.content) + assert len(query_content) == 3 # 1 text + 2 images + assert query_content[0]["content_type"] == "text_query" + assert query_content[1]["content_type"] == "image_query" + assert query_content[1]["content"] == "att1" + + @patch("core.rag.datasource.retrieval_service.RetrievalService.retrieve") + @patch("extensions.ext_database.db.session.add") + @patch("extensions.ext_database.db.session.commit") + def test_retrieve_should_handle_reranking_and_threshold(self, mock_commit, mock_add, mock_retrieve): + """Test that retrieve passes reranking and threshold parameters correctly""" + # Arrange + dataset = MagicMock(spec=Dataset) + dataset.id = "dataset_id" + query = "test query" + account = MagicMock() + account.id = "account_id" + + retrieval_model = { + "search_method": "hybrid_search", + "top_k": 10, + "reranking_enable": True, + "reranking_model": {"provider": "test"}, + "reranking_mode": "weighted_sum", + "score_threshold_enabled": True, + "score_threshold": 0.5, + "weights": {"vector": 0.5, "keyword": 0.5}, + } + mock_retrieve.return_value = [] + + # Act + HitTestingService.retrieve( + dataset=dataset, query=query, account=account, retrieval_model=retrieval_model, external_retrieval_model={} + ) + + # Assert + mock_retrieve.assert_called_once() + kwargs = mock_retrieve.call_args.kwargs + assert kwargs["score_threshold"] == 0.5 + assert kwargs["reranking_model"] == {"provider": "test"} + assert kwargs["reranking_mode"] == "weighted_sum" + assert kwargs["weights"] == {"vector": 0.5, "keyword": 0.5} diff --git a/api/tests/unit_tests/services/test_human_input_delivery_test_service.py b/api/tests/unit_tests/services/test_human_input_delivery_test_service.py index e64d3c5406..74139fd12d 100644 --- a/api/tests/unit_tests/services/test_human_input_delivery_test_service.py +++ b/api/tests/unit_tests/services/test_human_input_delivery_test_service.py @@ -1,97 +1,291 @@ from types import SimpleNamespace +from unittest.mock import MagicMock, patch import pytest +from sqlalchemy.engine import Engine +from configs import dify_config from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, EmailRecipients, ExternalRecipient, + MemberRecipient, ) from dify_graph.runtime import VariablePool from services import human_input_delivery_test_service as service_module from services.human_input_delivery_test_service import ( DeliveryTestContext, + DeliveryTestEmailRecipient, DeliveryTestError, + DeliveryTestRegistry, + DeliveryTestResult, + DeliveryTestStatus, + DeliveryTestUnsupportedError, EmailDeliveryTestHandler, + HumanInputDeliveryTestService, + _build_form_link, ) -def _make_email_method() -> EmailDeliveryMethod: - return EmailDeliveryMethod( - config=EmailDeliveryConfig( - recipients=EmailRecipients( - whole_workspace=False, - items=[ExternalRecipient(email="tester@example.com")], - ), - subject="Test subject", - body="Test body", +@pytest.fixture +def mock_db(monkeypatch): + mock_db = MagicMock() + monkeypatch.setattr(service_module, "db", mock_db) + return mock_db + + +def _make_valid_email_config(): + return EmailDeliveryConfig(recipients=EmailRecipients(whole_workspace=False, items=[]), subject="Subj", body="Body") + + +def test_build_form_link(): + with patch.object(dify_config, "APP_WEB_URL", "http://example.com/"): + assert _build_form_link("token123") == "http://example.com/form/token123" + + with patch.object(dify_config, "APP_WEB_URL", "http://example.com"): + assert _build_form_link("token123") == "http://example.com/form/token123" + + assert _build_form_link(None) is None + + with patch.object(dify_config, "APP_WEB_URL", None): + assert _build_form_link("token123") is None + + +class TestDeliveryTestRegistry: + def test_register(self): + registry = DeliveryTestRegistry() + assert len(registry._handlers) == 0 + handler = MagicMock() + registry.register(handler) + assert len(registry._handlers) == 1 + assert registry._handlers[0] == handler + + def test_register_and_dispatch(self): + handler = MagicMock() + handler.supports.return_value = True + handler.send_test.return_value = DeliveryTestResult(status=DeliveryTestStatus.OK) + + registry = DeliveryTestRegistry([handler]) + context = MagicMock(spec=DeliveryTestContext) + method = MagicMock() + + result = registry.dispatch(context=context, method=method) + + assert result.status == DeliveryTestStatus.OK + handler.supports.assert_called_once_with(method) + handler.send_test.assert_called_once_with(context=context, method=method) + + def test_dispatch_unsupported(self): + handler = MagicMock() + handler.supports.return_value = False + + registry = DeliveryTestRegistry([handler]) + context = MagicMock(spec=DeliveryTestContext) + method = MagicMock() + + with pytest.raises(DeliveryTestUnsupportedError, match="Delivery method does not support test send."): + registry.dispatch(context=context, method=method) + + def test_default(self, mock_db): + registry = DeliveryTestRegistry.default() + assert len(registry._handlers) == 1 + assert isinstance(registry._handlers[0], EmailDeliveryTestHandler) + + +def test_human_input_delivery_test_service(): + registry = MagicMock(spec=DeliveryTestRegistry) + service = HumanInputDeliveryTestService(registry=registry) + context = MagicMock(spec=DeliveryTestContext) + method = MagicMock() + + service.send_test(context=context, method=method) + registry.dispatch.assert_called_once_with(context=context, method=method) + + +class TestEmailDeliveryTestHandler: + def test_init_with_engine(self): + engine = MagicMock(spec=Engine) + handler = EmailDeliveryTestHandler(session_factory=engine) + assert handler._session_factory.kw["bind"] == engine + + def test_supports(self): + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + method = EmailDeliveryMethod(config=_make_valid_email_config()) + assert handler.supports(method) is True + assert handler.supports(MagicMock()) is False + + def test_send_test_unsupported_method(self): + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + with pytest.raises(DeliveryTestUnsupportedError): + handler.send_test(context=MagicMock(), method=MagicMock()) + + def test_send_test_feature_disabled(self, monkeypatch): + monkeypatch.setattr( + service_module.FeatureService, + "get_features", + lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=False), ) - ) - - -def test_email_delivery_test_handler_rejects_when_feature_disabled(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr( - service_module.FeatureService, - "get_features", - lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=False), - ) - - handler = EmailDeliveryTestHandler(session_factory=object()) - context = DeliveryTestContext( - tenant_id="tenant-1", - app_id="app-1", - node_id="node-1", - node_title="Human Input", - rendered_content="content", - ) - method = _make_email_method() - - with pytest.raises(DeliveryTestError, match="Email delivery is not available"): - handler.send_test(context=context, method=method) - - -def test_email_delivery_test_handler_replaces_body_variables(monkeypatch: pytest.MonkeyPatch): - class DummyMail: - def __init__(self): - self.sent: list[dict[str, str]] = [] - - def is_inited(self) -> bool: - return True - - def send(self, *, to: str, subject: str, html: str): - self.sent.append({"to": to, "subject": subject, "html": html}) - - mail = DummyMail() - monkeypatch.setattr(service_module, "mail", mail) - monkeypatch.setattr(service_module, "render_email_template", lambda template, _substitutions: template) - monkeypatch.setattr( - service_module.FeatureService, - "get_features", - lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=True), - ) - - handler = EmailDeliveryTestHandler(session_factory=object()) - handler._resolve_recipients = lambda **_kwargs: ["tester@example.com"] # type: ignore[assignment] - - method = EmailDeliveryMethod( - config=EmailDeliveryConfig( - recipients=EmailRecipients(whole_workspace=False, items=[ExternalRecipient(email="tester@example.com")]), - subject="Subject", - body="Value {{#node1.value#}}", + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + context = DeliveryTestContext( + tenant_id="t1", app_id="a1", node_id="n1", node_title="title", rendered_content="content" ) - ) - variable_pool = VariablePool() - variable_pool.add(["node1", "value"], "OK") - context = DeliveryTestContext( - tenant_id="tenant-1", - app_id="app-1", - node_id="node-1", - node_title="Human Input", - rendered_content="content", - variable_pool=variable_pool, - ) + method = EmailDeliveryMethod(config=_make_valid_email_config()) - handler.send_test(context=context, method=method) + with pytest.raises(DeliveryTestError, match="Email delivery is not available"): + handler.send_test(context=context, method=method) - assert mail.sent[0]["html"] == "Value OK" + def test_send_test_mail_not_inited(self, monkeypatch): + monkeypatch.setattr( + service_module.FeatureService, + "get_features", + lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True), + ) + monkeypatch.setattr(service_module.mail, "is_inited", lambda: False) + + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + context = DeliveryTestContext( + tenant_id="t1", app_id="a1", node_id="n1", node_title="title", rendered_content="content" + ) + method = EmailDeliveryMethod(config=_make_valid_email_config()) + + with pytest.raises(DeliveryTestError, match="Mail client is not initialized."): + handler.send_test(context=context, method=method) + + def test_send_test_no_recipients(self, monkeypatch): + monkeypatch.setattr( + service_module.FeatureService, + "get_features", + lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True), + ) + monkeypatch.setattr(service_module.mail, "is_inited", lambda: True) + + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + handler._resolve_recipients = MagicMock(return_value=[]) + + context = DeliveryTestContext( + tenant_id="t1", app_id="a1", node_id="n1", node_title="title", rendered_content="content" + ) + method = EmailDeliveryMethod(config=_make_valid_email_config()) + + with pytest.raises(DeliveryTestError, match="No recipients configured"): + handler.send_test(context=context, method=method) + + def test_send_test_success(self, monkeypatch): + monkeypatch.setattr( + service_module.FeatureService, + "get_features", + lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True), + ) + monkeypatch.setattr(service_module.mail, "is_inited", lambda: True) + mock_mail_send = MagicMock() + monkeypatch.setattr(service_module.mail, "send", mock_mail_send) + monkeypatch.setattr(service_module, "render_email_template", lambda t, s: f"RENDERED_{t}") + + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + handler._resolve_recipients = MagicMock(return_value=["test@example.com"]) + + variable_pool = VariablePool() + context = DeliveryTestContext( + tenant_id="t1", + app_id="a1", + node_id="n1", + node_title="title", + rendered_content="content", + variable_pool=variable_pool, + recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")], + ) + + method = EmailDeliveryMethod(config=_make_valid_email_config()) + + result = handler.send_test(context=context, method=method) + + assert result.status == DeliveryTestStatus.OK + assert result.delivered_to == ["test@example.com"] + mock_mail_send.assert_called_once() + args, kwargs = mock_mail_send.call_args + assert kwargs["to"] == "test@example.com" + assert "RENDERED_Subj" in kwargs["subject"] + + def test_resolve_recipients(self): + handler = EmailDeliveryTestHandler(session_factory=MagicMock()) + + # Test Case 1: External Recipient + method = EmailDeliveryMethod( + config=EmailDeliveryConfig( + recipients=EmailRecipients(items=[ExternalRecipient(email="ext@example.com")], whole_workspace=False), + subject="", + body="", + ) + ) + assert handler._resolve_recipients(tenant_id="t1", method=method) == ["ext@example.com"] + + # Test Case 2: Member Recipient + method = EmailDeliveryMethod( + config=EmailDeliveryConfig( + recipients=EmailRecipients(items=[MemberRecipient(user_id="u1")], whole_workspace=False), + subject="", + body="", + ) + ) + handler._query_workspace_member_emails = MagicMock(return_value={"u1": "u1@example.com"}) + assert handler._resolve_recipients(tenant_id="t1", method=method) == ["u1@example.com"] + + # Test Case 3: Whole Workspace + method = EmailDeliveryMethod( + config=EmailDeliveryConfig(recipients=EmailRecipients(items=[], whole_workspace=True), subject="", body="") + ) + handler._query_workspace_member_emails = MagicMock( + return_value={"u1": "u1@example.com", "u2": "u2@example.com"} + ) + recipients = handler._resolve_recipients(tenant_id="t1", method=method) + assert set(recipients) == {"u1@example.com", "u2@example.com"} + + def test_query_workspace_member_emails(self): + mock_session = MagicMock() + mock_session_factory = MagicMock(return_value=mock_session) + mock_session.__enter__.return_value = mock_session + + handler = EmailDeliveryTestHandler(session_factory=mock_session_factory) + + # Empty user_ids + assert handler._query_workspace_member_emails(tenant_id="t1", user_ids=[]) == {} + + # user_ids is None (all) + mock_execute = MagicMock() + mock_session.execute.return_value = mock_execute + mock_execute.all.return_value = [("u1", "u1@example.com")] + + result = handler._query_workspace_member_emails(tenant_id="t1", user_ids=None) + assert result == {"u1": "u1@example.com"} + + # user_ids with values + result = handler._query_workspace_member_emails(tenant_id="t1", user_ids=["u1"]) + assert result == {"u1": "u1@example.com"} + + def test_build_substitutions(self): + context = DeliveryTestContext( + tenant_id="t1", + app_id="a1", + node_id="n1", + node_title="title", + rendered_content="content", + template_vars={"custom": "var"}, + recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")], + ) + + subs = EmailDeliveryTestHandler._build_substitutions(context=context, recipient_email="test@example.com") + + assert subs["node_title"] == "title" + assert subs["form_content"] == "content" + assert subs["recipient_email"] == "test@example.com" + assert subs["custom"] == "var" + assert subs["form_token"] == "token123" + assert "form/token123" in subs["form_link"] + + # Without matching recipient + subs_no_match = EmailDeliveryTestHandler._build_substitutions( + context=context, recipient_email="other@example.com" + ) + assert subs_no_match["form_token"] == "" + assert subs_no_match["form_link"] == "" diff --git a/api/tests/unit_tests/services/test_human_input_service.py b/api/tests/unit_tests/services/test_human_input_service.py index a4c6c50593..375e47d7fc 100644 --- a/api/tests/unit_tests/services/test_human_input_service.py +++ b/api/tests/unit_tests/services/test_human_input_service.py @@ -16,7 +16,13 @@ from dify_graph.nodes.human_input.entities import ( ) from dify_graph.nodes.human_input.enums import FormInputType, HumanInputFormKind, HumanInputFormStatus from models.human_input import RecipientType -from services.human_input_service import Form, FormExpiredError, HumanInputService, InvalidFormDataError +from services.human_input_service import ( + Form, + FormExpiredError, + FormSubmittedError, + HumanInputService, + InvalidFormDataError, +) @pytest.fixture @@ -285,3 +291,172 @@ def test_submit_form_by_token_missing_inputs(sample_form_record, mock_session_fa assert "Missing required inputs" in str(exc_info.value) repo.mark_submitted.assert_not_called() + + +def test_form_properties(sample_form_record): + form = Form(sample_form_record) + assert form.id == "form-id" + assert form.workflow_run_id == "workflow-run-id" + assert form.tenant_id == "tenant-id" + assert form.app_id == "app-id" + assert form.recipient_id == "recipient-id" + assert form.recipient_type == RecipientType.STANDALONE_WEB_APP + assert form.status == HumanInputFormStatus.WAITING + assert form.form_kind == HumanInputFormKind.RUNTIME + assert isinstance(form.created_at, datetime) + assert isinstance(form.expiration_time, datetime) + + +def test_form_submitted_error_init(): + error = FormSubmittedError(form_id="test-form") + assert "form_id=test-form" in error.description + assert error.code == 412 + + +def test_human_input_service_init_with_engine(mocker): + engine = MagicMock(spec=human_input_service_module.Engine) + sessionmaker_mock = mocker.patch("services.human_input_service.sessionmaker") + + HumanInputService(session_factory=engine) + sessionmaker_mock.assert_called_once_with(bind=engine) + + +def test_get_form_by_token_none(mock_session_factory): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = None + + service = HumanInputService(session_factory, form_repository=repo) + assert service.get_form_by_token("invalid") is None + + +def test_get_form_definition_by_token_mismatch(sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = sample_form_record + + service = HumanInputService(session_factory, form_repository=repo) + # RecipientType mismatch + assert service.get_form_definition_by_token(RecipientType.CONSOLE, "token") is None + + +def test_get_form_definition_by_token_success(sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = sample_form_record + + service = HumanInputService(session_factory, form_repository=repo) + form = service.get_form_definition_by_token(RecipientType.STANDALONE_WEB_APP, "token") + assert form is not None + assert form.id == sample_form_record.form_id + + +def test_get_form_definition_by_token_for_console_mismatch(sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = sample_form_record # is STANDALONE_WEB_APP + + service = HumanInputService(session_factory, form_repository=repo) + assert service.get_form_definition_by_token_for_console("token") is None + + +def test_submit_form_by_token_delivery_not_enabled(mock_session_factory): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = None + + service = HumanInputService(session_factory, form_repository=repo) + with pytest.raises(human_input_service_module.WebAppDeliveryNotEnabledError): + service.submit_form_by_token(RecipientType.STANDALONE_WEB_APP, "token", "action", {}) + + +def test_submit_form_by_token_no_workflow_run_id(sample_form_record, mock_session_factory, mocker): + session_factory, _ = mock_session_factory + repo = MagicMock(spec=HumanInputFormSubmissionRepository) + repo.get_by_token.return_value = sample_form_record + + # Return record with no workflow_run_id + result_record = dataclasses.replace(sample_form_record, workflow_run_id=None) + repo.mark_submitted.return_value = result_record + + service = HumanInputService(session_factory, form_repository=repo) + enqueue_spy = mocker.patch.object(service, "enqueue_resume") + + service.submit_form_by_token(RecipientType.STANDALONE_WEB_APP, "token", "submit", {}) + enqueue_spy.assert_not_called() + + +def test_ensure_form_active_errors(sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + service = HumanInputService(session_factory) + + # Submitted + submitted_record = dataclasses.replace(sample_form_record, submitted_at=datetime.utcnow()) + with pytest.raises(human_input_service_module.FormSubmittedError): + service.ensure_form_active(Form(submitted_record)) + + # Timeout status + timeout_record = dataclasses.replace(sample_form_record, status=HumanInputFormStatus.TIMEOUT) + with pytest.raises(FormExpiredError): + service.ensure_form_active(Form(timeout_record)) + + # Expired time + expired_time_record = dataclasses.replace( + sample_form_record, expiration_time=datetime.utcnow() - timedelta(minutes=1) + ) + with pytest.raises(FormExpiredError): + service.ensure_form_active(Form(expired_time_record)) + + +def test_ensure_not_submitted_raises(sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + service = HumanInputService(session_factory) + submitted_record = dataclasses.replace(sample_form_record, submitted_at=datetime.utcnow()) + + with pytest.raises(human_input_service_module.FormSubmittedError): + service._ensure_not_submitted(Form(submitted_record)) + + +def test_enqueue_resume_workflow_not_found(mocker, mock_session_factory): + session_factory, _ = mock_session_factory + service = HumanInputService(session_factory) + + workflow_run_repo = MagicMock() + workflow_run_repo.get_workflow_run_by_id_without_tenant.return_value = None + mocker.patch( + "services.human_input_service.DifyAPIRepositoryFactory.create_api_workflow_run_repository", + return_value=workflow_run_repo, + ) + + with pytest.raises(AssertionError) as excinfo: + service.enqueue_resume("workflow-run-id") + assert "WorkflowRun not found" in str(excinfo.value) + + +def test_enqueue_resume_app_not_found(mocker, mock_session_factory): + session_factory, session = mock_session_factory + service = HumanInputService(session_factory) + + workflow_run = MagicMock() + workflow_run.app_id = "app-id" + + workflow_run_repo = MagicMock() + workflow_run_repo.get_workflow_run_by_id_without_tenant.return_value = workflow_run + mocker.patch( + "services.human_input_service.DifyAPIRepositoryFactory.create_api_workflow_run_repository", + return_value=workflow_run_repo, + ) + + session.execute.return_value.scalar_one_or_none.return_value = None + logger_spy = mocker.patch("services.human_input_service.logger") + + service.enqueue_resume("workflow-run-id") + logger_spy.error.assert_called_once() + + +def test_is_globally_expired_zero_timeout(monkeypatch, sample_form_record, mock_session_factory): + session_factory, _ = mock_session_factory + service = HumanInputService(session_factory) + + monkeypatch.setattr(human_input_service_module.dify_config, "HUMAN_INPUT_GLOBAL_TIMEOUT_SECONDS", 0) + assert service._is_globally_expired(Form(sample_form_record)) is False diff --git a/api/tests/unit_tests/services/test_knowledge_service.py b/api/tests/unit_tests/services/test_knowledge_service.py new file mode 100644 index 0000000000..bc0caee071 --- /dev/null +++ b/api/tests/unit_tests/services/test_knowledge_service.py @@ -0,0 +1,146 @@ +from typing import Any, cast +from unittest.mock import MagicMock, patch + +import pytest + +from services.knowledge_service import ExternalDatasetTestService + + +class TestKnowledgeService: + """Test suite for ExternalDatasetTestService""" + + # ===== Happy Path Tests ===== + + @patch("services.knowledge_service.boto3.client") + @patch("services.knowledge_service.dify_config") + def test_knowledge_retrieval_should_succeed_with_valid_results( + self, mock_dify_config: MagicMock, mock_boto_client: MagicMock + ): + """Test that knowledge_retrieval successfully parses results from Bedrock""" + # Arrange + mock_dify_config.AWS_SECRET_ACCESS_KEY = "dummy_secret" + mock_dify_config.AWS_ACCESS_KEY_ID = "dummy_id" + + mock_client = MagicMock() + mock_boto_client.return_value = mock_client + + retrieval_setting = {"top_k": 4, "score_threshold": 0.5} + query = "test query" + knowledge_id = "kb-123" + + # Mock successful response + mock_client.retrieve.return_value = { + "ResponseMetadata": {"HTTPStatusCode": 200}, + "retrievalResults": [ + { + "score": 0.9, + "metadata": {"x-amz-bedrock-kb-source-uri": "s3://bucket/doc1.pdf"}, + "content": {"text": "content from doc1"}, + }, + { + "score": 0.4, # Below threshold + "metadata": {"x-amz-bedrock-kb-source-uri": "s3://bucket/doc2.pdf"}, + "content": {"text": "content from doc2"}, + }, + ], + } + + # Act + result = cast( + dict[str, Any], ExternalDatasetTestService.knowledge_retrieval(retrieval_setting, query, knowledge_id) + ) + + # Assert + assert len(result["records"]) == 1 + record = result["records"][0] + assert record["score"] == 0.9 + assert record["title"] == "s3://bucket/doc1.pdf" + assert record["content"] == "content from doc1" + + # verify retrieve called correctly + mock_client.retrieve.assert_called_once_with( + knowledgeBaseId=knowledge_id, + retrievalConfiguration={ + "vectorSearchConfiguration": { + "numberOfResults": 4, + "overrideSearchType": "HYBRID", + } + }, + retrievalQuery={"text": query}, + ) + + # NEW: verify boto3.client created with proper service name and config values + mock_boto_client.assert_called_once_with( + "bedrock-agent-runtime", + aws_secret_access_key="dummy_secret", + aws_access_key_id="dummy_id", + region_name="us-east-1", + ) + + @patch("services.knowledge_service.boto3.client") + def test_knowledge_retrieval_should_return_empty_when_no_results(self, mock_boto: MagicMock): + """Test that knowledge_retrieval returns empty records when Bedrock returns nothing""" + # Arrange + mock_client = MagicMock() + mock_boto.return_value = mock_client + + mock_client.retrieve.return_value = {"ResponseMetadata": {"HTTPStatusCode": 200}, "retrievalResults": []} + + # Act + result = cast(dict[str, Any], ExternalDatasetTestService.knowledge_retrieval({"top_k": 1}, "query", "kb")) + + # Assert + assert result["records"] == [] + + # ===== Error Handling Tests ===== + + @patch("services.knowledge_service.boto3.client") + def test_knowledge_retrieval_should_return_empty_on_http_error(self, mock_boto: MagicMock): + """Test that knowledge_retrieval returns empty records if Bedrock returns non-200 status""" + # Arrange + mock_client = MagicMock() + mock_boto.return_value = mock_client + + mock_client.retrieve.return_value = {"ResponseMetadata": {"HTTPStatusCode": 500}} + + # Act + result = cast(dict[str, Any], ExternalDatasetTestService.knowledge_retrieval({"top_k": 1}, "query", "kb")) + + # Assert + assert result["records"] == [] + + def test_knowledge_retrieval_should_raise_when_boto_client_creation_fails(self): + """Test that exceptions from boto3.client propagate (e.g., network/credentials issues)""" + with patch("services.knowledge_service.boto3.client") as mock_boto: + mock_boto.side_effect = Exception("client init failed") + with pytest.raises(Exception) as exc_info: + ExternalDatasetTestService.knowledge_retrieval({"top_k": 1}, "query", "kb") + assert "client init failed" in str(exc_info.value) + + # ===== Edge Cases ===== + + @patch("services.knowledge_service.boto3.client") + def test_knowledge_retrieval_should_handle_missing_threshold_in_settings(self, mock_boto: MagicMock): + """Test that knowledge_retrieval uses 0.0 as default threshold if not provided""" + # Arrange + mock_client = MagicMock() + mock_boto.return_value = mock_client + + mock_client.retrieve.return_value = { + "ResponseMetadata": {"HTTPStatusCode": 200}, + "retrievalResults": [ + { + "score": 0.1, + "metadata": {"x-amz-bedrock-kb-source-uri": "uri"}, + "content": {"text": "text"}, + } + ], + } + + # Act + # retrieval_setting missing "score_threshold" + result = cast(dict[str, Any], ExternalDatasetTestService.knowledge_retrieval({"top_k": 1}, "query", "kb")) + + # Assert + assert len(result["records"]) == 1 + assert result["records"][0]["score"] == 0.1 diff --git a/api/tests/unit_tests/services/test_message_service.py b/api/tests/unit_tests/services/test_message_service.py index 3c38888753..4b8bdde46b 100644 --- a/api/tests/unit_tests/services/test_message_service.py +++ b/api/tests/unit_tests/services/test_message_service.py @@ -5,8 +5,13 @@ import pytest from libs.infinite_scroll_pagination import InfiniteScrollPagination from models.model import App, AppMode, EndUser, Message -from services.errors.message import FirstMessageNotExistsError, LastMessageNotExistsError -from services.message_service import MessageService +from services.errors.message import ( + FirstMessageNotExistsError, + LastMessageNotExistsError, + MessageNotExistsError, + SuggestedQuestionsAfterAnswerDisabledError, +) +from services.message_service import MessageService, attach_message_extra_contents class TestMessageServiceFactory: @@ -244,14 +249,12 @@ class TestMessageServicePaginationByFirstId: mock_query_first = MagicMock() mock_query_history = MagicMock() + query_calls = [] + def query_side_effect(*args): if args[0] == Message: - # First call returns mock for first_message query - if not hasattr(query_side_effect, "call_count"): - query_side_effect.call_count = 0 - query_side_effect.call_count += 1 - - if query_side_effect.call_count == 1: + query_calls.append(args) + if len(query_calls) == 1: return mock_query_first else: return mock_query_history @@ -647,3 +650,410 @@ class TestMessageServicePaginationByLastId: assert len(result.data) == 10 # Last message trimmed assert result.has_more is True assert result.limit == 10 + + +class TestMessageServiceUtilities: + """Unit tests for MessageService module-level utility functions.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestMessageServiceFactory() + + # Test 16: attach_message_extra_contents with empty list + def test_attach_message_extra_contents_empty(self): + """Test attach_message_extra_contents with empty list does nothing.""" + # Act & Assert (should not raise error) + attach_message_extra_contents([]) + + # Test 17: attach_message_extra_contents with messages + @patch("services.message_service._create_execution_extra_content_repository") + def test_attach_message_extra_contents_with_messages(self, mock_create_repo, factory): + """Test attach_message_extra_contents correctly attaches content.""" + # Arrange + messages = [factory.create_message_mock(message_id="msg-1"), factory.create_message_mock(message_id="msg-2")] + + mock_repo = MagicMock() + mock_create_repo.return_value = mock_repo + + # Mock extra content models + mock_content1 = MagicMock() + mock_content1.model_dump.return_value = {"key": "value1"} + mock_content2 = MagicMock() + mock_content2.model_dump.return_value = {"key": "value2"} + + mock_repo.get_by_message_ids.return_value = [[mock_content1], [mock_content2]] + + # Act + attach_message_extra_contents(messages) + + # Assert + mock_repo.get_by_message_ids.assert_called_once_with(["msg-1", "msg-2"]) + messages[0].set_extra_contents.assert_called_once_with([{"key": "value1"}]) + messages[1].set_extra_contents.assert_called_once_with([{"key": "value2"}]) + + # Test 18: attach_message_extra_contents with index out of bounds + @patch("services.message_service._create_execution_extra_content_repository") + def test_attach_message_extra_contents_index_out_of_bounds(self, mock_create_repo, factory): + """Test attach_message_extra_contents handles missing content lists.""" + # Arrange + messages = [factory.create_message_mock(message_id="msg-1")] + + mock_repo = MagicMock() + mock_create_repo.return_value = mock_repo + mock_repo.get_by_message_ids.return_value = [] # Empty returned list + + # Act + attach_message_extra_contents(messages) + + # Assert + messages[0].set_extra_contents.assert_called_once_with([]) + + # Test 19: _create_execution_extra_content_repository + @patch("services.message_service.db") + @patch("services.message_service.sessionmaker") + @patch("services.message_service.SQLAlchemyExecutionExtraContentRepository") + def test_create_execution_extra_content_repository(self, mock_repo_class, mock_sessionmaker, mock_db): + """Test _create_execution_extra_content_repository creates expected repository.""" + from services.message_service import _create_execution_extra_content_repository + + # Act + _create_execution_extra_content_repository() + + # Assert + mock_sessionmaker.assert_called_once() + mock_repo_class.assert_called_once() + + +class TestMessageServiceGetMessage: + """Unit tests for MessageService.get_message method.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestMessageServiceFactory() + + # Test 20: get_message success for EndUser + @patch("services.message_service.db") + def test_get_message_end_user_success(self, mock_db, factory): + """Test get_message returns message for EndUser.""" + # Arrange + app = factory.create_app_mock() + user = factory.create_end_user_mock(user_id="end-user-123") + message = factory.create_message_mock() + + mock_query = MagicMock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = message + + # Act + result = MessageService.get_message(app_model=app, user=user, message_id="msg-123") + + # Assert + assert result == message + mock_query.where.assert_called_once() + + # Test 21: get_message success for Account (Admin) + @patch("services.message_service.db") + def test_get_message_account_success(self, mock_db, factory): + """Test get_message returns message for Account.""" + # Arrange + from models import Account + + app = factory.create_app_mock() + user = MagicMock(spec=Account) + user.id = "account-123" + message = factory.create_message_mock() + + mock_query = MagicMock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = message + + # Act + result = MessageService.get_message(app_model=app, user=user, message_id="msg-123") + + # Assert + assert result == message + + # Test 22: get_message not found + @patch("services.message_service.db") + def test_get_message_not_found(self, mock_db, factory): + """Test get_message raises MessageNotExistsError when not found.""" + # Arrange + app = factory.create_app_mock() + user = factory.create_end_user_mock() + + mock_query = MagicMock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = None + + # Act & Assert + with pytest.raises(MessageNotExistsError): + MessageService.get_message(app_model=app, user=user, message_id="msg-123") + + +class TestMessageServiceFeedback: + """Unit tests for MessageService feedback-related methods.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestMessageServiceFactory() + + # Test 23: create_feedback - new feedback for EndUser + @patch("services.message_service.db") + @patch.object(MessageService, "get_message") + def test_create_feedback_new_end_user(self, mock_get_message, mock_db, factory): + """Test creating new feedback for an end user.""" + # Arrange + app = factory.create_app_mock() + user = factory.create_end_user_mock() + message = factory.create_message_mock() + message.user_feedback = None + mock_get_message.return_value = message + + # Act + result = MessageService.create_feedback( + app_model=app, + message_id="msg-123", + user=user, + rating="like", + content="Good answer", + ) + + # Assert + assert result.rating == "like" + assert result.content == "Good answer" + assert result.from_source == "user" + mock_db.session.add.assert_called_once() + mock_db.session.commit.assert_called_once() + + # Test 24: create_feedback - update feedback for Account + @patch("services.message_service.db") + @patch.object(MessageService, "get_message") + def test_create_feedback_update_account(self, mock_get_message, mock_db, factory): + """Test updating existing feedback for an account.""" + # Arrange + from models import Account, MessageFeedback + + app = factory.create_app_mock() + user = MagicMock(spec=Account) + user.id = "account-123" + message = factory.create_message_mock() + feedback = MagicMock(spec=MessageFeedback) + message.admin_feedback = feedback + mock_get_message.return_value = message + + # Act + result = MessageService.create_feedback( + app_model=app, + message_id="msg-123", + user=user, + rating="dislike", + content="Bad answer", + ) + + # Assert + assert result == feedback + assert feedback.rating == "dislike" + assert feedback.content == "Bad answer" + mock_db.session.commit.assert_called_once() + + # Test 25: create_feedback - delete feedback (rating is None) + @patch("services.message_service.db") + @patch.object(MessageService, "get_message") + def test_create_feedback_delete(self, mock_get_message, mock_db, factory): + """Test deleting feedback by passing rating=None.""" + # Arrange + app = factory.create_app_mock() + user = factory.create_end_user_mock() + message = factory.create_message_mock() + feedback = MagicMock() + message.user_feedback = feedback + mock_get_message.return_value = message + + # Act + result = MessageService.create_feedback( + app_model=app, + message_id="msg-123", + user=user, + rating=None, + content=None, + ) + + # Assert + assert result == feedback + mock_db.session.delete.assert_called_once_with(feedback) + mock_db.session.commit.assert_called_once() + + # Test 26: get_all_messages_feedbacks + @patch("services.message_service.db") + def test_get_all_messages_feedbacks(self, mock_db, factory): + """Test get_all_messages_feedbacks returns list of dicts.""" + # Arrange + app = factory.create_app_mock() + feedback = MagicMock() + feedback.to_dict.return_value = {"id": "fb-1"} + + mock_query = MagicMock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.order_by.return_value = mock_query + mock_query.limit.return_value = mock_query + mock_query.offset.return_value = mock_query + mock_query.all.return_value = [feedback] + + # Act + result = MessageService.get_all_messages_feedbacks(app_model=app, page=1, limit=10) + + # Assert + assert result == [{"id": "fb-1"}] + mock_query.limit.assert_called_with(10) + mock_query.offset.assert_called_with(0) + + +class TestMessageServiceSuggestedQuestions: + """Unit tests for MessageService.get_suggested_questions_after_answer method.""" + + @pytest.fixture + def factory(self): + """Provide test data factory.""" + return TestMessageServiceFactory() + + # Test 27: get_suggested_questions_after_answer - user is None + def test_get_suggested_questions_user_none(self, factory): + app = factory.create_app_mock() + with pytest.raises(ValueError, match="user cannot be None"): + MessageService.get_suggested_questions_after_answer( + app_model=app, user=None, message_id="msg-123", invoke_from=MagicMock() + ) + + # Test 28: get_suggested_questions_after_answer - Advanced Chat success + @patch("services.message_service.ModelManager") + @patch("services.message_service.WorkflowService") + @patch("services.message_service.AdvancedChatAppConfigManager") + @patch("services.message_service.TokenBufferMemory") + @patch("services.message_service.LLMGenerator") + @patch("services.message_service.TraceQueueManager") + @patch.object(MessageService, "get_message") + @patch("services.message_service.ConversationService") + def test_get_suggested_questions_advanced_chat_success( + self, + mock_conversation_service, + mock_get_message, + mock_trace_manager, + mock_llm_gen, + mock_memory, + mock_config_manager, + mock_workflow_service, + mock_model_manager, + factory, + ): + """Test successful suggested questions generation in Advanced Chat mode.""" + from core.app.entities.app_invoke_entities import InvokeFrom + + # Arrange + app = factory.create_app_mock(mode=AppMode.ADVANCED_CHAT.value) + user = factory.create_end_user_mock() + message = factory.create_message_mock() + mock_get_message.return_value = message + + workflow = MagicMock() + mock_workflow_service.return_value.get_published_workflow.return_value = workflow + + app_config = MagicMock() + app_config.additional_features.suggested_questions_after_answer = True + mock_config_manager.get_app_config.return_value = app_config + + mock_llm_gen.generate_suggested_questions_after_answer.return_value = ["Q1?"] + + # Act + result = MessageService.get_suggested_questions_after_answer( + app_model=app, user=user, message_id="msg-123", invoke_from=InvokeFrom.WEB_APP + ) + + # Assert + assert result == ["Q1?"] + mock_workflow_service.return_value.get_published_workflow.assert_called_once() + mock_llm_gen.generate_suggested_questions_after_answer.assert_called_once() + + # Test 29: get_suggested_questions_after_answer - Chat app success (no override) + @patch("services.message_service.db") + @patch("services.message_service.ModelManager") + @patch("services.message_service.TokenBufferMemory") + @patch("services.message_service.LLMGenerator") + @patch("services.message_service.TraceQueueManager") + @patch.object(MessageService, "get_message") + @patch("services.message_service.ConversationService") + def test_get_suggested_questions_chat_app_success( + self, + mock_conversation_service, + mock_get_message, + mock_trace_manager, + mock_llm_gen, + mock_memory, + mock_model_manager, + mock_db, + factory, + ): + """Test successful suggested questions generation in basic Chat mode.""" + # Arrange + app = factory.create_app_mock(mode=AppMode.CHAT.value) + user = factory.create_end_user_mock() + message = factory.create_message_mock() + mock_get_message.return_value = message + + conversation = MagicMock() + conversation.override_model_configs = None + mock_conversation_service.get_conversation.return_value = conversation + + app_model_config = MagicMock() + app_model_config.suggested_questions_after_answer_dict = {"enabled": True} + app_model_config.model_dict = {"provider": "openai", "name": "gpt-4"} + + mock_query = MagicMock() + mock_db.session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_query.first.return_value = app_model_config + + mock_llm_gen.generate_suggested_questions_after_answer.return_value = ["Q1?"] + + # Act + result = MessageService.get_suggested_questions_after_answer( + app_model=app, user=user, message_id="msg-123", invoke_from=MagicMock() + ) + + # Assert + assert result == ["Q1?"] + mock_query.first.assert_called_once() + mock_llm_gen.generate_suggested_questions_after_answer.assert_called_once() + + # Test 30: get_suggested_questions_after_answer - Disabled Error + @patch("services.message_service.WorkflowService") + @patch("services.message_service.AdvancedChatAppConfigManager") + @patch.object(MessageService, "get_message") + @patch("services.message_service.ConversationService") + def test_get_suggested_questions_disabled_error( + self, mock_conversation_service, mock_get_message, mock_config_manager, mock_workflow_service, factory + ): + """Test SuggestedQuestionsAfterAnswerDisabledError is raised when feature is disabled.""" + # Arrange + app = factory.create_app_mock(mode=AppMode.ADVANCED_CHAT.value) + user = factory.create_end_user_mock() + mock_get_message.return_value = factory.create_message_mock() + + workflow = MagicMock() + mock_workflow_service.return_value.get_published_workflow.return_value = workflow + + app_config = MagicMock() + app_config.additional_features.suggested_questions_after_answer = False + mock_config_manager.get_app_config.return_value = app_config + + # Act & Assert + with pytest.raises(SuggestedQuestionsAfterAnswerDisabledError): + MessageService.get_suggested_questions_after_answer( + app_model=app, user=user, message_id="msg-123", invoke_from=MagicMock() + ) diff --git a/api/tests/unit_tests/services/test_messages_clean_service.py b/api/tests/unit_tests/services/test_messages_clean_service.py index 67ae2c9142..4449b442d6 100644 --- a/api/tests/unit_tests/services/test_messages_clean_service.py +++ b/api/tests/unit_tests/services/test_messages_clean_service.py @@ -554,11 +554,9 @@ class TestMessagesCleanServiceFromDays: MessagesCleanService.from_days(policy=policy, days=-1) # Act - with patch("services.retention.conversation.messages_clean_service.datetime", autospec=True) as mock_datetime: + with patch("services.retention.conversation.messages_clean_service.naive_utc_now") as mock_now: fixed_now = datetime.datetime(2024, 6, 15, 14, 0, 0) - mock_datetime.datetime.now.return_value = fixed_now - mock_datetime.timedelta = datetime.timedelta - + mock_now.return_value = fixed_now service = MessagesCleanService.from_days(policy=policy, days=0) # Assert @@ -586,11 +584,9 @@ class TestMessagesCleanServiceFromDays: dry_run = True # Act - with patch("services.retention.conversation.messages_clean_service.datetime", autospec=True) as mock_datetime: + with patch("services.retention.conversation.messages_clean_service.naive_utc_now") as mock_now: fixed_now = datetime.datetime(2024, 6, 15, 10, 30, 0) - mock_datetime.datetime.now.return_value = fixed_now - mock_datetime.timedelta = datetime.timedelta - + mock_now.return_value = fixed_now service = MessagesCleanService.from_days( policy=policy, days=days, @@ -613,11 +609,9 @@ class TestMessagesCleanServiceFromDays: policy = BillingDisabledPolicy() # Act - with patch("services.retention.conversation.messages_clean_service.datetime", autospec=True) as mock_datetime: + with patch("services.retention.conversation.messages_clean_service.naive_utc_now") as mock_now: fixed_now = datetime.datetime(2024, 6, 15, 10, 30, 0) - mock_datetime.datetime.now.return_value = fixed_now - mock_datetime.timedelta = datetime.timedelta - + mock_now.return_value = fixed_now service = MessagesCleanService.from_days(policy=policy) # Assert diff --git a/api/tests/unit_tests/services/test_operation_service.py b/api/tests/unit_tests/services/test_operation_service.py new file mode 100644 index 0000000000..a4c69b23ac --- /dev/null +++ b/api/tests/unit_tests/services/test_operation_service.py @@ -0,0 +1,120 @@ +from unittest.mock import MagicMock, patch + +import httpx +import pytest + +from services.operation_service import OperationService + + +class TestOperationService: + """Test suite for OperationService""" + + # ===== Internal Method Tests ===== + + @patch("httpx.request") + def test_should_call_with_correct_parameters_when__send_request_invoked( + self, mock_request: MagicMock, monkeypatch: pytest.MonkeyPatch + ): + """Test that _send_request calls httpx.request with the correct URL, headers, and data""" + # Arrange + monkeypatch.setattr(OperationService, "base_url", "https://billing.example") + monkeypatch.setattr(OperationService, "secret_key", "s3cr3t") + + mock_response = MagicMock() + mock_response.json.return_value = {"status": "success"} + mock_request.return_value = mock_response + + method = "POST" + endpoint = "/test_endpoint" + json_data = {"key": "value"} + + # Act + result = OperationService._send_request(method, endpoint, json=json_data) + + # Assert + assert result == {"status": "success"} + + # Verify call parameters + expected_url = "https://billing.example/test_endpoint" + mock_request.assert_called_once() + args, kwargs = mock_request.call_args + assert args[0] == method + assert args[1] == expected_url + assert kwargs["json"] == json_data + assert kwargs["headers"]["Billing-Api-Secret-Key"] == "s3cr3t" + assert kwargs["headers"]["Content-Type"] == "application/json" + + @patch("httpx.request") + def test_should_propagate_httpx_error_when__send_request_raises( + self, mock_request: MagicMock, monkeypatch: pytest.MonkeyPatch + ): + """Test that _send_request handles httpx raising an error""" + # Arrange + monkeypatch.setattr(OperationService, "base_url", "https://billing.example") + mock_request.side_effect = httpx.RequestError("network error") + + # Act & Assert + with pytest.raises(httpx.RequestError): + OperationService._send_request("POST", "/test") + + # ===== Public Method Tests ===== + + @pytest.mark.parametrize( + ("utm_info", "expected_params"), + [ + ( + { + "utm_source": "google", + "utm_medium": "cpc", + "utm_campaign": "spring_sale", + "utm_content": "ad_1", + "utm_term": "ai_agent", + }, + { + "tenant_id": "tenant-123", + "utm_source": "google", + "utm_medium": "cpc", + "utm_campaign": "spring_sale", + "utm_content": "ad_1", + "utm_term": "ai_agent", + }, + ), + ( + {}, # Empty utm_info + { + "tenant_id": "tenant-123", + "utm_source": "", + "utm_medium": "", + "utm_campaign": "", + "utm_content": "", + "utm_term": "", + }, + ), + ( + {"utm_source": "newsletter"}, # Partial utm_info + { + "tenant_id": "tenant-123", + "utm_source": "newsletter", + "utm_medium": "", + "utm_campaign": "", + "utm_content": "", + "utm_term": "", + }, + ), + ], + ) + @patch.object(OperationService, "_send_request") + def test_should_map_parameters_correctly_when_record_utm_called( + self, mock_send: MagicMock, utm_info: dict, expected_params: dict + ): + """Test that record_utm correctly maps utm_info to parameters and calls _send_request""" + # Arrange + tenant_id = "tenant-123" + mock_send.return_value = {"status": "recorded"} + + # Act + result = OperationService.record_utm(tenant_id, utm_info) + + # Assert + assert result == {"status": "recorded"} + mock_send.assert_called_once_with("POST", "/tenant_utms", params=expected_params) diff --git a/api/tests/unit_tests/services/test_ops_service.py b/api/tests/unit_tests/services/test_ops_service.py new file mode 100644 index 0000000000..ab7b473790 --- /dev/null +++ b/api/tests/unit_tests/services/test_ops_service.py @@ -0,0 +1,381 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from core.ops.entities.config_entity import TracingProviderEnum +from models.model import App, TraceAppConfig +from services.ops_service import OpsService + + +class TestOpsService: + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_get_tracing_app_config_no_config(self, mock_ops_trace_manager, mock_db): + # Arrange + mock_db.session.query.return_value.where.return_value.first.return_value = None + + # Act + result = OpsService.get_tracing_app_config("app_id", "arize") + + # Assert + assert result is None + mock_db.session.query.assert_called_with(TraceAppConfig) + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_get_tracing_app_config_no_app(self, mock_ops_trace_manager, mock_db): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, None] + + # Act + result = OpsService.get_tracing_app_config("app_id", "arize") + + # Assert + assert result is None + assert mock_db.session.query.call_count == 2 + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_get_tracing_app_config_none_config(self, mock_ops_trace_manager, mock_db): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + trace_config.tracing_config = None + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, app] + + # Act & Assert + with pytest.raises(ValueError, match="Tracing config cannot be None."): + OpsService.get_tracing_app_config("app_id", "arize") + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + @pytest.mark.parametrize( + ("provider", "default_url"), + [ + ("arize", "https://app.arize.com/"), + ("phoenix", "https://app.phoenix.arize.com/projects/"), + ("langsmith", "https://smith.langchain.com/"), + ("opik", "https://www.comet.com/opik/"), + ("weave", "https://wandb.ai/"), + ("aliyun", "https://arms.console.aliyun.com/"), + ("tencent", "https://console.cloud.tencent.com/apm"), + ("mlflow", "http://localhost:5000/"), + ("databricks", "https://www.databricks.com/"), + ], + ) + def test_get_tracing_app_config_providers_exception(self, mock_ops_trace_manager, mock_db, provider, default_url): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + trace_config.tracing_config = {"some": "config"} + trace_config.to_dict.return_value = {"tracing_config": {"project_url": default_url}} + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, app] + + mock_ops_trace_manager.decrypt_tracing_config.return_value = {} + mock_ops_trace_manager.obfuscated_decrypt_token.return_value = {} + mock_ops_trace_manager.get_trace_config_project_url.side_effect = Exception("error") + mock_ops_trace_manager.get_trace_config_project_key.side_effect = Exception("error") + + # Act + result = OpsService.get_tracing_app_config("app_id", provider) + + # Assert + assert result["tracing_config"]["project_url"] == default_url + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + @pytest.mark.parametrize( + "provider", ["arize", "phoenix", "langsmith", "opik", "weave", "aliyun", "tencent", "mlflow", "databricks"] + ) + def test_get_tracing_app_config_providers_success(self, mock_ops_trace_manager, mock_db, provider): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + trace_config.tracing_config = {"some": "config"} + trace_config.to_dict.return_value = {"tracing_config": {"project_url": "success_url"}} + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, app] + + mock_ops_trace_manager.decrypt_tracing_config.return_value = {} + mock_ops_trace_manager.obfuscated_decrypt_token.return_value = {} + mock_ops_trace_manager.get_trace_config_project_url.return_value = "success_url" + + # Act + result = OpsService.get_tracing_app_config("app_id", provider) + + # Assert + assert result["tracing_config"]["project_url"] == "success_url" + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_get_tracing_app_config_langfuse_success(self, mock_ops_trace_manager, mock_db): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + trace_config.tracing_config = {"some": "config"} + trace_config.to_dict.return_value = {"tracing_config": {"project_url": "https://api.langfuse.com/project/key"}} + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, app] + + mock_ops_trace_manager.decrypt_tracing_config.return_value = {"host": "https://api.langfuse.com"} + mock_ops_trace_manager.obfuscated_decrypt_token.return_value = {"host": "https://api.langfuse.com"} + mock_ops_trace_manager.get_trace_config_project_key.return_value = "key" + + # Act + result = OpsService.get_tracing_app_config("app_id", "langfuse") + + # Assert + assert result["tracing_config"]["project_url"] == "https://api.langfuse.com/project/key" + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_get_tracing_app_config_langfuse_exception(self, mock_ops_trace_manager, mock_db): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + trace_config.tracing_config = {"some": "config"} + trace_config.to_dict.return_value = {"tracing_config": {"project_url": "https://api.langfuse.com/"}} + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [trace_config, app] + + mock_ops_trace_manager.decrypt_tracing_config.return_value = {"host": "https://api.langfuse.com"} + mock_ops_trace_manager.obfuscated_decrypt_token.return_value = {"host": "https://api.langfuse.com"} + mock_ops_trace_manager.get_trace_config_project_key.side_effect = Exception("error") + + # Act + result = OpsService.get_tracing_app_config("app_id", "langfuse") + + # Assert + assert result["tracing_config"]["project_url"] == "https://api.langfuse.com/" + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_invalid_provider(self, mock_ops_trace_manager, mock_db): + # Act + result = OpsService.create_tracing_app_config("app_id", "invalid_provider", {}) + + # Assert + assert result == {"error": "Invalid tracing provider: invalid_provider"} + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_invalid_credentials(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.LANGFUSE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = False + + # Act + result = OpsService.create_tracing_app_config("app_id", provider, {"public_key": "p", "secret_key": "s"}) + + # Assert + assert result == {"error": "Invalid Credentials"} + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + @pytest.mark.parametrize( + ("provider", "config"), + [ + (TracingProviderEnum.ARIZE, {}), + (TracingProviderEnum.LANGFUSE, {"public_key": "p", "secret_key": "s"}), + (TracingProviderEnum.LANGSMITH, {"api_key": "k", "project": "p"}), + (TracingProviderEnum.ALIYUN, {"license_key": "k", "endpoint": "https://aliyun.com"}), + ], + ) + def test_create_tracing_app_config_project_url_exception(self, mock_ops_trace_manager, mock_db, provider, config): + # Arrange + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + mock_ops_trace_manager.get_trace_config_project_url.side_effect = Exception("error") + mock_ops_trace_manager.get_trace_config_project_key.side_effect = Exception("error") + mock_db.session.query.return_value.where.return_value.first.return_value = MagicMock(spec=TraceAppConfig) + + # Act + result = OpsService.create_tracing_app_config("app_id", provider, config) + + # Assert + assert result is None + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_langfuse_success(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.LANGFUSE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + mock_ops_trace_manager.get_trace_config_project_key.return_value = "key" + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [None, app] + mock_ops_trace_manager.encrypt_tracing_config.return_value = {} + + # Act + result = OpsService.create_tracing_app_config( + "app_id", provider, {"public_key": "p", "secret_key": "s", "host": "https://api.langfuse.com"} + ) + + # Assert + assert result == {"result": "success"} + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_already_exists(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + mock_db.session.query.return_value.where.return_value.first.return_value = MagicMock(spec=TraceAppConfig) + + # Act + result = OpsService.create_tracing_app_config("app_id", provider, {}) + + # Assert + assert result is None + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_no_app(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + mock_db.session.query.return_value.where.return_value.first.side_effect = [None, None] + + # Act + result = OpsService.create_tracing_app_config("app_id", provider, {}) + + # Assert + assert result is None + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_with_empty_other_keys(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [None, app] + mock_ops_trace_manager.encrypt_tracing_config.return_value = {} + + # Act + # 'project' is in other_keys for Arize + # provide an empty string for the project in the tracing_config + # create_tracing_app_config will replace it with the default from the model + result = OpsService.create_tracing_app_config("app_id", provider, {"project": ""}) + + # Assert + assert result == {"result": "success"} + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_create_tracing_app_config_success(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + mock_ops_trace_manager.get_trace_config_project_url.return_value = "http://project_url" + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [None, app] + mock_ops_trace_manager.encrypt_tracing_config.return_value = {"encrypted": "config"} + + # Act + result = OpsService.create_tracing_app_config("app_id", provider, {}) + + # Assert + assert result == {"result": "success"} + mock_db.session.add.assert_called() + mock_db.session.commit.assert_called() + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_update_tracing_app_config_invalid_provider(self, mock_ops_trace_manager, mock_db): + # Act & Assert + with pytest.raises(ValueError, match="Invalid tracing provider: invalid_provider"): + OpsService.update_tracing_app_config("app_id", "invalid_provider", {}) + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_update_tracing_app_config_no_config(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + mock_db.session.query.return_value.where.return_value.first.return_value = None + + # Act + result = OpsService.update_tracing_app_config("app_id", provider, {}) + + # Assert + assert result is None + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_update_tracing_app_config_no_app(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + current_config = MagicMock(spec=TraceAppConfig) + mock_db.session.query.return_value.where.return_value.first.side_effect = [current_config, None] + + # Act + result = OpsService.update_tracing_app_config("app_id", provider, {}) + + # Assert + assert result is None + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_update_tracing_app_config_invalid_credentials(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + current_config = MagicMock(spec=TraceAppConfig) + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [current_config, app] + mock_ops_trace_manager.decrypt_tracing_config.return_value = {} + mock_ops_trace_manager.check_trace_config_is_effective.return_value = False + + # Act & Assert + with pytest.raises(ValueError, match="Invalid Credentials"): + OpsService.update_tracing_app_config("app_id", provider, {}) + + @patch("services.ops_service.db") + @patch("services.ops_service.OpsTraceManager") + def test_update_tracing_app_config_success(self, mock_ops_trace_manager, mock_db): + # Arrange + provider = TracingProviderEnum.ARIZE + current_config = MagicMock(spec=TraceAppConfig) + current_config.to_dict.return_value = {"some": "data"} + app = MagicMock(spec=App) + app.tenant_id = "tenant_id" + mock_db.session.query.return_value.where.return_value.first.side_effect = [current_config, app] + mock_ops_trace_manager.decrypt_tracing_config.return_value = {} + mock_ops_trace_manager.check_trace_config_is_effective.return_value = True + + # Act + result = OpsService.update_tracing_app_config("app_id", provider, {}) + + # Assert + assert result == {"some": "data"} + mock_db.session.commit.assert_called_once() + + @patch("services.ops_service.db") + def test_delete_tracing_app_config_no_config(self, mock_db): + # Arrange + mock_db.session.query.return_value.where.return_value.first.return_value = None + + # Act + result = OpsService.delete_tracing_app_config("app_id", "arize") + + # Assert + assert result is None + + @patch("services.ops_service.db") + def test_delete_tracing_app_config_success(self, mock_db): + # Arrange + trace_config = MagicMock(spec=TraceAppConfig) + mock_db.session.query.return_value.where.return_value.first.return_value = trace_config + + # Act + result = OpsService.delete_tracing_app_config("app_id", "arize") + + # Assert + assert result is True + mock_db.session.delete.assert_called_with(trace_config) + mock_db.session.commit.assert_called_once() diff --git a/api/tests/unit_tests/services/test_schedule_service.py b/api/tests/unit_tests/services/test_schedule_service.py index 5e3dd157e6..e28965ea2c 100644 --- a/api/tests/unit_tests/services/test_schedule_service.py +++ b/api/tests/unit_tests/services/test_schedule_service.py @@ -5,8 +5,8 @@ from unittest.mock import MagicMock, Mock, patch import pytest from sqlalchemy.orm import Session -from dify_graph.nodes.trigger_schedule.entities import ScheduleConfig, SchedulePlanUpdate, VisualConfig -from dify_graph.nodes.trigger_schedule.exc import ScheduleConfigError +from core.workflow.nodes.trigger_schedule.entities import ScheduleConfig, SchedulePlanUpdate, VisualConfig +from core.workflow.nodes.trigger_schedule.exc import ScheduleConfigError from events.event_handlers.sync_workflow_schedule_when_app_published import ( sync_schedule_from_workflow, ) @@ -136,7 +136,7 @@ class TestScheduleService(unittest.TestCase): def test_update_schedule_not_found(self): """Test updating a non-existent schedule raises exception.""" - from dify_graph.nodes.trigger_schedule.exc import ScheduleNotFoundError + from core.workflow.nodes.trigger_schedule.exc import ScheduleNotFoundError mock_session = MagicMock(spec=Session) mock_session.get.return_value = None @@ -172,7 +172,7 @@ class TestScheduleService(unittest.TestCase): def test_delete_schedule_not_found(self): """Test deleting a non-existent schedule raises exception.""" - from dify_graph.nodes.trigger_schedule.exc import ScheduleNotFoundError + from core.workflow.nodes.trigger_schedule.exc import ScheduleNotFoundError mock_session = MagicMock(spec=Session) mock_session.get.return_value = None diff --git a/api/tests/unit_tests/services/test_summary_index_service.py b/api/tests/unit_tests/services/test_summary_index_service.py new file mode 100644 index 0000000000..c7e1fed21f --- /dev/null +++ b/api/tests/unit_tests/services/test_summary_index_service.py @@ -0,0 +1,1329 @@ +"""Unit tests for services.summary_index_service.""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from datetime import UTC, datetime +from types import SimpleNamespace +from unittest.mock import MagicMock + +import pytest + +import services.summary_index_service as summary_module +from services.summary_index_service import SummaryIndexService + + +@dataclass(frozen=True) +class _SessionContext: + session: MagicMock + + def __enter__(self) -> MagicMock: + return self.session + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + +def _dataset(*, indexing_technique: str = "high_quality") -> MagicMock: + dataset = MagicMock(name="dataset") + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = indexing_technique + dataset.embedding_model_provider = "openai" + dataset.embedding_model = "text-embedding" + return dataset + + +def _segment(*, has_document: bool = True) -> MagicMock: + segment = MagicMock(name="segment") + segment.id = "seg-1" + segment.document_id = "doc-1" + segment.dataset_id = "dataset-1" + segment.content = "hello world" + segment.enabled = True + segment.status = "completed" + segment.position = 1 + if has_document: + doc = MagicMock(name="document") + doc.doc_language = "en" + doc.doc_form = "text_model" + segment.document = doc + else: + segment.document = None + return segment + + +def _summary_record(*, summary_content: str = "summary", node_id: str | None = None) -> MagicMock: + record = MagicMock(spec=summary_module.DocumentSegmentSummary, name="summary_record") + record.id = "sum-1" + record.dataset_id = "dataset-1" + record.document_id = "doc-1" + record.chunk_id = "seg-1" + record.summary_content = summary_content + record.summary_index_node_id = node_id + record.summary_index_node_hash = None + record.tokens = None + record.status = "generating" + record.error = None + record.enabled = True + record.created_at = datetime(2024, 1, 1, tzinfo=UTC) + record.updated_at = datetime(2024, 1, 1, tzinfo=UTC) + record.disabled_at = None + record.disabled_by = None + return record + + +def test_generate_summary_for_segment_passes_document_language(monkeypatch: pytest.MonkeyPatch) -> None: + usage = MagicMock() + usage.total_tokens = 10 + usage.prompt_tokens = 3 + usage.completion_tokens = 7 + + paragraph_module = SimpleNamespace( + ParagraphIndexProcessor=SimpleNamespace(generate_summary=MagicMock(return_value=("sum", usage))) + ) + monkeypatch.setitem( + sys.modules, + "core.rag.index_processor.processor.paragraph_index_processor", + paragraph_module, + ) + + segment = _segment(has_document=True) + dataset = _dataset() + + content, got_usage = SummaryIndexService.generate_summary_for_segment(segment, dataset, {"a": 1}) + assert content == "sum" + assert got_usage is usage + + paragraph_module.ParagraphIndexProcessor.generate_summary.assert_called_once() + _, kwargs = paragraph_module.ParagraphIndexProcessor.generate_summary.call_args + assert kwargs["document_language"] == "en" + + +def test_generate_summary_for_segment_raises_when_empty(monkeypatch: pytest.MonkeyPatch) -> None: + paragraph_module = SimpleNamespace( + ParagraphIndexProcessor=SimpleNamespace(generate_summary=MagicMock(return_value=("", MagicMock()))) + ) + monkeypatch.setitem( + sys.modules, + "core.rag.index_processor.processor.paragraph_index_processor", + paragraph_module, + ) + + with pytest.raises(ValueError, match="Generated summary is empty"): + SummaryIndexService.generate_summary_for_segment(_segment(), _dataset(), {"a": 1}) + + +def test_create_summary_record_updates_existing_and_reenables(monkeypatch: pytest.MonkeyPatch) -> None: + existing = _summary_record(summary_content="old", node_id="n1") + existing.enabled = False + existing.disabled_at = datetime(2024, 1, 1) + existing.disabled_by = "u" + + session = MagicMock(name="session") + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = existing + session.query.return_value = query + + create_session_mock = MagicMock(return_value=_SessionContext(session)) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + segment = _segment() + dataset = _dataset() + + result = SummaryIndexService.create_summary_record(segment, dataset, "new", status="generating") + assert result is existing + assert existing.summary_content == "new" + assert existing.status == "generating" + assert existing.enabled is True + assert existing.disabled_at is None + assert existing.disabled_by is None + assert existing.error is None + session.add.assert_called_once_with(existing) + session.flush.assert_called_once() + + +def test_create_summary_record_creates_new(monkeypatch: pytest.MonkeyPatch) -> None: + session = MagicMock(name="session") + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + + create_session_mock = MagicMock(return_value=_SessionContext(session)) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + record = SummaryIndexService.create_summary_record(_segment(), _dataset(), "new", status="generating") + assert record.dataset_id == "dataset-1" + assert record.chunk_id == "seg-1" + assert record.summary_content == "new" + assert record.enabled is True + session.add.assert_called_once() + session.flush.assert_called_once() + + +def test_vectorize_summary_skips_non_high_quality(monkeypatch: pytest.MonkeyPatch) -> None: + vector_cls = MagicMock() + monkeypatch.setattr(summary_module, "Vector", vector_cls) + SummaryIndexService.vectorize_summary(_summary_record(), _segment(), _dataset(indexing_technique="economy")) + vector_cls.assert_not_called() + + +def test_vectorize_summary_raises_for_blank_content() -> None: + with pytest.raises(ValueError, match="Summary content is empty"): + SummaryIndexService.vectorize_summary(_summary_record(summary_content=" "), _segment(), _dataset()) + + +def test_vectorize_summary_retries_connection_errors_then_succeeds(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + + embedding_model = MagicMock() + embedding_model.get_text_embedding_num_tokens.return_value = [5] + model_manager = MagicMock() + model_manager.get_model_instance.return_value = embedding_model + monkeypatch.setattr(summary_module, "ModelManager", MagicMock(return_value=model_manager)) + + vector_instance = MagicMock() + vector_instance.add_texts.side_effect = [RuntimeError("connection timeout"), None] + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + + session = MagicMock(name="provided_session") + merged = _summary_record(summary_content="sum") + session.merge.return_value = merged + monkeypatch.setattr(summary_module.time, "sleep", MagicMock()) + + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=session) + + assert vector_instance.add_texts.call_count == 2 + summary_module.time.sleep.assert_called_once() # type: ignore[attr-defined] + session.flush.assert_called_once() + assert summary.status == "completed" + assert summary.summary_index_node_id == "uuid-1" + assert summary.summary_index_node_hash == "hash-1" + assert summary.tokens == 5 + + +def test_vectorize_summary_without_session_creates_record_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id="old-node") + + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + + # Force deletion branch to run and swallow delete failures. + vector_for_delete = MagicMock() + vector_for_delete.delete_by_ids.side_effect = RuntimeError("delete failed") + vector_for_add = MagicMock() + vector_for_add.add_texts.return_value = None + vector_cls = MagicMock(side_effect=[vector_for_delete, vector_for_add]) + monkeypatch.setattr(summary_module, "Vector", vector_cls) + + model_manager = MagicMock() + model_manager.get_model_instance.side_effect = RuntimeError("no model") + monkeypatch.setattr(summary_module, "ModelManager", MagicMock(return_value=model_manager)) + + # New session used after vectorization succeeds (record not found by id nor chunk_id). + session = MagicMock(name="session") + q1 = MagicMock() + q1.filter_by.return_value = q1 + q1.first.side_effect = [None, None] + session.query.return_value = q1 + + create_session_mock = MagicMock(return_value=_SessionContext(session)) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + + # One context for success path, no error handler session. + create_session_mock.assert_called() + session.add.assert_called() + session.commit.assert_called_once() + assert summary.status == "completed" + assert summary.summary_index_node_id == "old-node" # reused + + +def test_vectorize_summary_final_failure_updates_error_status(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + monkeypatch.setattr(summary_module.time, "sleep", MagicMock()) + + vector_instance = MagicMock() + vector_instance.add_texts.side_effect = RuntimeError("boom") + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + + # error_session should find record and commit status update + error_session = MagicMock(name="error_session") + q = MagicMock() + q.filter_by.return_value = q + q.first.return_value = summary + error_session.query.return_value = q + + create_session_mock = MagicMock(return_value=_SessionContext(error_session)) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + with pytest.raises(RuntimeError, match="boom"): + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + + assert summary.status == "error" + assert "Vectorization failed" in (summary.error or "") + error_session.commit.assert_called_once() + + +def test_batch_create_summary_records_no_segments_noop(monkeypatch: pytest.MonkeyPatch) -> None: + create_session_mock = MagicMock() + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + SummaryIndexService.batch_create_summary_records([], _dataset()) + create_session_mock.assert_not_called() + + +def test_batch_create_summary_records_creates_and_updates(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + s1 = _segment() + s2 = _segment() + s2.id = "seg-2" + s2.document_id = "doc-2" + + existing = _summary_record() + existing.chunk_id = "seg-2" + existing.enabled = False + + session = MagicMock() + query = MagicMock() + query.filter.return_value = query + query.all.return_value = [existing] + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + SummaryIndexService.batch_create_summary_records([s1, s2], dataset, status="not_started") + session.commit.assert_called_once() + assert existing.enabled is True + + +def test_update_summary_record_error_updates_when_exists(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + SummaryIndexService.update_summary_record_error(segment, dataset, "err") + assert record.status == "error" + assert record.error == "err" + session.commit.assert_called_once() + + +def test_generate_and_vectorize_summary_success(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr( + SummaryIndexService, "generate_summary_for_segment", MagicMock(return_value=("sum", MagicMock(total_tokens=0))) + ) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(return_value=None)) + + out = SummaryIndexService.generate_and_vectorize_summary(segment, dataset, {"enable": True}) + assert out is record + session.refresh.assert_called_once_with(record) + session.commit.assert_called() + + +def test_generate_and_vectorize_summary_vectorize_failure_sets_error(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr( + SummaryIndexService, "generate_summary_for_segment", MagicMock(return_value=("sum", MagicMock(total_tokens=0))) + ) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(side_effect=RuntimeError("boom"))) + + with pytest.raises(RuntimeError, match="boom"): + SummaryIndexService.generate_and_vectorize_summary(segment, dataset, {"enable": True}) + assert record.status == "error" + # Outer exception handler overwrites the error with the raw exception message. + assert record.error == "boom" + + +def test_vectorize_summary_updates_existing_record_found_by_chunk_id(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + + vector_instance = MagicMock() + vector_instance.add_texts.return_value = None + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + monkeypatch.setattr( + summary_module, + "ModelManager", + MagicMock(return_value=MagicMock(get_model_instance=MagicMock(return_value=None))), + ) + + existing = _summary_record(summary_content="old", node_id="old-node") + existing.id = "other-id" + session = MagicMock(name="session") + q = MagicMock() + q.filter_by.return_value = q + q.first.side_effect = [None, existing] # miss by id, hit by chunk_id + session.query.return_value = q + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + session.commit.assert_called_once() + assert existing.summary_index_node_id == "uuid-1" + + +def test_vectorize_summary_updates_existing_record_found_by_id(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + monkeypatch.setattr( + summary_module, "Vector", MagicMock(return_value=MagicMock(add_texts=MagicMock(return_value=None))) + ) + monkeypatch.setattr( + summary_module, + "ModelManager", + MagicMock(return_value=MagicMock(get_model_instance=MagicMock(return_value=None))), + ) + + existing = _summary_record(summary_content="old", node_id="old-node") + session = MagicMock(name="session") + q = MagicMock() + q.filter_by.return_value = q + q.first.return_value = existing # hit by id + session.query.return_value = q + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + session.commit.assert_called_once() + assert existing.summary_index_node_hash == "hash-1" + + +def test_vectorize_summary_session_enter_returns_none_triggers_runtime_error(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + monkeypatch.setattr( + summary_module, "Vector", MagicMock(return_value=MagicMock(add_texts=MagicMock(return_value=None))) + ) + monkeypatch.setattr( + summary_module, + "ModelManager", + MagicMock(return_value=MagicMock(get_model_instance=MagicMock(return_value=None))), + ) + + class _BadContext: + def __enter__(self): + return None + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + error_session = MagicMock() + q = MagicMock() + q.filter_by.return_value = q + q.first.return_value = summary + error_session.query.return_value = q + + create_session_mock = MagicMock(side_effect=[_BadContext(), _SessionContext(error_session)]) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + with pytest.raises(RuntimeError, match="Session should not be None"): + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + + +def test_vectorize_summary_created_record_becomes_none_triggers_guard(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + monkeypatch.setattr( + summary_module, "Vector", MagicMock(return_value=MagicMock(add_texts=MagicMock(return_value=None))) + ) + monkeypatch.setattr( + summary_module, + "ModelManager", + MagicMock(return_value=MagicMock(get_model_instance=MagicMock(return_value=None))), + ) + + session = MagicMock() + q = MagicMock() + q.filter_by.return_value = q + q.first.side_effect = [None, None] # miss by id and chunk_id + session.query.return_value = q + + error_session = MagicMock() + eq = MagicMock() + eq.filter_by.return_value = eq + eq.first.return_value = summary + error_session.query.return_value = eq + + create_session_mock = MagicMock(side_effect=[_SessionContext(session), _SessionContext(error_session)]) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + # Force the created record to be None so the "should not be None" guard triggers. + monkeypatch.setattr(summary_module, "DocumentSegmentSummary", MagicMock(return_value=None)) + + with pytest.raises(RuntimeError, match="summary_record_in_session should not be None"): + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + + +def test_vectorize_summary_error_handler_tries_chunk_id_lookup_and_can_warn_not_found( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _dataset() + segment = _segment() + summary = _summary_record(summary_content="sum", node_id=None) + + monkeypatch.setattr(summary_module.uuid, "uuid4", MagicMock(return_value="uuid-1")) + monkeypatch.setattr(summary_module.helper, "generate_text_hash", MagicMock(return_value="hash-1")) + monkeypatch.setattr(summary_module.time, "sleep", MagicMock()) + monkeypatch.setattr( + summary_module, + "Vector", + MagicMock(return_value=MagicMock(add_texts=MagicMock(side_effect=RuntimeError("boom")))), + ) + + error_session = MagicMock(name="error_session") + q = MagicMock() + q.filter_by.return_value = q + q.first.side_effect = [None, None] # not found by id, not found by chunk_id + error_session.query.return_value = q + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(error_session))), + ) + + with pytest.raises(RuntimeError, match="boom"): + SummaryIndexService.vectorize_summary(summary, segment, dataset, session=None) + + # No record -> no commit in error session. + error_session.commit.assert_not_called() + + +def test_update_summary_record_error_warns_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + logger_mock = MagicMock() + monkeypatch.setattr(summary_module, "logger", logger_mock) + + SummaryIndexService.update_summary_record_error(segment, dataset, "err") + logger_mock.warning.assert_called_once() + + +def test_generate_and_vectorize_summary_creates_missing_record_and_logs_usage(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + usage = MagicMock(total_tokens=4, prompt_tokens=1, completion_tokens=3) + monkeypatch.setattr(SummaryIndexService, "generate_summary_for_segment", MagicMock(return_value=("sum", usage))) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(return_value=None)) + + logger_mock = MagicMock() + monkeypatch.setattr(summary_module, "logger", logger_mock) + + result = SummaryIndexService.generate_and_vectorize_summary(segment, dataset, {"enable": True}) + assert result.status in {"generating", "completed"} + logger_mock.info.assert_called() + + +def test_generate_summaries_for_document_skip_conditions(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset(indexing_technique="economy") + document = MagicMock(spec=summary_module.DatasetDocument) + document.id = "doc-1" + document.doc_form = "text_model" + assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == [] + + dataset = _dataset() + assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": False}) == [] + + document.doc_form = "qa_model" + assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == [] + + +def test_generate_summaries_for_document_runs_and_handles_errors(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + document = MagicMock(spec=summary_module.DatasetDocument) + document.id = "doc-1" + document.doc_form = "text_model" + + seg1 = _segment() + seg2 = _segment() + seg2.id = "seg-2" + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [seg1, seg2] + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr(SummaryIndexService, "batch_create_summary_records", MagicMock()) + monkeypatch.setattr( + SummaryIndexService, + "generate_and_vectorize_summary", + MagicMock(side_effect=[MagicMock(), RuntimeError("boom")]), + ) + update_err_mock = MagicMock() + monkeypatch.setattr(SummaryIndexService, "update_summary_record_error", update_err_mock) + + records = SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) + assert len(records) == 1 + update_err_mock.assert_called_once() + + +def test_generate_summaries_for_document_no_segments_returns_empty(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + document = MagicMock(spec=summary_module.DatasetDocument) + document.id = "doc-1" + document.doc_form = "text_model" + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [] + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + assert SummaryIndexService.generate_summaries_for_document(dataset, document, {"enable": True}) == [] + + +def test_generate_summaries_for_document_applies_segment_ids_and_only_parent_chunks( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _dataset() + document = MagicMock(spec=summary_module.DatasetDocument) + document.id = "doc-1" + document.doc_form = "text_model" + seg = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [seg] + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + monkeypatch.setattr(SummaryIndexService, "batch_create_summary_records", MagicMock()) + monkeypatch.setattr(SummaryIndexService, "generate_and_vectorize_summary", MagicMock(return_value=MagicMock())) + + SummaryIndexService.generate_summaries_for_document( + dataset, + document, + {"enable": True}, + segment_ids=[seg.id], + only_parent_chunks=True, + ) + query.filter.assert_called() + + +def test_disable_summaries_for_segments_handles_vector_delete_error(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + summary1 = _summary_record(summary_content="s", node_id="n1") + summary2 = _summary_record(summary_content="s", node_id=None) + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [summary1, summary2] + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr( + summary_module, + "Vector", + MagicMock(return_value=MagicMock(delete_by_ids=MagicMock(side_effect=RuntimeError("boom")))), + ) + monkeypatch.setitem( + sys.modules, "libs.datetime_utils", SimpleNamespace(naive_utc_now=MagicMock(return_value=datetime(2024, 1, 1))) + ) + + SummaryIndexService.disable_summaries_for_segments(dataset, segment_ids=["seg-1"], disabled_by="u") + assert summary1.enabled is False + assert summary1.disabled_by == "u" + session.commit.assert_called_once() + + +def test_disable_summaries_for_segments_no_summaries_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [] + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setitem( + sys.modules, "libs.datetime_utils", SimpleNamespace(naive_utc_now=MagicMock(return_value=datetime(2024, 1, 1))) + ) + SummaryIndexService.disable_summaries_for_segments(dataset) + session.commit.assert_not_called() + + +def test_enable_summaries_for_segments_skips_non_high_quality() -> None: + SummaryIndexService.enable_summaries_for_segments(_dataset(indexing_technique="economy")) + + +def test_enable_summaries_for_segments_revectorizes_and_enables(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + summary = _summary_record(summary_content="sum", node_id="n1") + summary.enabled = False + + segment = _segment() + segment.id = summary.chunk_id + segment.enabled = True + segment.status = "completed" + + session = MagicMock() + summary_query = MagicMock() + summary_query.filter_by.return_value = summary_query + summary_query.filter.return_value = summary_query + summary_query.all.return_value = [summary] + + seg_query = MagicMock() + seg_query.filter_by.return_value = seg_query + seg_query.first.return_value = segment + + def query_side_effect(model: object) -> MagicMock: + if model is summary_module.DocumentSegmentSummary: + return summary_query + return seg_query + + session.query.side_effect = query_side_effect + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + vec_mock = MagicMock() + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", vec_mock) + + SummaryIndexService.enable_summaries_for_segments(dataset, segment_ids=[summary.chunk_id]) + vec_mock.assert_called_once() + assert summary.enabled is True + session.commit.assert_called_once() + + +def test_enable_summaries_for_segments_no_summaries_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [] + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + SummaryIndexService.enable_summaries_for_segments(dataset) + session.commit.assert_not_called() + + +def test_enable_summaries_for_segments_skips_segment_or_content_and_handles_vectorize_error( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _dataset() + summary1 = _summary_record(summary_content="sum", node_id="n1") + summary1.enabled = False + summary2 = _summary_record(summary_content="", node_id="n2") + summary2.enabled = False + summary3 = _summary_record(summary_content="sum3", node_id="n3") + summary3.enabled = False + + bad_segment = _segment() + bad_segment.enabled = False + bad_segment.status = "completed" + + good_segment = _segment() + good_segment.enabled = True + good_segment.status = "completed" + + session = MagicMock() + summary_query = MagicMock() + summary_query.filter_by.return_value = summary_query + summary_query.filter.return_value = summary_query + summary_query.all.return_value = [summary1, summary2, summary3] + + seg_query = MagicMock() + seg_query.filter_by.return_value = seg_query + seg_query.first.side_effect = [bad_segment, good_segment, good_segment] + + def query_side_effect(model: object) -> MagicMock: + if model is summary_module.DocumentSegmentSummary: + return summary_query + return seg_query + + session.query.side_effect = query_side_effect + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + logger_mock = MagicMock() + monkeypatch.setattr(summary_module, "logger", logger_mock) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(side_effect=RuntimeError("boom"))) + + SummaryIndexService.enable_summaries_for_segments(dataset) + logger_mock.exception.assert_called_once() + session.commit.assert_called_once() + + +def test_delete_summaries_for_segments_deletes_vectors_and_records(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + summary = _summary_record(summary_content="sum", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [summary] + session.query.return_value = query + + vector_instance = MagicMock() + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + SummaryIndexService.delete_summaries_for_segments(dataset, segment_ids=[summary.chunk_id]) + vector_instance.delete_by_ids.assert_called_once_with(["n1"]) + session.delete.assert_called_once_with(summary) + session.commit.assert_called_once() + + +def test_delete_summaries_for_segments_no_summaries_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.filter.return_value = query + query.all.return_value = [] + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + SummaryIndexService.delete_summaries_for_segments(dataset) + session.commit.assert_not_called() + + +def test_update_summary_for_segment_skip_conditions() -> None: + assert ( + SummaryIndexService.update_summary_for_segment(_segment(), _dataset(indexing_technique="economy"), "x") is None + ) + seg = _segment(has_document=True) + seg.document.doc_form = "qa_model" + assert SummaryIndexService.update_summary_for_segment(seg, _dataset(), "x") is None + + +def test_update_summary_for_segment_empty_content_deletes_existing(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + + vector_instance = MagicMock() + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + assert SummaryIndexService.update_summary_for_segment(segment, dataset, " ") is None + vector_instance.delete_by_ids.assert_called_once_with(["n1"]) + session.delete.assert_called_once_with(record) + session.commit.assert_called_once() + + +def test_update_summary_for_segment_empty_content_delete_vector_warns(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + vector_instance = MagicMock() + vector_instance.delete_by_ids.side_effect = RuntimeError("boom") + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + logger_mock = MagicMock() + monkeypatch.setattr(summary_module, "logger", logger_mock) + + assert SummaryIndexService.update_summary_for_segment(segment, dataset, "") is None + logger_mock.warning.assert_called() + + +def test_update_summary_for_segment_empty_content_no_record_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + assert SummaryIndexService.update_summary_for_segment(segment, dataset, " ") is None + + +def test_update_summary_for_segment_updates_existing_and_vectorizes(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + + vector_instance = MagicMock() + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + vectorize_mock = MagicMock() + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", vectorize_mock) + + out = SummaryIndexService.update_summary_for_segment(segment, dataset, "new summary") + assert out is record + vectorize_mock.assert_called_once() + session.refresh.assert_called_once_with(record) + session.commit.assert_called() + + +def test_update_summary_for_segment_existing_vector_delete_warns(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + vector_instance = MagicMock() + vector_instance.delete_by_ids.side_effect = RuntimeError("boom") + monkeypatch.setattr(summary_module, "Vector", MagicMock(return_value=vector_instance)) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(return_value=None)) + logger_mock = MagicMock() + monkeypatch.setattr(summary_module, "logger", logger_mock) + + SummaryIndexService.update_summary_for_segment(segment, dataset, "new") + logger_mock.warning.assert_called() + + +def test_update_summary_for_segment_existing_vectorize_failure_returns_error_record( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(side_effect=RuntimeError("boom"))) + + out = SummaryIndexService.update_summary_for_segment(segment, dataset, "new") + assert out is record + assert out.status == "error" + assert "Vectorization failed" in (out.error or "") + + +def test_update_summary_for_segment_new_record_success(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + created = _summary_record(summary_content="new", node_id=None) + monkeypatch.setattr(SummaryIndexService, "create_summary_record", MagicMock(return_value=created)) + session.merge.return_value = created + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", MagicMock(return_value=None)) + + out = SummaryIndexService.update_summary_for_segment(segment, dataset, "new") + assert out is created + session.refresh.assert_called() + session.commit.assert_called() + + +def test_update_summary_for_segment_outer_exception_sets_error_and_reraises(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _dataset() + segment = _segment() + record = _summary_record(summary_content="old", node_id="n1") + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = record + session.query.return_value = query + session.flush.side_effect = RuntimeError("flush boom") + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + with pytest.raises(RuntimeError, match="flush boom"): + SummaryIndexService.update_summary_for_segment(segment, dataset, "new") + assert record.status == "error" + assert record.error == "flush boom" + session.commit.assert_called() + + +def test_get_segment_summary_and_document_summaries(monkeypatch: pytest.MonkeyPatch) -> None: + record = _summary_record(summary_content="sum", node_id="n1") + session = MagicMock() + + q1 = MagicMock() + q1.where.return_value = q1 + q1.first.return_value = record + + q2 = MagicMock() + q2.filter.return_value = q2 + q2.all.return_value = [record] + + def query_side_effect(model: object) -> MagicMock: + if model is summary_module.DocumentSegmentSummary: + # first call used by get_segment_summary, second by get_document_summaries + if not hasattr(query_side_effect, "_called"): + query_side_effect._called = True # type: ignore[attr-defined] + return q1 + return q2 + return MagicMock() + + session.query.side_effect = query_side_effect + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + assert SummaryIndexService.get_segment_summary("seg-1", "dataset-1") is record + assert SummaryIndexService.get_document_summaries("doc-1", "dataset-1", segment_ids=["seg-1"]) == [record] + + +def test_get_segments_summaries_non_empty(monkeypatch: pytest.MonkeyPatch) -> None: + record1 = _summary_record() + record1.chunk_id = "seg-1" + record2 = _summary_record() + record2.chunk_id = "seg-2" + session = MagicMock() + q = MagicMock() + q.where.return_value = q + q.all.return_value = [record1, record2] + session.query.return_value = q + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + out = SummaryIndexService.get_segments_summaries(["seg-1", "seg-2"], "dataset-1") + assert set(out.keys()) == {"seg-1", "seg-2"} + + +def test_get_document_summary_index_status_no_segments_returns_none(monkeypatch: pytest.MonkeyPatch) -> None: + session = MagicMock() + q = MagicMock() + q.where.return_value = q + q.all.return_value = [] + session.query.return_value = q + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + assert SummaryIndexService.get_document_summary_index_status("doc-1", "dataset-1", "tenant-1") is None + + +def test_get_documents_summary_index_status_empty_input(monkeypatch: pytest.MonkeyPatch) -> None: + assert SummaryIndexService.get_documents_summary_index_status([], "dataset-1", "tenant-1") == {} + + +def test_get_documents_summary_index_status_no_pending_sets_none(monkeypatch: pytest.MonkeyPatch) -> None: + session = MagicMock() + q = MagicMock() + q.where.return_value = q + q.all.return_value = [SimpleNamespace(id="seg-1", document_id="doc-1")] + session.query.return_value = q + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + monkeypatch.setattr( + SummaryIndexService, + "get_segments_summaries", + MagicMock(return_value={"seg-1": SimpleNamespace(status="completed")}), + ) + result = SummaryIndexService.get_documents_summary_index_status(["doc-1"], "dataset-1", "tenant-1") + assert result["doc-1"] is None + + +def test_update_summary_for_segment_creates_new_and_vectorize_fails_returns_error_record( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _dataset() + segment = _segment() + + session = MagicMock() + query = MagicMock() + query.filter_by.return_value = query + query.first.return_value = None + session.query.return_value = query + + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session))), + ) + + created = _summary_record(summary_content="new", node_id=None) + monkeypatch.setattr(SummaryIndexService, "create_summary_record", MagicMock(return_value=created)) + session.merge.return_value = created + + vectorize_mock = MagicMock(side_effect=RuntimeError("boom")) + monkeypatch.setattr(SummaryIndexService, "vectorize_summary", vectorize_mock) + + out = SummaryIndexService.update_summary_for_segment(segment, dataset, "new") + assert out.status == "error" + assert "Vectorization failed" in (out.error or "") + + +def test_get_segments_summaries_empty_list() -> None: + assert SummaryIndexService.get_segments_summaries([], "dataset-1") == {} + + +def test_get_document_summary_index_status_and_documents_status(monkeypatch: pytest.MonkeyPatch) -> None: + seg_row = SimpleNamespace(id="seg-1", document_id="doc-1") + session = MagicMock() + query = MagicMock() + query.where.return_value = query + query.all.return_value = [SimpleNamespace(id="seg-1")] + session.query.return_value = query + + create_session_mock = MagicMock(return_value=_SessionContext(session)) + monkeypatch.setattr(summary_module, "session_factory", SimpleNamespace(create_session=create_session_mock)) + + monkeypatch.setattr( + SummaryIndexService, + "get_segments_summaries", + MagicMock(return_value={"seg-1": SimpleNamespace(status="generating")}), + ) + assert SummaryIndexService.get_document_summary_index_status("doc-1", "dataset-1", "tenant-1") == "SUMMARIZING" + + # Multiple docs + query2 = MagicMock() + query2.where.return_value = query2 + query2.all.return_value = [seg_row] + session2 = MagicMock() + session2.query.return_value = query2 + monkeypatch.setattr( + summary_module, + "session_factory", + SimpleNamespace(create_session=MagicMock(return_value=_SessionContext(session2))), + ) + monkeypatch.setattr( + SummaryIndexService, + "get_segments_summaries", + MagicMock(return_value={"seg-1": SimpleNamespace(status="not_started")}), + ) + result = SummaryIndexService.get_documents_summary_index_status(["doc-1", "doc-2"], "dataset-1", "tenant-1") + assert result["doc-1"] == "SUMMARIZING" + assert result["doc-2"] is None + + +def test_get_document_summary_status_detail_counts_and_previews(monkeypatch: pytest.MonkeyPatch) -> None: + segment1 = _segment() + segment1.id = "seg-1" + segment1.position = 1 + segment2 = _segment() + segment2.id = "seg-2" + segment2.position = 2 + + summary1 = _summary_record(summary_content="x" * 150, node_id="n1") + summary1.chunk_id = "seg-1" + summary1.status = "completed" + summary1.error = None + summary1.created_at = datetime(2024, 1, 1, tzinfo=UTC) + summary1.updated_at = datetime(2024, 1, 2, tzinfo=UTC) + + segment_service = SimpleNamespace(get_segments_by_document_and_dataset=MagicMock(return_value=[segment1, segment2])) + monkeypatch.setitem(sys.modules, "services.dataset_service", SimpleNamespace(SegmentService=segment_service)) + + monkeypatch.setattr(SummaryIndexService, "get_document_summaries", MagicMock(return_value=[summary1])) + + detail = SummaryIndexService.get_document_summary_status_detail("doc-1", "dataset-1") + assert detail["total_segments"] == 2 + assert detail["summary_status"]["completed"] == 1 + assert detail["summary_status"]["not_started"] == 1 + assert detail["summaries"][0]["summary_preview"].endswith("...") + assert detail["summaries"][1]["status"] == "not_started" diff --git a/api/tests/unit_tests/services/test_vector_service.py b/api/tests/unit_tests/services/test_vector_service.py new file mode 100644 index 0000000000..7b0103a2a1 --- /dev/null +++ b/api/tests/unit_tests/services/test_vector_service.py @@ -0,0 +1,704 @@ +"""Unit tests for `api/services/vector_service.py`.""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any +from unittest.mock import MagicMock + +import pytest + +import services.vector_service as vector_service_module +from services.vector_service import VectorService + + +@dataclass(frozen=True) +class _UploadFileStub: + id: str + name: str + + +@dataclass(frozen=True) +class _ChildDocStub: + page_content: str + metadata: dict[str, Any] + + +@dataclass +class _ParentDocStub: + children: list[_ChildDocStub] + + +def _make_dataset( + *, + indexing_technique: str = "high_quality", + doc_form: str = "text_model", + tenant_id: str = "tenant-1", + dataset_id: str = "dataset-1", + is_multimodal: bool = False, + embedding_model_provider: str | None = "openai", + embedding_model: str = "text-embedding", +) -> MagicMock: + dataset = MagicMock(name="dataset") + dataset.id = dataset_id + dataset.tenant_id = tenant_id + dataset.doc_form = doc_form + dataset.indexing_technique = indexing_technique + dataset.is_multimodal = is_multimodal + dataset.embedding_model_provider = embedding_model_provider + dataset.embedding_model = embedding_model + return dataset + + +def _make_segment( + *, + segment_id: str = "seg-1", + tenant_id: str = "tenant-1", + dataset_id: str = "dataset-1", + document_id: str = "doc-1", + content: str = "hello", + index_node_id: str = "node-1", + index_node_hash: str = "hash-1", + attachments: list[dict[str, str]] | None = None, +) -> MagicMock: + segment = MagicMock(name="segment") + segment.id = segment_id + segment.tenant_id = tenant_id + segment.dataset_id = dataset_id + segment.document_id = document_id + segment.content = content + segment.index_node_id = index_node_id + segment.index_node_hash = index_node_hash + segment.attachments = attachments or [] + return segment + + +def _mock_db_session_for_update_multimodel(*, upload_files: list[_UploadFileStub] | None) -> MagicMock: + session = MagicMock(name="session") + + binding_query = MagicMock(name="binding_query") + binding_query.where.return_value = binding_query + binding_query.delete.return_value = 1 + + upload_query = MagicMock(name="upload_query") + upload_query.where.return_value = upload_query + upload_query.all.return_value = upload_files or [] + + def query_side_effect(model: object) -> MagicMock: + if model is vector_service_module.SegmentAttachmentBinding: + return binding_query + if model is vector_service_module.UploadFile: + return upload_query + return MagicMock(name=f"query({model})") + + session.query.side_effect = query_side_effect + db_mock = MagicMock(name="db") + db_mock.session = session + return db_mock + + +def test_create_segments_vector_regular_indexing_loads_documents_and_keywords(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(is_multimodal=False) + segment = _make_segment() + + index_processor = MagicMock(name="index_processor") + factory_instance = MagicMock(name="IndexProcessorFactory-instance") + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model") + + index_processor.load.assert_called_once() + args, kwargs = index_processor.load.call_args + assert args[0] == dataset + assert len(args[1]) == 1 + assert args[2] is None + assert kwargs["with_keywords"] is True + assert kwargs["keywords_list"] == [["k1"]] + + +def test_create_segments_vector_regular_indexing_loads_multimodal_documents(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(is_multimodal=True) + segment = _make_segment( + attachments=[ + {"id": "img-1", "name": "a.png"}, + {"id": "img-2", "name": "b.png"}, + ] + ) + + index_processor = MagicMock(name="index_processor") + factory_instance = MagicMock(name="IndexProcessorFactory-instance") + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector([["k1"]], [segment], dataset, "text_model") + + assert index_processor.load.call_count == 2 + first_args, first_kwargs = index_processor.load.call_args_list[0] + assert first_args[0] == dataset + assert len(first_args[1]) == 1 + assert first_kwargs["with_keywords"] is True + + second_args, second_kwargs = index_processor.load.call_args_list[1] + assert second_args[0] == dataset + assert second_args[1] == [] + assert len(second_args[2]) == 2 + assert second_kwargs["with_keywords"] is False + + +def test_create_segments_vector_with_no_segments_does_not_load(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset() + index_processor = MagicMock(name="index_processor") + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector(None, [], dataset, "text_model") + index_processor.load.assert_not_called() + + +def _mock_parent_child_queries( + *, + dataset_document: object | None, + processing_rule: object | None, +) -> MagicMock: + session = MagicMock(name="session") + + doc_query = MagicMock(name="doc_query") + doc_query.filter_by.return_value = doc_query + doc_query.first.return_value = dataset_document + + rule_query = MagicMock(name="rule_query") + rule_query.where.return_value = rule_query + rule_query.first.return_value = processing_rule + + def query_side_effect(model: object) -> MagicMock: + if model is vector_service_module.DatasetDocument: + return doc_query + if model is vector_service_module.DatasetProcessRule: + return rule_query + return MagicMock(name=f"query({model})") + + session.query.side_effect = query_side_effect + db_mock = MagicMock(name="db") + db_mock.session = session + return db_mock + + +def test_create_segments_vector_parent_child_calls_generate_child_chunks_with_explicit_model( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset( + doc_form=vector_service_module.IndexStructureType.PARENT_CHILD_INDEX, + embedding_model_provider="openai", + indexing_technique="high_quality", + ) + segment = _make_segment() + + dataset_document = MagicMock(name="dataset_document") + dataset_document.id = segment.document_id + dataset_document.dataset_process_rule_id = "rule-1" + dataset_document.doc_language = "en" + dataset_document.created_by = "user-1" + + processing_rule = MagicMock(name="processing_rule") + processing_rule.to_dict.return_value = {"rules": {}} + + monkeypatch.setattr( + vector_service_module, + "db", + _mock_parent_child_queries(dataset_document=dataset_document, processing_rule=processing_rule), + ) + + embedding_model_instance = MagicMock(name="embedding_model_instance") + model_manager_instance = MagicMock(name="model_manager_instance") + model_manager_instance.get_model_instance.return_value = embedding_model_instance + monkeypatch.setattr(vector_service_module, "ModelManager", MagicMock(return_value=model_manager_instance)) + + generate_child_chunks_mock = MagicMock() + monkeypatch.setattr(VectorService, "generate_child_chunks", generate_child_chunks_mock) + + index_processor = MagicMock() + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector( + None, [segment], dataset, vector_service_module.IndexStructureType.PARENT_CHILD_INDEX + ) + + model_manager_instance.get_model_instance.assert_called_once() + generate_child_chunks_mock.assert_called_once_with( + segment, dataset_document, dataset, embedding_model_instance, processing_rule, False + ) + index_processor.load.assert_not_called() + + +def test_create_segments_vector_parent_child_uses_default_embedding_model_when_provider_missing( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset( + doc_form=vector_service_module.IndexStructureType.PARENT_CHILD_INDEX, + embedding_model_provider=None, + indexing_technique="high_quality", + ) + segment = _make_segment() + + dataset_document = MagicMock() + dataset_document.dataset_process_rule_id = "rule-1" + dataset_document.doc_language = "en" + dataset_document.created_by = "user-1" + + processing_rule = MagicMock() + processing_rule.to_dict.return_value = {"rules": {}} + + monkeypatch.setattr( + vector_service_module, + "db", + _mock_parent_child_queries(dataset_document=dataset_document, processing_rule=processing_rule), + ) + + embedding_model_instance = MagicMock() + model_manager_instance = MagicMock() + model_manager_instance.get_default_model_instance.return_value = embedding_model_instance + monkeypatch.setattr(vector_service_module, "ModelManager", MagicMock(return_value=model_manager_instance)) + + generate_child_chunks_mock = MagicMock() + monkeypatch.setattr(VectorService, "generate_child_chunks", generate_child_chunks_mock) + + index_processor = MagicMock() + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector( + None, [segment], dataset, vector_service_module.IndexStructureType.PARENT_CHILD_INDEX + ) + + model_manager_instance.get_default_model_instance.assert_called_once() + generate_child_chunks_mock.assert_called_once() + + +def test_create_segments_vector_parent_child_missing_document_logs_warning_and_continues( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset(doc_form=vector_service_module.IndexStructureType.PARENT_CHILD_INDEX) + segment = _make_segment() + + processing_rule = MagicMock() + monkeypatch.setattr( + vector_service_module, + "db", + _mock_parent_child_queries(dataset_document=None, processing_rule=processing_rule), + ) + + logger_mock = MagicMock() + monkeypatch.setattr(vector_service_module, "logger", logger_mock) + + index_processor = MagicMock() + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + VectorService.create_segments_vector( + None, [segment], dataset, vector_service_module.IndexStructureType.PARENT_CHILD_INDEX + ) + logger_mock.warning.assert_called_once() + index_processor.load.assert_not_called() + + +def test_create_segments_vector_parent_child_missing_processing_rule_raises(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(doc_form=vector_service_module.IndexStructureType.PARENT_CHILD_INDEX) + segment = _make_segment() + + dataset_document = MagicMock() + dataset_document.dataset_process_rule_id = "rule-1" + monkeypatch.setattr( + vector_service_module, + "db", + _mock_parent_child_queries(dataset_document=dataset_document, processing_rule=None), + ) + + with pytest.raises(ValueError, match="No processing rule found"): + VectorService.create_segments_vector( + None, [segment], dataset, vector_service_module.IndexStructureType.PARENT_CHILD_INDEX + ) + + +def test_create_segments_vector_parent_child_non_high_quality_raises(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset( + doc_form=vector_service_module.IndexStructureType.PARENT_CHILD_INDEX, + indexing_technique="economy", + ) + segment = _make_segment() + dataset_document = MagicMock() + dataset_document.dataset_process_rule_id = "rule-1" + processing_rule = MagicMock() + monkeypatch.setattr( + vector_service_module, + "db", + _mock_parent_child_queries(dataset_document=dataset_document, processing_rule=processing_rule), + ) + + with pytest.raises(ValueError, match="not high quality"): + VectorService.create_segments_vector( + None, [segment], dataset, vector_service_module.IndexStructureType.PARENT_CHILD_INDEX + ) + + +def test_update_segment_vector_high_quality_uses_vector(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality") + segment = _make_segment() + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + + VectorService.update_segment_vector(["k"], segment, dataset) + + vector_instance.delete_by_ids.assert_called_once_with([segment.index_node_id]) + vector_instance.add_texts.assert_called_once() + add_args, add_kwargs = vector_instance.add_texts.call_args + assert len(add_args[0]) == 1 + assert add_kwargs["duplicate_check"] is True + + +def test_update_segment_vector_economy_uses_keyword_with_keywords_list(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="economy") + segment = _make_segment() + + keyword_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Keyword", MagicMock(return_value=keyword_instance)) + + VectorService.update_segment_vector(["a", "b"], segment, dataset) + + keyword_instance.delete_by_ids.assert_called_once_with([segment.index_node_id]) + keyword_instance.add_texts.assert_called_once() + args, kwargs = keyword_instance.add_texts.call_args + assert len(args[0]) == 1 + assert kwargs["keywords_list"] == [["a", "b"]] + + +def test_update_segment_vector_economy_uses_keyword_without_keywords_list(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="economy") + segment = _make_segment() + + keyword_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Keyword", MagicMock(return_value=keyword_instance)) + + VectorService.update_segment_vector(None, segment, dataset) + keyword_instance.add_texts.assert_called_once() + _, kwargs = keyword_instance.add_texts.call_args + assert "keywords_list" not in kwargs + + +def test_generate_child_chunks_regenerate_cleans_then_saves_children(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(doc_form="text_model", tenant_id="tenant-1", dataset_id="dataset-1") + segment = _make_segment(segment_id="seg-1") + + dataset_document = MagicMock() + dataset_document.id = segment.document_id + dataset_document.doc_language = "en" + dataset_document.created_by = "user-1" + + processing_rule = MagicMock() + processing_rule.to_dict.return_value = {"rules": {}} + + child1 = _ChildDocStub(page_content="c1", metadata={"doc_id": "c1-id", "doc_hash": "c1-h"}) + child2 = _ChildDocStub(page_content="c2", metadata={"doc_id": "c2-id", "doc_hash": "c2-h"}) + transformed = [_ParentDocStub(children=[child1, child2])] + + index_processor = MagicMock() + index_processor.transform.return_value = transformed + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + child_chunk_ctor = MagicMock(side_effect=lambda **kwargs: kwargs) + monkeypatch.setattr(vector_service_module, "ChildChunk", child_chunk_ctor) + + db_mock = MagicMock() + db_mock.session.add = MagicMock() + db_mock.session.commit = MagicMock() + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.generate_child_chunks( + segment=segment, + dataset_document=dataset_document, + dataset=dataset, + embedding_model_instance=MagicMock(), + processing_rule=processing_rule, + regenerate=True, + ) + + index_processor.clean.assert_called_once() + _, transform_kwargs = index_processor.transform.call_args + assert transform_kwargs["process_rule"]["rules"]["parent_mode"] == vector_service_module.ParentMode.FULL_DOC + index_processor.load.assert_called_once() + assert db_mock.session.add.call_count == 2 + db_mock.session.commit.assert_called_once() + + +def test_generate_child_chunks_commits_even_when_no_children(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(doc_form="text_model") + segment = _make_segment() + dataset_document = MagicMock() + dataset_document.doc_language = "en" + dataset_document.created_by = "user-1" + + processing_rule = MagicMock() + processing_rule.to_dict.return_value = {"rules": {}} + + index_processor = MagicMock() + index_processor.transform.return_value = [_ParentDocStub(children=[])] + factory_instance = MagicMock() + factory_instance.init_index_processor.return_value = index_processor + monkeypatch.setattr(vector_service_module, "IndexProcessorFactory", MagicMock(return_value=factory_instance)) + + db_mock = MagicMock() + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.generate_child_chunks( + segment=segment, + dataset_document=dataset_document, + dataset=dataset, + embedding_model_instance=MagicMock(), + processing_rule=processing_rule, + regenerate=False, + ) + + index_processor.load.assert_not_called() + db_mock.session.add.assert_not_called() + db_mock.session.commit.assert_called_once() + + +def test_create_child_chunk_vector_high_quality_adds_texts(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality") + child_chunk = MagicMock() + child_chunk.content = "child" + child_chunk.index_node_id = "id" + child_chunk.index_node_hash = "h" + child_chunk.document_id = "doc-1" + child_chunk.dataset_id = "dataset-1" + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + + VectorService.create_child_chunk_vector(child_chunk, dataset) + vector_instance.add_texts.assert_called_once() + + +def test_create_child_chunk_vector_economy_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="economy") + vector_cls = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", vector_cls) + + child_chunk = MagicMock() + child_chunk.content = "child" + child_chunk.index_node_id = "id" + child_chunk.index_node_hash = "h" + child_chunk.document_id = "doc-1" + child_chunk.dataset_id = "dataset-1" + + VectorService.create_child_chunk_vector(child_chunk, dataset) + vector_cls.assert_not_called() + + +def test_update_child_chunk_vector_high_quality_updates_vector(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality") + + new_chunk = MagicMock() + new_chunk.content = "n" + new_chunk.index_node_id = "nid" + new_chunk.index_node_hash = "nh" + new_chunk.document_id = "d" + new_chunk.dataset_id = "ds" + + upd_chunk = MagicMock() + upd_chunk.content = "u" + upd_chunk.index_node_id = "uid" + upd_chunk.index_node_hash = "uh" + upd_chunk.document_id = "d" + upd_chunk.dataset_id = "ds" + + del_chunk = MagicMock() + del_chunk.index_node_id = "did" + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + + VectorService.update_child_chunk_vector([new_chunk], [upd_chunk], [del_chunk], dataset) + + vector_instance.delete_by_ids.assert_called_once_with(["uid", "did"]) + vector_instance.add_texts.assert_called_once() + docs = vector_instance.add_texts.call_args.args[0] + assert len(docs) == 2 + + +def test_update_child_chunk_vector_economy_noop(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="economy") + vector_cls = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", vector_cls) + VectorService.update_child_chunk_vector([], [], [], dataset) + vector_cls.assert_not_called() + + +def test_delete_child_chunk_vector_deletes_by_id(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset() + child_chunk = MagicMock() + child_chunk.index_node_id = "cid" + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + + VectorService.delete_child_chunk_vector(child_chunk, dataset) + vector_instance.delete_by_ids.assert_called_once_with(["cid"]) + + +# --------------------------------------------------------------------------- +# update_multimodel_vector (missing coverage in previous suites) +# --------------------------------------------------------------------------- + + +def test_update_multimodel_vector_returns_when_not_high_quality(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="economy", is_multimodal=True) + segment = _make_segment(tenant_id="t", attachments=[{"id": "a"}]) + + vector_cls = MagicMock() + db_mock = _mock_db_session_for_update_multimodel(upload_files=[]) + monkeypatch.setattr(vector_service_module, "Vector", vector_cls) + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["a"], dataset=dataset) + vector_cls.assert_not_called() + db_mock.session.query.assert_not_called() + + +def test_update_multimodel_vector_returns_when_no_actual_change(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=True) + segment = _make_segment(tenant_id="t", attachments=[{"id": "a"}, {"id": "b"}]) + + vector_cls = MagicMock() + db_mock = _mock_db_session_for_update_multimodel(upload_files=[]) + monkeypatch.setattr(vector_service_module, "Vector", vector_cls) + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["b", "a"], dataset=dataset) + vector_cls.assert_not_called() + db_mock.session.query.assert_not_called() + + +def test_update_multimodel_vector_deletes_bindings_and_commits_on_empty_new_ids( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=True) + segment = _make_segment(tenant_id="tenant-1", attachments=[{"id": "old-1"}, {"id": "old-2"}]) + + vector_instance = MagicMock(name="vector_instance") + vector_cls = MagicMock(return_value=vector_instance) + db_mock = _mock_db_session_for_update_multimodel(upload_files=[]) + + monkeypatch.setattr(vector_service_module, "Vector", vector_cls) + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=[], dataset=dataset) + + vector_cls.assert_called_once_with(dataset=dataset) + vector_instance.delete_by_ids.assert_called_once_with(["old-1", "old-2"]) + db_mock.session.query.assert_called_once_with(vector_service_module.SegmentAttachmentBinding) + db_mock.session.commit.assert_called_once() + db_mock.session.add_all.assert_not_called() + vector_instance.add_texts.assert_not_called() + + +def test_update_multimodel_vector_commits_when_no_upload_files_found(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=True) + segment = _make_segment(tenant_id="tenant-1", attachments=[{"id": "old-1"}]) + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + db_mock = _mock_db_session_for_update_multimodel(upload_files=[]) + monkeypatch.setattr(vector_service_module, "db", db_mock) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["new-1"], dataset=dataset) + + db_mock.session.commit.assert_called_once() + db_mock.session.add_all.assert_not_called() + vector_instance.add_texts.assert_not_called() + + +def test_update_multimodel_vector_adds_bindings_and_vectors_and_skips_missing_upload_files( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=True) + segment = _make_segment(segment_id="seg-1", tenant_id="tenant-1", attachments=[{"id": "old-1"}]) + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + db_mock = _mock_db_session_for_update_multimodel(upload_files=[_UploadFileStub(id="file-1", name="img.png")]) + monkeypatch.setattr(vector_service_module, "db", db_mock) + + binding_ctor = MagicMock(side_effect=lambda **kwargs: kwargs) + monkeypatch.setattr(vector_service_module, "SegmentAttachmentBinding", binding_ctor) + + logger_mock = MagicMock() + monkeypatch.setattr(vector_service_module, "logger", logger_mock) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["file-1", "missing"], dataset=dataset) + + logger_mock.warning.assert_called_once() + db_mock.session.add_all.assert_called_once() + bindings = db_mock.session.add_all.call_args.args[0] + assert len(bindings) == 1 + assert bindings[0]["attachment_id"] == "file-1" + + vector_instance.add_texts.assert_called_once() + documents = vector_instance.add_texts.call_args.args[0] + assert len(documents) == 1 + assert documents[0].page_content == "img.png" + assert documents[0].metadata["doc_id"] == "file-1" + db_mock.session.commit.assert_called_once() + + +def test_update_multimodel_vector_updates_bindings_without_multimodal_vector_ops( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=False) + segment = _make_segment(tenant_id="tenant-1", attachments=[{"id": "old-1"}]) + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + db_mock = _mock_db_session_for_update_multimodel(upload_files=[_UploadFileStub(id="file-1", name="img.png")]) + monkeypatch.setattr(vector_service_module, "db", db_mock) + monkeypatch.setattr( + vector_service_module, "SegmentAttachmentBinding", MagicMock(side_effect=lambda **kwargs: kwargs) + ) + + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["file-1"], dataset=dataset) + + vector_instance.delete_by_ids.assert_not_called() + vector_instance.add_texts.assert_not_called() + db_mock.session.add_all.assert_called_once() + db_mock.session.commit.assert_called_once() + + +def test_update_multimodel_vector_rolls_back_and_reraises_on_error(monkeypatch: pytest.MonkeyPatch) -> None: + dataset = _make_dataset(indexing_technique="high_quality", is_multimodal=True) + segment = _make_segment(segment_id="seg-1", tenant_id="tenant-1", attachments=[{"id": "old-1"}]) + + vector_instance = MagicMock() + monkeypatch.setattr(vector_service_module, "Vector", MagicMock(return_value=vector_instance)) + db_mock = _mock_db_session_for_update_multimodel(upload_files=[_UploadFileStub(id="file-1", name="img.png")]) + db_mock.session.commit.side_effect = RuntimeError("boom") + monkeypatch.setattr(vector_service_module, "db", db_mock) + monkeypatch.setattr( + vector_service_module, "SegmentAttachmentBinding", MagicMock(side_effect=lambda **kwargs: kwargs) + ) + + logger_mock = MagicMock() + monkeypatch.setattr(vector_service_module, "logger", logger_mock) + + with pytest.raises(RuntimeError, match="boom"): + VectorService.update_multimodel_vector(segment=segment, attachment_ids=["file-1"], dataset=dataset) + + logger_mock.exception.assert_called_once() + db_mock.session.rollback.assert_called_once() diff --git a/api/tests/unit_tests/services/test_website_service.py b/api/tests/unit_tests/services/test_website_service.py new file mode 100644 index 0000000000..e2775ce90c --- /dev/null +++ b/api/tests/unit_tests/services/test_website_service.py @@ -0,0 +1,718 @@ +"""Unit tests for services.website_service. + +Focuses on provider dispatching, argument validation, and provider-specific branches +without making any real network/storage/redis calls. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from datetime import UTC, datetime +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +import services.website_service as website_service_module +from services.website_service import ( + CrawlOptions, + WebsiteCrawlApiRequest, + WebsiteCrawlStatusApiRequest, + WebsiteService, +) + + +@dataclass(frozen=True) +class _DummyHttpxResponse: + payload: dict[str, Any] + + def json(self) -> dict[str, Any]: + return self.payload + + +@pytest.fixture(autouse=True) +def stub_current_user(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + website_service_module, + "current_user", + type("User", (), {"current_tenant_id": "tenant-1"})(), + ) + + +def test_crawl_options_include_exclude_paths() -> None: + options = CrawlOptions(includes="a,b", excludes="x,y") + assert options.get_include_paths() == ["a", "b"] + assert options.get_exclude_paths() == ["x", "y"] + + empty = CrawlOptions(includes=None, excludes=None) + assert empty.get_include_paths() == [] + assert empty.get_exclude_paths() == [] + + +def test_website_crawl_api_request_from_args_valid_and_to_crawl_request() -> None: + args = { + "provider": "firecrawl", + "url": "https://example.com", + "options": { + "limit": 2, + "crawl_sub_pages": True, + "only_main_content": True, + "includes": "a,b", + "excludes": "x", + "prompt": "hi", + "max_depth": 3, + "use_sitemap": False, + }, + } + + api_req = WebsiteCrawlApiRequest.from_args(args) + crawl_req = api_req.to_crawl_request() + + assert crawl_req.provider == "firecrawl" + assert crawl_req.url == "https://example.com" + assert crawl_req.options.limit == 2 + assert crawl_req.options.crawl_sub_pages is True + assert crawl_req.options.only_main_content is True + assert crawl_req.options.get_include_paths() == ["a", "b"] + assert crawl_req.options.get_exclude_paths() == ["x"] + assert crawl_req.options.prompt == "hi" + assert crawl_req.options.max_depth == 3 + assert crawl_req.options.use_sitemap is False + + +@pytest.mark.parametrize( + ("args", "missing_msg"), + [ + ({}, "Provider is required"), + ({"provider": "firecrawl"}, "URL is required"), + ({"provider": "firecrawl", "url": "https://example.com"}, "Options are required"), + ], +) +def test_website_crawl_api_request_from_args_requires_fields(args: dict, missing_msg: str) -> None: + with pytest.raises(ValueError, match=missing_msg): + WebsiteCrawlApiRequest.from_args(args) + + +def test_website_crawl_status_api_request_from_args_requires_fields() -> None: + with pytest.raises(ValueError, match="Provider is required"): + WebsiteCrawlStatusApiRequest.from_args({}, job_id="job-1") + + with pytest.raises(ValueError, match="Job ID is required"): + WebsiteCrawlStatusApiRequest.from_args({"provider": "firecrawl"}, job_id="") + + req = WebsiteCrawlStatusApiRequest.from_args({"provider": "firecrawl"}, job_id="job-1") + assert req.provider == "firecrawl" + assert req.job_id == "job-1" + + +def test_get_credentials_and_config_selects_plugin_id_and_key_firecrawl(monkeypatch: pytest.MonkeyPatch) -> None: + service_instance = MagicMock(name="DatasourceProviderService-instance") + service_instance.get_datasource_credentials.return_value = {"firecrawl_api_key": "k", "base_url": "b"} + monkeypatch.setattr(website_service_module, "DatasourceProviderService", MagicMock(return_value=service_instance)) + + api_key, config = WebsiteService._get_credentials_and_config("tenant-1", "firecrawl") + assert api_key == "k" + assert config["base_url"] == "b" + + service_instance.get_datasource_credentials.assert_called_once_with( + tenant_id="tenant-1", + provider="firecrawl", + plugin_id="langgenius/firecrawl_datasource", + ) + + +@pytest.mark.parametrize( + ("provider", "plugin_id"), + [ + ("watercrawl", "watercrawl/watercrawl_datasource"), + ("jinareader", "langgenius/jina_datasource"), + ], +) +def test_get_credentials_and_config_selects_plugin_id_and_key_api_key( + monkeypatch: pytest.MonkeyPatch, provider: str, plugin_id: str +) -> None: + service_instance = MagicMock(name="DatasourceProviderService-instance") + service_instance.get_datasource_credentials.return_value = {"api_key": "enc-key", "base_url": "b"} + monkeypatch.setattr(website_service_module, "DatasourceProviderService", MagicMock(return_value=service_instance)) + + api_key, config = WebsiteService._get_credentials_and_config("tenant-1", provider) + assert api_key == "enc-key" + assert config["base_url"] == "b" + + service_instance.get_datasource_credentials.assert_called_once_with( + tenant_id="tenant-1", + provider=provider, + plugin_id=plugin_id, + ) + + +def test_get_credentials_and_config_rejects_invalid_provider() -> None: + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService._get_credentials_and_config("tenant-1", "unknown") + + +def test_get_credentials_and_config_hits_unreachable_guard_branch(monkeypatch: pytest.MonkeyPatch) -> None: + class FlakyProvider: + def __init__(self) -> None: + self._eq_calls = 0 + + def __hash__(self) -> int: + return 1 + + def __eq__(self, other: object) -> bool: + if other == "firecrawl": + self._eq_calls += 1 + return self._eq_calls == 1 + return False + + def __repr__(self) -> str: + return "FlakyProvider()" + + service_instance = MagicMock(name="DatasourceProviderService-instance") + service_instance.get_datasource_credentials.return_value = {"firecrawl_api_key": "k"} + monkeypatch.setattr(website_service_module, "DatasourceProviderService", MagicMock(return_value=service_instance)) + + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService._get_credentials_and_config("tenant-1", FlakyProvider()) # type: ignore[arg-type] + + +def test_get_decrypted_api_key_requires_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(website_service_module.encrypter, "decrypt_token", MagicMock()) + with pytest.raises(ValueError, match="API key not found in configuration"): + WebsiteService._get_decrypted_api_key("tenant-1", {}) + + +def test_get_decrypted_api_key_decrypts(monkeypatch: pytest.MonkeyPatch) -> None: + decrypt_mock = MagicMock(return_value="plain") + monkeypatch.setattr(website_service_module.encrypter, "decrypt_token", decrypt_mock) + + assert WebsiteService._get_decrypted_api_key("tenant-1", {"api_key": "enc"}) == "plain" + decrypt_mock.assert_called_once_with(tenant_id="tenant-1", token="enc") + + +def test_document_create_args_validate_wraps_error_message() -> None: + with pytest.raises(ValueError, match=r"^Invalid arguments: Provider is required$"): + WebsiteService.document_create_args_validate({}) + + +def test_crawl_url_dispatches_by_provider(monkeypatch: pytest.MonkeyPatch) -> None: + api_request = WebsiteCrawlApiRequest(provider="firecrawl", url="https://example.com", options={"limit": 1}) + crawl_request = api_request.to_crawl_request() + + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {"base_url": "b"}))) + firecrawl_mock = MagicMock(return_value={"status": "active", "job_id": "j1"}) + monkeypatch.setattr(WebsiteService, "_crawl_with_firecrawl", firecrawl_mock) + + result = WebsiteService.crawl_url(api_request) + + assert result == {"status": "active", "job_id": "j1"} + firecrawl_mock.assert_called_once() + assert firecrawl_mock.call_args.kwargs["request"] == crawl_request + + +@pytest.mark.parametrize( + ("provider", "method_name"), + [ + ("watercrawl", "_crawl_with_watercrawl"), + ("jinareader", "_crawl_with_jinareader"), + ], +) +def test_crawl_url_dispatches_other_providers(monkeypatch: pytest.MonkeyPatch, provider: str, method_name: str) -> None: + api_request = WebsiteCrawlApiRequest(provider=provider, url="https://example.com", options={"limit": 1}) + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {"base_url": "b"}))) + + impl_mock = MagicMock(return_value={"status": "active"}) + monkeypatch.setattr(WebsiteService, method_name, impl_mock) + + assert WebsiteService.crawl_url(api_request) == {"status": "active"} + impl_mock.assert_called_once() + + +def test_crawl_url_rejects_invalid_provider(monkeypatch: pytest.MonkeyPatch) -> None: + api_request = WebsiteCrawlApiRequest(provider="bad", url="https://example.com", options={"limit": 1}) + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {}))) + + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService.crawl_url(api_request) + + +def test_crawl_with_firecrawl_builds_params_single_page_and_sets_redis(monkeypatch: pytest.MonkeyPatch) -> None: + firecrawl_instance = MagicMock(name="FirecrawlApp-instance") + firecrawl_instance.crawl_url.return_value = "job-1" + firecrawl_cls = MagicMock(return_value=firecrawl_instance) + monkeypatch.setattr(website_service_module, "FirecrawlApp", firecrawl_cls) + + redis_mock = MagicMock() + monkeypatch.setattr(website_service_module, "redis_client", redis_mock) + + fixed_now = datetime(2024, 1, 1, tzinfo=UTC) + with patch.object(website_service_module.datetime, "datetime") as datetime_mock: + datetime_mock.now.return_value = fixed_now + + req = WebsiteCrawlApiRequest( + provider="firecrawl", url="https://example.com", options={"limit": 5} + ).to_crawl_request() + req.options.crawl_sub_pages = False + req.options.only_main_content = True + + result = WebsiteService._crawl_with_firecrawl(request=req, api_key="k", config={"base_url": "b"}) + + assert result == {"status": "active", "job_id": "job-1"} + + firecrawl_cls.assert_called_once_with(api_key="k", base_url="b") + firecrawl_instance.crawl_url.assert_called_once() + _, params = firecrawl_instance.crawl_url.call_args.args + assert params["limit"] == 1 + assert params["includePaths"] == [] + assert params["excludePaths"] == [] + assert params["scrapeOptions"] == {"onlyMainContent": True} + + redis_mock.setex.assert_called_once() + key, ttl, value = redis_mock.setex.call_args.args + assert key == "website_crawl_job-1" + assert ttl == 3600 + assert float(value) == pytest.approx(fixed_now.timestamp(), rel=0, abs=1e-6) + + +def test_crawl_with_firecrawl_builds_params_multi_page_including_prompt(monkeypatch: pytest.MonkeyPatch) -> None: + firecrawl_instance = MagicMock(name="FirecrawlApp-instance") + firecrawl_instance.crawl_url.return_value = "job-2" + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + monkeypatch.setattr(website_service_module, "redis_client", MagicMock()) + + req = WebsiteCrawlApiRequest( + provider="firecrawl", + url="https://example.com", + options={ + "crawl_sub_pages": True, + "limit": 3, + "only_main_content": False, + "includes": "a,b", + "excludes": "x", + "prompt": "use this", + }, + ).to_crawl_request() + + WebsiteService._crawl_with_firecrawl(request=req, api_key="k", config={"base_url": None}) + _, params = firecrawl_instance.crawl_url.call_args.args + assert params["includePaths"] == ["a", "b"] + assert params["excludePaths"] == ["x"] + assert params["limit"] == 3 + assert params["scrapeOptions"] == {"onlyMainContent": False} + assert params["prompt"] == "use this" + + +def test_crawl_with_watercrawl_passes_options_dict(monkeypatch: pytest.MonkeyPatch) -> None: + provider_instance = MagicMock() + provider_instance.crawl_url.return_value = {"status": "active", "job_id": "w1"} + provider_cls = MagicMock(return_value=provider_instance) + monkeypatch.setattr(website_service_module, "WaterCrawlProvider", provider_cls) + + req = WebsiteCrawlApiRequest( + provider="watercrawl", + url="https://example.com", + options={ + "limit": 2, + "crawl_sub_pages": True, + "only_main_content": True, + "includes": "a", + "excludes": None, + "max_depth": 5, + "use_sitemap": False, + }, + ).to_crawl_request() + + result = WebsiteService._crawl_with_watercrawl(request=req, api_key="k", config={"base_url": "b"}) + assert result == {"status": "active", "job_id": "w1"} + + provider_cls.assert_called_once_with(api_key="k", base_url="b") + provider_instance.crawl_url.assert_called_once_with( + url="https://example.com", + options={ + "limit": 2, + "crawl_sub_pages": True, + "only_main_content": True, + "includes": "a", + "excludes": None, + "max_depth": 5, + "use_sitemap": False, + }, + ) + + +def test_crawl_with_jinareader_single_page_success(monkeypatch: pytest.MonkeyPatch) -> None: + get_mock = MagicMock(return_value=_DummyHttpxResponse({"code": 200, "data": {"title": "t"}})) + monkeypatch.setattr(website_service_module.httpx, "get", get_mock) + + req = WebsiteCrawlApiRequest( + provider="jinareader", url="https://example.com", options={"crawl_sub_pages": False} + ).to_crawl_request() + req.options.crawl_sub_pages = False + + result = WebsiteService._crawl_with_jinareader(request=req, api_key="k") + assert result == {"status": "active", "data": {"title": "t"}} + get_mock.assert_called_once() + + +def test_crawl_with_jinareader_single_page_failure(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(website_service_module.httpx, "get", MagicMock(return_value=_DummyHttpxResponse({"code": 500}))) + req = WebsiteCrawlApiRequest( + provider="jinareader", url="https://example.com", options={"crawl_sub_pages": False} + ).to_crawl_request() + req.options.crawl_sub_pages = False + + with pytest.raises(ValueError, match="Failed to crawl:"): + WebsiteService._crawl_with_jinareader(request=req, api_key="k") + + +def test_crawl_with_jinareader_multi_page_success(monkeypatch: pytest.MonkeyPatch) -> None: + post_mock = MagicMock(return_value=_DummyHttpxResponse({"code": 200, "data": {"taskId": "t1"}})) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + req = WebsiteCrawlApiRequest( + provider="jinareader", + url="https://example.com", + options={"crawl_sub_pages": True, "limit": 5, "use_sitemap": True}, + ).to_crawl_request() + req.options.crawl_sub_pages = True + + result = WebsiteService._crawl_with_jinareader(request=req, api_key="k") + assert result == {"status": "active", "job_id": "t1"} + post_mock.assert_called_once() + + +def test_crawl_with_jinareader_multi_page_failure(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + website_service_module.httpx, "post", MagicMock(return_value=_DummyHttpxResponse({"code": 400})) + ) + req = WebsiteCrawlApiRequest( + provider="jinareader", + url="https://example.com", + options={"crawl_sub_pages": True, "limit": 2, "use_sitemap": False}, + ).to_crawl_request() + req.options.crawl_sub_pages = True + + with pytest.raises(ValueError, match="Failed to crawl$"): + WebsiteService._crawl_with_jinareader(request=req, api_key="k") + + +def test_get_crawl_status_dispatches(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {"base_url": "b"}))) + firecrawl_status = MagicMock(return_value={"status": "active"}) + monkeypatch.setattr(WebsiteService, "_get_firecrawl_status", firecrawl_status) + + result = WebsiteService.get_crawl_status("job-1", "firecrawl") + assert result == {"status": "active"} + firecrawl_status.assert_called_once_with("job-1", "k", {"base_url": "b"}) + + watercrawl_status = MagicMock(return_value={"status": "active", "job_id": "w"}) + monkeypatch.setattr(WebsiteService, "_get_watercrawl_status", watercrawl_status) + assert WebsiteService.get_crawl_status("job-2", "watercrawl") == {"status": "active", "job_id": "w"} + watercrawl_status.assert_called_once_with("job-2", "k", {"base_url": "b"}) + + jinareader_status = MagicMock(return_value={"status": "active", "job_id": "j"}) + monkeypatch.setattr(WebsiteService, "_get_jinareader_status", jinareader_status) + assert WebsiteService.get_crawl_status("job-3", "jinareader") == {"status": "active", "job_id": "j"} + jinareader_status.assert_called_once_with("job-3", "k") + + +def test_get_crawl_status_typed_rejects_invalid_provider(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {}))) + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService.get_crawl_status_typed(WebsiteCrawlStatusApiRequest(provider="bad", job_id="j")) + + +def test_get_firecrawl_status_adds_time_consuming_when_completed_and_cached(monkeypatch: pytest.MonkeyPatch) -> None: + firecrawl_instance = MagicMock() + firecrawl_instance.check_crawl_status.return_value = {"status": "completed", "total": 2, "current": 2, "data": []} + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + + redis_mock = MagicMock() + redis_mock.get.return_value = b"100.0" + monkeypatch.setattr(website_service_module, "redis_client", redis_mock) + + with patch.object(website_service_module.datetime, "datetime") as datetime_mock: + datetime_mock.now.return_value = datetime.fromtimestamp(105.0, tz=UTC) + result = WebsiteService._get_firecrawl_status(job_id="job-1", api_key="k", config={"base_url": "b"}) + + assert result["status"] == "completed" + assert result["time_consuming"] == "5.00" + redis_mock.delete.assert_called_once_with("website_crawl_job-1") + + +def test_get_firecrawl_status_completed_without_cache_does_not_add_time(monkeypatch: pytest.MonkeyPatch) -> None: + firecrawl_instance = MagicMock() + firecrawl_instance.check_crawl_status.return_value = {"status": "completed"} + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + + redis_mock = MagicMock() + redis_mock.get.return_value = None + monkeypatch.setattr(website_service_module, "redis_client", redis_mock) + + result = WebsiteService._get_firecrawl_status(job_id="job-1", api_key="k", config={"base_url": None}) + assert result["status"] == "completed" + assert "time_consuming" not in result + redis_mock.delete.assert_not_called() + + +def test_get_watercrawl_status_delegates(monkeypatch: pytest.MonkeyPatch) -> None: + provider_instance = MagicMock() + provider_instance.get_crawl_status.return_value = {"status": "active", "job_id": "w1"} + monkeypatch.setattr(website_service_module, "WaterCrawlProvider", MagicMock(return_value=provider_instance)) + + assert WebsiteService._get_watercrawl_status("job-1", "k", {"base_url": "b"}) == { + "status": "active", + "job_id": "w1", + } + provider_instance.get_crawl_status.assert_called_once_with("job-1") + + +def test_get_jinareader_status_active(monkeypatch: pytest.MonkeyPatch) -> None: + post_mock = MagicMock( + return_value=_DummyHttpxResponse( + { + "data": { + "status": "active", + "urls": ["a", "b"], + "processed": {"a": {}}, + "failed": {"b": {}}, + "duration": 3000, + } + } + ) + ) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + result = WebsiteService._get_jinareader_status("job-1", "k") + assert result["status"] == "active" + assert result["total"] == 2 + assert result["current"] == 2 + assert result["time_consuming"] == 3.0 + assert result["data"] == [] + post_mock.assert_called_once() + + +def test_get_jinareader_status_completed_formats_processed_items(monkeypatch: pytest.MonkeyPatch) -> None: + status_payload = { + "data": { + "status": "completed", + "urls": ["u1"], + "processed": {"u1": {}}, + "failed": {}, + "duration": 1000, + } + } + processed_payload = { + "data": { + "processed": { + "u1": { + "data": { + "title": "t", + "url": "u1", + "description": "d", + "content": "md", + } + } + } + } + } + post_mock = MagicMock(side_effect=[_DummyHttpxResponse(status_payload), _DummyHttpxResponse(processed_payload)]) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + result = WebsiteService._get_jinareader_status("job-1", "k") + assert result["status"] == "completed" + assert result["data"] == [{"title": "t", "source_url": "u1", "description": "d", "markdown": "md"}] + assert post_mock.call_count == 2 + + +def test_get_crawl_url_data_dispatches_invalid_provider() -> None: + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService.get_crawl_url_data("job-1", "bad", "https://example.com", "tenant-1") + + +def test_get_crawl_url_data_hits_invalid_provider_branch_when_credentials_stubbed( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {}))) + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService.get_crawl_url_data("job-1", object(), "u", "tenant-1") # type: ignore[arg-type] + + +@pytest.mark.parametrize( + ("provider", "method_name"), + [ + ("firecrawl", "_get_firecrawl_url_data"), + ("watercrawl", "_get_watercrawl_url_data"), + ("jinareader", "_get_jinareader_url_data"), + ], +) +def test_get_crawl_url_data_dispatches(monkeypatch: pytest.MonkeyPatch, provider: str, method_name: str) -> None: + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {"base_url": "b"}))) + impl_mock = MagicMock(return_value={"ok": True}) + monkeypatch.setattr(WebsiteService, method_name, impl_mock) + + result = WebsiteService.get_crawl_url_data("job-1", provider, "u", "tenant-1") + assert result == {"ok": True} + impl_mock.assert_called_once() + + +def test_get_firecrawl_url_data_reads_from_storage_when_present(monkeypatch: pytest.MonkeyPatch) -> None: + stored_list = [{"source_url": "https://example.com", "title": "t"}] + stored = json.dumps(stored_list).encode("utf-8") + + storage_mock = MagicMock() + storage_mock.exists.return_value = True + storage_mock.load_once.return_value = stored + monkeypatch.setattr(website_service_module, "storage", storage_mock) + + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock()) + + result = WebsiteService._get_firecrawl_url_data("job-1", "https://example.com", "k", {"base_url": "b"}) + assert result == {"source_url": "https://example.com", "title": "t"} + assert result is not stored_list[0] + + +def test_get_firecrawl_url_data_returns_none_when_storage_empty(monkeypatch: pytest.MonkeyPatch) -> None: + storage_mock = MagicMock() + storage_mock.exists.return_value = True + storage_mock.load_once.return_value = b"" + monkeypatch.setattr(website_service_module, "storage", storage_mock) + + assert WebsiteService._get_firecrawl_url_data("job-1", "https://example.com", "k", {}) is None + + +def test_get_firecrawl_url_data_raises_when_job_not_completed(monkeypatch: pytest.MonkeyPatch) -> None: + storage_mock = MagicMock() + storage_mock.exists.return_value = False + monkeypatch.setattr(website_service_module, "storage", storage_mock) + + firecrawl_instance = MagicMock() + firecrawl_instance.check_crawl_status.return_value = {"status": "active"} + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + + with pytest.raises(ValueError, match="Crawl job is not completed"): + WebsiteService._get_firecrawl_url_data("job-1", "https://example.com", "k", {"base_url": None}) + + +def test_get_firecrawl_url_data_returns_none_when_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + storage_mock = MagicMock() + storage_mock.exists.return_value = False + monkeypatch.setattr(website_service_module, "storage", storage_mock) + + firecrawl_instance = MagicMock() + firecrawl_instance.check_crawl_status.return_value = {"status": "completed", "data": [{"source_url": "x"}]} + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + + assert WebsiteService._get_firecrawl_url_data("job-1", "https://example.com", "k", {"base_url": "b"}) is None + + +def test_get_watercrawl_url_data_delegates(monkeypatch: pytest.MonkeyPatch) -> None: + provider_instance = MagicMock() + provider_instance.get_crawl_url_data.return_value = {"source_url": "u"} + monkeypatch.setattr(website_service_module, "WaterCrawlProvider", MagicMock(return_value=provider_instance)) + + result = WebsiteService._get_watercrawl_url_data("job-1", "u", "k", {"base_url": "b"}) + assert result == {"source_url": "u"} + provider_instance.get_crawl_url_data.assert_called_once_with("job-1", "u") + + +def test_get_jinareader_url_data_without_job_id_success(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + website_service_module.httpx, + "get", + MagicMock(return_value=_DummyHttpxResponse({"code": 200, "data": {"url": "u"}})), + ) + assert WebsiteService._get_jinareader_url_data("", "u", "k") == {"url": "u"} + + +def test_get_jinareader_url_data_without_job_id_failure(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(website_service_module.httpx, "get", MagicMock(return_value=_DummyHttpxResponse({"code": 500}))) + with pytest.raises(ValueError, match="Failed to crawl$"): + WebsiteService._get_jinareader_url_data("", "u", "k") + + +def test_get_jinareader_url_data_with_job_id_completed_returns_matching_item(monkeypatch: pytest.MonkeyPatch) -> None: + status_payload = {"data": {"status": "completed", "processed": {"u1": {}}}} + processed_payload = {"data": {"processed": {"u1": {"data": {"url": "u", "title": "t"}}}}} + + post_mock = MagicMock(side_effect=[_DummyHttpxResponse(status_payload), _DummyHttpxResponse(processed_payload)]) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + assert WebsiteService._get_jinareader_url_data("job-1", "u", "k") == {"url": "u", "title": "t"} + assert post_mock.call_count == 2 + + +def test_get_jinareader_url_data_with_job_id_not_completed_raises(monkeypatch: pytest.MonkeyPatch) -> None: + post_mock = MagicMock(return_value=_DummyHttpxResponse({"data": {"status": "active"}})) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + with pytest.raises(ValueError, match=r"Crawl job is no\s*t completed"): + WebsiteService._get_jinareader_url_data("job-1", "u", "k") + + +def test_get_jinareader_url_data_with_job_id_completed_but_not_found_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + status_payload = {"data": {"status": "completed", "processed": {"u1": {}}}} + processed_payload = {"data": {"processed": {"u1": {"data": {"url": "other"}}}}} + + post_mock = MagicMock(side_effect=[_DummyHttpxResponse(status_payload), _DummyHttpxResponse(processed_payload)]) + monkeypatch.setattr(website_service_module.httpx, "post", post_mock) + + assert WebsiteService._get_jinareader_url_data("job-1", "u", "k") is None + + +def test_get_scrape_url_data_dispatches_and_rejects_invalid_provider(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(WebsiteService, "_get_credentials_and_config", MagicMock(return_value=("k", {"base_url": "b"}))) + + scrape_mock = MagicMock(return_value={"data": "x"}) + monkeypatch.setattr(WebsiteService, "_scrape_with_firecrawl", scrape_mock) + assert WebsiteService.get_scrape_url_data("firecrawl", "u", "tenant-1", True) == {"data": "x"} + scrape_mock.assert_called_once() + + watercrawl_mock = MagicMock(return_value={"data": "y"}) + monkeypatch.setattr(WebsiteService, "_scrape_with_watercrawl", watercrawl_mock) + assert WebsiteService.get_scrape_url_data("watercrawl", "u", "tenant-1", False) == {"data": "y"} + watercrawl_mock.assert_called_once() + + with pytest.raises(ValueError, match="Invalid provider"): + WebsiteService.get_scrape_url_data("jinareader", "u", "tenant-1", True) + + +def test_scrape_with_firecrawl_calls_app(monkeypatch: pytest.MonkeyPatch) -> None: + firecrawl_instance = MagicMock() + firecrawl_instance.scrape_url.return_value = {"markdown": "m"} + monkeypatch.setattr(website_service_module, "FirecrawlApp", MagicMock(return_value=firecrawl_instance)) + + result = WebsiteService._scrape_with_firecrawl( + request=website_service_module.ScrapeRequest( + provider="firecrawl", + url="u", + tenant_id="tenant-1", + only_main_content=True, + ), + api_key="k", + config={"base_url": "b"}, + ) + assert result == {"markdown": "m"} + firecrawl_instance.scrape_url.assert_called_once_with(url="u", params={"onlyMainContent": True}) + + +def test_scrape_with_watercrawl_calls_provider(monkeypatch: pytest.MonkeyPatch) -> None: + provider_instance = MagicMock() + provider_instance.scrape_url.return_value = {"markdown": "m"} + monkeypatch.setattr(website_service_module, "WaterCrawlProvider", MagicMock(return_value=provider_instance)) + + result = WebsiteService._scrape_with_watercrawl( + request=website_service_module.ScrapeRequest( + provider="watercrawl", + url="u", + tenant_id="tenant-1", + only_main_content=False, + ), + api_key="k", + config={"base_url": "b"}, + ) + assert result == {"markdown": "m"} + provider_instance.scrape_url.assert_called_once_with("u") diff --git a/api/tests/unit_tests/services/test_workflow_service.py b/api/tests/unit_tests/services/test_workflow_service.py index 6b36592c41..8d50b61d63 100644 --- a/api/tests/unit_tests/services/test_workflow_service.py +++ b/api/tests/unit_tests/services/test_workflow_service.py @@ -14,7 +14,7 @@ from unittest.mock import MagicMock, Mock, patch import pytest -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNode, HttpRequestNodeConfig from libs.datetime_utils import naive_utc_now from models.model import App, AppMode @@ -134,7 +134,7 @@ class TestWorkflowAssociatedDataFactory: return ( (node["id"], node["data"]) for node in nodes - if node.get("data", {}).get("type") == specific_node_type.value + if node.get("data", {}).get("type") == str(specific_node_type) ) # Return all nodes if no filter specified return ((node["id"], node["data"]) for node in nodes) @@ -179,7 +179,7 @@ class TestWorkflowAssociatedDataFactory: { "id": "start", "data": { - "type": NodeType.START.value, + "type": BuiltinNodeTypes.START, "title": "START", "variables": [], }, @@ -204,7 +204,7 @@ class TestWorkflowAssociatedDataFactory: { "id": "llm-1", "data": { - "type": NodeType.LLM.value, + "type": BuiltinNodeTypes.LLM, "title": "LLM", "model": { "provider": "openai", @@ -1074,12 +1074,12 @@ class TestWorkflowService: Used by the UI to populate the node palette and provide sensible defaults when users add new nodes to their workflow. """ - with patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping: + with patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping: # Mock node class with default config mock_node_class = MagicMock() mock_node_class.get_default_config.return_value = {"type": "llm", "config": {}} - mock_mapping.items.return_value = [(NodeType.LLM, {"latest": mock_node_class})] + mock_mapping.return_value = {BuiltinNodeTypes.LLM: {"latest": mock_node_class}} with patch("services.workflow_service.LATEST_VERSION", "latest"): result = workflow_service.get_default_block_configs() @@ -1098,7 +1098,7 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch( "services.workflow_service.build_http_request_config", @@ -1109,10 +1109,10 @@ class TestWorkflowService: mock_http_node_class.get_default_config.return_value = {"type": "http-request", "config": {}} mock_llm_node_class = MagicMock() mock_llm_node_class.get_default_config.return_value = {"type": "llm", "config": {}} - mock_mapping.items.return_value = [ - (NodeType.HTTP_REQUEST, {"latest": mock_http_node_class}), - (NodeType.LLM, {"latest": mock_llm_node_class}), - ] + mock_mapping.return_value = { + BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_http_node_class}, + BuiltinNodeTypes.LLM: {"latest": mock_llm_node_class}, + } result = workflow_service.get_default_block_configs() @@ -1133,7 +1133,7 @@ class TestWorkflowService: This includes default values for all required and optional parameters. """ with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), ): # Mock node class with default config @@ -1141,23 +1141,21 @@ class TestWorkflowService: mock_config = {"type": "llm", "config": {"provider": "openai"}} mock_node_class.get_default_config.return_value = mock_config - # Create a mock mapping that includes NodeType.LLM - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + # Create a mock mapping that includes BuiltinNodeTypes.LLM + mock_mapping.return_value = {BuiltinNodeTypes.LLM: {"latest": mock_node_class}} - result = workflow_service.get_default_block_config(NodeType.LLM.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.LLM) assert result == mock_config mock_node_class.get_default_config.assert_called_once() def test_get_default_block_config_invalid_node_type(self, workflow_service): """Test get_default_block_config returns empty dict for invalid node type.""" - with patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping: - # Mock mapping to not contain the node type - mock_mapping.__contains__.return_value = False + with patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping: + mock_mapping.return_value = {} # Use a valid NodeType but one that's not in the mapping - result = workflow_service.get_default_block_config(NodeType.LLM.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.LLM) assert result == {} @@ -1173,7 +1171,7 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch( "services.workflow_service.build_http_request_config", @@ -1183,10 +1181,9 @@ class TestWorkflowService: mock_node_class = MagicMock() expected = {"type": "http-request", "config": {}} mock_node_class.get_default_config.return_value = expected - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + mock_mapping.return_value = {BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_node_class}} - result = workflow_service.get_default_block_config(NodeType.HTTP_REQUEST.value) + result = workflow_service.get_default_block_config(BuiltinNodeTypes.HTTP_REQUEST) assert result == expected mock_build_config.assert_called_once() @@ -1205,18 +1202,17 @@ class TestWorkflowService: ) with ( - patch("services.workflow_service.NODE_TYPE_CLASSES_MAPPING") as mock_mapping, + patch("services.workflow_service.get_node_type_classes_mapping") as mock_mapping, patch("services.workflow_service.LATEST_VERSION", "latest"), patch("services.workflow_service.build_http_request_config") as mock_build_config, ): mock_node_class = MagicMock() expected = {"type": "http-request", "config": {}} mock_node_class.get_default_config.return_value = expected - mock_mapping.__contains__.return_value = True - mock_mapping.__getitem__.return_value = {"latest": mock_node_class} + mock_mapping.return_value = {BuiltinNodeTypes.HTTP_REQUEST: {"latest": mock_node_class}} result = workflow_service.get_default_block_config( - NodeType.HTTP_REQUEST.value, + BuiltinNodeTypes.HTTP_REQUEST, filters={HTTP_REQUEST_CONFIG_FILTER_KEY: provided_config}, ) @@ -1228,14 +1224,14 @@ class TestWorkflowService: def test_get_default_block_config_http_request_malformed_config_raises_value_error(self, workflow_service): with ( patch( - "services.workflow_service.NODE_TYPE_CLASSES_MAPPING", - {NodeType.HTTP_REQUEST: {"latest": HttpRequestNode}}, + "services.workflow_service.get_node_type_classes_mapping", + return_value={BuiltinNodeTypes.HTTP_REQUEST: {"latest": HttpRequestNode}}, ), patch("services.workflow_service.LATEST_VERSION", "latest"), ): with pytest.raises(ValueError, match="http_request_config must be an HttpRequestNodeConfig instance"): workflow_service.get_default_block_config( - NodeType.HTTP_REQUEST.value, + BuiltinNodeTypes.HTTP_REQUEST, filters={HTTP_REQUEST_CONFIG_FILTER_KEY: "invalid"}, ) diff --git a/api/tests/unit_tests/services/tools/test_builtin_tools_manage_service.py b/api/tests/unit_tests/services/tools/test_builtin_tools_manage_service.py new file mode 100644 index 0000000000..439d203c58 --- /dev/null +++ b/api/tests/unit_tests/services/tools/test_builtin_tools_manage_service.py @@ -0,0 +1,455 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from services.tools.builtin_tools_manage_service import BuiltinToolManageService + +MODULE = "services.tools.builtin_tools_manage_service" + + +def _mock_session(mock_session_cls): + """Helper: set up a Session context manager mock and return the inner session.""" + session = MagicMock() + mock_session_cls.return_value.__enter__ = MagicMock(return_value=session) + mock_session_cls.return_value.__exit__ = MagicMock(return_value=False) + return session + + +class TestDeleteCustomOauthClientParams: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_deletes_and_returns_success(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + + result = BuiltinToolManageService.delete_custom_oauth_client_params("tenant-1", "google") + + assert result == {"result": "success"} + session.query.return_value.filter_by.return_value.delete.assert_called_once() + session.commit.assert_called_once() + + +class TestListBuiltinToolProviderTools: + @patch(f"{MODULE}.ToolLabelManager") + @patch(f"{MODULE}.ToolTransformService") + @patch(f"{MODULE}.ToolManager") + def test_transforms_each_tool(self, mock_manager, mock_transform, mock_labels): + mock_controller = MagicMock() + mock_controller.get_tools.return_value = [MagicMock(), MagicMock()] + mock_manager.get_builtin_provider.return_value = mock_controller + mock_transform.convert_tool_entity_to_api_entity.return_value = MagicMock() + + result = BuiltinToolManageService.list_builtin_tool_provider_tools("tenant-1", "google") + + assert len(result) == 2 + + @patch(f"{MODULE}.ToolLabelManager") + @patch(f"{MODULE}.ToolTransformService") + @patch(f"{MODULE}.ToolManager") + def test_empty_tools(self, mock_manager, mock_transform, mock_labels): + mock_controller = MagicMock() + mock_controller.get_tools.return_value = [] + mock_manager.get_builtin_provider.return_value = mock_controller + + assert BuiltinToolManageService.list_builtin_tool_provider_tools("t", "p") == [] + + +class TestGetBuiltinToolProviderInfo: + @patch(f"{MODULE}.ToolTransformService") + @patch(f"{MODULE}.BuiltinToolManageService.get_builtin_provider") + @patch(f"{MODULE}.ToolManager") + def test_raises_when_not_found(self, mock_manager, mock_get, mock_transform): + mock_get.return_value = None + + with pytest.raises(ValueError, match="you have not added provider"): + BuiltinToolManageService.get_builtin_tool_provider_info("t", "no") + + @patch(f"{MODULE}.ToolTransformService") + @patch(f"{MODULE}.BuiltinToolManageService.get_builtin_provider") + @patch(f"{MODULE}.ToolManager") + def test_clears_original_credentials(self, mock_manager, mock_get, mock_transform): + mock_get.return_value = MagicMock() + entity = MagicMock() + mock_transform.builtin_provider_to_user_provider.return_value = entity + + result = BuiltinToolManageService.get_builtin_tool_provider_info("t", "google") + + assert result.original_credentials == {} + + +class TestListBuiltinProviderCredentialsSchema: + @patch(f"{MODULE}.ToolManager") + def test_returns_schema(self, mock_manager): + mock_manager.get_builtin_provider.return_value.get_credentials_schema_by_type.return_value = [{"f": "k"}] + + result = BuiltinToolManageService.list_builtin_provider_credentials_schema("g", "api_key", "t") + + assert result == [{"f": "k"}] + + +class TestGetBuiltinToolProviderIcon: + @patch(f"{MODULE}.Path") + @patch(f"{MODULE}.ToolManager") + def test_returns_bytes_and_mime(self, mock_manager, mock_path): + mock_manager.get_hardcoded_provider_icon.return_value = ("/icon.svg", "image/svg+xml") + mock_path.return_value.read_bytes.return_value = b"" + + icon, mime = BuiltinToolManageService.get_builtin_tool_provider_icon("google") + + assert icon == b"" + assert mime == "image/svg+xml" + + +class TestIsOauthSystemClientExists: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_true_when_exists(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = MagicMock() + + assert BuiltinToolManageService.is_oauth_system_client_exists("google") is True + + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_false_when_missing(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = None + + assert BuiltinToolManageService.is_oauth_system_client_exists("google") is False + + +class TestIsOauthCustomClientEnabled: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_true_when_enabled(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = MagicMock(enabled=True) + + assert BuiltinToolManageService.is_oauth_custom_client_enabled("t", "g") is True + + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_false_when_none(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = None + + assert BuiltinToolManageService.is_oauth_custom_client_enabled("t", "g") is False + + +class TestDeleteBuiltinToolProvider: + @patch(f"{MODULE}.BuiltinToolManageService.create_tool_encrypter") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_raises_when_not_found(self, mock_db, mock_session_cls, mock_tm, mock_enc): + session = _mock_session(mock_session_cls) + session.query.return_value.where.return_value.first.return_value = None + + with pytest.raises(ValueError, match="you have not added provider"): + BuiltinToolManageService.delete_builtin_tool_provider("t", "p", "id") + + @patch(f"{MODULE}.BuiltinToolManageService.create_tool_encrypter") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_deletes_provider_and_clears_cache(self, mock_db, mock_session_cls, mock_tm, mock_enc): + session = _mock_session(mock_session_cls) + db_provider = MagicMock() + session.query.return_value.where.return_value.first.return_value = db_provider + mock_cache = MagicMock() + mock_enc.return_value = (MagicMock(), mock_cache) + + result = BuiltinToolManageService.delete_builtin_tool_provider("t", "p", "c") + + assert result == {"result": "success"} + session.delete.assert_called_once_with(db_provider) + session.commit.assert_called_once() + mock_cache.delete.assert_called_once() + + +class TestSetDefaultProvider: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_raises_when_not_found(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = None + + with pytest.raises(ValueError, match="provider not found"): + BuiltinToolManageService.set_default_provider("t", "u", "p", "id") + + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_sets_default_and_clears_old(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + target = MagicMock() + session.query.return_value.filter_by.return_value.first.return_value = target + + result = BuiltinToolManageService.set_default_provider("t", "u", "p", "id") + + assert result == {"result": "success"} + assert target.is_default is True + session.commit.assert_called_once() + + +class TestUpdateBuiltinToolProvider: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_raises_when_provider_not_exists(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.where.return_value.first.return_value = None + + with pytest.raises(ValueError, match="you have not added provider"): + BuiltinToolManageService.update_builtin_tool_provider("u", "t", "p", "c") + + @patch(f"{MODULE}.BuiltinToolManageService.create_tool_encrypter") + @patch(f"{MODULE}.CredentialType") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_updates_credentials_and_commits(self, mock_db, mock_session_cls, mock_tm, mock_cred_type, mock_enc): + session = _mock_session(mock_session_cls) + db_provider = MagicMock(credential_type="api_key", credentials="{}") + session.query.return_value.where.return_value.first.return_value = db_provider + + mock_cred_instance = MagicMock() + mock_cred_instance.is_editable.return_value = True + mock_cred_instance.is_validate_allowed.return_value = False + mock_cred_type.of.return_value = mock_cred_instance + + mock_controller = MagicMock(need_credentials=True) + mock_tm.get_builtin_provider.return_value = mock_controller + + mock_encrypter = MagicMock() + mock_encrypter.decrypt.return_value = {"key": "old"} + mock_encrypter.encrypt.return_value = {"key": "new"} + mock_cache = MagicMock() + mock_enc.return_value = (mock_encrypter, mock_cache) + + result = BuiltinToolManageService.update_builtin_tool_provider("u", "t", "p", "c", credentials={"key": "val"}) + + assert result == {"result": "success"} + session.commit.assert_called_once() + mock_cache.delete.assert_called_once() + + +class TestGetOauthClientSchema: + @patch(f"{MODULE}.BuiltinToolManageService.get_custom_oauth_client_params", return_value={}) + @patch(f"{MODULE}.BuiltinToolManageService.is_oauth_system_client_exists", return_value=False) + @patch(f"{MODULE}.BuiltinToolManageService.is_oauth_custom_client_enabled", return_value=True) + @patch(f"{MODULE}.dify_config") + @patch(f"{MODULE}.PluginService") + @patch(f"{MODULE}.ToolManager") + def test_returns_schema_dict(self, mock_tm, mock_plugin, mock_config, mock_enabled, mock_sys, mock_params): + mock_config.CONSOLE_API_URL = "https://api.example.com" + mock_controller = MagicMock() + mock_controller.get_oauth_client_schema.return_value = [] + mock_tm.get_builtin_provider.return_value = mock_controller + + result = BuiltinToolManageService.get_builtin_tool_provider_oauth_client_schema("t", "google") + + assert "schema" in result + assert result["is_oauth_custom_client_enabled"] is True + assert "redirect_uri" in result + + +class TestGetOauthClient: + @patch(f"{MODULE}.PluginService") + @patch(f"{MODULE}.create_provider_encrypter") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_returns_user_client_params_when_exists( + self, mock_db, mock_session_cls, mock_tm, mock_create_enc, mock_plugin + ): + session = _mock_session(mock_session_cls) + mock_controller = MagicMock() + mock_controller.get_oauth_client_schema.return_value = [] + mock_tm.get_builtin_provider.return_value = mock_controller + + mock_encrypter = MagicMock() + mock_encrypter.decrypt.return_value = {"client_id": "id", "client_secret": "secret"} + mock_create_enc.return_value = (mock_encrypter, MagicMock()) + + user_client = MagicMock(oauth_params='{"encrypted": "data"}') + session.query.return_value.filter_by.return_value.first.return_value = user_client + + result = BuiltinToolManageService.get_oauth_client("t", "google") + + assert result == {"client_id": "id", "client_secret": "secret"} + + @patch(f"{MODULE}.decrypt_system_oauth_params", return_value={"sys_key": "sys_val"}) + @patch(f"{MODULE}.PluginService") + @patch(f"{MODULE}.create_provider_encrypter") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_falls_back_to_system_client( + self, mock_db, mock_session_cls, mock_tm, mock_create_enc, mock_plugin, mock_decrypt + ): + session = _mock_session(mock_session_cls) + mock_controller = MagicMock() + mock_controller.get_oauth_client_schema.return_value = [] + mock_tm.get_builtin_provider.return_value = mock_controller + + mock_create_enc.return_value = (MagicMock(), MagicMock()) + + system_client = MagicMock(encrypted_oauth_params="enc") + session.query.return_value.filter_by.return_value.first.side_effect = [ + None, # user client + system_client, # system client + ] + + result = BuiltinToolManageService.get_oauth_client("t", "google") + + assert result == {"sys_key": "sys_val"} + + +class TestSaveCustomOauthClientParams: + def test_returns_early_when_no_params(self): + result = BuiltinToolManageService.save_custom_oauth_client_params("t", "p") + assert result == {"result": "success"} + + @patch(f"{MODULE}.ToolManager") + def test_raises_when_provider_not_found(self, mock_tm): + mock_tm.get_builtin_provider.return_value = None + + with pytest.raises((ValueError, Exception), match="not found|Provider"): + BuiltinToolManageService.save_custom_oauth_client_params("t", "p", enable_oauth_custom_client=True) + + +class TestGetCustomOauthClientParams: + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_returns_empty_when_none(self, mock_db, mock_session_cls): + session = _mock_session(mock_session_cls) + session.query.return_value.filter_by.return_value.first.return_value = None + + result = BuiltinToolManageService.get_custom_oauth_client_params("t", "p") + + assert result == {} + + +class TestGetBuiltinToolProviderCredentialInfo: + @patch(f"{MODULE}.BuiltinToolManageService.is_oauth_custom_client_enabled", return_value=False) + @patch(f"{MODULE}.BuiltinToolManageService.get_builtin_tool_provider_credentials", return_value=[]) + @patch(f"{MODULE}.ToolManager") + def test_returns_credential_info(self, mock_tm, mock_creds, mock_oauth): + mock_tm.get_builtin_provider.return_value.get_supported_credential_types.return_value = ["api-key"] + + result = BuiltinToolManageService.get_builtin_tool_provider_credential_info("t", "google") + + assert result.credentials == [] + assert result.supported_credential_types == ["api-key"] + assert result.is_oauth_custom_client_enabled is False + + +class TestGetBuiltinToolProviderCredentials: + @patch(f"{MODULE}.db") + def test_returns_empty_when_no_providers(self, mock_db): + mock_db.session.no_autoflush.__enter__ = MagicMock(return_value=None) + mock_db.session.no_autoflush.__exit__ = MagicMock(return_value=False) + mock_db.session.query.return_value.filter_by.return_value.order_by.return_value.all.return_value = [] + + result = BuiltinToolManageService.get_builtin_tool_provider_credentials("t", "google") + + assert result == [] + + @patch(f"{MODULE}.ToolTransformService") + @patch(f"{MODULE}.BuiltinToolManageService.create_tool_encrypter") + @patch(f"{MODULE}.ToolManager") + @patch(f"{MODULE}.db") + def test_returns_credential_entities(self, mock_db, mock_tm, mock_enc, mock_transform): + mock_db.session.no_autoflush.__enter__ = MagicMock(return_value=None) + mock_db.session.no_autoflush.__exit__ = MagicMock(return_value=False) + + provider = MagicMock(provider="google", is_default=False) + mock_db.session.query.return_value.filter_by.return_value.order_by.return_value.all.return_value = [provider] + + mock_encrypter = MagicMock() + mock_encrypter.decrypt.return_value = {"key": "decrypted"} + mock_encrypter.mask_plugin_credentials.return_value = {"key": "***"} + mock_enc.return_value = (mock_encrypter, MagicMock()) + + credential_entity = MagicMock() + mock_transform.convert_builtin_provider_to_credential_entity.return_value = credential_entity + + result = BuiltinToolManageService.get_builtin_tool_provider_credentials("t", "google") + + assert len(result) == 1 + assert result[0] is credential_entity + assert provider.is_default is True + + +class TestGetBuiltinProvider: + @patch(f"{MODULE}.ToolProviderID") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_returns_none_when_not_found(self, mock_db, mock_session_cls, mock_prov_id): + session = _mock_session(mock_session_cls) + mock_prov_id.return_value.provider_name = "google" + mock_prov_id.return_value.organization = "langgenius" + session.query.return_value.where.return_value.order_by.return_value.first.return_value = None + + result = BuiltinToolManageService.get_builtin_provider("google", "t") + + assert result is None + + @patch(f"{MODULE}.ToolProviderID") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_returns_provider_for_langgenius_org(self, mock_db, mock_session_cls, mock_prov_id): + session = _mock_session(mock_session_cls) + mock_prov_id.return_value.provider_name = "google" + mock_prov_id.return_value.organization = "langgenius" + db_provider = MagicMock(provider="google") + mock_prov_id_result = MagicMock() + mock_prov_id_result.to_string.return_value = "langgenius/google/google" + + def prov_id_side_effect(name): + m = MagicMock() + m.provider_name = "google" + m.organization = "langgenius" + m.to_string.return_value = "langgenius/google/google" + m.plugin_id = "langgenius/google" + return m + + mock_prov_id.side_effect = prov_id_side_effect + session.query.return_value.where.return_value.order_by.return_value.first.return_value = db_provider + + result = BuiltinToolManageService.get_builtin_provider("google", "t") + + assert result is db_provider + + @patch(f"{MODULE}.ToolProviderID") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_returns_provider_for_non_langgenius_org(self, mock_db, mock_session_cls, mock_prov_id): + session = _mock_session(mock_session_cls) + + def prov_id_side_effect(name): + m = MagicMock() + m.provider_name = "custom-tool" + m.organization = "third-party" + m.to_string.return_value = "third-party/custom/custom-tool" + m.plugin_id = "third-party/custom" + return m + + mock_prov_id.side_effect = prov_id_side_effect + db_provider = MagicMock(provider="third-party/custom/custom-tool") + session.query.return_value.where.return_value.order_by.return_value.first.return_value = db_provider + + result = BuiltinToolManageService.get_builtin_provider("third-party/custom/custom-tool", "t") + + assert result is db_provider + + @patch(f"{MODULE}.ToolProviderID") + @patch(f"{MODULE}.Session") + @patch(f"{MODULE}.db") + def test_falls_back_on_exception(self, mock_db, mock_session_cls, mock_prov_id): + session = _mock_session(mock_session_cls) + mock_prov_id.side_effect = Exception("parse error") + fallback = MagicMock() + session.query.return_value.where.return_value.order_by.return_value.first.return_value = fallback + + result = BuiltinToolManageService.get_builtin_provider("old-provider", "t") + + assert result is fallback diff --git a/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py b/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py index 7511fd6f0c..9537d207f0 100644 --- a/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py +++ b/api/tests/unit_tests/services/tools/test_mcp_tools_transform.py @@ -7,7 +7,7 @@ import pytest from core.mcp.types import Tool as MCPTool from core.tools.entities.api_entities import ToolApiEntity, ToolProviderApiEntity from core.tools.entities.common_entities import I18nObject -from core.tools.entities.tool_entities import ToolProviderType +from core.tools.entities.tool_entities import ToolParameter, ToolProviderType from models.tools import MCPToolProvider from services.tools.tools_transform_service import ToolTransformService @@ -175,6 +175,137 @@ class TestMCPToolTransform: # The actual parameter conversion is handled by convert_mcp_schema_to_parameter # which should be tested separately + def test_convert_mcp_schema_to_parameter_preserves_anyof_object_type(self): + """Nullable object schemas should keep the object parameter type.""" + schema = { + "type": "object", + "properties": { + "retrieval_model": { + "anyOf": [{"type": "object"}, {"type": "null"}], + "description": "检索模型配置", + } + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 1 + assert result[0].name == "retrieval_model" + assert result[0].type == ToolParameter.ToolParameterType.OBJECT + assert result[0].input_schema == schema["properties"]["retrieval_model"] + + def test_convert_mcp_schema_to_parameter_preserves_oneof_object_type(self): + """Nullable oneOf object schemas should keep the object parameter type.""" + schema = { + "type": "object", + "properties": { + "retrieval_model": { + "oneOf": [{"type": "object"}, {"type": "null"}], + "description": "检索模型配置", + } + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 1 + assert result[0].name == "retrieval_model" + assert result[0].type == ToolParameter.ToolParameterType.OBJECT + assert result[0].input_schema == schema["properties"]["retrieval_model"] + + def test_convert_mcp_schema_to_parameter_handles_null_type(self): + """Schemas with only a null type should fall back to string.""" + schema = { + "type": "object", + "properties": { + "null_prop_str": {"type": "null"}, + "null_prop_list": {"type": ["null"]}, + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 2 + param_map = {parameter.name: parameter for parameter in result} + assert "null_prop_str" in param_map + assert param_map["null_prop_str"].type == ToolParameter.ToolParameterType.STRING + assert "null_prop_list" in param_map + assert param_map["null_prop_list"].type == ToolParameter.ToolParameterType.STRING + + def test_convert_mcp_schema_to_parameter_preserves_allof_object_type_with_multiple_object_items(self): + """Property-level allOf with multiple object items should still resolve to object.""" + schema = { + "type": "object", + "properties": { + "config": { + "allOf": [ + { + "type": "object", + "properties": { + "enabled": {"type": "boolean"}, + }, + "required": ["enabled"], + }, + { + "type": "object", + "properties": { + "priority": {"type": "integer", "minimum": 1, "maximum": 10}, + }, + "required": ["priority"], + }, + ], + "description": "Config must match all schemas (allOf)", + } + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 1 + assert result[0].name == "config" + assert result[0].type == ToolParameter.ToolParameterType.OBJECT + assert result[0].input_schema == schema["properties"]["config"] + + def test_convert_mcp_schema_to_parameter_preserves_allof_object_type(self): + """Composed property schemas should keep the object parameter type.""" + schema = { + "type": "object", + "properties": { + "retrieval_model": { + "allOf": [ + {"type": "object"}, + {"properties": {"top_k": {"type": "integer"}}}, + ], + "description": "检索模型配置", + } + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 1 + assert result[0].name == "retrieval_model" + assert result[0].type == ToolParameter.ToolParameterType.OBJECT + assert result[0].input_schema == schema["properties"]["retrieval_model"] + + def test_convert_mcp_schema_to_parameter_limits_recursive_schema_depth(self): + """Self-referential composed schemas should stop resolving after the configured max depth.""" + recursive_property: dict[str, object] = {"description": "Recursive schema"} + recursive_property["anyOf"] = [recursive_property] + schema = { + "type": "object", + "properties": { + "recursive_config": recursive_property, + }, + } + + result = ToolTransformService.convert_mcp_schema_to_parameter(schema) + + assert len(result) == 1 + assert result[0].name == "recursive_config" + assert result[0].type == ToolParameter.ToolParameterType.STRING + assert result[0].input_schema is None + def test_mcp_provider_to_user_provider_for_list(self, mock_provider_full): """Test mcp_provider_to_user_provider with for_list=True.""" # Set tools data with null description diff --git a/api/tests/unit_tests/services/tools/test_tool_labels_service.py b/api/tests/unit_tests/services/tools/test_tool_labels_service.py new file mode 100644 index 0000000000..6acdbb7901 --- /dev/null +++ b/api/tests/unit_tests/services/tools/test_tool_labels_service.py @@ -0,0 +1,21 @@ +from services.tools.tool_labels_service import ToolLabelsService + + +def test_list_tool_labels_returns_default_labels(): + result = ToolLabelsService.list_tool_labels() + assert isinstance(result, list) + assert len(result) > 0 + + +def test_list_tool_labels_items_are_tool_labels(): + from core.tools.entities.tool_entities import ToolLabel + + result = ToolLabelsService.list_tool_labels() + for label in result: + assert isinstance(label, ToolLabel) + + +def test_list_tool_labels_matches_default_values(): + from core.tools.entities.values import default_tool_labels + + assert ToolLabelsService.list_tool_labels() is default_tool_labels diff --git a/api/tests/unit_tests/services/tools/test_tools_manage_service.py b/api/tests/unit_tests/services/tools/test_tools_manage_service.py new file mode 100644 index 0000000000..73ac9a10c6 --- /dev/null +++ b/api/tests/unit_tests/services/tools/test_tools_manage_service.py @@ -0,0 +1,40 @@ +from unittest.mock import MagicMock, patch + +from services.tools.tools_manage_service import ToolCommonService + + +class TestToolCommonService: + @patch("services.tools.tools_manage_service.ToolTransformService") + @patch("services.tools.tools_manage_service.ToolManager") + def test_list_tool_providers_transforms_and_returns(self, mock_manager, mock_transform): + mock_provider1 = MagicMock() + mock_provider1.to_dict.return_value = {"name": "provider1"} + mock_provider2 = MagicMock() + mock_provider2.to_dict.return_value = {"name": "provider2"} + mock_manager.list_providers_from_api.return_value = [mock_provider1, mock_provider2] + + result = ToolCommonService.list_tool_providers("user-1", "tenant-1") + + mock_manager.list_providers_from_api.assert_called_once_with("user-1", "tenant-1", None) + assert mock_transform.repack_provider.call_count == 2 + assert result == [{"name": "provider1"}, {"name": "provider2"}] + + @patch("services.tools.tools_manage_service.ToolTransformService") + @patch("services.tools.tools_manage_service.ToolManager") + def test_list_tool_providers_with_type_filter(self, mock_manager, mock_transform): + mock_manager.list_providers_from_api.return_value = [] + + result = ToolCommonService.list_tool_providers("user-1", "tenant-1", typ="builtin") + + mock_manager.list_providers_from_api.assert_called_once_with("user-1", "tenant-1", "builtin") + assert result == [] + + @patch("services.tools.tools_manage_service.ToolTransformService") + @patch("services.tools.tools_manage_service.ToolManager") + def test_list_tool_providers_empty(self, mock_manager, mock_transform): + mock_manager.list_providers_from_api.return_value = [] + + result = ToolCommonService.list_tool_providers("u", "t") + + assert result == [] + mock_transform.repack_provider.assert_not_called() diff --git a/api/tests/unit_tests/services/workflow/test_queue_dispatcher.py b/api/tests/unit_tests/services/workflow/test_queue_dispatcher.py new file mode 100644 index 0000000000..bbfc1cc294 --- /dev/null +++ b/api/tests/unit_tests/services/workflow/test_queue_dispatcher.py @@ -0,0 +1,110 @@ +from unittest.mock import patch + +import pytest + +from services.workflow.queue_dispatcher import ( + BaseQueueDispatcher, + ProfessionalQueueDispatcher, + QueueDispatcherManager, + QueuePriority, + SandboxQueueDispatcher, + TeamQueueDispatcher, +) + + +class TestQueuePriority: + def test_priority_values(self): + assert QueuePriority.PROFESSIONAL == "workflow_professional" + assert QueuePriority.TEAM == "workflow_team" + assert QueuePriority.SANDBOX == "workflow_sandbox" + + +class TestDispatchers: + def test_professional_dispatcher(self): + d = ProfessionalQueueDispatcher() + assert d.get_queue_name() == QueuePriority.PROFESSIONAL + assert d.get_priority() == 100 + + def test_team_dispatcher(self): + d = TeamQueueDispatcher() + assert d.get_queue_name() == QueuePriority.TEAM + assert d.get_priority() == 50 + + def test_sandbox_dispatcher(self): + d = SandboxQueueDispatcher() + assert d.get_queue_name() == QueuePriority.SANDBOX + assert d.get_priority() == 10 + + def test_base_dispatcher_is_abstract(self): + with pytest.raises(TypeError): + BaseQueueDispatcher() + + +class TestQueueDispatcherManager: + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_enabled_professional_plan(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.return_value = {"subscription": {"plan": "professional"}} + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, ProfessionalQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_enabled_team_plan(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.return_value = {"subscription": {"plan": "team"}} + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, TeamQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_enabled_sandbox_plan(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.return_value = {"subscription": {"plan": "sandbox"}} + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, SandboxQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_enabled_unknown_plan_defaults_to_sandbox(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.return_value = {"subscription": {"plan": "enterprise"}} + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, SandboxQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_enabled_service_failure_defaults_to_sandbox(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.side_effect = Exception("billing unavailable") + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, SandboxQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.dify_config") + def test_billing_disabled_defaults_to_team(self, mock_config): + mock_config.BILLING_ENABLED = False + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, TeamQueueDispatcher) + + @patch("services.workflow.queue_dispatcher.BillingService") + @patch("services.workflow.queue_dispatcher.dify_config") + def test_missing_subscription_key_defaults_to_sandbox(self, mock_config, mock_billing): + mock_config.BILLING_ENABLED = True + mock_billing.get_info.return_value = {} + + dispatcher = QueueDispatcherManager.get_dispatcher("tenant-1") + + assert isinstance(dispatcher, SandboxQueueDispatcher) diff --git a/api/tests/unit_tests/services/workflow/test_scheduler.py b/api/tests/unit_tests/services/workflow/test_scheduler.py new file mode 100644 index 0000000000..90b6cb2d8b --- /dev/null +++ b/api/tests/unit_tests/services/workflow/test_scheduler.py @@ -0,0 +1,89 @@ +import pytest + +from services.workflow.entities import WorkflowScheduleCFSPlanEntity +from services.workflow.scheduler import CFSPlanScheduler, SchedulerCommand + + +class TestSchedulerCommand: + def test_enum_values(self): + assert SchedulerCommand.RESOURCE_LIMIT_REACHED == "resource_limit_reached" + assert SchedulerCommand.NONE == "none" + + def test_enum_is_str(self): + for member in SchedulerCommand: + assert isinstance(member, str) + + +class TestCFSPlanScheduler: + def test_stores_plan(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.Nop, + granularity=-1, + ) + + class ConcretePlanScheduler(CFSPlanScheduler): + def can_schedule(self): + return SchedulerCommand.NONE + + scheduler = ConcretePlanScheduler(plan) + + assert scheduler.plan is plan + assert scheduler.plan.schedule_strategy == WorkflowScheduleCFSPlanEntity.Strategy.Nop + assert scheduler.plan.granularity == -1 + + def test_cannot_instantiate_abstract(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.TimeSlice, + granularity=10, + ) + with pytest.raises(TypeError): + CFSPlanScheduler(plan) + + def test_concrete_subclass_can_schedule(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.TimeSlice, + granularity=5, + ) + + class TimedScheduler(CFSPlanScheduler): + def can_schedule(self): + if self.plan.granularity > 0: + return SchedulerCommand.NONE + return SchedulerCommand.RESOURCE_LIMIT_REACHED + + scheduler = TimedScheduler(plan) + assert scheduler.can_schedule() == SchedulerCommand.NONE + + def test_concrete_subclass_resource_limit(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.TimeSlice, + granularity=-1, + ) + + class TimedScheduler(CFSPlanScheduler): + def can_schedule(self): + if self.plan.granularity > 0: + return SchedulerCommand.NONE + return SchedulerCommand.RESOURCE_LIMIT_REACHED + + scheduler = TimedScheduler(plan) + assert scheduler.can_schedule() == SchedulerCommand.RESOURCE_LIMIT_REACHED + + +class TestWorkflowScheduleCFSPlanEntity: + def test_strategy_values(self): + assert WorkflowScheduleCFSPlanEntity.Strategy.TimeSlice == "time-slice" + assert WorkflowScheduleCFSPlanEntity.Strategy.Nop == "nop" + + def test_default_granularity(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.Nop, + ) + assert plan.granularity == -1 + + def test_explicit_granularity(self): + plan = WorkflowScheduleCFSPlanEntity( + schedule_strategy=WorkflowScheduleCFSPlanEntity.Strategy.TimeSlice, + granularity=100, + ) + assert plan.granularity == 100 diff --git a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py index 4042e05565..9f3874b8f1 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_draft_variable_service.py @@ -8,7 +8,7 @@ from sqlalchemy import Engine from sqlalchemy.orm import Session from dify_graph.constants import SYSTEM_VARIABLE_NODE_ID -from dify_graph.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from dify_graph.variables.segments import StringSegment from dify_graph.variables.types import SegmentType from libs.uuid_utils import uuidv7 @@ -54,12 +54,12 @@ class TestDraftVariableSaver: session=mock_session, app_id=test_app_id, node_id="test_node_id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_execution_id="test_execution_id", user=mock_user, ) - assert saver._should_variable_be_visible("123_456", NodeType.IF_ELSE, "output") == False - assert saver._should_variable_be_visible("123", NodeType.START, "output") == True + assert saver._should_variable_be_visible("123_456", BuiltinNodeTypes.IF_ELSE, "output") == False + assert saver._should_variable_be_visible("123", BuiltinNodeTypes.START, "output") == True def test__normalize_variable_for_start_node(self): @dataclasses.dataclass(frozen=True) @@ -102,7 +102,7 @@ class TestDraftVariableSaver: session=mock_session, app_id=test_app_id, node_id=_NODE_ID, - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, node_execution_id="test_execution_id", user=mock_user, ) @@ -134,7 +134,7 @@ class TestDraftVariableSaver: session=mock_session, app_id="test-app-id", node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, node_execution_id="test-execution-id", user=mock_user, ) @@ -331,7 +331,7 @@ class TestWorkflowDraftVariableService: mock_node_config = {"type": "test_node"} with ( patch.object(workflow, "get_node_config_by_id", return_value=mock_node_config, autospec=True), - patch.object(workflow, "get_node_type_from_node_config", return_value=NodeType.LLM, autospec=True), + patch.object(workflow, "get_node_type_from_node_config", return_value=BuiltinNodeTypes.LLM, autospec=True), ): result = service._reset_node_var_or_sys_var(workflow, variable) diff --git a/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py b/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py index 5d6fa4c137..c890ab6a65 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_human_input_delivery.py @@ -5,7 +5,8 @@ from unittest.mock import MagicMock import pytest from sqlalchemy.orm import sessionmaker -from dify_graph.enums import NodeType +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, @@ -22,7 +23,7 @@ def _make_service() -> WorkflowService: return WorkflowService(session_maker=sessionmaker()) -def _build_node_config(delivery_methods): +def _build_node_config(delivery_methods: list[EmailDeliveryMethod]) -> NodeConfigDict: node_data = HumanInputNodeData( title="Human Input", delivery_methods=delivery_methods, @@ -30,8 +31,8 @@ def _build_node_config(delivery_methods): inputs=[], user_actions=[], ).model_dump(mode="json") - node_data["type"] = NodeType.HUMAN_INPUT.value - return {"id": "node-1", "data": node_data} + node_data["type"] = BuiltinNodeTypes.HUMAN_INPUT + return NodeConfigDictAdapter.validate_python({"id": "node-1", "data": node_data}) def _make_email_method(enabled: bool = True, debug_mode: bool = False) -> EmailDeliveryMethod: diff --git a/api/tests/unit_tests/services/workflow/test_workflow_service.py b/api/tests/unit_tests/services/workflow/test_workflow_service.py index 83c1f8d9da..ed26bcec01 100644 --- a/api/tests/unit_tests/services/workflow/test_workflow_service.py +++ b/api/tests/unit_tests/services/workflow/test_workflow_service.py @@ -4,7 +4,8 @@ from unittest.mock import MagicMock import pytest -from dify_graph.enums import NodeType +from dify_graph.entities.graph_config import NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes from dify_graph.nodes.human_input.entities import FormInput, HumanInputNodeData, UserAction from dify_graph.nodes.human_input.enums import FormInputType from models.model import App @@ -40,6 +41,23 @@ class TestWorkflowService: workflows.append(workflow) return workflows + @pytest.fixture + def dummy_session_cls(self): + class DummySession: + def __init__(self, *args, **kwargs): + self.commit = MagicMock() + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def begin(self): + return nullcontext() + + return DummySession + def test_get_all_published_workflow_no_workflow_id(self, workflow_service, mock_app): mock_app.workflow_id = None mock_session = MagicMock() @@ -169,7 +187,10 @@ class TestWorkflowService: mock_session.scalars.assert_called_once() def test_submit_human_input_form_preview_uses_rendered_content( - self, workflow_service: WorkflowService, monkeypatch: pytest.MonkeyPatch + self, + workflow_service: WorkflowService, + monkeypatch: pytest.MonkeyPatch, + dummy_session_cls, ) -> None: service = workflow_service node_data = HumanInputNodeData( @@ -187,25 +208,15 @@ class TestWorkflowService: service._build_human_input_node = MagicMock(return_value=node) # type: ignore[method-assign] workflow = MagicMock() - workflow.get_node_config_by_id.return_value = {"id": "node-1", "data": {"type": NodeType.HUMAN_INPUT.value}} + node_config = NodeConfigDictAdapter.validate_python( + {"id": "node-1", "data": {"type": BuiltinNodeTypes.HUMAN_INPUT}} + ) + workflow.get_node_config_by_id.return_value = node_config workflow.get_enclosing_node_type_and_id.return_value = None service.get_draft_workflow = MagicMock(return_value=workflow) # type: ignore[method-assign] saved_outputs: dict[str, object] = {} - class DummySession: - def __init__(self, *args, **kwargs): - self.commit = MagicMock() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - return False - - def begin(self): - return nullcontext() - class DummySaver: def __init__(self, *args, **kwargs): pass @@ -213,7 +224,7 @@ class TestWorkflowService: def save(self, outputs, process_data): saved_outputs.update(outputs) - monkeypatch.setattr(workflow_service_module, "Session", DummySession) + monkeypatch.setattr(workflow_service_module, "Session", dummy_session_cls) monkeypatch.setattr(workflow_service_module, "DraftVariableSaver", DummySaver) monkeypatch.setattr(workflow_service_module, "db", SimpleNamespace(engine=MagicMock())) @@ -232,7 +243,7 @@ class TestWorkflowService: service._build_human_input_variable_pool.assert_called_once_with( app_model=app_model, workflow=workflow, - node_config={"id": "node-1", "data": {"type": NodeType.HUMAN_INPUT.value}}, + node_config=node_config, manual_inputs={"#node-0.result#": "LLM output"}, ) @@ -267,12 +278,13 @@ class TestWorkflowService: service._build_human_input_node = MagicMock(return_value=node) # type: ignore[method-assign] workflow = MagicMock() - workflow.get_node_config_by_id.return_value = {"id": "node-1", "data": {"type": NodeType.HUMAN_INPUT.value}} + workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( + {"id": "node-1", "data": {"type": BuiltinNodeTypes.HUMAN_INPUT}} + ) service.get_draft_workflow = MagicMock(return_value=workflow) # type: ignore[method-assign] app_model = SimpleNamespace(id="app-1", tenant_id="tenant-1") account = SimpleNamespace(id="account-1") - with pytest.raises(ValueError) as exc_info: service.submit_human_input_form_preview( app_model=app_model, @@ -284,3 +296,119 @@ class TestWorkflowService: ) assert "Missing required inputs" in str(exc_info.value) + + def test_run_draft_workflow_node_successful_behavior( + self, workflow_service, mock_app, monkeypatch, dummy_session_cls + ): + """Behavior: When a basic workflow node runs, it correctly sets up context, + executes the node, and saves outputs.""" + service = workflow_service + account = SimpleNamespace(id="account-1") + mock_workflow = MagicMock() + mock_workflow.id = "wf-1" + mock_workflow.tenant_id = "tenant-1" + mock_workflow.environment_variables = [] + mock_workflow.conversation_variables = [] + + # Mock node config + mock_workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( + {"id": "node-1", "data": {"type": BuiltinNodeTypes.LLM}} + ) + mock_workflow.get_enclosing_node_type_and_id.return_value = None + + # Mock class methods + monkeypatch.setattr(workflow_service_module, "WorkflowDraftVariableService", MagicMock()) + monkeypatch.setattr(workflow_service_module, "DraftVarLoader", MagicMock()) + + # Mock workflow entry execution + mock_node_exec = MagicMock() + mock_node_exec.id = "exec-1" + mock_node_exec.process_data = {} + mock_run = MagicMock() + monkeypatch.setattr(workflow_service_module.WorkflowEntry, "single_step_run", mock_run) + + # Mock execution handling + service._handle_single_step_result = MagicMock(return_value=mock_node_exec) + + # Mock repository + mock_repo = MagicMock() + mock_repo.get_execution_by_id.return_value = mock_node_exec + mock_repo_factory = MagicMock(return_value=mock_repo) + monkeypatch.setattr( + workflow_service_module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + mock_repo_factory, + ) + service._node_execution_service_repo = mock_repo + + # Set up node execution service repo mock to return our exec node + mock_node_exec.load_full_outputs.return_value = {"output_var": "result_value"} + mock_node_exec.node_id = "node-1" + mock_node_exec.node_type = "llm" + + # Mock draft variable saver + mock_saver = MagicMock() + monkeypatch.setattr(workflow_service_module, "DraftVariableSaver", MagicMock(return_value=mock_saver)) + + # Mock DB + monkeypatch.setattr(workflow_service_module, "db", SimpleNamespace(engine=MagicMock())) + + monkeypatch.setattr(workflow_service_module, "Session", dummy_session_cls) + + # Act + result = service.run_draft_workflow_node( + app_model=mock_app, + draft_workflow=mock_workflow, + node_id="node-1", + user_inputs={"input_val": "test"}, + account=account, + ) + + # Assert + assert result == mock_node_exec + service._handle_single_step_result.assert_called_once() + mock_repo.save.assert_called_once_with(mock_node_exec) + mock_saver.save.assert_called_once_with(process_data={}, outputs={"output_var": "result_value"}) + + def test_run_draft_workflow_node_failure_behavior(self, workflow_service, mock_app, monkeypatch, dummy_session_cls): + """Behavior: If retrieving the saved execution fails, an appropriate error bubble matches expectations.""" + service = workflow_service + account = SimpleNamespace(id="account-1") + mock_workflow = MagicMock() + mock_workflow.tenant_id = "tenant-1" + mock_workflow.environment_variables = [] + mock_workflow.conversation_variables = [] + mock_workflow.get_node_config_by_id.return_value = NodeConfigDictAdapter.validate_python( + {"id": "node-1", "data": {"type": BuiltinNodeTypes.LLM}} + ) + mock_workflow.get_enclosing_node_type_and_id.return_value = None + + monkeypatch.setattr(workflow_service_module, "WorkflowDraftVariableService", MagicMock()) + monkeypatch.setattr(workflow_service_module, "DraftVarLoader", MagicMock()) + monkeypatch.setattr(workflow_service_module.WorkflowEntry, "single_step_run", MagicMock()) + + mock_node_exec = MagicMock() + mock_node_exec.id = "exec-invalid" + service._handle_single_step_result = MagicMock(return_value=mock_node_exec) + + mock_repo = MagicMock() + mock_repo_factory = MagicMock(return_value=mock_repo) + monkeypatch.setattr( + workflow_service_module.DifyCoreRepositoryFactory, + "create_workflow_node_execution_repository", + mock_repo_factory, + ) + service._node_execution_service_repo = mock_repo + + # Simulate failure to retrieve the saved execution + mock_repo.get_execution_by_id.return_value = None + + monkeypatch.setattr(workflow_service_module, "db", SimpleNamespace(engine=MagicMock())) + + monkeypatch.setattr(workflow_service_module, "Session", dummy_session_cls) + + # Act & Assert + with pytest.raises(ValueError, match="WorkflowNodeExecution with id exec-invalid not found after saving"): + service.run_draft_workflow_node( + app_model=mock_app, draft_workflow=mock_workflow, node_id="node-1", user_inputs={}, account=account + ) diff --git a/api/tests/unit_tests/tasks/test_dataset_indexing_task.py b/api/tests/unit_tests/tasks/test_dataset_indexing_task.py index 11b4663187..67e0a8efaf 100644 --- a/api/tests/unit_tests/tasks/test_dataset_indexing_task.py +++ b/api/tests/unit_tests/tasks/test_dataset_indexing_task.py @@ -10,14 +10,23 @@ This module tests the document indexing task functionality including: """ import uuid -from unittest.mock import Mock, patch +from unittest.mock import MagicMock, Mock, patch import pytest +from core.indexing_runner import DocumentIsPausedError from core.rag.pipeline.queue import TenantIsolatedTaskQueue from enums.cloud_plan import CloudPlan from extensions.ext_redis import redis_client +from models.dataset import Dataset, Document from services.document_indexing_proxy.document_indexing_task_proxy import DocumentIndexingTaskProxy +from tasks.document_indexing_task import ( + _document_indexing, + _document_indexing_with_tenant_queue, + document_indexing_task, + normal_document_indexing_task, + priority_document_indexing_task, +) # ============================================================================ # Fixtures @@ -56,6 +65,190 @@ def mock_redis(): return redis_client +# Additional fixtures required by tests in this module + + +@pytest.fixture +def mock_db_session(): + """Mock session_factory.create_session() to return a session whose queries use shared test data. + + Tests set session._shared_data = {"dataset": , "documents": [, ...]} + This fixture makes session.query(Dataset).first() return the shared dataset, + and session.query(Document).all()/first() return from the shared documents. + """ + with patch("tasks.document_indexing_task.session_factory") as mock_sf: + session = MagicMock() + session._shared_data = {"dataset": None, "documents": []} + + # Keep a pointer so repeated Document.first() calls iterate across provided docs + session._doc_first_idx = 0 + + def _query_side_effect(model): + q = MagicMock() + + # Capture filters passed via where(...) so first()/all() can honor them. + q._filters = {} + + def _extract_filters(*conds, **kw): + # Support both SQLAlchemy expressions (BinaryExpression) and kwargs + # We only need the simple fields used by production code: id, dataset_id, and id.in_(...) + for cond in conds: + left = getattr(cond, "left", None) + right = getattr(cond, "right", None) + key = None + if left is not None: + key = getattr(left, "key", None) or getattr(left, "name", None) + if not key: + continue + # Right side might be a BindParameter with .value, or a raw value/sequence + val = getattr(right, "value", right) + q._filters[key] = val + # Also accept kwargs (e.g., where(id=...)) just in case + for k, v in kw.items(): + q._filters[k] = v + + def _where_side_effect(*conds, **kw): + _extract_filters(*conds, **kw) + return q + + q.where.side_effect = _where_side_effect + + # Dataset queries + if model.__name__ == "Dataset": + + def _dataset_first(): + ds = session._shared_data.get("dataset") + if not ds: + return None + if "id" in q._filters: + val = q._filters["id"] + if isinstance(val, (list, tuple, set)): + return ds if ds.id in val else None + return ds if ds.id == val else None + return ds + + def _dataset_all(): + ds = session._shared_data.get("dataset") + if not ds: + return [] + first = _dataset_first() + return [first] if first else [] + + q.first.side_effect = _dataset_first + q.all.side_effect = _dataset_all + return q + + # Document queries + if model.__name__ == "Document": + + def _apply_doc_filters(docs): + result = list(docs) + for key in ("id", "dataset_id"): + if key in q._filters: + val = q._filters[key] + if isinstance(val, (list, tuple, set)): + result = [d for d in result if getattr(d, key, None) in val] + else: + result = [d for d in result if getattr(d, key, None) == val] + return result + + def _docs_all(): + docs = session._shared_data.get("documents", []) + return _apply_doc_filters(docs) + + def _docs_first(): + docs = _docs_all() + return docs[0] if docs else None + + q.all.side_effect = _docs_all + q.first.side_effect = _docs_first + return q + + # Default fallback + q.first.return_value = None + q.all.return_value = [] + return q + + session.query.side_effect = _query_side_effect + + # Implement session.begin() context manager that commits on exit + session.commit = MagicMock() + bm = MagicMock() + bm.__enter__.return_value = session + + def _bm_exit_side_effect(*args, **kwargs): + session.commit() + + bm.__exit__.side_effect = _bm_exit_side_effect + session.begin.return_value = bm + + # Context manager behavior for create_session(): ensure close() is called on exit + session.close = MagicMock() + cm = MagicMock() + cm.__enter__.return_value = session + + def _exit_side_effect(*args, **kwargs): + session.close() + + cm.__exit__.side_effect = _exit_side_effect + mock_sf.create_session.return_value = cm + + yield session + + +@pytest.fixture +def mock_dataset(dataset_id, tenant_id): + """Create a mock Dataset object.""" + dataset = Mock(spec=Dataset) + dataset.id = dataset_id + dataset.tenant_id = tenant_id + dataset.indexing_technique = "high_quality" + dataset.embedding_model_provider = "openai" + dataset.embedding_model = "text-embedding-ada-002" + return dataset + + +@pytest.fixture +def mock_documents(document_ids, dataset_id): + """Create mock Document objects.""" + documents = [] + for doc_id in document_ids: + doc = Mock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + doc.processing_started_at = None + # optional attribute used in some code paths + doc.doc_form = "text_model" + documents.append(doc) + return documents + + +@pytest.fixture +def mock_indexing_runner(): + """Mock IndexingRunner for document_indexing_task module.""" + with patch("tasks.document_indexing_task.IndexingRunner") as mock_runner_class: + mock_runner = MagicMock() + mock_runner_class.return_value = mock_runner + yield mock_runner + + +@pytest.fixture +def mock_feature_service(): + """Mock FeatureService for document_indexing_task module.""" + with patch("tasks.document_indexing_task.FeatureService") as mock_service: + mock_features = Mock() + mock_features.billing = Mock() + mock_features.billing.enabled = False + mock_features.vector_space = Mock() + mock_features.vector_space.size = 0 + mock_features.vector_space.limit = 1000 + mock_service.get_features.return_value = mock_features + yield mock_service + + # ============================================================================ # Test Task Enqueuing # ============================================================================ @@ -166,6 +359,492 @@ class TestTaskEnqueuing: assert mock_redis.lpush.called mock_task.delay.assert_not_called() + def test_legacy_document_indexing_task_still_works( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner + ): + """ + Test that the legacy document_indexing_task function still works. + + This ensures backward compatibility for existing code that may still + use the deprecated function. + """ + # Arrange + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + document_indexing_task(dataset_id, document_ids) + + # Assert + mock_indexing_runner.run.assert_called_once() + + +# ============================================================================ +# Test Batch Processing +# ============================================================================ + + +class TestBatchProcessing: + """Test cases for batch processing of multiple documents.""" + + def test_batch_processing_multiple_documents( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test batch processing of multiple documents. + + All documents in the batch should be processed together and their + status should be updated to 'parsing'. + """ + # Arrange - Create actual document objects that can be modified + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - All documents should be set to 'parsing' status + for doc in mock_documents: + assert doc.indexing_status == "parsing" + assert doc.processing_started_at is not None + + # IndexingRunner should be called with all documents + mock_indexing_runner.run.assert_called_once() + call_args = mock_indexing_runner.run.call_args[0][0] + assert len(call_args) == len(document_ids) + + def test_batch_processing_with_limit_check(self, dataset_id, mock_db_session, mock_dataset, mock_feature_service): + """ + Test batch processing respects upload limits. + + When the number of documents exceeds the batch upload limit, + an error should be raised and all documents should be marked as error. + """ + # Arrange + batch_limit = 10 + document_ids = [str(uuid.uuid4()) for _ in range(batch_limit + 1)] + + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + mock_feature_service.get_features.return_value.billing.enabled = True + mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL + mock_feature_service.get_features.return_value.vector_space.limit = 1000 + mock_feature_service.get_features.return_value.vector_space.size = 0 + + with patch("tasks.document_indexing_task.dify_config.BATCH_UPLOAD_LIMIT", str(batch_limit)): + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - All documents should have error status + for doc in mock_documents: + assert doc.indexing_status == "error" + assert doc.error is not None + assert "batch upload limit" in doc.error + + def test_batch_processing_sandbox_plan_single_document_only( + self, dataset_id, mock_db_session, mock_dataset, mock_feature_service + ): + """ + Test that sandbox plan only allows single document upload. + + Sandbox plan should reject batch uploads (more than 1 document). + """ + # Arrange + document_ids = [str(uuid.uuid4()) for _ in range(2)] + + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + mock_feature_service.get_features.return_value.billing.enabled = True + mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.SANDBOX + mock_feature_service.get_features.return_value.vector_space.limit = 1000 + mock_feature_service.get_features.return_value.vector_space.size = 0 + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - All documents should have error status + for doc in mock_documents: + assert doc.indexing_status == "error" + assert "does not support batch upload" in doc.error + + def test_batch_processing_empty_document_list( + self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test batch processing with empty document list. + + Should handle empty list gracefully without errors. + """ + # Arrange + document_ids = [] + + # Set shared mock data with empty documents list + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = [] + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - IndexingRunner should still be called with empty list + mock_indexing_runner.run.assert_called_once_with([]) + + +# ============================================================================ +# Test Progress Tracking +# ============================================================================ + + +class TestProgressTracking: + """Test cases for progress tracking through task lifecycle.""" + + def test_document_status_progression( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test document status progresses correctly through lifecycle. + + Documents should transition from 'waiting' -> 'parsing' -> processed. + """ + # Arrange - Create actual document objects + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - Status should be 'parsing' + for doc in mock_documents: + assert doc.indexing_status == "parsing" + assert doc.processing_started_at is not None + + # Verify commit was called to persist status + assert mock_db_session.commit.called + + def test_processing_started_timestamp_set( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test that processing_started_at timestamp is set correctly. + + When documents start processing, the timestamp should be recorded. + """ + # Arrange - Create actual document objects + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert + for doc in mock_documents: + assert doc.processing_started_at is not None + + def test_tenant_queue_processes_next_task_after_completion( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test that tenant queue processes next waiting task after completion. + + After a task completes, the system should check for waiting tasks + and process the next one. + """ + # Arrange + next_task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["next_doc_id"]} + + # Simulate next task in queue + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=next_task_data) + mock_redis.rpop.return_value = wrapper.serialize() + + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Next task should be enqueued + mock_task.apply_async.assert_called() + # Task key should be set for next task + assert mock_redis.setex.called + + def test_tenant_queue_clears_flag_when_no_more_tasks( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test that tenant queue clears flag when no more tasks are waiting. + + When there are no more tasks in the queue, the task key should be deleted. + """ + # Arrange + mock_redis.rpop.return_value = None # No more tasks + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Task key should be deleted + assert mock_redis.delete.called + + +# ============================================================================ +# Test Error Handling and Retries +# ============================================================================ + + +class TestErrorHandling: + """Test cases for error handling and retry mechanisms.""" + + def test_error_handling_sets_document_error_status( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_feature_service + ): + """ + Test that errors during validation set document error status. + + When validation fails (e.g., limit exceeded), documents should be + marked with error status and error message. + """ + # Arrange - Create actual document objects + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Set up to trigger vector space limit error + mock_feature_service.get_features.return_value.billing.enabled = True + mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL + mock_feature_service.get_features.return_value.vector_space.limit = 100 + mock_feature_service.get_features.return_value.vector_space.size = 100 # At limit + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert + for doc in mock_documents: + assert doc.indexing_status == "error" + assert doc.error is not None + assert "over the limit" in doc.error + assert doc.stopped_at is not None + + def test_error_handling_during_indexing_runner( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner + ): + """ + Test error handling when IndexingRunner raises an exception. + + Errors during indexing should be caught and logged, but not crash the task. + """ + # Arrange + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Make IndexingRunner raise an exception + mock_indexing_runner.run.side_effect = Exception("Indexing failed") + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act - Should not raise exception + _document_indexing(dataset_id, document_ids) + + # Assert - Session should be closed even after error + assert mock_db_session.close.called + + def test_document_paused_error_handling( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner + ): + """ + Test handling of DocumentIsPausedError. + + When a document is paused, the error should be caught and logged + but not treated as a failure. + """ + # Arrange + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Make IndexingRunner raise DocumentIsPausedError + mock_indexing_runner.run.side_effect = DocumentIsPausedError("Document is paused") + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act - Should not raise exception + _document_indexing(dataset_id, document_ids) + + # Assert - Session should be closed + assert mock_db_session.close.called + + def test_dataset_not_found_error_handling(self, dataset_id, document_ids, mock_db_session): + """ + Test handling when dataset is not found. + + If the dataset doesn't exist, the task should exit gracefully. + """ + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = None + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - Session should be closed + assert mock_db_session.close.called + + def test_tenant_queue_error_handling_still_processes_next_task( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test that errors don't prevent processing next task in tenant queue. + + Even if the current task fails, the next task should still be processed. + """ + # Arrange + next_task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["next_doc_id"]} + + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=next_task_data) + # Set up rpop to return task once for concurrency check + mock_redis.rpop.side_effect = [wrapper.serialize(), None] + + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + # Make _document_indexing raise an error + with patch("tasks.document_indexing_task._document_indexing") as mock_indexing: + mock_indexing.side_effect = Exception("Processing failed") + + # Patch logger to avoid format string issue in actual code + with patch("tasks.document_indexing_task.logger"): + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Next task should still be enqueued despite error + mock_task.apply_async.assert_called() + + def test_concurrent_task_limit_respected( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset + ): + """ + Test that tenant isolated task concurrency limit is respected. + + Should pull only TENANT_ISOLATED_TASK_CONCURRENCY tasks at a time. + """ + # Arrange + concurrency_limit = 2 + + # Create multiple tasks in queue + tasks = [] + for i in range(5): + task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [f"doc_{i}"]} + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=task_data) + tasks.append(wrapper.serialize()) + + # Mock rpop to return tasks one by one + mock_redis.rpop.side_effect = tasks[:concurrency_limit] + [None] + + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit): + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Should enqueue exactly concurrency_limit tasks + assert mock_task.apply_async.call_count == concurrency_limit + # ============================================================================ # Test Task Cancellation @@ -198,6 +877,407 @@ class TestTaskCancellation: assert tenant_2 in queue_2._queue +# ============================================================================ +# Integration Tests +# ============================================================================ + + +class TestAdvancedScenarios: + """Advanced test scenarios for edge cases and complex workflows.""" + + def test_multiple_documents_with_mixed_success_and_failure( + self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test handling of mixed success and failure scenarios in batch processing. + + When processing multiple documents, some may succeed while others fail. + This tests that the system handles partial failures gracefully. + + Scenario: + - Process 3 documents in a batch + - First document succeeds + - Second document is not found (skipped) + - Third document succeeds + + Expected behavior: + - Only found documents are processed + - Missing documents are skipped without crashing + - IndexingRunner receives only valid documents + """ + # Arrange - Create document IDs with one missing + document_ids = [str(uuid.uuid4()) for _ in range(3)] + + # Create only 2 documents (simulate one missing) + # The new code uses .all() which will only return existing documents + mock_documents = [] + for i, doc_id in enumerate([document_ids[0], document_ids[2]]): # Skip middle one + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data - .all() will only return existing documents + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - Only 2 documents should be processed (missing one skipped) + mock_indexing_runner.run.assert_called_once() + call_args = mock_indexing_runner.run.call_args[0][0] + assert len(call_args) == 2 # Only found documents + + def test_tenant_queue_with_multiple_concurrent_tasks( + self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset + ): + """ + Test concurrent task processing with tenant isolation. + + This tests the scenario where multiple tasks are queued for the same tenant + and need to be processed respecting the concurrency limit. + + Scenario: + - 5 tasks are waiting in the queue + - Concurrency limit is 2 + - After current task completes, pull and enqueue next 2 tasks + + Expected behavior: + - Exactly 2 tasks are pulled from queue (respecting concurrency) + - Each task is enqueued with correct parameters + - Task waiting time is set for each new task + """ + # Arrange + concurrency_limit = 2 + document_ids = [str(uuid.uuid4())] + + # Create multiple waiting tasks + waiting_tasks = [] + for i in range(5): + task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [f"doc_{i}"]} + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=task_data) + waiting_tasks.append(wrapper.serialize()) + + # Mock rpop to return tasks up to concurrency limit + mock_redis.rpop.side_effect = waiting_tasks[:concurrency_limit] + [None] + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit): + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert + # Should enqueue exactly concurrency_limit tasks + assert mock_task.apply_async.call_count == concurrency_limit + + # Verify task waiting time was set for each task + assert mock_redis.setex.call_count >= concurrency_limit + + def test_vector_space_limit_edge_case_at_exact_limit( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_feature_service + ): + """ + Test vector space limit validation at exact boundary. + + Edge case: When vector space is exactly at the limit (not over), + the upload should still be rejected. + + Scenario: + - Vector space limit: 100 + - Current size: 100 (exactly at limit) + - Try to upload 3 documents + + Expected behavior: + - Upload is rejected with appropriate error message + - All documents are marked with error status + """ + # Arrange + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.error = None + doc.stopped_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Set vector space exactly at limit + mock_feature_service.get_features.return_value.billing.enabled = True + mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL + mock_feature_service.get_features.return_value.vector_space.limit = 100 + mock_feature_service.get_features.return_value.vector_space.size = 100 # Exactly at limit + + # Act + _document_indexing(dataset_id, document_ids) + + # Assert - All documents should have error status + for doc in mock_documents: + assert doc.indexing_status == "error" + assert "over the limit" in doc.error + + def test_task_queue_fifo_ordering(self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset): + """ + Test that tasks are processed in FIFO (First-In-First-Out) order. + + The tenant isolated queue should maintain task order, ensuring + that tasks are processed in the sequence they were added. + + Scenario: + - Task A added first + - Task B added second + - Task C added third + - When pulling tasks, should get A, then B, then C + + Expected behavior: + - Tasks are retrieved in the order they were added + - FIFO ordering is maintained throughout processing + """ + # Arrange + document_ids = [str(uuid.uuid4())] + + # Create tasks with identifiable document IDs to track order + task_order = ["task_A", "task_B", "task_C"] + tasks = [] + for task_name in task_order: + task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [task_name]} + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=task_data) + tasks.append(wrapper.serialize()) + + # Mock rpop to return tasks in FIFO order + mock_redis.rpop.side_effect = tasks + [None] + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", 3): + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Verify tasks were enqueued in correct order + assert mock_task.apply_async.call_count == 3 + + # Check that document_ids in calls match expected order + for i, call_obj in enumerate(mock_task.apply_async.call_args_list): + called_doc_ids = call_obj[1]["kwargs"]["document_ids"] + assert called_doc_ids == [task_order[i]] + + def test_empty_queue_after_task_completion_cleans_up( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset + ): + """ + Test cleanup behavior when queue becomes empty after task completion. + + After processing the last task in the queue, the system should: + 1. Detect that no more tasks are waiting + 2. Delete the task key to indicate tenant is idle + 3. Allow new tasks to start fresh processing + + Scenario: + - Process a task + - Check queue for next tasks + - Queue is empty + - Task key should be deleted + + Expected behavior: + - Task key is deleted when queue is empty + - Tenant is marked as idle (no active tasks) + """ + # Arrange + mock_redis.rpop.return_value = None # Empty queue + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert + # Verify delete was called to clean up task key + mock_redis.delete.assert_called_once() + + # Verify the correct key was deleted (contains tenant_id and "document_indexing") + delete_call_args = mock_redis.delete.call_args[0][0] + assert tenant_id in delete_call_args + assert "document_indexing" in delete_call_args + + def test_billing_disabled_skips_limit_checks( + self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner, mock_feature_service + ): + """ + Test that billing limit checks are skipped when billing is disabled. + + For self-hosted or enterprise deployments where billing is disabled, + the system should not enforce vector space or batch upload limits. + + Scenario: + - Billing is disabled + - Upload 100 documents (would normally exceed limits) + - No limit checks should be performed + + Expected behavior: + - Documents are processed without limit validation + - No errors related to limits + - All documents proceed to indexing + """ + # Arrange - Create many documents + large_batch_ids = [str(uuid.uuid4()) for _ in range(100)] + + mock_documents = [] + for doc_id in large_batch_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Billing disabled - limits should not be checked + mock_feature_service.get_features.return_value.billing.enabled = False + + # Act + _document_indexing(dataset_id, large_batch_ids) + + # Assert + # All documents should be set to parsing (no limit errors) + for doc in mock_documents: + assert doc.indexing_status == "parsing" + + # IndexingRunner should be called with all documents + mock_indexing_runner.run.assert_called_once() + call_args = mock_indexing_runner.run.call_args[0][0] + assert len(call_args) == 100 + + +class TestIntegration: + """Integration tests for complete task workflows.""" + + def test_complete_workflow_normal_task( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test complete workflow for normal document indexing task. + + This tests the full flow from task receipt to completion. + """ + # Arrange - Create actual document objects + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set up rpop to return None for concurrency check (no more tasks) + mock_redis.rpop.side_effect = [None] + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + normal_document_indexing_task(tenant_id, dataset_id, document_ids) + + # Assert + # Documents should be processed + mock_indexing_runner.run.assert_called_once() + # Session should be closed + assert mock_db_session.close.called + # Task key should be deleted (no more tasks) + assert mock_redis.delete.called + + def test_complete_workflow_priority_task( + self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test complete workflow for priority document indexing task. + + Priority tasks should follow the same flow as normal tasks. + """ + # Arrange - Create actual document objects + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set up rpop to return None for concurrency check (no more tasks) + mock_redis.rpop.side_effect = [None] + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + # Act + priority_document_indexing_task(tenant_id, dataset_id, document_ids) + + # Assert + mock_indexing_runner.run.assert_called_once() + assert mock_db_session.close.called + assert mock_redis.delete.called + + def test_queue_chain_processing( + self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner + ): + """ + Test that multiple tasks in queue are processed in sequence. + + When tasks are queued, they should be processed one after another. + """ + # Arrange + task_1_docs = [str(uuid.uuid4())] + task_2_docs = [str(uuid.uuid4())] + + task_2_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": task_2_docs} + + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=task_2_data) + + # First call returns task 2, second call returns None + mock_redis.rpop.side_effect = [wrapper.serialize(), None] + + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features: + mock_features.return_value.billing.enabled = False + + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act - Process first task + _document_indexing_with_tenant_queue(tenant_id, dataset_id, task_1_docs, mock_task) + + # Assert - Second task should be enqueued + assert mock_task.apply_async.called + call_args = mock_task.apply_async.call_args + assert call_args[1]["kwargs"]["document_ids"] == task_2_docs + + # ============================================================================ # Additional Edge Case Tests # ============================================================================ @@ -249,6 +1329,107 @@ class TestEdgeCases: class TestPerformanceScenarios: """Test performance-related scenarios and optimizations.""" + def test_large_document_batch_processing( + self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner, mock_feature_service + ): + """ + Test processing a large batch of documents at batch limit. + + When processing the maximum allowed batch size, the system + should handle it efficiently without errors. + + Scenario: + - Process exactly batch_upload_limit documents (e.g., 50) + - All documents are valid + - Billing is enabled + + Expected behavior: + - All documents are processed successfully + - No timeout or memory issues + - Batch limit is not exceeded + """ + # Arrange + batch_limit = 50 + document_ids = [str(uuid.uuid4()) for _ in range(batch_limit)] + + mock_documents = [] + for doc_id in document_ids: + doc = MagicMock(spec=Document) + doc.id = doc_id + doc.dataset_id = dataset_id + doc.indexing_status = "waiting" + doc.processing_started_at = None + mock_documents.append(doc) + + # Set shared mock data so all sessions can access it + mock_db_session._shared_data["dataset"] = mock_dataset + mock_db_session._shared_data["documents"] = mock_documents + + # Configure billing with sufficient limits + mock_feature_service.get_features.return_value.billing.enabled = True + mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL + mock_feature_service.get_features.return_value.vector_space.limit = 10000 + mock_feature_service.get_features.return_value.vector_space.size = 0 + + with patch("tasks.document_indexing_task.dify_config.BATCH_UPLOAD_LIMIT", str(batch_limit)): + # Act + _document_indexing(dataset_id, document_ids) + + # Assert + for doc in mock_documents: + assert doc.indexing_status == "parsing" + + mock_indexing_runner.run.assert_called_once() + call_args = mock_indexing_runner.run.call_args[0][0] + assert len(call_args) == batch_limit + + def test_tenant_queue_handles_burst_traffic(self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset): + """ + Test tenant queue handling burst traffic scenarios. + + When many tasks arrive in a burst for the same tenant, + the queue should handle them efficiently without dropping tasks. + + Scenario: + - 20 tasks arrive rapidly + - Concurrency limit is 3 + - Tasks should be queued and processed in batches + + Expected behavior: + - First 3 tasks are processed immediately + - Remaining tasks wait in queue + - No tasks are lost + """ + # Arrange + num_tasks = 20 + concurrency_limit = 3 + document_ids = [str(uuid.uuid4())] + + # Create waiting tasks + waiting_tasks = [] + for i in range(num_tasks): + task_data = { + "tenant_id": tenant_id, + "dataset_id": dataset_id, + "document_ids": [f"doc_{i}"], + } + from core.rag.pipeline.queue import TaskWrapper + + wrapper = TaskWrapper(data=task_data) + waiting_tasks.append(wrapper.serialize()) + + # Mock rpop to return tasks up to concurrency limit + mock_redis.rpop.side_effect = waiting_tasks[:concurrency_limit] + [None] + mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset + + with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit): + with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task: + # Act + _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task) + + # Assert - Should process exactly concurrency_limit tasks + assert mock_task.apply_async.call_count == concurrency_limit + def test_multiple_tenants_isolated_processing(self, mock_redis): """ Test that multiple tenants process tasks in isolation. diff --git a/api/tests/unit_tests/tasks/test_summary_queue_isolation.py b/api/tests/unit_tests/tasks/test_summary_queue_isolation.py new file mode 100644 index 0000000000..f6632e0a8a --- /dev/null +++ b/api/tests/unit_tests/tasks/test_summary_queue_isolation.py @@ -0,0 +1,40 @@ +""" +Unit tests for summary index task queue isolation. + +These tasks must NOT run on the shared 'dataset' queue because they invoke LLMs +for each document segment and can occupy all worker slots for hours, blocking +document indexing tasks. +""" + +import pytest + +from tasks.generate_summary_index_task import generate_summary_index_task +from tasks.regenerate_summary_index_task import regenerate_summary_index_task + +SUMMARY_QUEUE = "dataset_summary" +INDEXING_QUEUE = "dataset" + + +def _task_queue(task) -> str | None: + # Celery's @shared_task(queue=...) stores the routing key on the task instance + # at runtime, but type stubs don't declare it; use getattr to stay type-clean. + return getattr(task, "queue", None) + + +@pytest.mark.parametrize( + ("task", "task_name"), + [ + (generate_summary_index_task, "generate_summary_index_task"), + (regenerate_summary_index_task, "regenerate_summary_index_task"), + ], +) +def test_summary_task_uses_dedicated_queue(task, task_name): + """Summary tasks must use the dataset_summary queue, not the shared dataset queue. + + Summary generation is LLM-heavy and will block document indexing if placed + on the shared queue. + """ + assert _task_queue(task) == SUMMARY_QUEUE, ( + f"{task_name} must run on '{SUMMARY_QUEUE}' queue (not '{INDEXING_QUEUE}'). " + "Summary generation is LLM-heavy and will block document indexing if placed on the shared queue." + ) diff --git a/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py b/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py index 54be8379d5..a223f0119e 100644 --- a/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py +++ b/api/tests/unit_tests/tasks/test_workflow_node_execution_tasks.py @@ -15,7 +15,7 @@ # WorkflowNodeExecution, # WorkflowNodeExecutionStatus, # ) -# from dify_graph.enums import NodeType +# from dify_graph.enums import BuiltinNodeTypes # from libs.datetime_utils import naive_utc_now # from models import WorkflowNodeExecutionModel # from models.enums import ExecutionOffLoadType @@ -41,7 +41,7 @@ # workflow_execution_id=str(uuid4()), # index=1, # node_id="test_node", -# node_type=NodeType.LLM, +# node_type=BuiltinNodeTypes.LLM, # title="Test Node", # inputs={"input_key": "input_value"}, # outputs={"output_key": "output_value"}, @@ -134,7 +134,7 @@ # workflow_execution_id=str(uuid4()), # index=1, # node_id="test_node", -# node_type=NodeType.LLM, +# node_type=BuiltinNodeTypes.LLM, # title="Test Node", # inputs=large_data, # outputs=large_data, diff --git a/api/tests/unit_tests/tools/__init__.py b/api/tests/unit_tests/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/uv.lock b/api/uv.lock index 223f473f9e..8ce6bd104a 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -97,18 +97,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" }, ] -[[package]] -name = "aiohttp-retry" -version = "2.9.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9d/61/ebda4d8e3d8cfa1fd3db0fb428db2dd7461d5742cea35178277ad180b033/aiohttp_retry-2.9.1.tar.gz", hash = "sha256:8eb75e904ed4ee5c2ec242fefe85bf04240f685391c4879d8f541d6028ff01f1", size = 13608, upload-time = "2024-11-06T10:44:54.574Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1a/99/84ba7273339d0f3dfa57901b846489d2e5c2cd731470167757f1935fffbd/aiohttp_retry-2.9.1-py3-none-any.whl", hash = "sha256:66d2759d1921838256a05a3f80ad7e724936f083e35be5abb5e16eed6be6dc54", size = 9981, upload-time = "2024-11-06T10:44:52.917Z" }, -] - [[package]] name = "aiomysql" version = "0.3.2" @@ -410,7 +398,7 @@ wheels = [ [[package]] name = "arize-phoenix-otel" -version = "0.9.2" +version = "0.15.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "openinference-instrumentation" }, @@ -420,10 +408,11 @@ dependencies = [ { name = "opentelemetry-sdk" }, { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, + { name = "wrapt" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/27/b9/8c89191eb46915e9ba7bdb473e2fb1c510b7db3635ae5ede5e65b2176b9d/arize_phoenix_otel-0.9.2.tar.gz", hash = "sha256:a48c7d41f3ac60dc75b037f036bf3306d2af4af371cdb55e247e67957749bc31", size = 11599, upload-time = "2025-04-14T22:05:28.637Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/f0/b254118db28a2a202573472be67cf61f09cb37912bfde45b27ddc1c5b71f/arize_phoenix_otel-0.15.0.tar.gz", hash = "sha256:56c7dae09aaaa80df9e9595b7384c1bd4054b69b6032ab18e3a110a59b488388", size = 20254, upload-time = "2026-03-02T20:19:04.112Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/3d/f64136a758c649e883315939f30fe51ad0747024b0db05fd78450801a78d/arize_phoenix_otel-0.9.2-py3-none-any.whl", hash = "sha256:5286b33c58b596ef8edd9a4255ee00fd74f774b1e5dbd9393e77e87870a14d76", size = 12560, upload-time = "2025-04-14T22:05:27.162Z" }, + { url = "https://files.pythonhosted.org/packages/e4/4d/70d9c9d7137cc2e2aad819932172ef13ce21b4e60bf258910b9f15e426af/arize_phoenix_otel-0.15.0-py3-none-any.whl", hash = "sha256:5ff4d03b52d2dbd9c2a234417848f6b171cd220dc3c4020cf3568be84b89b88b", size = 17697, upload-time = "2026-03-02T20:19:03.242Z" }, ] [[package]] @@ -480,22 +469,23 @@ wheels = [ [[package]] name = "azure-identity" -version = "1.16.1" +version = "1.25.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, { name = "cryptography" }, { name = "msal" }, { name = "msal-extensions" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/1c/bd704075e555046e24b069157ca25c81aedb4199c3e0b35acba9243a6ca6/azure-identity-1.16.1.tar.gz", hash = "sha256:6d93f04468f240d59246d8afde3091494a5040d4f141cad0f49fc0c399d0d91e", size = 236726, upload-time = "2024-06-10T22:23:27.46Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/3a/439a32a5e23e45f6a91f0405949dc66cfe6834aba15a430aebfc063a81e7/azure_identity-1.25.2.tar.gz", hash = "sha256:030dbaa720266c796221c6cdbd1999b408c079032c919fef725fcc348a540fe9", size = 284709, upload-time = "2026-02-11T01:55:42.323Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/c5/ca55106564d2044ab90614381368b3756690fb7e3ab04552e17f308e4e4f/azure_identity-1.16.1-py3-none-any.whl", hash = "sha256:8fb07c25642cd4ac422559a8b50d3e77f73dcc2bbfaba419d06d6c9d7cff6726", size = 166741, upload-time = "2024-06-10T22:23:30.906Z" }, + { url = "https://files.pythonhosted.org/packages/9b/77/f658c76f9e9a52c784bd836aaca6fd5b9aae176f1f53273e758a2bcda695/azure_identity-1.25.2-py3-none-any.whl", hash = "sha256:1b40060553d01a72ba0d708b9a46d0f61f56312e215d8896d836653ffdc6753d", size = 191423, upload-time = "2026-02-11T01:55:44.245Z" }, ] [[package]] name = "azure-storage-blob" -version = "12.26.0" +version = "12.28.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "azure-core" }, @@ -503,9 +493,9 @@ dependencies = [ { name = "isodate" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/96/95/3e3414491ce45025a1cde107b6ae72bf72049e6021597c201cd6a3029b9a/azure_storage_blob-12.26.0.tar.gz", hash = "sha256:5dd7d7824224f7de00bfeb032753601c982655173061e242f13be6e26d78d71f", size = 583332, upload-time = "2025-07-16T21:34:07.644Z" } +sdist = { url = "https://files.pythonhosted.org/packages/71/24/072ba8e27b0e2d8fec401e9969b429d4f5fc4c8d4f0f05f4661e11f7234a/azure_storage_blob-12.28.0.tar.gz", hash = "sha256:e7d98ea108258d29aa0efbfd591b2e2075fa1722a2fae8699f0b3c9de11eff41", size = 604225, upload-time = "2026-01-06T23:48:57.282Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5b/64/63dbfdd83b31200ac58820a7951ddfdeed1fbee9285b0f3eae12d1357155/azure_storage_blob-12.26.0-py3-none-any.whl", hash = "sha256:8c5631b8b22b4f53ec5fff2f3bededf34cfef111e2af613ad42c9e6de00a77fe", size = 412907, upload-time = "2025-07-16T21:34:09.367Z" }, + { url = "https://files.pythonhosted.org/packages/d8/3a/6ef2047a072e54e1142718d433d50e9514c999a58f51abfff7902f3a72f8/azure_storage_blob-12.28.0-py3-none-any.whl", hash = "sha256:00fb1db28bf6a7b7ecaa48e3b1d5c83bfadacc5a678b77826081304bd87d6461", size = 431499, upload-time = "2026-01-06T23:48:58.995Z" }, ] [[package]] @@ -519,14 +509,14 @@ wheels = [ [[package]] name = "basedpyright" -version = "1.31.7" +version = "1.38.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "nodejs-wheel-binaries" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c6/ba/ed69e8df732a09c8ca469f592c8e08707fe29149735b834c276d94d4a3da/basedpyright-1.31.7.tar.gz", hash = "sha256:394f334c742a19bcc5905b2455c9f5858182866b7679a6f057a70b44b049bceb", size = 22710948, upload-time = "2025-10-11T05:12:48.3Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/58/7abba2c743571a42b2548f07aee556ebc1e4d0bc2b277aeba1ee6c83b0af/basedpyright-1.38.3.tar.gz", hash = "sha256:9725419786afbfad8a9539527f162da02d462afad440b0412fdb3f3cdf179b90", size = 25277430, upload-time = "2026-03-17T13:10:41.526Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/90/ce01ad2d0afdc1b82b8b5aaba27e60d2e138e39d887e71c35c55d8f1bfcd/basedpyright-1.31.7-py3-none-any.whl", hash = "sha256:7c54beb7828c9ed0028630aaa6904f395c27e5a9f5a313aa9e91fc1d11170831", size = 11817571, upload-time = "2025-10-11T05:12:45.432Z" }, + { url = "https://files.pythonhosted.org/packages/2c/e3/3ebb5c23bd3abb5fc2053b8a06a889aa5c1cf8cff738c78cb6c1957e90cd/basedpyright-1.38.3-py3-none-any.whl", hash = "sha256:1f15c2e489c67d6c5e896c24b6a63251195c04223a55e4568b8f8e8ed49ca830", size = 12313363, upload-time = "2026-03-17T13:10:47.344Z" }, ] [[package]] @@ -597,15 +587,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/f4/a69c20ee4f660081a7dedb1ac57f29be9378e04edfcb90c526b923d4bebc/beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a", size = 142979, upload-time = "2023-04-07T15:02:50.77Z" }, ] -[[package]] -name = "bidict" -version = "0.23.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/9a/6e/026678aa5a830e07cd9498a05d3e7e650a4f56a42f267a53d22bcda1bdc9/bidict-0.23.1.tar.gz", hash = "sha256:03069d763bc387bbd20e7d49914e75fc4132a41937fa3405417e1a5a2d006d71", size = 29093, upload-time = "2024-02-18T19:09:05.748Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/99/37/e8730c3587a65eb5645d4aba2d27aae48e8003614d6aaf15dda67f702f1f/bidict-0.23.1-py3-none-any.whl", hash = "sha256:5dae8d4d79b552a71cbabc7deb25dfe8ce710b17ff41711e13010ead2abfc3e5", size = 32764, upload-time = "2024-02-18T19:09:04.156Z" }, -] - [[package]] name = "billiard" version = "4.2.4" @@ -626,16 +607,16 @@ wheels = [ [[package]] name = "boto3" -version = "1.35.99" +version = "1.42.65" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/99/3e8b48f15580672eda20f33439fc1622bd611f6238b6d05407320e1fb98c/boto3-1.35.99.tar.gz", hash = "sha256:e0abd794a7a591d90558e92e29a9f8837d25ece8e3c120e530526fe27eba5fca", size = 111028, upload-time = "2025-01-14T20:20:28.636Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/c9/8ff8a901cf62374f1289cf36391f855e1702c70f545c28d1b57608a84ff2/boto3-1.42.65.tar.gz", hash = "sha256:c740af6bdaebcc1a00f3827a5729050bf6fc820ee148bf7d06f28db11c80e2a1", size = 112805, upload-time = "2026-03-10T19:44:58.255Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/65/77/8bbca82f70b062181cf0ae53fd43f1ac6556f3078884bfef9da2269c06a3/boto3-1.35.99-py3-none-any.whl", hash = "sha256:83e560faaec38a956dfb3d62e05e1703ee50432b45b788c09e25107c5058bd71", size = 139178, upload-time = "2025-01-14T20:20:25.48Z" }, + { url = "https://files.pythonhosted.org/packages/46/bb/ace5921655df51e3c9b787b3f0bd6aa25548e5cf1dabae02e53fa88f2d98/boto3-1.42.65-py3-none-any.whl", hash = "sha256:cc7f2e0aec6c68ee5b10232cf3e01326acf6100bc785a770385b61a0474b31f4", size = 140556, upload-time = "2026-03-10T19:44:55.433Z" }, ] [[package]] @@ -659,16 +640,16 @@ bedrock-runtime = [ [[package]] name = "botocore" -version = "1.35.99" +version = "1.42.73" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7c/9c/1df6deceee17c88f7170bad8325aa91452529d683486273928eecfd946d8/botocore-1.35.99.tar.gz", hash = "sha256:1eab44e969c39c5f3d9a3104a0836c24715579a455f12b3979a31d7cde51b3c3", size = 13490969, upload-time = "2025-01-14T20:20:11.419Z" } +sdist = { url = "https://files.pythonhosted.org/packages/28/23/0c88ca116ef63b1ae77c901cd5d2095d22a8dbde9e80df74545db4a061b4/botocore-1.42.73.tar.gz", hash = "sha256:575858641e4949aaf2af1ced145b8524529edf006d075877af6b82ff96ad854c", size = 15008008, upload-time = "2026-03-20T19:39:40.082Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/dd/d87e2a145fad9e08d0ec6edcf9d71f838ccc7acdd919acc4c0d4a93515f8/botocore-1.35.99-py3-none-any.whl", hash = "sha256:b22d27b6b617fc2d7342090d6129000af2efd20174215948c0d7ae2da0fab445", size = 13293216, upload-time = "2025-01-14T20:20:06.427Z" }, + { url = "https://files.pythonhosted.org/packages/8e/65/971f3d55015f4d133a6ff3ad74cd39f4b8dd8f53f7775a3c2ad378ea5145/botocore-1.42.73-py3-none-any.whl", hash = "sha256:7b62e2a12f7a1b08eb7360eecd23bb16fe3b7ab7f5617cf91b25476c6f86a0fe", size = 14681861, upload-time = "2026-03-20T19:39:35.341Z" }, ] [[package]] @@ -708,15 +689,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/ad/d71da675eef85ac153eef5111ca0caa924548c9591da00939bcabba8de8e/bottleneck-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:81e3822499f057a917b7d3972ebc631ac63c6bbcc79ad3542a66c4c40634e3a6", size = 113493, upload-time = "2025-09-08T16:30:02.872Z" }, ] -[[package]] -name = "bracex" -version = "2.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/63/9a/fec38644694abfaaeca2798b58e276a8e61de49e2e37494ace423395febc/bracex-2.6.tar.gz", hash = "sha256:98f1347cd77e22ee8d967a30ad4e310b233f7754dbf31ff3fceb76145ba47dc7", size = 26642, upload-time = "2025-06-22T19:12:31.254Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/2a/9186535ce58db529927f6cf5990a849aa9e052eea3e2cfefe20b9e1802da/bracex-2.6-py3-none-any.whl", hash = "sha256:0b0049264e7340b3ec782b5cb99beb325f36c3782a32e36e876452fd49a09952", size = 11508, upload-time = "2025-06-22T19:12:29.781Z" }, -] - [[package]] name = "brotli" version = "1.2.0" @@ -779,16 +751,16 @@ wheels = [ [[package]] name = "build" -version = "1.4.0" +version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "os_name == 'nt' and sys_platform != 'linux'" }, { name = "packaging" }, { name = "pyproject-hooks" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/42/18/94eaffda7b329535d91f00fe605ab1f1e5cd68b2074d03f255c7d250687d/build-1.4.0.tar.gz", hash = "sha256:f1b91b925aa322be454f8330c6fb48b465da993d1e7e7e6fa35027ec49f3c936", size = 50054, upload-time = "2026-01-08T16:41:47.696Z" } +sdist = { url = "https://files.pythonhosted.org/packages/25/1c/23e33405a7c9eac261dff640926b8b5adaed6a6eb3e1767d441ed611d0c0/build-1.3.0.tar.gz", hash = "sha256:698edd0ea270bde950f53aed21f3a0135672206f3911e0176261a31e0e07b397", size = 48544, upload-time = "2025-08-01T21:27:09.268Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/0d/84a4380f930db0010168e0aa7b7a8fed9ba1835a8fbb1472bc6d0201d529/build-1.4.0-py3-none-any.whl", hash = "sha256:6a07c1b8eb6f2b311b96fcbdbce5dab5fe637ffda0fd83c9cac622e927501596", size = 24141, upload-time = "2026-01-08T16:41:46.453Z" }, + { url = "https://files.pythonhosted.org/packages/cb/8c/2b30c12155ad8de0cf641d76a8b396a16d2c36bc6d50b621a62b7c4567c1/build-1.3.0-py3-none-any.whl", hash = "sha256:7145f0b5061ba90a1500d60bd1b13ca0a8a4cebdd0cc16ed8adf1c0e739f43b4", size = 23382, upload-time = "2025-08-01T21:27:07.844Z" }, ] [[package]] @@ -1139,7 +1111,7 @@ wheels = [ [[package]] name = "cos-python-sdk-v5" -version = "1.9.38" +version = "1.9.41" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "crcmod" }, @@ -1148,9 +1120,9 @@ dependencies = [ { name = "six" }, { name = "xmltodict" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/24/3c/d208266fec7cc3221b449e236b87c3fc1999d5ac4379d4578480321cfecc/cos_python_sdk_v5-1.9.38.tar.gz", hash = "sha256:491a8689ae2f1a6f04dacba66a877b2c8d361456f9cfd788ed42170a1cbf7a9f", size = 98092, upload-time = "2025-07-22T07:56:20.34Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0e/38/c0029f413f51238aa2319715f45d74bcae931768e36c7e4604b02f407c6c/cos_python_sdk_v5-1.9.41.tar.gz", hash = "sha256:68f4be7d8fe27a1d186b3159b93c622816e398effdc236eddd442b86db592b82", size = 102625, upload-time = "2026-01-06T07:00:11.692Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ab/c8/c9c156aa3bc7caba9b4f8a2b6abec3da6263215988f3fec0ea843f137a10/cos_python_sdk_v5-1.9.38-py3-none-any.whl", hash = "sha256:1d3dd3be2bd992b2e9c2dcd018e2596aa38eab022dbc86b4a5d14c8fc88370e6", size = 92601, upload-time = "2025-08-17T05:12:30.867Z" }, + { url = "https://files.pythonhosted.org/packages/aa/2f/ead3fb551509fdc94e4a42093b770e3de2827ff7227570165df5e35c2a3e/cos_python_sdk_v5-1.9.41-py3-none-any.whl", hash = "sha256:f465aae43a4ba3f1caa8caeaca838d0395932f6848e89d6dde2807725e3c88a0", size = 98285, upload-time = "2026-01-06T06:43:02.754Z" }, ] [[package]] @@ -1175,29 +1147,41 @@ wheels = [ [[package]] name = "coverage" -version = "7.2.7" +version = "7.13.5" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/45/8b/421f30467e69ac0e414214856798d4bc32da1336df745e49e49ae5c1e2a8/coverage-7.2.7.tar.gz", hash = "sha256:924d94291ca674905fe9481f12294eb11f2d3d3fd1adb20314ba89e94f44ed59", size = 762575, upload-time = "2023-05-29T20:08:50.273Z" } +sdist = { url = "https://files.pythonhosted.org/packages/9d/e0/70553e3000e345daff267cec284ce4cbf3fc141b6da229ac52775b5428f1/coverage-7.13.5.tar.gz", hash = "sha256:c81f6515c4c40141f83f502b07bbfa5c240ba25bbe73da7b33f1e5b6120ff179", size = 915967, upload-time = "2026-03-17T10:33:18.341Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/fa/529f55c9a1029c840bcc9109d5a15ff00478b7ff550a1ae361f8745f8ad5/coverage-7.2.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:06a9a2be0b5b576c3f18f1a241f0473575c4a26021b52b2a85263a00f034d51f", size = 200895, upload-time = "2023-05-29T20:07:21.963Z" }, - { url = "https://files.pythonhosted.org/packages/67/d7/cd8fe689b5743fffac516597a1222834c42b80686b99f5b44ef43ccc2a43/coverage-7.2.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5baa06420f837184130752b7c5ea0808762083bf3487b5038d68b012e5937dbe", size = 201120, upload-time = "2023-05-29T20:07:23.765Z" }, - { url = "https://files.pythonhosted.org/packages/8c/95/16eed713202406ca0a37f8ac259bbf144c9d24f9b8097a8e6ead61da2dbb/coverage-7.2.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdec9e8cbf13a5bf63290fc6013d216a4c7232efb51548594ca3631a7f13c3a3", size = 233178, upload-time = "2023-05-29T20:07:25.281Z" }, - { url = "https://files.pythonhosted.org/packages/c1/49/4d487e2ad5d54ed82ac1101e467e8994c09d6123c91b2a962145f3d262c2/coverage-7.2.7-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:52edc1a60c0d34afa421c9c37078817b2e67a392cab17d97283b64c5833f427f", size = 230754, upload-time = "2023-05-29T20:07:27.044Z" }, - { url = "https://files.pythonhosted.org/packages/a7/cd/3ce94ad9d407a052dc2a74fbeb1c7947f442155b28264eb467ee78dea812/coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb", size = 232558, upload-time = "2023-05-29T20:07:28.743Z" }, - { url = "https://files.pythonhosted.org/packages/8f/a8/12cc7b261f3082cc299ab61f677f7e48d93e35ca5c3c2f7241ed5525ccea/coverage-7.2.7-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:afb17f84d56068a7c29f5fa37bfd38d5aba69e3304af08ee94da8ed5b0865833", size = 241509, upload-time = "2023-05-29T20:07:30.434Z" }, - { url = "https://files.pythonhosted.org/packages/04/fa/43b55101f75a5e9115259e8be70ff9279921cb6b17f04c34a5702ff9b1f7/coverage-7.2.7-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:48c19d2159d433ccc99e729ceae7d5293fbffa0bdb94952d3579983d1c8c9d97", size = 239924, upload-time = "2023-05-29T20:07:32.065Z" }, - { url = "https://files.pythonhosted.org/packages/68/5f/d2bd0f02aa3c3e0311986e625ccf97fdc511b52f4f1a063e4f37b624772f/coverage-7.2.7-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0e1f928eaf5469c11e886fe0885ad2bf1ec606434e79842a879277895a50942a", size = 240977, upload-time = "2023-05-29T20:07:34.184Z" }, - { url = "https://files.pythonhosted.org/packages/ba/92/69c0722882643df4257ecc5437b83f4c17ba9e67f15dc6b77bad89b6982e/coverage-7.2.7-cp311-cp311-win32.whl", hash = "sha256:33d6d3ea29d5b3a1a632b3c4e4f4ecae24ef170b0b9ee493883f2df10039959a", size = 203168, upload-time = "2023-05-29T20:07:35.869Z" }, - { url = "https://files.pythonhosted.org/packages/b1/96/c12ed0dfd4ec587f3739f53eb677b9007853fd486ccb0e7d5512a27bab2e/coverage-7.2.7-cp311-cp311-win_amd64.whl", hash = "sha256:5b7540161790b2f28143191f5f8ec02fb132660ff175b7747b95dcb77ac26562", size = 204185, upload-time = "2023-05-29T20:07:37.39Z" }, - { url = "https://files.pythonhosted.org/packages/ff/d5/52fa1891d1802ab2e1b346d37d349cb41cdd4fd03f724ebbf94e80577687/coverage-7.2.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f2f67fe12b22cd130d34d0ef79206061bfb5eda52feb6ce0dba0644e20a03cf4", size = 201020, upload-time = "2023-05-29T20:07:38.724Z" }, - { url = "https://files.pythonhosted.org/packages/24/df/6765898d54ea20e3197a26d26bb65b084deefadd77ce7de946b9c96dfdc5/coverage-7.2.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a342242fe22407f3c17f4b499276a02b01e80f861f1682ad1d95b04018e0c0d4", size = 233994, upload-time = "2023-05-29T20:07:40.274Z" }, - { url = "https://files.pythonhosted.org/packages/15/81/b108a60bc758b448c151e5abceed027ed77a9523ecbc6b8a390938301841/coverage-7.2.7-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:171717c7cb6b453aebac9a2ef603699da237f341b38eebfee9be75d27dc38e01", size = 231358, upload-time = "2023-05-29T20:07:41.998Z" }, - { url = "https://files.pythonhosted.org/packages/61/90/c76b9462f39897ebd8714faf21bc985b65c4e1ea6dff428ea9dc711ed0dd/coverage-7.2.7-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49969a9f7ffa086d973d91cec8d2e31080436ef0fb4a359cae927e742abfaaa6", size = 233316, upload-time = "2023-05-29T20:07:43.539Z" }, - { url = "https://files.pythonhosted.org/packages/04/d6/8cba3bf346e8b1a4fb3f084df7d8cea25a6b6c56aaca1f2e53829be17e9e/coverage-7.2.7-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b46517c02ccd08092f4fa99f24c3b83d8f92f739b4657b0f146246a0ca6a831d", size = 240159, upload-time = "2023-05-29T20:07:44.982Z" }, - { url = "https://files.pythonhosted.org/packages/6e/ea/4a252dc77ca0605b23d477729d139915e753ee89e4c9507630e12ad64a80/coverage-7.2.7-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a3d33a6b3eae87ceaefa91ffdc130b5e8536182cd6dfdbfc1aa56b46ff8c86de", size = 238127, upload-time = "2023-05-29T20:07:46.522Z" }, - { url = "https://files.pythonhosted.org/packages/9f/5c/d9760ac497c41f9c4841f5972d0edf05d50cad7814e86ee7d133ec4a0ac8/coverage-7.2.7-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:976b9c42fb2a43ebf304fa7d4a310e5f16cc99992f33eced91ef6f908bd8f33d", size = 239833, upload-time = "2023-05-29T20:07:47.992Z" }, - { url = "https://files.pythonhosted.org/packages/69/8c/26a95b08059db1cbb01e4b0e6d40f2e9debb628c6ca86b78f625ceaf9bab/coverage-7.2.7-cp312-cp312-win32.whl", hash = "sha256:8de8bb0e5ad103888d65abef8bca41ab93721647590a3f740100cd65c3b00511", size = 203463, upload-time = "2023-05-29T20:07:49.939Z" }, - { url = "https://files.pythonhosted.org/packages/b7/00/14b00a0748e9eda26e97be07a63cc911108844004687321ddcc213be956c/coverage-7.2.7-cp312-cp312-win_amd64.whl", hash = "sha256:9e31cb64d7de6b6f09702bb27c02d1904b3aebfca610c12772452c4e6c21a0d3", size = 204347, upload-time = "2023-05-29T20:07:51.909Z" }, + { url = "https://files.pythonhosted.org/packages/4b/37/d24c8f8220ff07b839b2c043ea4903a33b0f455abe673ae3c03bbdb7f212/coverage-7.13.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66a80c616f80181f4d643b0f9e709d97bcea413ecd9631e1dedc7401c8e6695d", size = 219381, upload-time = "2026-03-17T10:30:14.68Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/cd129b0ca4afe886a6ce9d183c44d8301acbd4ef248622e7c49a23145605/coverage-7.13.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:145ede53ccbafb297c1c9287f788d1bc3efd6c900da23bf6931b09eafc931587", size = 219880, upload-time = "2026-03-17T10:30:16.231Z" }, + { url = "https://files.pythonhosted.org/packages/55/2f/e0e5b237bffdb5d6c530ce87cc1d413a5b7d7dfd60fb067ad6d254c35c76/coverage-7.13.5-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0672854dc733c342fa3e957e0605256d2bf5934feeac328da9e0b5449634a642", size = 250303, upload-time = "2026-03-17T10:30:17.748Z" }, + { url = "https://files.pythonhosted.org/packages/92/be/b1afb692be85b947f3401375851484496134c5554e67e822c35f28bf2fbc/coverage-7.13.5-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ec10e2a42b41c923c2209b846126c6582db5e43a33157e9870ba9fb70dc7854b", size = 252218, upload-time = "2026-03-17T10:30:19.804Z" }, + { url = "https://files.pythonhosted.org/packages/da/69/2f47bb6fa1b8d1e3e5d0c4be8ccb4313c63d742476a619418f85740d597b/coverage-7.13.5-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:be3d4bbad9d4b037791794ddeedd7d64a56f5933a2c1373e18e9e568b9141686", size = 254326, upload-time = "2026-03-17T10:30:21.321Z" }, + { url = "https://files.pythonhosted.org/packages/d5/d0/79db81da58965bd29dabc8f4ad2a2af70611a57cba9d1ec006f072f30a54/coverage-7.13.5-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d2afbc5cc54d286bfb54541aa50b64cdb07a718227168c87b9e2fb8f25e1743", size = 256267, upload-time = "2026-03-17T10:30:23.094Z" }, + { url = "https://files.pythonhosted.org/packages/e5/32/d0d7cc8168f91ddab44c0ce4806b969df5f5fdfdbb568eaca2dbc2a04936/coverage-7.13.5-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3ad050321264c49c2fa67bb599100456fc51d004b82534f379d16445da40fb75", size = 250430, upload-time = "2026-03-17T10:30:25.311Z" }, + { url = "https://files.pythonhosted.org/packages/4d/06/a055311d891ddbe231cd69fdd20ea4be6e3603ffebddf8704b8ca8e10a3c/coverage-7.13.5-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7300c8a6d13335b29bb76d7651c66af6bd8658517c43499f110ddc6717bfc209", size = 252017, upload-time = "2026-03-17T10:30:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/d6/f6/d0fd2d21e29a657b5f77a2fe7082e1568158340dceb941954f776dce1b7b/coverage-7.13.5-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:eb07647a5738b89baab047f14edd18ded523de60f3b30e75c2acc826f79c839a", size = 250080, upload-time = "2026-03-17T10:30:29.481Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ab/0d7fb2efc2e9a5eb7ddcc6e722f834a69b454b7e6e5888c3a8567ecffb31/coverage-7.13.5-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9adb6688e3b53adffefd4a52d72cbd8b02602bfb8f74dcd862337182fd4d1a4e", size = 253843, upload-time = "2026-03-17T10:30:31.301Z" }, + { url = "https://files.pythonhosted.org/packages/ba/6f/7467b917bbf5408610178f62a49c0ed4377bb16c1657f689cc61470da8ce/coverage-7.13.5-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:7c8d4bc913dd70b93488d6c496c77f3aff5ea99a07e36a18f865bca55adef8bd", size = 249802, upload-time = "2026-03-17T10:30:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/75/2c/1172fb689df92135f5bfbbd69fc83017a76d24ea2e2f3a1154007e2fb9f8/coverage-7.13.5-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0e3c426ffc4cd952f54ee9ffbdd10345709ecc78a3ecfd796a57236bfad0b9b8", size = 250707, upload-time = "2026-03-17T10:30:35.2Z" }, + { url = "https://files.pythonhosted.org/packages/67/21/9ac389377380a07884e3b48ba7a620fcd9dbfaf1d40565facdc6b36ec9ef/coverage-7.13.5-cp311-cp311-win32.whl", hash = "sha256:259b69bb83ad9894c4b25be2528139eecba9a82646ebdda2d9db1ba28424a6bf", size = 221880, upload-time = "2026-03-17T10:30:36.775Z" }, + { url = "https://files.pythonhosted.org/packages/af/7f/4cd8a92531253f9d7c1bbecd9fa1b472907fb54446ca768c59b531248dc5/coverage-7.13.5-cp311-cp311-win_amd64.whl", hash = "sha256:258354455f4e86e3e9d0d17571d522e13b4e1e19bf0f8596bcf9476d61e7d8a9", size = 222816, upload-time = "2026-03-17T10:30:38.891Z" }, + { url = "https://files.pythonhosted.org/packages/12/a6/1d3f6155fb0010ca68eba7fe48ca6c9da7385058b77a95848710ecf189b1/coverage-7.13.5-cp311-cp311-win_arm64.whl", hash = "sha256:bff95879c33ec8da99fc9b6fe345ddb5be6414b41d6d1ad1c8f188d26f36e028", size = 221483, upload-time = "2026-03-17T10:30:40.463Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c3/a396306ba7db865bf96fc1fb3b7fd29bcbf3d829df642e77b13555163cd6/coverage-7.13.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:460cf0114c5016fa841214ff5564aa4864f11948da9440bc97e21ad1f4ba1e01", size = 219554, upload-time = "2026-03-17T10:30:42.208Z" }, + { url = "https://files.pythonhosted.org/packages/a6/16/a68a19e5384e93f811dccc51034b1fd0b865841c390e3c931dcc4699e035/coverage-7.13.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0e223ce4b4ed47f065bfb123687686512e37629be25cc63728557ae7db261422", size = 219908, upload-time = "2026-03-17T10:30:43.906Z" }, + { url = "https://files.pythonhosted.org/packages/29/72/20b917c6793af3a5ceb7fb9c50033f3ec7865f2911a1416b34a7cfa0813b/coverage-7.13.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:6e3370441f4513c6252bf042b9c36d22491142385049243253c7e48398a15a9f", size = 251419, upload-time = "2026-03-17T10:30:45.545Z" }, + { url = "https://files.pythonhosted.org/packages/8c/49/cd14b789536ac6a4778c453c6a2338bc0a2fb60c5a5a41b4008328b9acc1/coverage-7.13.5-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:03ccc709a17a1de074fb1d11f217342fb0d2b1582ed544f554fc9fc3f07e95f5", size = 254159, upload-time = "2026-03-17T10:30:47.204Z" }, + { url = "https://files.pythonhosted.org/packages/9d/00/7b0edcfe64e2ed4c0340dac14a52ad0f4c9bd0b8b5e531af7d55b703db7c/coverage-7.13.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3f4818d065964db3c1c66dc0fbdac5ac692ecbc875555e13374fdbe7eedb4376", size = 255270, upload-time = "2026-03-17T10:30:48.812Z" }, + { url = "https://files.pythonhosted.org/packages/93/89/7ffc4ba0f5d0a55c1e84ea7cee39c9fc06af7b170513d83fbf3bbefce280/coverage-7.13.5-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:012d5319e66e9d5a218834642d6c35d265515a62f01157a45bcc036ecf947256", size = 257538, upload-time = "2026-03-17T10:30:50.77Z" }, + { url = "https://files.pythonhosted.org/packages/81/bd/73ddf85f93f7e6fa83e77ccecb6162d9415c79007b4bc124008a4995e4a7/coverage-7.13.5-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8dd02af98971bdb956363e4827d34425cb3df19ee550ef92855b0acb9c7ce51c", size = 251821, upload-time = "2026-03-17T10:30:52.5Z" }, + { url = "https://files.pythonhosted.org/packages/a0/81/278aff4e8dec4926a0bcb9486320752811f543a3ce5b602cc7a29978d073/coverage-7.13.5-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f08fd75c50a760c7eb068ae823777268daaf16a80b918fa58eea888f8e3919f5", size = 253191, upload-time = "2026-03-17T10:30:54.543Z" }, + { url = "https://files.pythonhosted.org/packages/70/ee/fe1621488e2e0a58d7e94c4800f0d96f79671553488d401a612bebae324b/coverage-7.13.5-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:843ea8643cf967d1ac7e8ecd4bb00c99135adf4816c0c0593fdcc47b597fcf09", size = 251337, upload-time = "2026-03-17T10:30:56.663Z" }, + { url = "https://files.pythonhosted.org/packages/37/a6/f79fb37aa104b562207cc23cb5711ab6793608e246cae1e93f26b2236ed9/coverage-7.13.5-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:9d44d7aa963820b1b971dbecd90bfe5fe8f81cff79787eb6cca15750bd2f79b9", size = 255404, upload-time = "2026-03-17T10:30:58.427Z" }, + { url = "https://files.pythonhosted.org/packages/75/f0/ed15262a58ec81ce457ceb717b7f78752a1713556b19081b76e90896e8d4/coverage-7.13.5-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:7132bed4bd7b836200c591410ae7d97bf7ae8be6fc87d160b2bd881df929e7bf", size = 250903, upload-time = "2026-03-17T10:31:00.093Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e9/9129958f20e7e9d4d56d51d42ccf708d15cac355ff4ac6e736e97a9393d2/coverage-7.13.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a698e363641b98843c517817db75373c83254781426e94ada3197cabbc2c919c", size = 252780, upload-time = "2026-03-17T10:31:01.916Z" }, + { url = "https://files.pythonhosted.org/packages/a4/d7/0ad9b15812d81272db94379fe4c6df8fd17781cc7671fdfa30c76ba5ff7b/coverage-7.13.5-cp312-cp312-win32.whl", hash = "sha256:bdba0a6b8812e8c7df002d908a9a2ea3c36e92611b5708633c50869e6d922fdf", size = 222093, upload-time = "2026-03-17T10:31:03.642Z" }, + { url = "https://files.pythonhosted.org/packages/29/3d/821a9a5799fac2556bcf0bd37a70d1d11fa9e49784b6d22e92e8b2f85f18/coverage-7.13.5-cp312-cp312-win_amd64.whl", hash = "sha256:d2c87e0c473a10bffe991502eac389220533024c8082ec1ce849f4218dded810", size = 222900, upload-time = "2026-03-17T10:31:05.651Z" }, + { url = "https://files.pythonhosted.org/packages/d4/fa/2238c2ad08e35cf4f020ea721f717e09ec3152aea75d191a7faf3ef009a8/coverage-7.13.5-cp312-cp312-win_arm64.whl", hash = "sha256:bf69236a9a81bdca3bff53796237aab096cdbf8d78a66ad61e992d9dac7eb2de", size = 221515, upload-time = "2026-03-17T10:31:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/9e/ee/a4cf96b8ce1e566ed238f0659ac2d3f007ed1d14b181bcb684e19561a69a/coverage-7.13.5-py3-none-any.whl", hash = "sha256:34b02417cf070e173989b3db962f7ed56d2f644307b2cf9d5a0f258e13084a61", size = 211346, upload-time = "2026-03-17T10:33:15.691Z" }, ] [package.optional-dependencies] @@ -1342,94 +1326,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/c7/95349670e193b2891176e1b8e5f43e12b31bff6d9994f70e74ab385047f6/dateparser-1.3.0-py3-none-any.whl", hash = "sha256:8dc678b0a526e103379f02ae44337d424bd366aac727d3c6cf52ce1b01efbb5a", size = 318688, upload-time = "2026-02-04T16:00:04.652Z" }, ] -[[package]] -name = "daytona" -version = "0.128.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiofiles" }, - { name = "daytona-api-client" }, - { name = "daytona-api-client-async" }, - { name = "daytona-toolbox-api-client" }, - { name = "daytona-toolbox-api-client-async" }, - { name = "deprecated" }, - { name = "environs" }, - { name = "httpx" }, - { name = "multipart" }, - { name = "obstore" }, - { name = "pydantic" }, - { name = "toml" }, - { name = "websockets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/96/f1/c6af7843f1405ed53cd45946658f3bfa623d042f75e8cdd8924b5bef414a/daytona-0.128.1.tar.gz", hash = "sha256:4d9ab0cc2504b48223dbd2f4429b304b484292cdff3d942af8080bb9f359d6b1", size = 116493, upload-time = "2025-12-23T17:04:46.629Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c0/94/9027e12d3394a3b0c7705f738d78bcf5babd3e75a54a439e8130dfe6b943/daytona-0.128.1-py3-none-any.whl", hash = "sha256:d00536a899d5bab299837d0ad7d59a8c9cb1e76d5618f01fd0106fc59fbeac69", size = 144677, upload-time = "2025-12-23T17:04:45.585Z" }, -] - -[[package]] -name = "daytona-api-client" -version = "0.128.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e1/2f/df4138f03bdcc17fcc4737504df9176d9db8d40a91abc1e91d79c99436d3/daytona_api_client-0.128.1.tar.gz", hash = "sha256:e9db105bf5ea7ad4b55431e3bb7db1e3a8937557ffbca7dba6167bc5a6a63c96", size = 125691, upload-time = "2025-12-23T17:03:36.391Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/0a/a3c0213d5af8700e799e94802ecac4cb2926e9c64813ecb8f4f68c212d8d/daytona_api_client-0.128.1-py3-none-any.whl", hash = "sha256:eede1d93ec8995d7280e077127018508b94a488c6288363f50794128af56d339", size = 371877, upload-time = "2025-12-23T17:03:35.024Z" }, -] - -[[package]] -name = "daytona-api-client-async" -version = "0.128.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "aiohttp-retry" }, - { name = "pydantic" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/94/a3/64a11a4e18bea571249615dc7cefb390abf7704569138d12a2b40d841c7d/daytona_api_client_async-0.128.1.tar.gz", hash = "sha256:2fb7507cb4122ae2011aa1f52a38556c1ce9c137173648aa96ca227ef072eadd", size = 126674, upload-time = "2025-12-23T17:03:48.261Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/61/69efbd19194e55a96156911dda579b149afb160473ff1f6f151481d75b00/daytona_api_client_async-0.128.1-py3-none-any.whl", hash = "sha256:c0fb378e0df95fe0ae125d9f60cebfd4badc08e089d18584bfa6158a0002893b", size = 376906, upload-time = "2025-12-23T17:03:47.283Z" }, -] - -[[package]] -name = "daytona-toolbox-api-client" -version = "0.128.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/62/32/e3b205b79341caccaccdd09f4bf0650dbc5346b45a6459bb00e63fb6a6dd/daytona_toolbox_api_client-0.128.1.tar.gz", hash = "sha256:869ee431f485ed535868a93154e29c10e46fb2c36a0a7af79020385830e23c8f", size = 61374, upload-time = "2025-12-23T17:04:03.418Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/7a/eb16474ee0548a31ee9bddebc650d83c32ece388a968b491f8c5e0818df1/daytona_toolbox_api_client-0.128.1-py3-none-any.whl", hash = "sha256:dd22da7e7fc823802c657a35996f1f4824deeda1b248192504c67e6936a1a120", size = 164297, upload-time = "2025-12-23T17:04:02.195Z" }, -] - -[[package]] -name = "daytona-toolbox-api-client-async" -version = "0.128.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "aiohttp-retry" }, - { name = "pydantic" }, - { name = "python-dateutil" }, - { name = "typing-extensions" }, - { name = "urllib3" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/3b/c0/ee0534242826c8f4c09f9a422e77d676d8ce5fa37599d421fa596d5bef35/daytona_toolbox_api_client_async-0.128.1.tar.gz", hash = "sha256:d9ef0ec4d17fcc611e5c8d17ae300afb825b32bf8346fa6a2a8576d760ef0304", size = 58335, upload-time = "2025-12-23T17:04:01.946Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/d5/dac14a5226122df7f4bf54beecbc793493151668d0338c67c87e98f20cd1/daytona_toolbox_api_client_async-0.128.1-py3-none-any.whl", hash = "sha256:2ae4afb86f2d2568bd1df031764df0d41837e8ccab2200f2ed70fd61d18231d3", size = 165556, upload-time = "2025-12-23T17:04:00.979Z" }, -] - [[package]] name = "decorator" version = "5.2.1" @@ -1474,7 +1370,7 @@ wheels = [ [[package]] name = "dify-api" -version = "1.14.0rc1" +version = "1.13.0" source = { virtual = "." } dependencies = [ { name = "aliyun-log-python-sdk" }, @@ -1488,9 +1384,6 @@ dependencies = [ { name = "celery" }, { name = "charset-normalizer" }, { name = "croniter" }, - { name = "daytona" }, - { name = "docker" }, - { name = "e2b-code-interpreter" }, { name = "fastopenapi", extra = ["flask"] }, { name = "flask" }, { name = "flask-compress" }, @@ -1501,7 +1394,6 @@ dependencies = [ { name = "flask-restx" }, { name = "flask-sqlalchemy" }, { name = "gevent" }, - { name = "gevent-websocket" }, { name = "gmpy2" }, { name = "google-api-core" }, { name = "google-api-python-client" }, @@ -1542,7 +1434,6 @@ dependencies = [ { name = "opik" }, { name = "packaging" }, { name = "pandas", extra = ["excel", "output-formatting", "performance"] }, - { name = "paramiko" }, { name = "psycogreen" }, { name = "psycopg2-binary" }, { name = "pycryptodome" }, @@ -1553,14 +1444,12 @@ dependencies = [ { name = "pypdfium2" }, { name = "python-docx" }, { name = "python-dotenv" }, - { name = "python-socketio" }, { name = "pyyaml" }, { name = "readabilipy" }, { name = "redis", extra = ["hiredis"] }, { name = "resend" }, { name = "sendgrid" }, { name = "sentry-sdk", extra = ["flask"] }, - { name = "setuptools" }, { name = "sqlalchemy" }, { name = "sseclient-py" }, { name = "starlette" }, @@ -1686,46 +1575,42 @@ vdb = [ requires-dist = [ { name = "aliyun-log-python-sdk", specifier = "~=0.9.37" }, { name = "apscheduler", specifier = ">=3.11.0" }, - { name = "arize-phoenix-otel", specifier = "~=0.9.2" }, - { name = "azure-identity", specifier = "==1.16.1" }, + { name = "arize-phoenix-otel", specifier = "~=0.15.0" }, + { name = "azure-identity", specifier = "==1.25.2" }, { name = "beautifulsoup4", specifier = "==4.12.2" }, - { name = "boto3", specifier = "==1.35.99" }, + { name = "boto3", specifier = "==1.42.65" }, { name = "bs4", specifier = "~=0.0.1" }, { name = "cachetools", specifier = "~=5.3.0" }, { name = "celery", specifier = "~=5.5.2" }, { name = "charset-normalizer", specifier = ">=3.4.4" }, { name = "croniter", specifier = ">=6.0.0" }, - { name = "daytona", specifier = "==0.128.1" }, - { name = "docker", specifier = ">=7.1.0" }, - { name = "e2b-code-interpreter", specifier = ">=2.4.1" }, { name = "fastopenapi", extras = ["flask"], specifier = ">=0.7.0" }, { name = "flask", specifier = "~=3.1.2" }, - { name = "flask-compress", specifier = ">=1.17,<1.18" }, + { name = "flask-compress", specifier = ">=1.17,<1.24" }, { name = "flask-cors", specifier = "~=6.0.0" }, { name = "flask-login", specifier = "~=0.6.3" }, - { name = "flask-migrate", specifier = "~=4.0.7" }, + { name = "flask-migrate", specifier = "~=4.1.0" }, { name = "flask-orjson", specifier = "~=2.0.0" }, { name = "flask-restx", specifier = "~=1.3.2" }, { name = "flask-sqlalchemy", specifier = "~=3.1.1" }, { name = "gevent", specifier = "~=25.9.1" }, - { name = "gevent-websocket", specifier = "~=0.10.1" }, { name = "gmpy2", specifier = "~=2.3.0" }, { name = "google-api-core", specifier = ">=2.19.1" }, - { name = "google-api-python-client", specifier = "==2.189.0" }, + { name = "google-api-python-client", specifier = "==2.192.0" }, { name = "google-auth", specifier = ">=2.47.0" }, - { name = "google-auth-httplib2", specifier = "==0.2.0" }, + { name = "google-auth-httplib2", specifier = "==0.3.0" }, { name = "google-cloud-aiplatform", specifier = ">=1.123.0" }, { name = "googleapis-common-protos", specifier = ">=1.65.0" }, - { name = "gunicorn", specifier = "~=23.0.0" }, - { name = "httpx", extras = ["socks"], specifier = "~=0.28.1" }, + { name = "gunicorn", specifier = "~=25.1.0" }, + { name = "httpx", extras = ["socks"], specifier = "~=0.28.0" }, { name = "httpx-sse", specifier = "~=0.4.0" }, { name = "jieba", specifier = "==0.42.1" }, { name = "json-repair", specifier = ">=0.55.1" }, { name = "jsonschema", specifier = ">=4.25.1" }, { name = "langfuse", specifier = "~=2.51.3" }, - { name = "langsmith", specifier = "~=0.1.77" }, - { name = "litellm", specifier = "==1.77.1" }, - { name = "markdown", specifier = "~=3.5.1" }, + { name = "langsmith", specifier = "~=0.7.16" }, + { name = "litellm", specifier = "==1.82.1" }, + { name = "markdown", specifier = "~=3.8.1" }, { name = "mlflow-skinny", specifier = ">=3.0.0" }, { name = "numpy", specifier = "~=1.26.4" }, { name = "openpyxl", specifier = "~=3.1.5" }, @@ -1741,38 +1626,35 @@ requires-dist = [ { name = "opentelemetry-instrumentation-httpx", specifier = "==0.49b0" }, { name = "opentelemetry-instrumentation-redis", specifier = "==0.49b0" }, { name = "opentelemetry-instrumentation-sqlalchemy", specifier = "==0.49b0" }, - { name = "opentelemetry-propagator-b3", specifier = "==1.28.0" }, + { name = "opentelemetry-propagator-b3", specifier = "==1.40.0" }, { name = "opentelemetry-proto", specifier = "==1.28.0" }, { name = "opentelemetry-sdk", specifier = "==1.28.0" }, { name = "opentelemetry-semantic-conventions", specifier = "==0.49b0" }, { name = "opentelemetry-util-http", specifier = "==0.49b0" }, - { name = "opik", specifier = "~=1.8.72" }, - { name = "packaging", specifier = "==24.1" }, + { name = "opik", specifier = "~=1.10.37" }, + { name = "packaging", specifier = "~=23.2" }, { name = "pandas", extras = ["excel", "output-formatting", "performance"], specifier = "~=2.2.2" }, - { name = "paramiko", specifier = ">=3.5.1" }, { name = "psycogreen", specifier = "~=1.0.2" }, { name = "psycopg2-binary", specifier = "~=2.9.6" }, { name = "pycryptodome", specifier = "==3.23.0" }, { name = "pydantic", specifier = "~=2.12.5" }, - { name = "pydantic-extra-types", specifier = "~=2.10.3" }, - { name = "pydantic-settings", specifier = "~=2.12.0" }, - { name = "pyjwt", specifier = "~=2.11.0" }, + { name = "pydantic-extra-types", specifier = "~=2.11.0" }, + { name = "pydantic-settings", specifier = "~=2.13.1" }, + { name = "pyjwt", specifier = "~=2.12.0" }, { name = "pypdfium2", specifier = "==5.2.0" }, { name = "python-docx", specifier = "~=1.2.0" }, { name = "python-dotenv", specifier = "==1.0.1" }, - { name = "python-socketio", specifier = "~=5.13.0" }, { name = "pyyaml", specifier = "~=6.0.1" }, { name = "readabilipy", specifier = "~=0.3.0" }, - { name = "redis", extras = ["hiredis"], specifier = "~=7.2.0" }, + { name = "redis", extras = ["hiredis"], specifier = "~=7.3.0" }, { name = "resend", specifier = "~=2.9.0" }, { name = "sendgrid", specifier = "~=6.12.3" }, { name = "sentry-sdk", extras = ["flask"], specifier = "~=2.28.0" }, - { name = "setuptools", specifier = "<81" }, { name = "sqlalchemy", specifier = "~=2.0.29" }, { name = "sseclient-py", specifier = "~=1.8.0" }, { name = "starlette", specifier = "==0.49.1" }, - { name = "tiktoken", specifier = "~=0.9.0" }, - { name = "transformers", specifier = "~=4.56.1" }, + { name = "tiktoken", specifier = "~=0.12.0" }, + { name = "transformers", specifier = "~=5.3.0" }, { name = "unstructured", extras = ["docx", "epub", "md", "ppt", "pptx"], specifier = "~=0.18.18" }, { name = "weave", specifier = ">=0.52.16" }, { name = "weaviate-client", specifier = "==4.17.0" }, @@ -1782,51 +1664,51 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ - { name = "basedpyright", specifier = "~=1.31.0" }, + { name = "basedpyright", specifier = "~=1.38.2" }, { name = "boto3-stubs", specifier = ">=1.38.20" }, { name = "celery-types", specifier = ">=0.23.0" }, - { name = "coverage", specifier = "~=7.2.4" }, - { name = "dotenv-linter", specifier = "~=0.5.0" }, - { name = "faker", specifier = "~=38.2.0" }, + { name = "coverage", specifier = "~=7.13.4" }, + { name = "dotenv-linter", specifier = "~=0.7.0" }, + { name = "faker", specifier = "~=40.8.0" }, { name = "hypothesis", specifier = ">=6.131.15" }, { name = "import-linter", specifier = ">=2.3" }, { name = "lxml-stubs", specifier = "~=0.5.1" }, - { name = "mypy", specifier = "~=1.17.1" }, - { name = "pandas-stubs", specifier = "~=2.2.3" }, - { name = "pyrefly", specifier = ">=0.54.0" }, - { name = "pytest", specifier = "~=8.3.2" }, - { name = "pytest-benchmark", specifier = "~=4.0.0" }, - { name = "pytest-cov", specifier = "~=4.1.0" }, + { name = "mypy", specifier = "~=1.19.1" }, + { name = "pandas-stubs", specifier = "~=3.0.0" }, + { name = "pyrefly", specifier = ">=0.55.0" }, + { name = "pytest", specifier = "~=9.0.2" }, + { name = "pytest-benchmark", specifier = "~=5.2.3" }, + { name = "pytest-cov", specifier = "~=7.0.0" }, { name = "pytest-env", specifier = "~=1.1.3" }, - { name = "pytest-mock", specifier = "~=3.14.0" }, + { name = "pytest-mock", specifier = "~=3.15.1" }, { name = "pytest-timeout", specifier = ">=2.4.0" }, { name = "pytest-xdist", specifier = ">=3.8.0" }, - { name = "ruff", specifier = "~=0.14.0" }, + { name = "ruff", specifier = "~=0.15.5" }, { name = "scipy-stubs", specifier = ">=1.15.3.0" }, { name = "sseclient-py", specifier = ">=1.8.0" }, { name = "testcontainers", specifier = "~=4.13.2" }, { name = "types-aiofiles", specifier = "~=25.1.0" }, { name = "types-beautifulsoup4", specifier = "~=4.12.0" }, - { name = "types-cachetools", specifier = "~=5.5.0" }, + { name = "types-cachetools", specifier = "~=6.2.0" }, { name = "types-cffi", specifier = ">=1.17.0" }, { name = "types-colorama", specifier = "~=0.4.15" }, { name = "types-defusedxml", specifier = "~=0.7.0" }, - { name = "types-deprecated", specifier = "~=1.2.15" }, - { name = "types-docutils", specifier = "~=0.21.0" }, - { name = "types-flask-cors", specifier = "~=5.0.0" }, + { name = "types-deprecated", specifier = "~=1.3.1" }, + { name = "types-docutils", specifier = "~=0.22.3" }, + { name = "types-flask-cors", specifier = "~=6.0.0" }, { name = "types-flask-migrate", specifier = "~=4.1.0" }, { name = "types-gevent", specifier = "~=25.9.0" }, { name = "types-greenlet", specifier = "~=3.3.0" }, { name = "types-html5lib", specifier = "~=1.1.11" }, { name = "types-jmespath", specifier = ">=1.0.2.20240106" }, - { name = "types-jsonschema", specifier = "~=4.23.0" }, + { name = "types-jsonschema", specifier = "~=4.26.0" }, { name = "types-markdown", specifier = "~=3.10.2" }, - { name = "types-oauthlib", specifier = "~=3.2.0" }, + { name = "types-oauthlib", specifier = "~=3.3.0" }, { name = "types-objgraph", specifier = "~=3.6.0" }, { name = "types-olefile", specifier = "~=0.47.0" }, { name = "types-openpyxl", specifier = "~=3.1.5" }, { name = "types-pexpect", specifier = "~=4.9.0" }, - { name = "types-protobuf", specifier = "~=5.29.1" }, + { name = "types-protobuf", specifier = "~=6.32.1" }, { name = "types-psutil", specifier = "~=7.2.2" }, { name = "types-psycopg2", specifier = "~=2.9.21" }, { name = "types-pygments", specifier = "~=2.19.0" }, @@ -1834,10 +1716,10 @@ dev = [ { name = "types-pyopenssl", specifier = ">=24.1.0" }, { name = "types-python-dateutil", specifier = "~=2.9.0" }, { name = "types-python-http-client", specifier = ">=3.3.7.20240910" }, - { name = "types-pywin32", specifier = "~=310.0.0" }, + { name = "types-pywin32", specifier = "~=311.0.0" }, { name = "types-pyyaml", specifier = "~=6.0.12" }, { name = "types-redis", specifier = ">=4.6.0.20241004" }, - { name = "types-regex", specifier = "~=2024.11.6" }, + { name = "types-regex", specifier = "~=2026.2.28" }, { name = "types-setuptools", specifier = ">=80.9.0" }, { name = "types-shapely", specifier = "~=2.1.0" }, { name = "types-simplejson", specifier = ">=3.20.0" }, @@ -1847,13 +1729,13 @@ dev = [ { name = "types-ujson", specifier = ">=5.10.0" }, ] storage = [ - { name = "azure-storage-blob", specifier = "==12.26.0" }, + { name = "azure-storage-blob", specifier = "==12.28.0" }, { name = "bce-python-sdk", specifier = "~=0.9.23" }, - { name = "cos-python-sdk-v5", specifier = "==1.9.38" }, - { name = "esdk-obs-python", specifier = "==3.25.8" }, + { name = "cos-python-sdk-v5", specifier = "==1.9.41" }, + { name = "esdk-obs-python", specifier = "==3.26.2" }, { name = "google-cloud-storage", specifier = ">=3.0.0" }, { name = "opendal", specifier = "~=0.46.0" }, - { name = "oss2", specifier = "==2.18.5" }, + { name = "oss2", specifier = "==2.19.1" }, { name = "supabase", specifier = "~=2.18.1" }, { name = "tos", specifier = "~=2.9.0" }, ] @@ -1871,7 +1753,7 @@ vdb = [ { name = "elasticsearch", specifier = "==8.14.0" }, { name = "intersystems-irispython", specifier = ">=5.1.0" }, { name = "mo-vector", specifier = "~=0.1.13" }, - { name = "mysql-connector-python", specifier = "==9.5.0" }, + { name = "mysql-connector-python", specifier = ">=9.3.0" }, { name = "opensearch-py", specifier = "==3.1.0" }, { name = "oracledb", specifier = "==3.3.0" }, { name = "pgvecto-rs", extras = ["sqlalchemy"], specifier = "~=0.2.1" }, @@ -1921,15 +1803,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, ] -[[package]] -name = "dockerfile-parse" -version = "2.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/92/df/929ee0b5d2c8bd8d713c45e71b94ab57c7e11e322130724d54f469b2cd48/dockerfile-parse-2.0.1.tar.gz", hash = "sha256:3184ccdc513221983e503ac00e1aa504a2aa8f84e5de673c46b0b6eee99ec7bc", size = 24556, upload-time = "2023-07-18T13:36:07.897Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/6c/79cd5bc1b880d8c1a9a5550aa8dacd57353fa3bb2457227e1fb47383eb49/dockerfile_parse-2.0.1-py2.py3-none-any.whl", hash = "sha256:bdffd126d2eb26acf1066acb54cb2e336682e1d72b974a40894fac76a4df17f6", size = 14845, upload-time = "2023-07-18T13:36:06.052Z" }, -] - [[package]] name = "docstring-parser" version = "0.17.0" @@ -1941,18 +1814,18 @@ wheels = [ [[package]] name = "dotenv-linter" -version = "0.5.0" +version = "0.7.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "click" }, { name = "click-default-group" }, - { name = "ply" }, + { name = "lark" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ef/fe/77e184ccc312f6263cbcc48a9579eec99f5c7ff72a9b1bd7812cafc22bbb/dotenv_linter-0.5.0.tar.gz", hash = "sha256:4862a8393e5ecdfb32982f1b32dbc006fff969a7b3c8608ba7db536108beeaea", size = 15346, upload-time = "2024-03-13T11:52:10.52Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/e5/515ca4e069b70ba0be477ab0a193855c08066f9ef1a9350dcfbdc8f12f87/dotenv_linter-0.7.0.tar.gz", hash = "sha256:24ed93c1028d6305d6787e51773badf3346e53012ad4f5ada9cf747d2da6de13", size = 14033, upload-time = "2025-04-28T17:40:00.771Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f0/01/62ed4374340e6cf17c5084828974d96db8085e4018439ac41dc3cbbbcab3/dotenv_linter-0.5.0-py3-none-any.whl", hash = "sha256:fd01cca7f2140cb1710f49cbc1bf0e62397a75a6f0522d26a8b9b2331143c8bd", size = 21770, upload-time = "2024-03-13T11:52:08.607Z" }, + { url = "https://files.pythonhosted.org/packages/6e/5e/e26881b8d6bd6498c1a7225fba8ead3626a9f4b2d7d29dd272a875753d0d/dotenv_linter-0.7.0-py3-none-any.whl", hash = "sha256:0ffdf0c7435bd638aba5ff6cc9ea53bf093488bf1c722e363e902008659bb1fb", size = 19806, upload-time = "2025-04-28T17:39:58.395Z" }, ] [[package]] @@ -1964,41 +1837,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] -[[package]] -name = "e2b" -version = "2.13.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "dockerfile-parse" }, - { name = "httpcore" }, - { name = "httpx" }, - { name = "packaging" }, - { name = "protobuf" }, - { name = "python-dateutil" }, - { name = "rich" }, - { name = "typing-extensions" }, - { name = "wcmatch" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/37/d0/745fe80a0bcc3b61eb81ab4b7640a10245625dc71479ce7ce9da9d9cd896/e2b-2.13.2.tar.gz", hash = "sha256:c0e81a3920091874fdf73c0b8f376b28766212db9f1cea5d8bd56a2e95d2436c", size = 133429, upload-time = "2026-02-09T19:27:58.531Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/5b/f83b0397406bb07b9572fc32ecd98502b104a3cfaba85ba4536e77146ccd/e2b-2.13.2-py3-none-any.whl", hash = "sha256:d91d5293bc0dd1917c72a6e6b35e86513607be2666a14ae18c57b921e7864de4", size = 240668, upload-time = "2026-02-09T19:27:57.126Z" }, -] - -[[package]] -name = "e2b-code-interpreter" -version = "2.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "attrs" }, - { name = "e2b" }, - { name = "httpx" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1e/eb/db6e51edd9f3402fd68d026572579b9b1bd833b10d990376a1e4c05d5b8d/e2b_code_interpreter-2.4.1.tar.gz", hash = "sha256:4b15014ee0d0dfcdc3072e1f409cbb87ca48f48d53d75629b7257e5513b9e7dd", size = 10700, upload-time = "2025-11-26T18:12:38.086Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1b/e7/09b9106ead227f7be14bd97c3181391ee498bb38933b1a9c566b72c8567a/e2b_code_interpreter-2.4.1-py3-none-any.whl", hash = "sha256:15d35f025b4a15033e119f2e12e7ac65657ad2b5a013fa9149e74581fbee778a", size = 13719, upload-time = "2025-11-26T18:12:36.7Z" }, -] - [[package]] name = "elastic-transport" version = "8.17.1" @@ -2033,29 +1871,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e1/5e/4b5aaaabddfacfe36ba7768817bd1f71a7a810a43705e531f3ae4c690767/emoji-2.15.0-py3-none-any.whl", hash = "sha256:205296793d66a89d88af4688fa57fd6496732eb48917a87175a023c8138995eb", size = 608433, upload-time = "2025-09-21T12:13:01.197Z" }, ] -[[package]] -name = "environs" -version = "14.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "marshmallow" }, - { name = "python-dotenv" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/aa/75/06801d5beeb398ed3903167af9376bb81c4ac41c44a53d45193065ebb1a8/environs-14.5.0.tar.gz", hash = "sha256:f7b8f6fcf3301bc674bc9c03e39b5986d116126ffb96764efd34c339ed9464ee", size = 35426, upload-time = "2025-11-02T21:30:36.78Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/f3/6961beb9a1e77d01dee1dd48f00fb3064429c8abcfa26aa863eb7cb2b6dd/environs-14.5.0-py3-none-any.whl", hash = "sha256:1abd3e3a5721fb09797438d6c902bc2f35d4580dfaffe68b8ee588b67b504e13", size = 17202, upload-time = "2025-11-02T21:30:35.186Z" }, -] - [[package]] name = "esdk-obs-python" -version = "3.25.8" +version = "3.26.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "crcmod" }, { name = "pycryptodome" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/40/99/52362d6e081a642d6de78f6ab53baa5e3f82f2386c48954e18ee7b4ab22b/esdk-obs-python-3.25.8.tar.gz", hash = "sha256:aeded00b27ecd5a25ffaec38a2cc9416b51923d48db96c663f1a735f859b5273", size = 96302, upload-time = "2025-09-01T11:35:20.432Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/9a/090f718114eec808c04762d9ea64f9e6f170ee419a673beba8b7810ec758/esdk_obs_python-3.26.2.tar.gz", hash = "sha256:dc865356bb4be474e5eaa557ff226f0f89ac8f5afff61a1cc85143079bf6e223", size = 95922, upload-time = "2026-03-07T10:38:16.732Z" } [[package]] name = "et-xmlfile" @@ -2085,14 +1910,14 @@ wheels = [ [[package]] name = "faker" -version = "38.2.0" +version = "40.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "tzdata" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/27/022d4dbd4c20567b4c294f79a133cc2f05240ea61e0d515ead18c995c249/faker-38.2.0.tar.gz", hash = "sha256:20672803db9c7cb97f9b56c18c54b915b6f1d8991f63d1d673642dc43f5ce7ab", size = 1941469, upload-time = "2025-11-19T16:37:31.892Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b0/15/88f23ff43dc1587797cfefe2d4e21cb2ec714248603ef2bd8605a5ebe8cc/faker-40.8.1.tar.gz", hash = "sha256:4e5dc0932fb156381ffa33202c512d96300e844767de32144619215ac76f3db4", size = 1956832, upload-time = "2026-03-13T14:11:53.923Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/17/93/00c94d45f55c336434a15f98d906387e87ce28f9918e4444829a8fda432d/faker-38.2.0-py3-none-any.whl", hash = "sha256:35fe4a0a79dee0dc4103a6083ee9224941e7d3594811a50e3969e547b0d2ee65", size = 1980505, upload-time = "2025-11-19T16:37:30.208Z" }, + { url = "https://files.pythonhosted.org/packages/17/43/a5f53028896e557051f0e1ff18e093f3dff731a73c2df7703c86bcb4af8e/faker-40.8.1-py3-none-any.whl", hash = "sha256:1db29cf8ad2ba34aaceeb6ce3a084f1c6eaeb8b8325638da6cbf3d3e934ea40d", size = 1989127, upload-time = "2026-03-13T14:11:51.641Z" }, ] [[package]] @@ -2249,16 +2074,16 @@ wheels = [ [[package]] name = "flask-migrate" -version = "4.0.7" +version = "4.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "alembic" }, { name = "flask" }, { name = "flask-sqlalchemy" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3b/e2/4008fc0d298d7ce797021b194bbe151d4d12db670691648a226d4fc8aefc/Flask-Migrate-4.0.7.tar.gz", hash = "sha256:dff7dd25113c210b069af280ea713b883f3840c1e3455274745d7355778c8622", size = 21770, upload-time = "2024-03-11T18:43:01.498Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/8e/47c7b3c93855ceffc2eabfa271782332942443321a07de193e4198f920cf/flask_migrate-4.1.0.tar.gz", hash = "sha256:1a336b06eb2c3ace005f5f2ded8641d534c18798d64061f6ff11f79e1434126d", size = 21965, upload-time = "2025-01-10T18:51:11.848Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/01/587023575286236f95d2ab8a826c320375ed5ea2102bb103ed89704ffa6b/Flask_Migrate-4.0.7-py3-none-any.whl", hash = "sha256:5c532be17e7b43a223b7500d620edae33795df27c75811ddf32560f7d48ec617", size = 21127, upload-time = "2024-03-11T18:42:59.462Z" }, + { url = "https://files.pythonhosted.org/packages/d2/c4/3f329b23d769fe7628a5fc57ad36956f1fb7132cf8837be6da762b197327/Flask_Migrate-4.1.0-py3-none-any.whl", hash = "sha256:24d8051af161782e0743af1b04a152d007bad9772b2bca67b7ec1e8ceeb3910d", size = 21237, upload-time = "2025-01-10T18:51:09.527Z" }, ] [[package]] @@ -2400,18 +2225,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/98/caf06d5d22a7c129c1fb2fc1477306902a2c8ddfd399cd26bbbd4caf2141/gevent-25.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:4acd6bcd5feabf22c7c5174bd3b9535ee9f088d2bbce789f740ad8d6554b18f3", size = 1682837, upload-time = "2025-09-17T19:48:47.318Z" }, ] -[[package]] -name = "gevent-websocket" -version = "0.10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "gevent" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/98/d2/6fa19239ff1ab072af40ebf339acd91fb97f34617c2ee625b8e34bf42393/gevent-websocket-0.10.1.tar.gz", hash = "sha256:7eaef32968290c9121f7c35b973e2cc302ffb076d018c9068d2f5ca8b2d85fb0", size = 18366, upload-time = "2017-03-12T22:46:05.68Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/84/2dc373eb6493e00c884cc11e6c059ec97abae2678d42f06bf780570b0193/gevent_websocket-0.10.1-py3-none-any.whl", hash = "sha256:17b67d91282f8f4c973eba0551183fc84f56f1c90c8f6b6b30256f31f66f5242", size = 22987, upload-time = "2017-03-12T22:46:03.611Z" }, -] - [[package]] name = "gitdb" version = "4.0.12" @@ -2501,7 +2314,7 @@ grpc = [ [[package]] name = "google-api-python-client" -version = "2.189.0" +version = "2.192.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-api-core" }, @@ -2510,9 +2323,9 @@ dependencies = [ { name = "httplib2" }, { name = "uritemplate" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6f/f8/0783aeca3410ee053d4dd1fccafd85197847b8f84dd038e036634605d083/google_api_python_client-2.189.0.tar.gz", hash = "sha256:45f2d8559b5c895dde6ad3fb33de025f5cb2c197fa5862f18df7f5295a172741", size = 13979470, upload-time = "2026-02-03T19:24:55.432Z" } +sdist = { url = "https://files.pythonhosted.org/packages/85/d8/489052a40935e45b9b5b3d6accc14b041360c1507bdc659c2e1a19aaa3ff/google_api_python_client-2.192.0.tar.gz", hash = "sha256:d48cfa6078fadea788425481b007af33fe0ab6537b78f37da914fb6fc112eb27", size = 14209505, upload-time = "2026-03-05T15:17:01.598Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/44/3677ff27998214f2fa7957359da48da378a0ffff1bd0bdaba42e752bc13e/google_api_python_client-2.189.0-py3-none-any.whl", hash = "sha256:a258c09660a49c6159173f8bbece171278e917e104a11f0640b34751b79c8a1a", size = 14547633, upload-time = "2026-02-03T19:24:52.845Z" }, + { url = "https://files.pythonhosted.org/packages/e0/76/ec4128f00fefb9011635ae2abc67d7dacd05c8559378f8f05f0c907c38d8/google_api_python_client-2.192.0-py3-none-any.whl", hash = "sha256:63a57d4457cd97df1d63eb89c5fda03c5a50588dcbc32c0115dd1433c08f4b62", size = 14783267, upload-time = "2026-03-05T15:16:58.804Z" }, ] [[package]] @@ -2535,15 +2348,15 @@ requests = [ [[package]] name = "google-auth-httplib2" -version = "0.2.0" +version = "0.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "google-auth" }, { name = "httplib2" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/56/be/217a598a818567b28e859ff087f347475c807a5649296fb5a817c58dacef/google-auth-httplib2-0.2.0.tar.gz", hash = "sha256:38aa7badf48f974f1eb9861794e9c0cb2a0511a4ec0679b1f886d108f5640e05", size = 10842, upload-time = "2023-12-12T17:40:30.722Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d5/ad/c1f2b1175096a8d04cf202ad5ea6065f108d26be6fc7215876bde4a7981d/google_auth_httplib2-0.3.0.tar.gz", hash = "sha256:177898a0175252480d5ed916aeea183c2df87c1f9c26705d74ae6b951c268b0b", size = 11134, upload-time = "2025-12-15T22:13:51.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl", hash = "sha256:b65a0a2123300dd71281a7bf6e64d65a0759287df52729bdd1ae2e47dc311a3d", size = 9253, upload-time = "2023-12-12T17:40:13.055Z" }, + { url = "https://files.pythonhosted.org/packages/99/d5/3c97526c8796d3caf5f4b3bed2b05e8a7102326f00a334e7a438237f3b22/google_auth_httplib2-0.3.0-py3-none-any.whl", hash = "sha256:426167e5df066e3f5a0fc7ea18768c08e7296046594ce4c8c409c2457dd1f776", size = 9529, upload-time = "2025-12-15T22:13:51.048Z" }, ] [[package]] @@ -2911,14 +2724,14 @@ wheels = [ [[package]] name = "gunicorn" -version = "23.0.0" +version = "25.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/13/ef67f59f6a7896fdc2c1d62b5665c5219d6b0a9a1784938eb9a28e55e128/gunicorn-25.1.0.tar.gz", hash = "sha256:1426611d959fa77e7de89f8c0f32eed6aa03ee735f98c01efba3e281b1c47616", size = 594377, upload-time = "2026-02-13T11:09:58.989Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/7d/6dac2a6e1eba33ee43f318edbed4ff29151a49b5d37f080aad1e6469bca4/gunicorn-23.0.0-py3-none-any.whl", hash = "sha256:ec400d38950de4dfd418cff8328b2c8faed0edb0d517d3394e457c317908ca4d", size = 85029, upload-time = "2024-08-10T20:25:24.996Z" }, + { url = "https://files.pythonhosted.org/packages/da/73/4ad5b1f6a2e21cf1e85afdaad2b7b1a933985e2f5d679147a1953aaa192c/gunicorn-25.1.0-py3-none-any.whl", hash = "sha256:d0b1236ccf27f72cfe14bce7caadf467186f19e865094ca84221424e839b8b8b", size = 197067, upload-time = "2026-02-13T11:09:57.146Z" }, ] [[package]] @@ -2945,17 +2758,18 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.2.0" +version = "1.4.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/08/23c84a26716382c89151b5b447b4beb19e3345f3a93d3b73009a71a57ad3/hf_xet-1.4.2.tar.gz", hash = "sha256:b7457b6b482d9e0743bd116363239b1fa904a5e65deede350fbc0c4ea67c71ea", size = 672357, upload-time = "2026-03-13T06:58:51.077Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, - { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, - { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, - { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, - { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, - { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, - { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, + { url = "https://files.pythonhosted.org/packages/b4/86/b40b83a2ff03ef05c4478d2672b1fc2b9683ff870e2b25f4f3af240f2e7b/hf_xet-1.4.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:71f02d6e4cdd07f344f6844845d78518cc7186bd2bc52d37c3b73dc26a3b0bc5", size = 3800339, upload-time = "2026-03-13T06:58:36.245Z" }, + { url = "https://files.pythonhosted.org/packages/64/2e/af4475c32b4378b0e92a587adb1aa3ec53e3450fd3e5fe0372a874531c00/hf_xet-1.4.2-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e9b38d876e94d4bdcf650778d6ebbaa791dd28de08db9736c43faff06ede1b5a", size = 3559664, upload-time = "2026-03-13T06:58:34.787Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4c/781267da3188db679e601de18112021a5cb16506fe86b246e22c5401a9c4/hf_xet-1.4.2-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:77e8c180b7ef12d8a96739a4e1e558847002afe9ea63b6f6358b2271a8bdda1c", size = 4217422, upload-time = "2026-03-13T06:58:27.472Z" }, + { url = "https://files.pythonhosted.org/packages/68/47/d6cf4a39ecf6c7705f887a46f6ef5c8455b44ad9eb0d391aa7e8a2ff7fea/hf_xet-1.4.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c3b3c6a882016b94b6c210957502ff7877802d0dbda8ad142c8595db8b944271", size = 3992847, upload-time = "2026-03-13T06:58:25.989Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ef/e80815061abff54697239803948abc665c6b1d237102c174f4f7a9a5ffc5/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9d9a634cc929cfbaf2e1a50c0e532ae8c78fa98618426769480c58501e8c8ac2", size = 4193843, upload-time = "2026-03-13T06:58:44.59Z" }, + { url = "https://files.pythonhosted.org/packages/54/75/07f6aa680575d9646c4167db6407c41340cbe2357f5654c4e72a1b01ca14/hf_xet-1.4.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6b0932eb8b10317ea78b7da6bab172b17be03bbcd7809383d8d5abd6a2233e04", size = 4432751, upload-time = "2026-03-13T06:58:46.533Z" }, + { url = "https://files.pythonhosted.org/packages/cd/71/193eabd7e7d4b903c4aa983a215509c6114915a5a237525ec562baddb868/hf_xet-1.4.2-cp37-abi3-win_amd64.whl", hash = "sha256:ad185719fb2e8ac26f88c8100562dbf9dbdcc3d9d2add00faa94b5f106aea53f", size = 3671149, upload-time = "2026-03-13T06:58:57.07Z" }, + { url = "https://files.pythonhosted.org/packages/b4/7e/ccf239da366b37ba7f0b36095450efae4a64980bdc7ec2f51354205fdf39/hf_xet-1.4.2-cp37-abi3-win_arm64.whl", hash = "sha256:32c012286b581f783653e718c1862aea5b9eb140631685bb0c5e7012c8719a87", size = 3533426, upload-time = "2026-03-13T06:58:55.46Z" }, ] [[package]] @@ -3095,21 +2909,22 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "0.36.2" +version = "1.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, { name = "packaging" }, { name = "pyyaml" }, - { name = "requests" }, { name = "tqdm" }, + { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" } +sdist = { url = "https://files.pythonhosted.org/packages/19/15/eafc1c57bf0f8afffb243dcd4c0cceb785e956acc17bba4d9bf2ae21fc9c/huggingface_hub-1.7.2.tar.gz", hash = "sha256:7f7e294e9bbb822e025bdb2ada025fa4344d978175a7f78e824d86e35f7ab43b", size = 724684, upload-time = "2026-03-20T10:36:08.767Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" }, + { url = "https://files.pythonhosted.org/packages/08/de/3ad061a05f74728927ded48c90b73521b9a9328c85d841bdefb30e01fb85/huggingface_hub-1.7.2-py3-none-any.whl", hash = "sha256:288f33a0a17b2a73a1359e2a5fd28d1becb2c121748c6173ab8643fb342c850e", size = 618036, upload-time = "2026-03-20T10:36:06.824Z" }, ] [[package]] @@ -3213,15 +3028,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/7f/8a80a1c7c2ed05822b5a2b312d2995f30c533641f8198366ba2e26a7bb03/intervaltree-3.2.1-py2.py3-none-any.whl", hash = "sha256:a8a8381bbd35d48ceebee932c77ffc988492d22fb1d27d0ba1d74a7694eb8f0b", size = 25929, upload-time = "2025-12-24T04:25:05.298Z" }, ] -[[package]] -name = "invoke" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/bd/b461d3424a24c80490313fd77feeb666ca4f6a28c7e72713e3d9095719b4/invoke-2.2.1.tar.gz", hash = "sha256:515bf49b4a48932b79b024590348da22f39c4942dff991ad1fb8b8baea1be707", size = 304762, upload-time = "2025-10-11T00:36:35.172Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/4b/b99e37f88336009971405cbb7630610322ed6fbfa31e1d7ab3fbf3049a2d/invoke-2.2.1-py3-none-any.whl", hash = "sha256:2413bc441b376e5cd3f55bb5d364f973ad8bdd7bf87e53c79de3c11bf3feecc8", size = 160287, upload-time = "2025-10-11T00:36:33.703Z" }, -] - [[package]] name = "isodate" version = "0.7.2" @@ -3427,23 +3233,70 @@ wheels = [ [[package]] name = "langsmith" -version = "0.1.147" +version = "0.7.22" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "httpx" }, { name = "orjson", marker = "platform_python_implementation != 'PyPy'" }, + { name = "packaging" }, { name = "pydantic" }, { name = "requests" }, { name = "requests-toolbelt" }, + { name = "uuid-utils" }, + { name = "xxhash" }, + { name = "zstandard" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/56/201dd94d492ae47c1bf9b50cacc1985113dc2288d8f15857e1f4a6818376/langsmith-0.1.147.tar.gz", hash = "sha256:2e933220318a4e73034657103b3b1a3a6109cc5db3566a7e8e03be8d6d7def7a", size = 300453, upload-time = "2024-11-27T17:32:41.297Z" } +sdist = { url = "https://files.pythonhosted.org/packages/be/2a/2d5e6c67396fd228670af278c4da7bd6db2b8d11deaf6f108490b6d3f561/langsmith-0.7.22.tar.gz", hash = "sha256:35bfe795d648b069958280760564632fd28ebc9921c04f3e209c0db6a6c7dc04", size = 1134923, upload-time = "2026-03-19T22:45:23.492Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/de/f0/63b06b99b730b9954f8709f6f7d9b8d076fa0a973e472efe278089bde42b/langsmith-0.1.147-py3-none-any.whl", hash = "sha256:7166fc23b965ccf839d64945a78e9f1157757add228b086141eb03a60d699a15", size = 311812, upload-time = "2024-11-27T17:32:39.569Z" }, + { url = "https://files.pythonhosted.org/packages/1a/94/1f5d72655ab6534129540843776c40eff757387b88e798d8b3bf7e313fd4/langsmith-0.7.22-py3-none-any.whl", hash = "sha256:6e9d5148314d74e86748cb9d3898632cad0320c9323d95f70f969e5bc078eee4", size = 359927, upload-time = "2026-03-19T22:45:21.603Z" }, +] + +[[package]] +name = "lark" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" }, +] + +[[package]] +name = "librt" +version = "0.8.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/9c/b4b0c54d84da4a94b37bd44151e46d5e583c9534c7e02250b961b1b6d8a8/librt-0.8.1.tar.gz", hash = "sha256:be46a14693955b3bd96014ccbdb8339ee8c9346fbe11c1b78901b55125f14c73", size = 177471, upload-time = "2026-02-17T16:13:06.101Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/01/0e748af5e4fee180cf7cd12bd12b0513ad23b045dccb2a83191bde82d168/librt-0.8.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:681dc2451d6d846794a828c16c22dc452d924e9f700a485b7ecb887a30aad1fd", size = 65315, upload-time = "2026-02-17T16:11:25.152Z" }, + { url = "https://files.pythonhosted.org/packages/9d/4d/7184806efda571887c798d573ca4134c80ac8642dcdd32f12c31b939c595/librt-0.8.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3b4350b13cc0e6f5bec8fa7caf29a8fb8cdc051a3bae45cfbfd7ce64f009965", size = 68021, upload-time = "2026-02-17T16:11:26.129Z" }, + { url = "https://files.pythonhosted.org/packages/ae/88/c3c52d2a5d5101f28d3dc89298444626e7874aa904eed498464c2af17627/librt-0.8.1-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:ac1e7817fd0ed3d14fd7c5df91daed84c48e4c2a11ee99c0547f9f62fdae13da", size = 194500, upload-time = "2026-02-17T16:11:27.177Z" }, + { url = "https://files.pythonhosted.org/packages/d6/5d/6fb0a25b6a8906e85b2c3b87bee1d6ed31510be7605b06772f9374ca5cb3/librt-0.8.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:747328be0c5b7075cde86a0e09d7a9196029800ba75a1689332348e998fb85c0", size = 205622, upload-time = "2026-02-17T16:11:28.242Z" }, + { url = "https://files.pythonhosted.org/packages/b2/a6/8006ae81227105476a45691f5831499e4d936b1c049b0c1feb17c11b02d1/librt-0.8.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f0af2bd2bc204fa27f3d6711d0f360e6b8c684a035206257a81673ab924aa11e", size = 218304, upload-time = "2026-02-17T16:11:29.344Z" }, + { url = "https://files.pythonhosted.org/packages/ee/19/60e07886ad16670aae57ef44dada41912c90906a6fe9f2b9abac21374748/librt-0.8.1-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d480de377f5b687b6b1bc0c0407426da556e2a757633cc7e4d2e1a057aa688f3", size = 211493, upload-time = "2026-02-17T16:11:30.445Z" }, + { url = "https://files.pythonhosted.org/packages/9c/cf/f666c89d0e861d05600438213feeb818c7514d3315bae3648b1fc145d2b6/librt-0.8.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d0ee06b5b5291f609ddb37b9750985b27bc567791bc87c76a569b3feed8481ac", size = 219129, upload-time = "2026-02-17T16:11:32.021Z" }, + { url = "https://files.pythonhosted.org/packages/8f/ef/f1bea01e40b4a879364c031476c82a0dc69ce068daad67ab96302fed2d45/librt-0.8.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e2c6f77b9ad48ce5603b83b7da9ee3e36b3ab425353f695cba13200c5d96596", size = 213113, upload-time = "2026-02-17T16:11:33.192Z" }, + { url = "https://files.pythonhosted.org/packages/9b/80/cdab544370cc6bc1b72ea369525f547a59e6938ef6863a11ab3cd24759af/librt-0.8.1-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:439352ba9373f11cb8e1933da194dcc6206daf779ff8df0ed69c5e39113e6a99", size = 212269, upload-time = "2026-02-17T16:11:34.373Z" }, + { url = "https://files.pythonhosted.org/packages/9d/9c/48d6ed8dac595654f15eceab2035131c136d1ae9a1e3548e777bb6dbb95d/librt-0.8.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:82210adabbc331dbb65d7868b105185464ef13f56f7f76688565ad79f648b0fe", size = 234673, upload-time = "2026-02-17T16:11:36.063Z" }, + { url = "https://files.pythonhosted.org/packages/16/01/35b68b1db517f27a01be4467593292eb5315def8900afad29fabf56304ba/librt-0.8.1-cp311-cp311-win32.whl", hash = "sha256:52c224e14614b750c0a6d97368e16804a98c684657c7518752c356834fff83bb", size = 54597, upload-time = "2026-02-17T16:11:37.544Z" }, + { url = "https://files.pythonhosted.org/packages/71/02/796fe8f02822235966693f257bf2c79f40e11337337a657a8cfebba5febc/librt-0.8.1-cp311-cp311-win_amd64.whl", hash = "sha256:c00e5c884f528c9932d278d5c9cbbea38a6b81eb62c02e06ae53751a83a4d52b", size = 61733, upload-time = "2026-02-17T16:11:38.691Z" }, + { url = "https://files.pythonhosted.org/packages/28/ad/232e13d61f879a42a4e7117d65e4984bb28371a34bb6fb9ca54ec2c8f54e/librt-0.8.1-cp311-cp311-win_arm64.whl", hash = "sha256:f7cdf7f26c2286ffb02e46d7bac56c94655540b26347673bea15fa52a6af17e9", size = 52273, upload-time = "2026-02-17T16:11:40.308Z" }, + { url = "https://files.pythonhosted.org/packages/95/21/d39b0a87ac52fc98f621fb6f8060efb017a767ebbbac2f99fbcbc9ddc0d7/librt-0.8.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a28f2612ab566b17f3698b0da021ff9960610301607c9a5e8eaca62f5e1c350a", size = 66516, upload-time = "2026-02-17T16:11:41.604Z" }, + { url = "https://files.pythonhosted.org/packages/69/f1/46375e71441c43e8ae335905e069f1c54febee63a146278bcee8782c84fd/librt-0.8.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:60a78b694c9aee2a0f1aaeaa7d101cf713e92e8423a941d2897f4fa37908dab9", size = 68634, upload-time = "2026-02-17T16:11:43.268Z" }, + { url = "https://files.pythonhosted.org/packages/0a/33/c510de7f93bf1fa19e13423a606d8189a02624a800710f6e6a0a0f0784b3/librt-0.8.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:758509ea3f1eba2a57558e7e98f4659d0ea7670bff49673b0dde18a3c7e6c0eb", size = 198941, upload-time = "2026-02-17T16:11:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/dd/36/e725903416409a533d92398e88ce665476f275081d0d7d42f9c4951999e5/librt-0.8.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:039b9f2c506bd0ab0f8725aa5ba339c6f0cd19d3b514b50d134789809c24285d", size = 209991, upload-time = "2026-02-17T16:11:45.462Z" }, + { url = "https://files.pythonhosted.org/packages/30/7a/8d908a152e1875c9f8eac96c97a480df425e657cdb47854b9efaa4998889/librt-0.8.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5bb54f1205a3a6ab41a6fd71dfcdcbd278670d3a90ca502a30d9da583105b6f7", size = 224476, upload-time = "2026-02-17T16:11:46.542Z" }, + { url = "https://files.pythonhosted.org/packages/a8/b8/a22c34f2c485b8903a06f3fe3315341fe6876ef3599792344669db98fcff/librt-0.8.1-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:05bd41cdee35b0c59c259f870f6da532a2c5ca57db95b5f23689fcb5c9e42440", size = 217518, upload-time = "2026-02-17T16:11:47.746Z" }, + { url = "https://files.pythonhosted.org/packages/79/6f/5c6fea00357e4f82ba44f81dbfb027921f1ab10e320d4a64e1c408d035d9/librt-0.8.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adfab487facf03f0d0857b8710cf82d0704a309d8ffc33b03d9302b4c64e91a9", size = 225116, upload-time = "2026-02-17T16:11:49.298Z" }, + { url = "https://files.pythonhosted.org/packages/f2/a0/95ced4e7b1267fe1e2720a111685bcddf0e781f7e9e0ce59d751c44dcfe5/librt-0.8.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:153188fe98a72f206042be10a2c6026139852805215ed9539186312d50a8e972", size = 217751, upload-time = "2026-02-17T16:11:50.49Z" }, + { url = "https://files.pythonhosted.org/packages/93/c2/0517281cb4d4101c27ab59472924e67f55e375bc46bedae94ac6dc6e1902/librt-0.8.1-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:dd3c41254ee98604b08bd5b3af5bf0a89740d4ee0711de95b65166bf44091921", size = 218378, upload-time = "2026-02-17T16:11:51.783Z" }, + { url = "https://files.pythonhosted.org/packages/43/e8/37b3ac108e8976888e559a7b227d0ceac03c384cfd3e7a1c2ee248dbae79/librt-0.8.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e0d138c7ae532908cbb342162b2611dbd4d90c941cd25ab82084aaf71d2c0bd0", size = 241199, upload-time = "2026-02-17T16:11:53.561Z" }, + { url = "https://files.pythonhosted.org/packages/4b/5b/35812d041c53967fedf551a39399271bbe4257e681236a2cf1a69c8e7fa1/librt-0.8.1-cp312-cp312-win32.whl", hash = "sha256:43353b943613c5d9c49a25aaffdba46f888ec354e71e3529a00cca3f04d66a7a", size = 54917, upload-time = "2026-02-17T16:11:54.758Z" }, + { url = "https://files.pythonhosted.org/packages/de/d1/fa5d5331b862b9775aaf2a100f5ef86854e5d4407f71bddf102f4421e034/librt-0.8.1-cp312-cp312-win_amd64.whl", hash = "sha256:ff8baf1f8d3f4b6b7257fcb75a501f2a5499d0dda57645baa09d4d0d34b19444", size = 62017, upload-time = "2026-02-17T16:11:55.748Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7c/c614252f9acda59b01a66e2ddfd243ed1c7e1deab0293332dfbccf862808/librt-0.8.1-cp312-cp312-win_arm64.whl", hash = "sha256:0f2ae3725904f7377e11cc37722d5d401e8b3d5851fb9273d7f4fe04f6b3d37d", size = 52441, upload-time = "2026-02-17T16:11:56.801Z" }, ] [[package]] name = "litellm" -version = "1.77.1" +version = "1.82.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -3459,9 +3312,9 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8c/65/71fe4851709fa4a612e41b80001a9ad803fea979d21b90970093fd65eded/litellm-1.77.1.tar.gz", hash = "sha256:76bab5203115efb9588244e5bafbfc07a800a239be75d8dc6b1b9d17394c6418", size = 10275745, upload-time = "2025-09-13T21:05:21.377Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/bd/6251e9a965ae2d7bc3342ae6c1a2d25dd265d354c502e63225451b135016/litellm-1.82.1.tar.gz", hash = "sha256:bc8427cdccc99e191e08e36fcd631c93b27328d1af789839eb3ac01a7d281890", size = 17197496, upload-time = "2026-03-10T09:10:04.438Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/dc/ff4f119cd4d783742c9648a03e0ba5c2b52fc385b2ae9f0d32acf3a78241/litellm-1.77.1-py3-none-any.whl", hash = "sha256:407761dc3c35fbcd41462d3fe65dd3ed70aac705f37cde318006c18940f695a0", size = 9067070, upload-time = "2025-09-13T21:05:18.078Z" }, + { url = "https://files.pythonhosted.org/packages/57/77/0c6eca2cb049793ddf8ce9cdcd5123a35666c4962514788c4fc90edf1d3b/litellm-1.82.1-py3-none-any.whl", hash = "sha256:a9ec3fe42eccb1611883caaf8b1bf33c9f4e12163f94c7d1004095b14c379eb2", size = 15341896, upload-time = "2026-03-10T09:10:00.702Z" }, ] [[package]] @@ -3575,11 +3428,11 @@ wheels = [ [[package]] name = "markdown" -version = "3.5.2" +version = "3.8.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/11/28/c5441a6642681d92de56063fa7984df56f783d3f1eba518dc3e7a253b606/Markdown-3.5.2.tar.gz", hash = "sha256:e1ac7b3dc550ee80e602e71c1d168002f062e49f1b11e26a36264dafd4df2ef8", size = 349398, upload-time = "2024-01-10T15:19:38.261Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071, upload-time = "2025-06-19T17:12:44.483Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/42/f4/f0031854de10a0bc7821ef9fca0b92ca0d7aa6fbfbf504c5473ba825e49c/Markdown-3.5.2-py3-none-any.whl", hash = "sha256:d43323865d89fc0cb9b20c75fc8ad313af307cc087e84b657d9eec768eddeadd", size = 103870, upload-time = "2024-01-10T15:19:36.071Z" }, + { url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827, upload-time = "2025-06-19T17:12:42.994Z" }, ] [[package]] @@ -3823,39 +3676,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, ] -[[package]] -name = "multipart" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/c9/c6f5ab81bae667d4fe42a58df29f4c2db6ad8377cfd0e9baa729e4fa3ebb/multipart-1.3.0.tar.gz", hash = "sha256:a46bd6b0eb4c1ba865beb88ddd886012a3da709b6e7b86084fc37e99087e5cf1", size = 38816, upload-time = "2025-07-26T15:09:38.056Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9a/d6/d547a7004b81fa0b2aafa143b09196f6635e4105cd9d2c641fa8a4051c05/multipart-1.3.0-py3-none-any.whl", hash = "sha256:439bf4b00fd7cb2dbff08ae13f49f4f49798931ecd8d496372c63537fa19f304", size = 14938, upload-time = "2025-07-26T15:09:36.884Z" }, -] - [[package]] name = "mypy" -version = "1.17.1" +version = "1.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, { name = "mypy-extensions" }, { name = "pathspec" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/22/ea637422dedf0bf36f3ef238eab4e455e2a0dcc3082b5cc067615347ab8e/mypy-1.17.1.tar.gz", hash = "sha256:25e01ec741ab5bb3eec8ba9cdb0f769230368a22c959c4937360efb89b7e9f01", size = 3352570, upload-time = "2025-07-31T07:54:19.204Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/46/cf/eadc80c4e0a70db1c08921dcc220357ba8ab2faecb4392e3cebeb10edbfa/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58", size = 10921009, upload-time = "2025-07-31T07:53:23.037Z" }, - { url = "https://files.pythonhosted.org/packages/5d/c1/c869d8c067829ad30d9bdae051046561552516cfb3a14f7f0347b7d973ee/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5", size = 10047482, upload-time = "2025-07-31T07:53:26.151Z" }, - { url = "https://files.pythonhosted.org/packages/98/b9/803672bab3fe03cee2e14786ca056efda4bb511ea02dadcedde6176d06d0/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd", size = 11832883, upload-time = "2025-07-31T07:53:47.948Z" }, - { url = "https://files.pythonhosted.org/packages/88/fb/fcdac695beca66800918c18697b48833a9a6701de288452b6715a98cfee1/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b", size = 12566215, upload-time = "2025-07-31T07:54:04.031Z" }, - { url = "https://files.pythonhosted.org/packages/7f/37/a932da3d3dace99ee8eb2043b6ab03b6768c36eb29a02f98f46c18c0da0e/mypy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c1fdf4abb29ed1cb091cf432979e162c208a5ac676ce35010373ff29247bcad5", size = 12751956, upload-time = "2025-07-31T07:53:36.263Z" }, - { url = "https://files.pythonhosted.org/packages/8c/cf/6438a429e0f2f5cab8bc83e53dbebfa666476f40ee322e13cac5e64b79e7/mypy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:ff2933428516ab63f961644bc49bc4cbe42bbffb2cd3b71cc7277c07d16b1a8b", size = 9507307, upload-time = "2025-07-31T07:53:59.734Z" }, - { url = "https://files.pythonhosted.org/packages/17/a2/7034d0d61af8098ec47902108553122baa0f438df8a713be860f7407c9e6/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb", size = 11086295, upload-time = "2025-07-31T07:53:28.124Z" }, - { url = "https://files.pythonhosted.org/packages/14/1f/19e7e44b594d4b12f6ba8064dbe136505cec813549ca3e5191e40b1d3cc2/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403", size = 10112355, upload-time = "2025-07-31T07:53:21.121Z" }, - { url = "https://files.pythonhosted.org/packages/5b/69/baa33927e29e6b4c55d798a9d44db5d394072eef2bdc18c3e2048c9ed1e9/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056", size = 11875285, upload-time = "2025-07-31T07:53:55.293Z" }, - { url = "https://files.pythonhosted.org/packages/90/13/f3a89c76b0a41e19490b01e7069713a30949d9a6c147289ee1521bcea245/mypy-1.17.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03b6d0ed2b188e35ee6d5c36b5580cffd6da23319991c49ab5556c023ccf1341", size = 12737895, upload-time = "2025-07-31T07:53:43.623Z" }, - { url = "https://files.pythonhosted.org/packages/23/a1/c4ee79ac484241301564072e6476c5a5be2590bc2e7bfd28220033d2ef8f/mypy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c837b896b37cd103570d776bda106eabb8737aa6dd4f248451aecf53030cdbeb", size = 12931025, upload-time = "2025-07-31T07:54:17.125Z" }, - { url = "https://files.pythonhosted.org/packages/89/b8/7409477be7919a0608900e6320b155c72caab4fef46427c5cc75f85edadd/mypy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:665afab0963a4b39dff7c1fa563cc8b11ecff7910206db4b2e64dd1ba25aed19", size = 9584664, upload-time = "2025-07-31T07:54:12.842Z" }, - { url = "https://files.pythonhosted.org/packages/1d/f3/8fcd2af0f5b806f6cf463efaffd3c9548a28f84220493ecd38d127b6b66d/mypy-1.17.1-py3-none-any.whl", hash = "sha256:a9f52c0351c21fe24c21d8c0eb1f62967b262d6729393397b6f443c3b773c3b9", size = 2283411, upload-time = "2025-07-31T07:53:24.664Z" }, + { url = "https://files.pythonhosted.org/packages/ef/47/6b3ebabd5474d9cdc170d1342fbf9dddc1b0ec13ec90bf9004ee6f391c31/mypy-1.19.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d8dfc6ab58ca7dda47d9237349157500468e404b17213d44fc1cb77bce532288", size = 13028539, upload-time = "2025-12-15T05:03:44.129Z" }, + { url = "https://files.pythonhosted.org/packages/5c/a6/ac7c7a88a3c9c54334f53a941b765e6ec6c4ebd65d3fe8cdcfbe0d0fd7db/mypy-1.19.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e3f276d8493c3c97930e354b2595a44a21348b320d859fb4a2b9f66da9ed27ab", size = 12083163, upload-time = "2025-12-15T05:03:37.679Z" }, + { url = "https://files.pythonhosted.org/packages/67/af/3afa9cf880aa4a2c803798ac24f1d11ef72a0c8079689fac5cfd815e2830/mypy-1.19.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2abb24cf3f17864770d18d673c85235ba52456b36a06b6afc1e07c1fdcd3d0e6", size = 12687629, upload-time = "2025-12-15T05:02:31.526Z" }, + { url = "https://files.pythonhosted.org/packages/2d/46/20f8a7114a56484ab268b0ab372461cb3a8f7deed31ea96b83a4e4cfcfca/mypy-1.19.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a009ffa5a621762d0c926a078c2d639104becab69e79538a494bcccb62cc0331", size = 13436933, upload-time = "2025-12-15T05:03:15.606Z" }, + { url = "https://files.pythonhosted.org/packages/5b/f8/33b291ea85050a21f15da910002460f1f445f8007adb29230f0adea279cb/mypy-1.19.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7cee03c9a2e2ee26ec07479f38ea9c884e301d42c6d43a19d20fb014e3ba925", size = 13661754, upload-time = "2025-12-15T05:02:26.731Z" }, + { url = "https://files.pythonhosted.org/packages/fd/a3/47cbd4e85bec4335a9cd80cf67dbc02be21b5d4c9c23ad6b95d6c5196bac/mypy-1.19.1-cp311-cp311-win_amd64.whl", hash = "sha256:4b84a7a18f41e167f7995200a1d07a4a6810e89d29859df936f1c3923d263042", size = 10055772, upload-time = "2025-12-15T05:03:26.179Z" }, + { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" }, + { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" }, + { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" }, + { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" }, + { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" }, + { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" }, + { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" }, ] [[package]] @@ -4030,43 +3875,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] -[[package]] -name = "obstore" -version = "0.8.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a3/8c/9ec984edd0f3b72226adfaa19b1c61b15823b35b52f311ca4af36d009d15/obstore-0.8.2.tar.gz", hash = "sha256:a467bc4e97169e2ba749981b4fd0936015428d9b8f3fb83a5528536b1b6f377f", size = 168852, upload-time = "2025-09-16T15:34:55.786Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2f/c4/018f90701f1e5ea3fbd57f61463f42e1ef5218e548d3adcf12b6be021c34/obstore-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2edaa97687c191c5324bb939d72f6fe86a7aa8191c410f1648c14e8296d05c1c", size = 3622568, upload-time = "2025-09-16T15:33:14.196Z" }, - { url = "https://files.pythonhosted.org/packages/a8/62/72dd1e7d52fc554bb1fdb1a9499bda219cf3facea5865a1d97fdc00b3a1b/obstore-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c4fb7ef8108f08d14edc8bec9e9a6a2e5c4d14eddb8819f5d0da498aff6e8888", size = 3356109, upload-time = "2025-09-16T15:33:15.315Z" }, - { url = "https://files.pythonhosted.org/packages/e0/ae/089fe5b9207091252fe5ce352551214f04560f85eb8f2cc4f716a6a1a57e/obstore-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fda8f658c0edf799ab1e264f9b12c7c184cd09a5272dc645d42e987810ff2772", size = 3454588, upload-time = "2025-09-16T15:33:16.421Z" }, - { url = "https://files.pythonhosted.org/packages/ea/10/1865ae2d1ba45e8ae85fb0c1aada2dc9533baf60c4dfe74dab905348d74a/obstore-0.8.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:87fe2bc15ce4051ecb56abd484feca323c2416628beb62c1c7b6712114564d6e", size = 3688627, upload-time = "2025-09-16T15:33:17.604Z" }, - { url = "https://files.pythonhosted.org/packages/a6/09/5d7ba6d0aeac563ea5f5586401c677bace4f782af83522b1fdf15430e152/obstore-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2482aa2562ab6a4ca40250b26bea33f8375b59898a9b5615fd412cab81098123", size = 3959896, upload-time = "2025-09-16T15:33:18.789Z" }, - { url = "https://files.pythonhosted.org/packages/16/15/2b3eda59914761a9ff4d840e2daec5697fd29b293bd18d3dc11c593aed06/obstore-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4153b928f5d2e9c6cb645e83668a53e0b42253d1e8bcb4e16571fc0a1434599a", size = 3933162, upload-time = "2025-09-16T15:33:19.935Z" }, - { url = "https://files.pythonhosted.org/packages/14/7a/5fc63b41526587067537fb1498c59a210884664c65ccf0d1f8f823b0875a/obstore-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbfa9c38620cc191be98c8b5558c62071e495dc6b1cc724f38293ee439aa9f92", size = 3769605, upload-time = "2025-09-16T15:33:21.389Z" }, - { url = "https://files.pythonhosted.org/packages/77/4e/2208ab6e1fc021bf8b7e117249a10ab75d0ed24e0f2de1a8d7cd67d885b5/obstore-0.8.2-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:0822836eae8d52499f10daef17f26855b4c123119c6eb984aa4f2d525ec2678d", size = 3534396, upload-time = "2025-09-16T15:33:22.574Z" }, - { url = "https://files.pythonhosted.org/packages/1d/8f/a0e2882edd6bd285c82b8a5851c4ecf386c93fe75b6e340d5d9d30e809fc/obstore-0.8.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8ef6435dfd586d83b4f778e7927a5d5b0d8b771e9ba914bc809a13d7805410e6", size = 3697777, upload-time = "2025-09-16T15:33:23.723Z" }, - { url = "https://files.pythonhosted.org/packages/94/78/ebf0c33bed5c9a8eed3b00eefafbcc0a687eeb1e05451c76fcf199d29ff8/obstore-0.8.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:0f2cba91f4271ca95a932a51aa8dda1537160342b33f7836c75e1eb9d40621a2", size = 3681546, upload-time = "2025-09-16T15:33:24.935Z" }, - { url = "https://files.pythonhosted.org/packages/af/21/9bf4fb9e53fd5f01af580b6538de2eae857e31d24b0ebfc4d916c306a1e4/obstore-0.8.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:23c876d603af0627627808d19a58d43eb5d8bfd02eecd29460bc9a58030fed55", size = 3765336, upload-time = "2025-09-16T15:33:26.069Z" }, - { url = "https://files.pythonhosted.org/packages/dd/3c/7f6895c23719482d231b2d6ed328e3223fdf99785f6850fba8d2fc5a86ee/obstore-0.8.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ff3c4b5d07629b70b9dee494cd6b94fff8465c3864752181a1cb81a77190fe42", size = 3941142, upload-time = "2025-09-16T15:33:27.275Z" }, - { url = "https://files.pythonhosted.org/packages/93/a4/56ccdb756161595680a28f4b0def2c04f7048ffacf128029be8394367b26/obstore-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:aadb2cb72de7227d07f4570f82729625ffc77522fadca5cf13c3a37fbe8c8de9", size = 3970172, upload-time = "2025-09-16T15:33:28.393Z" }, - { url = "https://files.pythonhosted.org/packages/2b/dc/60fefbb5736e69eab56657bca04ca64dc07fdeccb3814164a31b62ad066b/obstore-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:bb70ce297a47392b1d9a3e310f18d59cd5ebbb9453428210fef02ed60e4d75d1", size = 3612955, upload-time = "2025-09-16T15:33:29.527Z" }, - { url = "https://files.pythonhosted.org/packages/d2/8b/844e8f382e5a12b8a3796a05d76a03e12c7aedc13d6900419e39207d7868/obstore-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1619bf618428abf1f607e0b219b2e230a966dcf697b717deccfa0983dd91f646", size = 3346564, upload-time = "2025-09-16T15:33:30.698Z" }, - { url = "https://files.pythonhosted.org/packages/89/73/8537f99e09a38a54a6a15ede907aa25d4da089f767a808f0b2edd9c03cec/obstore-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a4605c3ed7c9515aeb4c619b5f7f2c9986ed4a79fe6045e536b5e59b804b1476", size = 3460809, upload-time = "2025-09-16T15:33:31.837Z" }, - { url = "https://files.pythonhosted.org/packages/b4/99/7714dec721e43f521d6325a82303a002cddad089437640f92542b84e9cc8/obstore-0.8.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce42670417876dd8668cbb8659e860e9725e5f26bbc86449fd259970e2dd9d18", size = 3692081, upload-time = "2025-09-16T15:33:33.028Z" }, - { url = "https://files.pythonhosted.org/packages/ec/bd/4ac4175fe95a24c220a96021c25c432bcc0c0212f618be0737184eebbaad/obstore-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4a3e893b2a06585f651c541c1972fe1e3bf999ae2a5fda052ee55eb7e6516f5", size = 3957466, upload-time = "2025-09-16T15:33:34.528Z" }, - { url = "https://files.pythonhosted.org/packages/4e/04/caa288fb735484fc5cb019bdf3d896eaccfae0ac4622e520d05692c46790/obstore-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08462b32f95a9948ed56ed63e88406e2e5a4cae1fde198f9682e0fb8487100ed", size = 3951293, upload-time = "2025-09-16T15:33:35.733Z" }, - { url = "https://files.pythonhosted.org/packages/44/2f/d380239da2d6a1fda82e17df5dae600a404e8a93a065784518ff8325d5f6/obstore-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a0bf7763292a8fc47d01cd66e6f19002c5c6ad4b3ed4e6b2729f5e190fa8a0d", size = 3766199, upload-time = "2025-09-16T15:33:36.904Z" }, - { url = "https://files.pythonhosted.org/packages/28/41/d391be069d3da82969b54266948b2582aeca5dd735abeda4d63dba36e07b/obstore-0.8.2-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:bcd47f8126cb192cbe86942b8f73b1c45a651ce7e14c9a82c5641dfbf8be7603", size = 3529678, upload-time = "2025-09-16T15:33:38.221Z" }, - { url = "https://files.pythonhosted.org/packages/b9/4c/4862fdd1a3abde459ee8eea699b1797df638a460af235b18ca82c8fffb72/obstore-0.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:57eda9fd8c757c3b4fe36cf3918d7e589cc1286591295cc10b34122fa36dd3fd", size = 3698079, upload-time = "2025-09-16T15:33:39.696Z" }, - { url = "https://files.pythonhosted.org/packages/68/ca/014e747bc53b570059c27e3565b2316fbe5c107d4134551f4cd3e24aa667/obstore-0.8.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ea44442aad8992166baa69f5069750979e4c5d9ffce772e61565945eea5774b9", size = 3687154, upload-time = "2025-09-16T15:33:40.92Z" }, - { url = "https://files.pythonhosted.org/packages/6f/89/6db5f8edd93028e5b8bfbeee15e6bd3e56f72106107d31cb208b57659de4/obstore-0.8.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:41496a3ab8527402db4142aaaf0d42df9d7d354b13ba10d9c33e0e48dd49dd96", size = 3773444, upload-time = "2025-09-16T15:33:42.123Z" }, - { url = "https://files.pythonhosted.org/packages/26/e5/c9e2cc540689c873beb61246e1615d6e38301e6a34dec424f5a5c63c1afd/obstore-0.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:43da209803f052df96c7c3cbec512d310982efd2407e4a435632841a51143170", size = 3939315, upload-time = "2025-09-16T15:33:43.252Z" }, - { url = "https://files.pythonhosted.org/packages/4d/c9/bb53280ca50103c1ffda373cdc9b0f835431060039c2897cbc87ddd92e42/obstore-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:1836f5dcd49f9f2950c75889ab5c51fb290d3ea93cdc39a514541e0be3af016e", size = 3978234, upload-time = "2025-09-16T15:33:44.393Z" }, -] - [[package]] name = "odfpy" version = "1.4.1" @@ -4437,15 +4245,15 @@ wheels = [ [[package]] name = "opentelemetry-propagator-b3" -version = "1.28.0" +version = "1.40.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "deprecated" }, { name = "opentelemetry-api" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6f/1d/225ea036785119964509e92f4e1bc0313ba6ec790fbf51bd363abafeafae/opentelemetry_propagator_b3-1.28.0.tar.gz", hash = "sha256:cf6f0d2a1881c4858898be47e8a94b11bc5b16fc73b6c37ebfa2121c4825adc6", size = 9592, upload-time = "2024-11-05T19:14:57.193Z" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/fe/e0c84af5c654ec42165ba57af83c7f67e4b8af77f836ddc29dee59ff73c6/opentelemetry_propagator_b3-1.40.0.tar.gz", hash = "sha256:59b6925498947c08a1b7e0dd38193ff97e5009bec74ec23824300c2e32f77bcf", size = 9587, upload-time = "2026-03-04T14:17:30.079Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4e/fa/438d53d73a6c45df5d416b56dc371a65d0b07859bc107ab632349a079d4a/opentelemetry_propagator_b3-1.28.0-py3-none-any.whl", hash = "sha256:9f6923a5da56d7da6724e4fdd758a67ede2a2732efb929e538cf6fea337700c5", size = 8917, upload-time = "2024-11-05T19:14:37.317Z" }, + { url = "https://files.pythonhosted.org/packages/8f/84/8654cc0539b5145046b2e60d058cebad401a600dd0b1240f1711c6788643/opentelemetry_propagator_b3-1.40.0-py3-none-any.whl", hash = "sha256:cb72a1698fd1d1b434f70dc90c1de62da8ade1dd84850d1f040eccf6a420fa7b", size = 8922, upload-time = "2026-03-04T14:17:14.732Z" }, ] [[package]] @@ -4498,7 +4306,7 @@ wheels = [ [[package]] name = "opik" -version = "1.8.102" +version = "1.10.45" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "boto3-stubs", extra = ["bedrock-runtime"] }, @@ -4517,9 +4325,9 @@ dependencies = [ { name = "tqdm" }, { name = "uuid6" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/30/af/f6382cea86bdfbfd0f9571960a15301da4a6ecd1506070d9252a0c0a7564/opik-1.8.102.tar.gz", hash = "sha256:c836a113e8b7fdf90770a3854dcc859b3c30d6347383d7c11e52971a530ed2c3", size = 490462, upload-time = "2025-11-05T18:54:50.142Z" } +sdist = { url = "https://files.pythonhosted.org/packages/85/17/edea6308347cec62e6828de7c573c596559c502b54fa4f0c88a52e2e81f5/opik-1.10.45.tar.gz", hash = "sha256:d8d8627ba03d12def46965e03d58f611daaf5cf878b3d087c53fe1159788c140", size = 789876, upload-time = "2026-03-20T11:35:12.457Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/8b/9b15a01f8360201100b9a5d3e0aeeeda57833fca2b16d34b9fada147fc4b/opik-1.8.102-py3-none-any.whl", hash = "sha256:d8501134bf62bf95443de036f6eaa4f66006f81f9b99e0a8a09e21d8be8c1628", size = 885834, upload-time = "2025-11-05T18:54:48.22Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/150e9eecfa28cb23f7a0bfe83ae1486a11022b97fe6d12328b455784658d/opik-1.10.45-py3-none-any.whl", hash = "sha256:e8050d9e5e0d92ff587f156eacbdd02099897f39cfe79a98380b6c8ae9906b95", size = 1337714, upload-time = "2026-03-20T11:35:10.237Z" }, ] [[package]] @@ -4601,7 +4409,7 @@ wheels = [ [[package]] name = "oss2" -version = "2.18.5" +version = "2.19.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aliyun-python-sdk-core" }, @@ -4611,7 +4419,7 @@ dependencies = [ { name = "requests" }, { name = "six" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/61/ce/d23a9d44268dc992ae1a878d24341dddaea4de4ae374c261209bb6e9554b/oss2-2.18.5.tar.gz", hash = "sha256:555c857f4441ae42a2c0abab8fc9482543fba35d65a4a4be73101c959a2b4011", size = 283388, upload-time = "2024-04-29T12:49:07.686Z" } +sdist = { url = "https://files.pythonhosted.org/packages/df/b5/f2cb1950dda46ac2284d6c950489fdacd0e743c2d79a347924d3cc44b86f/oss2-2.19.1.tar.gz", hash = "sha256:a8ab9ee7eb99e88a7e1382edc6ea641d219d585a7e074e3776e9dec9473e59c1", size = 298845, upload-time = "2024-10-25T11:37:46.638Z" } [[package]] name = "overrides" @@ -4624,11 +4432,11 @@ wheels = [ [[package]] name = "packaging" -version = "24.1" +version = "23.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/51/65/50db4dda066951078f0a96cf12f4b9ada6e4b811516bf0262c0f4f7064d4/packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002", size = 148788, upload-time = "2024-06-09T23:19:24.956Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/2b/9b9c33ffed44ee921d0967086d653047286054117d584f1b1a7c22ceaf7b/packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", size = 146714, upload-time = "2023-10-01T13:50:05.279Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985, upload-time = "2024-06-09T23:19:21.909Z" }, + { url = "https://files.pythonhosted.org/packages/ec/1a/610693ac4ee14fcdf2d9bf3c493370e4f2ef7ae2e19217d7a237ff42367d/packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7", size = 53011, upload-time = "2023-10-01T13:50:03.745Z" }, ] [[package]] @@ -4680,30 +4488,14 @@ performance = [ [[package]] name = "pandas-stubs" -version = "2.2.3.250527" +version = "3.0.0.260204" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, - { name = "types-pytz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5f/0d/5fe7f7f3596eb1c2526fea151e9470f86b379183d8b9debe44b2098651ca/pandas_stubs-2.2.3.250527.tar.gz", hash = "sha256:e2d694c4e72106055295ad143664e5c99e5815b07190d1ff85b73b13ff019e63", size = 106312, upload-time = "2025-05-27T15:24:29.716Z" } +sdist = { url = "https://files.pythonhosted.org/packages/27/1d/297ff2c7ea50a768a2247621d6451abb2a07c0e9be7ca6d36ebe371658e5/pandas_stubs-3.0.0.260204.tar.gz", hash = "sha256:bf9294b76352effcffa9cb85edf0bed1339a7ec0c30b8e1ac3d66b4228f1fbc3", size = 109383, upload-time = "2026-02-04T15:17:17.247Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/f8/46141ba8c9d7064dc5008bfb4a6ae5bd3c30e4c61c28b5c5ed485bf358ba/pandas_stubs-2.2.3.250527-py3-none-any.whl", hash = "sha256:cd0a49a95b8c5f944e605be711042a4dd8550e2c559b43d70ba2c4b524b66163", size = 159683, upload-time = "2025-05-27T15:24:28.4Z" }, -] - -[[package]] -name = "paramiko" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bcrypt" }, - { name = "cryptography" }, - { name = "invoke" }, - { name = "pynacl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/e7/81fdcbc7f190cdb058cffc9431587eb289833bdd633e2002455ca9bb13d4/paramiko-4.0.0.tar.gz", hash = "sha256:6a25f07b380cc9c9a88d2b920ad37167ac4667f8d9886ccebd8f90f654b5d69f", size = 1630743, upload-time = "2025-08-04T01:02:03.711Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl", hash = "sha256:0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9", size = 223932, upload-time = "2025-08-04T01:02:02.029Z" }, + { url = "https://files.pythonhosted.org/packages/7c/2f/f91e4eee21585ff548e83358332d5632ee49f6b2dcd96cb5dca4e0468951/pandas_stubs-3.0.0.260204-py3-none-any.whl", hash = "sha256:5ab9e4d55a6e2752e9720828564af40d48c4f709e6a2c69b743014a6fcb6c241", size = 168540, upload-time = "2026-02-04T15:17:15.615Z" }, ] [[package]] @@ -4812,15 +4604,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] -[[package]] -name = "ply" -version = "3.11" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/69/882ee5c9d017149285cab114ebeab373308ef0f874fcdac9beb90e0ac4da/ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3", size = 159130, upload-time = "2018-02-15T19:01:31.097Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/58/35da89ee790598a0700ea49b2a66594140f44dec458c07e8e3d4979137fc/ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce", size = 49567, upload-time = "2018-02-15T19:01:27.172Z" }, -] - [[package]] name = "polyfile-weave" version = "0.5.9" @@ -5179,29 +4962,29 @@ wheels = [ [[package]] name = "pydantic-extra-types" -version = "2.10.6" +version = "2.11.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/3a/10/fb64987804cde41bcc39d9cd757cd5f2bb5d97b389d81aa70238b14b8a7e/pydantic_extra_types-2.10.6.tar.gz", hash = "sha256:c63d70bf684366e6bbe1f4ee3957952ebe6973d41e7802aea0b770d06b116aeb", size = 141858, upload-time = "2025-10-08T13:47:49.483Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/71/dba38ee2651f84f7842206adbd2233d8bbdb59fb85e9fa14232486a8c471/pydantic_extra_types-2.11.1.tar.gz", hash = "sha256:46792d2307383859e923d8fcefa82108b1a141f8a9c0198982b3832ab5ef1049", size = 172002, upload-time = "2026-03-16T08:08:03.92Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/04/5c918669096da8d1c9ec7bb716bd72e755526103a61bc5e76a3e4fb23b53/pydantic_extra_types-2.10.6-py3-none-any.whl", hash = "sha256:6106c448316d30abf721b5b9fecc65e983ef2614399a24142d689c7546cc246a", size = 40949, upload-time = "2025-10-08T13:47:48.268Z" }, + { url = "https://files.pythonhosted.org/packages/17/c1/3226e6d7f5a4f736f38ac11a6fbb262d701889802595cdb0f53a885ac2e0/pydantic_extra_types-2.11.1-py3-none-any.whl", hash = "sha256:1722ea2bddae5628ace25f2aa685b69978ef533123e5638cfbddb999e0100ec1", size = 79526, upload-time = "2026-03-16T08:08:02.533Z" }, ] [[package]] name = "pydantic-settings" -version = "2.12.0" +version = "2.13.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/43/4b/ac7e0aae12027748076d72a8764ff1c9d82ca75a7a52622e67ed3f765c54/pydantic_settings-2.12.0.tar.gz", hash = "sha256:005538ef951e3c2a68e1c08b292b5f2e71490def8589d4221b95dab00dafcfd0", size = 194184, upload-time = "2025-11-10T14:25:47.013Z" } +sdist = { url = "https://files.pythonhosted.org/packages/52/6d/fffca34caecc4a3f97bda81b2098da5e8ab7efc9a66e819074a11955d87e/pydantic_settings-2.13.1.tar.gz", hash = "sha256:b4c11847b15237fb0171e1462bf540e294affb9b86db4d9aa5c01730bdbe4025", size = 223826, upload-time = "2026-02-19T13:45:08.055Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/60/5d4751ba3f4a40a6891f24eec885f51afd78d208498268c734e256fb13c4/pydantic_settings-2.12.0-py3-none-any.whl", hash = "sha256:fddb9fd99a5b18da837b29710391e945b1e30c135477f484084ee513adb93809", size = 51880, upload-time = "2025-11-10T14:25:45.546Z" }, + { url = "https://files.pythonhosted.org/packages/00/4b/ccc026168948fec4f7555b9164c724cf4125eac006e176541483d2c959be/pydantic_settings-2.13.1-py3-none-any.whl", hash = "sha256:d56fd801823dbeae7f0975e1f8c8e25c258eb75d278ea7abb5d9cebb01b56237", size = 58929, upload-time = "2026-02-19T13:45:06.034Z" }, ] [[package]] @@ -5215,11 +4998,11 @@ wheels = [ [[package]] name = "pyjwt" -version = "2.11.0" +version = "2.12.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" }, + { url = "https://files.pythonhosted.org/packages/e5/7a/8dd906bd22e79e47397a61742927f6747fe93242ef86645ee9092e610244/pyjwt-2.12.1-py3-none-any.whl", hash = "sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c", size = 29726, upload-time = "2026-03-13T19:27:35.677Z" }, ] [package.optional-dependencies] @@ -5268,29 +5051,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" }, ] -[[package]] -name = "pynacl" -version = "1.6.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d9/9a/4019b524b03a13438637b11538c82781a5eda427394380381af8f04f467a/pynacl-1.6.2.tar.gz", hash = "sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c", size = 3511692, upload-time = "2026-01-01T17:48:10.851Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/be/7b/4845bbf88e94586ec47a432da4e9107e3fc3ce37eb412b1398630a37f7dd/pynacl-1.6.2-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:c949ea47e4206af7c8f604b8278093b674f7c79ed0d4719cc836902bf4517465", size = 388458, upload-time = "2026-01-01T17:32:16.829Z" }, - { url = "https://files.pythonhosted.org/packages/1e/b4/e927e0653ba63b02a4ca5b4d852a8d1d678afbf69b3dbf9c4d0785ac905c/pynacl-1.6.2-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8845c0631c0be43abdd865511c41eab235e0be69c81dc66a50911594198679b0", size = 800020, upload-time = "2026-01-01T17:32:18.34Z" }, - { url = "https://files.pythonhosted.org/packages/7f/81/d60984052df5c97b1d24365bc1e30024379b42c4edcd79d2436b1b9806f2/pynacl-1.6.2-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:22de65bb9010a725b0dac248f353bb072969c94fa8d6b1f34b87d7953cf7bbe4", size = 1399174, upload-time = "2026-01-01T17:32:20.239Z" }, - { url = "https://files.pythonhosted.org/packages/68/f7/322f2f9915c4ef27d140101dd0ed26b479f7e6f5f183590fd32dfc48c4d3/pynacl-1.6.2-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46065496ab748469cdd999246d17e301b2c24ae2fdf739132e580a0e94c94a87", size = 835085, upload-time = "2026-01-01T17:32:22.24Z" }, - { url = "https://files.pythonhosted.org/packages/3e/d0/f301f83ac8dbe53442c5a43f6a39016f94f754d7a9815a875b65e218a307/pynacl-1.6.2-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a66d6fb6ae7661c58995f9c6435bda2b1e68b54b598a6a10247bfcdadac996c", size = 1437614, upload-time = "2026-01-01T17:32:23.766Z" }, - { url = "https://files.pythonhosted.org/packages/c4/58/fc6e649762b029315325ace1a8c6be66125e42f67416d3dbd47b69563d61/pynacl-1.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:26bfcd00dcf2cf160f122186af731ae30ab120c18e8375684ec2670dccd28130", size = 818251, upload-time = "2026-01-01T17:32:25.69Z" }, - { url = "https://files.pythonhosted.org/packages/c9/a8/b917096b1accc9acd878819a49d3d84875731a41eb665f6ebc826b1af99e/pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6", size = 1402859, upload-time = "2026-01-01T17:32:27.215Z" }, - { url = "https://files.pythonhosted.org/packages/85/42/fe60b5f4473e12c72f977548e4028156f4d340b884c635ec6b063fe7e9a5/pynacl-1.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68be3a09455743ff9505491220b64440ced8973fe930f270c8e07ccfa25b1f9e", size = 791926, upload-time = "2026-01-01T17:32:29.314Z" }, - { url = "https://files.pythonhosted.org/packages/fa/f9/e40e318c604259301cc091a2a63f237d9e7b424c4851cafaea4ea7c4834e/pynacl-1.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b097553b380236d51ed11356c953bf8ce36a29a3e596e934ecabe76c985a577", size = 1363101, upload-time = "2026-01-01T17:32:31.263Z" }, - { url = "https://files.pythonhosted.org/packages/48/47/e761c254f410c023a469284a9bc210933e18588ca87706ae93002c05114c/pynacl-1.6.2-cp38-abi3-win32.whl", hash = "sha256:5811c72b473b2f38f7e2a3dc4f8642e3a3e9b5e7317266e4ced1fba85cae41aa", size = 227421, upload-time = "2026-01-01T17:32:33.076Z" }, - { url = "https://files.pythonhosted.org/packages/41/ad/334600e8cacc7d86587fe5f565480fde569dfb487389c8e1be56ac21d8ac/pynacl-1.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:62985f233210dee6548c223301b6c25440852e13d59a8b81490203c3227c5ba0", size = 239754, upload-time = "2026-01-01T17:32:34.557Z" }, - { url = "https://files.pythonhosted.org/packages/29/7d/5945b5af29534641820d3bd7b00962abbbdfee84ec7e19f0d5b3175f9a31/pynacl-1.6.2-cp38-abi3-win_arm64.whl", hash = "sha256:834a43af110f743a754448463e8fd61259cd4ab5bbedcf70f9dabad1d28a394c", size = 184801, upload-time = "2026-01-01T17:32:36.309Z" }, -] - [[package]] name = "pyobvector" version = "0.2.24" @@ -5409,43 +5169,45 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.5" +version = "9.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, + { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891, upload-time = "2025-03-02T12:54:54.503Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, + { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, ] [[package]] name = "pytest-benchmark" -version = "4.0.0" +version = "5.2.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "py-cpuinfo" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/28/08/e6b0067efa9a1f2a1eb3043ecd8a0c48bfeb60d3255006dcc829d72d5da2/pytest-benchmark-4.0.0.tar.gz", hash = "sha256:fb0785b83efe599a6a956361c0691ae1dbb5318018561af10f3e915caa0048d1", size = 334641, upload-time = "2022-10-25T21:21:55.686Z" } +sdist = { url = "https://files.pythonhosted.org/packages/24/34/9f732b76456d64faffbef6232f1f9dbec7a7c4999ff46282fa418bd1af66/pytest_benchmark-5.2.3.tar.gz", hash = "sha256:deb7317998a23c650fd4ff76e1230066a76cb45dcece0aca5607143c619e7779", size = 341340, upload-time = "2025-11-09T18:48:43.215Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/a1/3b70862b5b3f830f0422844f25a823d0470739d994466be9dbbbb414d85a/pytest_benchmark-4.0.0-py3-none-any.whl", hash = "sha256:fdb7db64e31c8b277dff9850d2a2556d8b60bcb0ea6524e36e28ffd7c87f71d6", size = 43951, upload-time = "2022-10-25T21:21:53.208Z" }, + { url = "https://files.pythonhosted.org/packages/33/29/e756e715a48959f1c0045342088d7ca9762a2f509b945f362a316e9412b7/pytest_benchmark-5.2.3-py3-none-any.whl", hash = "sha256:bc839726ad20e99aaa0d11a127445457b4219bdb9e80a1afc4b51da7f96b0803", size = 45255, upload-time = "2025-11-09T18:48:39.765Z" }, ] [[package]] name = "pytest-cov" -version = "4.1.0" +version = "7.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "coverage", extra = ["toml"] }, + { name = "pluggy" }, { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7a/15/da3df99fd551507694a9b01f512a2f6cf1254f33601605843c3775f39460/pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6", size = 63245, upload-time = "2023-05-24T18:44:56.845Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/f7/c933acc76f5208b3b00089573cf6a2bc26dc80a8aece8f52bb7d6b1855ca/pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1", size = 54328, upload-time = "2025-09-09T10:57:02.113Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a", size = 21949, upload-time = "2023-05-24T18:44:54.079Z" }, + { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" }, ] [[package]] @@ -5462,14 +5224,14 @@ wheels = [ [[package]] name = "pytest-mock" -version = "3.14.1" +version = "3.15.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/28/67172c96ba684058a4d24ffe144d64783d2a270d0af0d9e792737bddc75c/pytest_mock-3.14.1.tar.gz", hash = "sha256:159e9edac4c451ce77a5cdb9fc5d1100708d2dd4ba3c3df572f14097351af80e", size = 33241, upload-time = "2025-05-26T13:58:45.167Z" } +sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/05/77b60e520511c53d1c1ca75f1930c7dd8e971d0c4379b7f4b3f9644685ba/pytest_mock-3.14.1-py3-none-any.whl", hash = "sha256:178aefcd11307d874b4cd3100344e7e2d888d9791a6a1d9bfe90fbc1b74fd1d0", size = 9923, upload-time = "2025-05-26T13:58:43.487Z" }, + { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" }, ] [[package]] @@ -5576,18 +5338,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863, upload-time = "2024-01-23T06:32:58.246Z" }, ] -[[package]] -name = "python-engineio" -version = "4.13.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "simple-websocket" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/34/12/bdef9dbeedbe2cdeba2a2056ad27b1fb081557d34b69a97f574843462cae/python_engineio-4.13.1.tar.gz", hash = "sha256:0a853fcef52f5b345425d8c2b921ac85023a04dfcf75d7b74696c61e940fd066", size = 92348, upload-time = "2026-02-06T23:38:06.12Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/aa/54/0cce26da03a981f949bb8449c9778537f75f5917c172e1d2992ff25cb57d/python_engineio-4.13.1-py3-none-any.whl", hash = "sha256:f32ad10589859c11053ad7d9bb3c9695cdf862113bfb0d20bc4d890198287399", size = 59847, upload-time = "2026-02-06T23:38:04.861Z" }, -] - [[package]] name = "python-http-client" version = "3.3.7" @@ -5644,19 +5394,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788, upload-time = "2024-08-07T17:33:28.192Z" }, ] -[[package]] -name = "python-socketio" -version = "5.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bidict" }, - { name = "python-engineio" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/21/1a/396d50ccf06ee539fa758ce5623b59a9cb27637fc4b2dc07ed08bf495e77/python_socketio-5.13.0.tar.gz", hash = "sha256:ac4e19a0302ae812e23b712ec8b6427ca0521f7c582d6abb096e36e24a263029", size = 121125, upload-time = "2025-04-12T15:46:59.933Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/32/b4fb8585d1be0f68bde7e110dffbcf354915f77ad8c778563f0ad9655c02/python_socketio-5.13.0-py3-none-any.whl", hash = "sha256:51f68d6499f2df8524668c24bcec13ba1414117cfb3a90115c559b601ab10caf", size = 77800, upload-time = "2025-04-12T15:46:58.412Z" }, -] - [[package]] name = "pytz" version = "2025.2" @@ -5799,14 +5536,14 @@ wheels = [ [[package]] name = "redis" -version = "7.2.1" +version = "7.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "async-timeout", marker = "python_full_version < '3.11.3'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e9/31/1476f206482dd9bc53fdbbe9f6fbd5e05d153f18e54667ce839df331f2e6/redis-7.2.1.tar.gz", hash = "sha256:6163c1a47ee2d9d01221d8456bc1c75ab953cbda18cfbc15e7140e9ba16ca3a5", size = 4906735, upload-time = "2026-02-25T20:05:18.171Z" } +sdist = { url = "https://files.pythonhosted.org/packages/da/82/4d1a5279f6c1251d3d2a603a798a1137c657de9b12cfc1fba4858232c4d2/redis-7.3.0.tar.gz", hash = "sha256:4d1b768aafcf41b01022410b3cc4f15a07d9b3d6fe0c66fc967da2c88e551034", size = 4928081, upload-time = "2026-03-06T18:18:16.287Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/98/1dd1a5c060916cf21d15e67b7d6a7078e26e2605d5c37cbc9f4f5454c478/redis-7.2.1-py3-none-any.whl", hash = "sha256:49e231fbc8df2001436ae5252b3f0f3dc930430239bfeb6da4c7ee92b16e5d33", size = 396057, upload-time = "2026-02-25T20:05:16.533Z" }, + { url = "https://files.pythonhosted.org/packages/f0/28/84e57fce7819e81ec5aa1bd31c42b89607241f4fb1a3ea5b0d2dbeaea26c/redis-7.3.0-py3-none-any.whl", hash = "sha256:9d4fcb002a12a5e3c3fbe005d59c48a2cc231f87fbb2f6b70c2d89bb64fec364", size = 404379, upload-time = "2026-03-06T18:18:14.583Z" }, ] [package.optional-dependencies] @@ -5999,40 +5736,39 @@ wheels = [ [[package]] name = "ruff" -version = "0.14.14" +version = "0.15.7" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2e/06/f71e3a86b2df0dfa2d2f72195941cd09b44f87711cb7fa5193732cb9a5fc/ruff-0.14.14.tar.gz", hash = "sha256:2d0f819c9a90205f3a867dbbd0be083bee9912e170fd7d9704cc8ae45824896b", size = 4515732, upload-time = "2026-01-22T22:30:17.527Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/22/9e4f66ee588588dc6c9af6a994e12d26e19efbe874d1a909d09a6dac7a59/ruff-0.15.7.tar.gz", hash = "sha256:04f1ae61fc20fe0b148617c324d9d009b5f63412c0b16474f3d5f1a1a665f7ac", size = 4601277, upload-time = "2026-03-19T16:26:22.605Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/89/20a12e97bc6b9f9f68343952da08a8099c57237aef953a56b82711d55edd/ruff-0.14.14-py3-none-linux_armv6l.whl", hash = "sha256:7cfe36b56e8489dee8fbc777c61959f60ec0f1f11817e8f2415f429552846aed", size = 10467650, upload-time = "2026-01-22T22:30:08.578Z" }, - { url = "https://files.pythonhosted.org/packages/a3/b1/c5de3fd2d5a831fcae21beda5e3589c0ba67eec8202e992388e4b17a6040/ruff-0.14.14-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6006a0082336e7920b9573ef8a7f52eec837add1265cc74e04ea8a4368cd704c", size = 10883245, upload-time = "2026-01-22T22:30:04.155Z" }, - { url = "https://files.pythonhosted.org/packages/b8/7c/3c1db59a10e7490f8f6f8559d1db8636cbb13dccebf18686f4e3c9d7c772/ruff-0.14.14-py3-none-macosx_11_0_arm64.whl", hash = "sha256:026c1d25996818f0bf498636686199d9bd0d9d6341c9c2c3b62e2a0198b758de", size = 10231273, upload-time = "2026-01-22T22:30:34.642Z" }, - { url = "https://files.pythonhosted.org/packages/a1/6e/5e0e0d9674be0f8581d1f5e0f0a04761203affce3232c1a1189d0e3b4dad/ruff-0.14.14-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f666445819d31210b71e0a6d1c01e24447a20b85458eea25a25fe8142210ae0e", size = 10585753, upload-time = "2026-01-22T22:30:31.781Z" }, - { url = "https://files.pythonhosted.org/packages/23/09/754ab09f46ff1884d422dc26d59ba18b4e5d355be147721bb2518aa2a014/ruff-0.14.14-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3c0f18b922c6d2ff9a5e6c3ee16259adc513ca775bcf82c67ebab7cbd9da5bc8", size = 10286052, upload-time = "2026-01-22T22:30:24.827Z" }, - { url = "https://files.pythonhosted.org/packages/c8/cc/e71f88dd2a12afb5f50733851729d6b571a7c3a35bfdb16c3035132675a0/ruff-0.14.14-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1629e67489c2dea43e8658c3dba659edbfd87361624b4040d1df04c9740ae906", size = 11043637, upload-time = "2026-01-22T22:30:13.239Z" }, - { url = "https://files.pythonhosted.org/packages/67/b2/397245026352494497dac935d7f00f1468c03a23a0c5db6ad8fc49ca3fb2/ruff-0.14.14-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:27493a2131ea0f899057d49d303e4292b2cae2bb57253c1ed1f256fbcd1da480", size = 12194761, upload-time = "2026-01-22T22:30:22.542Z" }, - { url = "https://files.pythonhosted.org/packages/5b/06/06ef271459f778323112c51b7587ce85230785cd64e91772034ddb88f200/ruff-0.14.14-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01ff589aab3f5b539e35db38425da31a57521efd1e4ad1ae08fc34dbe30bd7df", size = 12005701, upload-time = "2026-01-22T22:30:20.499Z" }, - { url = "https://files.pythonhosted.org/packages/41/d6/99364514541cf811ccc5ac44362f88df66373e9fec1b9d1c4cc830593fe7/ruff-0.14.14-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1cc12d74eef0f29f51775f5b755913eb523546b88e2d733e1d701fe65144e89b", size = 11282455, upload-time = "2026-01-22T22:29:59.679Z" }, - { url = "https://files.pythonhosted.org/packages/ca/71/37daa46f89475f8582b7762ecd2722492df26421714a33e72ccc9a84d7a5/ruff-0.14.14-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb8481604b7a9e75eff53772496201690ce2687067e038b3cc31aaf16aa0b974", size = 11215882, upload-time = "2026-01-22T22:29:57.032Z" }, - { url = "https://files.pythonhosted.org/packages/2c/10/a31f86169ec91c0705e618443ee74ede0bdd94da0a57b28e72db68b2dbac/ruff-0.14.14-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:14649acb1cf7b5d2d283ebd2f58d56b75836ed8c6f329664fa91cdea19e76e66", size = 11180549, upload-time = "2026-01-22T22:30:27.175Z" }, - { url = "https://files.pythonhosted.org/packages/fd/1e/c723f20536b5163adf79bdd10c5f093414293cdf567eed9bdb7b83940f3f/ruff-0.14.14-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:e8058d2145566510790eab4e2fad186002e288dec5e0d343a92fe7b0bc1b3e13", size = 10543416, upload-time = "2026-01-22T22:30:01.964Z" }, - { url = "https://files.pythonhosted.org/packages/3e/34/8a84cea7e42c2d94ba5bde1d7a4fae164d6318f13f933d92da6d7c2041ff/ruff-0.14.14-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:e651e977a79e4c758eb807f0481d673a67ffe53cfa92209781dfa3a996cf8412", size = 10285491, upload-time = "2026-01-22T22:30:29.51Z" }, - { url = "https://files.pythonhosted.org/packages/55/ef/b7c5ea0be82518906c978e365e56a77f8de7678c8bb6651ccfbdc178c29f/ruff-0.14.14-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cc8b22da8d9d6fdd844a68ae937e2a0adf9b16514e9a97cc60355e2d4b219fc3", size = 10733525, upload-time = "2026-01-22T22:30:06.499Z" }, - { url = "https://files.pythonhosted.org/packages/6a/5b/aaf1dfbcc53a2811f6cc0a1759de24e4b03e02ba8762daabd9b6bd8c59e3/ruff-0.14.14-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:16bc890fb4cc9781bb05beb5ab4cd51be9e7cb376bf1dd3580512b24eb3fda2b", size = 11315626, upload-time = "2026-01-22T22:30:36.848Z" }, - { url = "https://files.pythonhosted.org/packages/2c/aa/9f89c719c467dfaf8ad799b9bae0df494513fb21d31a6059cb5870e57e74/ruff-0.14.14-py3-none-win32.whl", hash = "sha256:b530c191970b143375b6a68e6f743800b2b786bbcf03a7965b06c4bf04568167", size = 10502442, upload-time = "2026-01-22T22:30:38.93Z" }, - { url = "https://files.pythonhosted.org/packages/87/44/90fa543014c45560cae1fffc63ea059fb3575ee6e1cb654562197e5d16fb/ruff-0.14.14-py3-none-win_amd64.whl", hash = "sha256:3dde1435e6b6fe5b66506c1dff67a421d0b7f6488d466f651c07f4cab3bf20fd", size = 11630486, upload-time = "2026-01-22T22:30:10.852Z" }, - { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, + { url = "https://files.pythonhosted.org/packages/41/2f/0b08ced94412af091807b6119ca03755d651d3d93a242682bf020189db94/ruff-0.15.7-py3-none-linux_armv6l.whl", hash = "sha256:a81cc5b6910fb7dfc7c32d20652e50fa05963f6e13ead3c5915c41ac5d16668e", size = 10489037, upload-time = "2026-03-19T16:26:32.47Z" }, + { url = "https://files.pythonhosted.org/packages/91/4a/82e0fa632e5c8b1eba5ee86ecd929e8ff327bbdbfb3c6ac5d81631bef605/ruff-0.15.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:722d165bd52403f3bdabc0ce9e41fc47070ac56d7a91b4e0d097b516a53a3477", size = 10955433, upload-time = "2026-03-19T16:27:00.205Z" }, + { url = "https://files.pythonhosted.org/packages/ab/10/12586735d0ff42526ad78c049bf51d7428618c8b5c467e72508c694119df/ruff-0.15.7-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7fbc2448094262552146cbe1b9643a92f66559d3761f1ad0656d4991491af49e", size = 10269302, upload-time = "2026-03-19T16:26:26.183Z" }, + { url = "https://files.pythonhosted.org/packages/eb/5d/32b5c44ccf149a26623671df49cbfbd0a0ae511ff3df9d9d2426966a8d57/ruff-0.15.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b39329b60eba44156d138275323cc726bbfbddcec3063da57caa8a8b1d50adf", size = 10607625, upload-time = "2026-03-19T16:27:03.263Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f1/f0001cabe86173aaacb6eb9bb734aa0605f9a6aa6fa7d43cb49cbc4af9c9/ruff-0.15.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:87768c151808505f2bfc93ae44e5f9e7c8518943e5074f76ac21558ef5627c85", size = 10324743, upload-time = "2026-03-19T16:27:09.791Z" }, + { url = "https://files.pythonhosted.org/packages/7a/87/b8a8f3d56b8d848008559e7c9d8bf367934d5367f6d932ba779456e2f73b/ruff-0.15.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb0511670002c6c529ec66c0e30641c976c8963de26a113f3a30456b702468b0", size = 11138536, upload-time = "2026-03-19T16:27:06.101Z" }, + { url = "https://files.pythonhosted.org/packages/e4/f2/4fd0d05aab0c5934b2e1464784f85ba2eab9d54bffc53fb5430d1ed8b829/ruff-0.15.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0d19644f801849229db8345180a71bee5407b429dd217f853ec515e968a6912", size = 11994292, upload-time = "2026-03-19T16:26:48.718Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/fc4483871e767e5e95d1622ad83dad5ebb830f762ed0420fde7dfa9d9b08/ruff-0.15.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4806d8e09ef5e84eb19ba833d0442f7e300b23fe3f0981cae159a248a10f0036", size = 11398981, upload-time = "2026-03-19T16:26:54.513Z" }, + { url = "https://files.pythonhosted.org/packages/b0/99/66f0343176d5eab02c3f7fcd2de7a8e0dd7a41f0d982bee56cd1c24db62b/ruff-0.15.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dce0896488562f09a27b9c91b1f58a097457143931f3c4d519690dea54e624c5", size = 11242422, upload-time = "2026-03-19T16:26:29.277Z" }, + { url = "https://files.pythonhosted.org/packages/5d/3a/a7060f145bfdcce4c987ea27788b30c60e2c81d6e9a65157ca8afe646328/ruff-0.15.7-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:1852ce241d2bc89e5dc823e03cff4ce73d816b5c6cdadd27dbfe7b03217d2a12", size = 11232158, upload-time = "2026-03-19T16:26:42.321Z" }, + { url = "https://files.pythonhosted.org/packages/a7/53/90fbb9e08b29c048c403558d3cdd0adf2668b02ce9d50602452e187cd4af/ruff-0.15.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5f3e4b221fb4bd293f79912fc5e93a9063ebd6d0dcbd528f91b89172a9b8436c", size = 10577861, upload-time = "2026-03-19T16:26:57.459Z" }, + { url = "https://files.pythonhosted.org/packages/2f/aa/5f486226538fe4d0f0439e2da1716e1acf895e2a232b26f2459c55f8ddad/ruff-0.15.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:b15e48602c9c1d9bdc504b472e90b90c97dc7d46c7028011ae67f3861ceba7b4", size = 10327310, upload-time = "2026-03-19T16:26:35.909Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/271afdffb81fe7bfc8c43ba079e9d96238f674380099457a74ccb3863857/ruff-0.15.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1b4705e0e85cedc74b0a23cf6a179dbb3df184cb227761979cc76c0440b5ab0d", size = 10840752, upload-time = "2026-03-19T16:26:45.723Z" }, + { url = "https://files.pythonhosted.org/packages/bf/29/a4ae78394f76c7759953c47884eb44de271b03a66634148d9f7d11e721bd/ruff-0.15.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:112c1fa316a558bb34319282c1200a8bf0495f1b735aeb78bfcb2991e6087580", size = 11336961, upload-time = "2026-03-19T16:26:39.076Z" }, + { url = "https://files.pythonhosted.org/packages/26/6b/8786ba5736562220d588a2f6653e6c17e90c59ced34a2d7b512ef8956103/ruff-0.15.7-py3-none-win32.whl", hash = "sha256:6d39e2d3505b082323352f733599f28169d12e891f7dd407f2d4f54b4c2886de", size = 10582538, upload-time = "2026-03-19T16:26:15.992Z" }, + { url = "https://files.pythonhosted.org/packages/2b/e9/346d4d3fffc6871125e877dae8d9a1966b254fbd92a50f8561078b88b099/ruff-0.15.7-py3-none-win_amd64.whl", hash = "sha256:4d53d712ddebcd7dace1bc395367aec12c057aacfe9adbb6d832302575f4d3a1", size = 11755839, upload-time = "2026-03-19T16:26:19.897Z" }, + { url = "https://files.pythonhosted.org/packages/8f/e8/726643a3ea68c727da31570bde48c7a10f1aa60eddd628d94078fec586ff/ruff-0.15.7-py3-none-win_arm64.whl", hash = "sha256:18e8d73f1c3fdf27931497972250340f92e8c861722161a9caeb89a58ead6ed2", size = 11023304, upload-time = "2026-03-19T16:26:51.669Z" }, ] [[package]] name = "s3transfer" -version = "0.10.4" +version = "0.16.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c0/0a/1cdbabf9edd0ea7747efdf6c9ab4e7061b085aa7f9bfc36bb1601563b069/s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7", size = 145287, upload-time = "2024-11-20T21:06:05.981Z" } +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/66/05/7957af15543b8c9799209506df4660cba7afc4cf94bfb60513827e96bed6/s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e", size = 83175, upload-time = "2024-11-20T21:06:03.961Z" }, + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, ] [[package]] @@ -6121,18 +5857,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, ] -[[package]] -name = "simple-websocket" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "wsproto" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300, upload-time = "2024-10-10T22:39:31.412Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842, upload-time = "2024-10-10T22:39:29.645Z" }, -] - [[package]] name = "six" version = "1.17.0" @@ -6442,26 +6166,28 @@ wheels = [ [[package]] name = "tiktoken" -version = "0.9.0" +version = "0.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "regex" }, { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991, upload-time = "2025-02-14T06:03:01.003Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/ae/4613a59a2a48e761c5161237fc850eb470b4bb93696db89da51b79a871f1/tiktoken-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f32cc56168eac4851109e9b5d327637f15fd662aa30dd79f964b7c39fbadd26e", size = 1065987, upload-time = "2025-02-14T06:02:14.174Z" }, - { url = "https://files.pythonhosted.org/packages/3f/86/55d9d1f5b5a7e1164d0f1538a85529b5fcba2b105f92db3622e5d7de6522/tiktoken-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:45556bc41241e5294063508caf901bf92ba52d8ef9222023f83d2483a3055348", size = 1009155, upload-time = "2025-02-14T06:02:15.384Z" }, - { url = "https://files.pythonhosted.org/packages/03/58/01fb6240df083b7c1916d1dcb024e2b761213c95d576e9f780dfb5625a76/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03935988a91d6d3216e2ec7c645afbb3d870b37bcb67ada1943ec48678e7ee33", size = 1142898, upload-time = "2025-02-14T06:02:16.666Z" }, - { url = "https://files.pythonhosted.org/packages/b1/73/41591c525680cd460a6becf56c9b17468d3711b1df242c53d2c7b2183d16/tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b3d80aad8d2c6b9238fc1a5524542087c52b860b10cbf952429ffb714bc1136", size = 1197535, upload-time = "2025-02-14T06:02:18.595Z" }, - { url = "https://files.pythonhosted.org/packages/7d/7c/1069f25521c8f01a1a182f362e5c8e0337907fae91b368b7da9c3e39b810/tiktoken-0.9.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b2a21133be05dc116b1d0372af051cd2c6aa1d2188250c9b553f9fa49301b336", size = 1259548, upload-time = "2025-02-14T06:02:20.729Z" }, - { url = "https://files.pythonhosted.org/packages/6f/07/c67ad1724b8e14e2b4c8cca04b15da158733ac60136879131db05dda7c30/tiktoken-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:11a20e67fdf58b0e2dea7b8654a288e481bb4fc0289d3ad21291f8d0849915fb", size = 893895, upload-time = "2025-02-14T06:02:22.67Z" }, - { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073, upload-time = "2025-02-14T06:02:24.768Z" }, - { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075, upload-time = "2025-02-14T06:02:26.92Z" }, - { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754, upload-time = "2025-02-14T06:02:28.124Z" }, - { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678, upload-time = "2025-02-14T06:02:29.845Z" }, - { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283, upload-time = "2025-02-14T06:02:33.838Z" }, - { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897, upload-time = "2025-02-14T06:02:36.265Z" }, + { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" }, + { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" }, + { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" }, + { url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444, upload-time = "2025-10-06T20:21:48.139Z" }, + { url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080, upload-time = "2025-10-06T20:21:49.246Z" }, + { url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240, upload-time = "2025-10-06T20:21:50.274Z" }, + { url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422, upload-time = "2025-10-06T20:21:51.734Z" }, + { url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728, upload-time = "2025-10-06T20:21:52.756Z" }, + { url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049, upload-time = "2025-10-06T20:21:53.782Z" }, + { url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008, upload-time = "2025-10-06T20:21:54.832Z" }, + { url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665, upload-time = "2025-10-06T20:21:56.129Z" }, + { url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230, upload-time = "2025-10-06T20:21:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688, upload-time = "2025-10-06T20:21:58.619Z" }, + { url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694, upload-time = "2025-10-06T20:21:59.876Z" }, ] [[package]] @@ -6554,23 +6280,22 @@ wheels = [ [[package]] name = "transformers" -version = "4.56.2" +version = "5.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, { name = "huggingface-hub" }, { name = "numpy" }, { name = "packaging" }, { name = "pyyaml" }, { name = "regex" }, - { name = "requests" }, { name = "safetensors" }, { name = "tokenizers" }, { name = "tqdm" }, + { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/82/0bcfddd134cdf53440becb5e738257cc3cf34cf229d63b57bfd288e6579f/transformers-4.56.2.tar.gz", hash = "sha256:5e7c623e2d7494105c726dd10f6f90c2c99a55ebe86eef7233765abd0cb1c529", size = 9844296, upload-time = "2025-09-19T15:16:26.778Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/26/2591b48412bde75e33bfd292034103ffe41743cacd03120e3242516cd143/transformers-4.56.2-py3-none-any.whl", hash = "sha256:79c03d0e85b26cb573c109ff9eafa96f3c8d4febfd8a0774e8bba32702dd6dde", size = 11608055, upload-time = "2025-09-19T15:16:23.736Z" }, + { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, ] [[package]] @@ -6620,11 +6345,11 @@ wheels = [ [[package]] name = "types-cachetools" -version = "5.5.0.20240820" +version = "6.2.0.20260317" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c2/7e/ad6ba4a56b2a994e0f0a04a61a50466b60ee88a13d10a18c83ac14a66c61/types-cachetools-5.5.0.20240820.tar.gz", hash = "sha256:b888ab5c1a48116f7799cd5004b18474cd82b5463acb5ffb2db2fc9c7b053bc0", size = 4198, upload-time = "2024-08-20T02:30:07.525Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/7f/16a4d8344c28193a5a74358028c2d2f753f0d9658dd98b9e1967c50045a2/types_cachetools-6.2.0.20260317.tar.gz", hash = "sha256:6d91855bcc944665897c125e720aa3c80aace929b77a64e796343701df4f61c6", size = 9812, upload-time = "2026-03-17T04:06:32.007Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/27/4d/fd7cc050e2d236d5570c4d92531c0396573a1e14b31735870e849351c717/types_cachetools-5.5.0.20240820-py3-none-any.whl", hash = "sha256:efb2ed8bf27a4b9d3ed70d33849f536362603a90b8090a328acf0cd42fda82e2", size = 4149, upload-time = "2024-08-20T02:30:06.461Z" }, + { url = "https://files.pythonhosted.org/packages/17/9a/b00b23054934c4d569c19f7278c4fb32746cd36a64a175a216d3073a4713/types_cachetools-6.2.0.20260317-py3-none-any.whl", hash = "sha256:92fa9bc50e4629e31fca67ceb3fb1de71791e314fa16c0a0d2728724dc222c8b", size = 9346, upload-time = "2026-03-17T04:06:31.184Z" }, ] [[package]] @@ -6659,32 +6384,32 @@ wheels = [ [[package]] name = "types-deprecated" -version = "1.2.15.20250304" +version = "1.3.1.20260130" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/0e/67/eeefaaabb03b288aad85483d410452c8bbcbf8b2bd876b0e467ebd97415b/types_deprecated-1.2.15.20250304.tar.gz", hash = "sha256:c329030553029de5cc6cb30f269c11f4e00e598c4241290179f63cda7d33f719", size = 8015, upload-time = "2025-03-04T02:48:17.894Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/97/9924e496f88412788c432891cacd041e542425fe0bffff4143a7c1c89ac4/types_deprecated-1.3.1.20260130.tar.gz", hash = "sha256:726b05e5e66d42359b1d6631835b15de62702588c8a59b877aa4b1e138453450", size = 8455, upload-time = "2026-01-30T03:58:17.401Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/e3/c18aa72ab84e0bc127a3a94e93be1a6ac2cb281371d3a45376ab7cfdd31c/types_deprecated-1.2.15.20250304-py3-none-any.whl", hash = "sha256:86a65aa550ea8acf49f27e226b8953288cd851de887970fbbdf2239c116c3107", size = 8553, upload-time = "2025-03-04T02:48:16.666Z" }, + { url = "https://files.pythonhosted.org/packages/d2/b2/6f920582af7efcd37165cd6321707f3ad5839dd24565a8a982f2bd9c6fd1/types_deprecated-1.3.1.20260130-py3-none-any.whl", hash = "sha256:593934d85c38ca321a9d301f00c42ffe13e4cf830b71b10579185ba0ce172d9a", size = 9077, upload-time = "2026-01-30T03:58:16.633Z" }, ] [[package]] name = "types-docutils" -version = "0.21.0.20250809" +version = "0.22.3.20260322" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/be/9b/f92917b004e0a30068e024e8925c7d9b10440687b96d91f26d8762f4b68c/types_docutils-0.21.0.20250809.tar.gz", hash = "sha256:cc2453c87dc729b5aae499597496e4f69b44aa5fccb27051ed8bb55b0bd5e31b", size = 54770, upload-time = "2025-08-09T03:15:42.752Z" } +sdist = { url = "https://files.pythonhosted.org/packages/44/bb/243a87fc1605a4a94c2c343d6dbddbf0d7ef7c0b9550f360b8cda8e82c39/types_docutils-0.22.3.20260322.tar.gz", hash = "sha256:e2450bb997283c3141ec5db3e436b91f0aa26efe35eb9165178ca976ccb4930b", size = 57311, upload-time = "2026-03-22T04:08:44.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/a9/46bc12e4c918c4109b67401bf87fd450babdffbebd5dbd7833f5096f42a5/types_docutils-0.21.0.20250809-py3-none-any.whl", hash = "sha256:af02c82327e8ded85f57dd85c8ebf93b6a0b643d85a44c32d471e3395604ea50", size = 89598, upload-time = "2025-08-09T03:15:41.503Z" }, + { url = "https://files.pythonhosted.org/packages/c6/4a/22c090cd4615a16917dff817cbe7c5956da376c961e024c241cd962d2c3d/types_docutils-0.22.3.20260322-py3-none-any.whl", hash = "sha256:681d4510ce9b80a0c6a593f0f9843d81f8caa786db7b39ba04d9fd5480ac4442", size = 91978, upload-time = "2026-03-22T04:08:43.117Z" }, ] [[package]] name = "types-flask-cors" -version = "5.0.0.20250413" +version = "6.0.0.20250809" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "flask" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a4/f3/dd2f0d274ecb77772d3ce83735f75ad14713461e8cf7e6d61a7c272037b1/types_flask_cors-5.0.0.20250413.tar.gz", hash = "sha256:b346d052f4ef3b606b73faf13e868e458f1efdbfedcbe1aba739eb2f54a6cf5f", size = 9921, upload-time = "2025-04-13T04:04:15.515Z" } +sdist = { url = "https://files.pythonhosted.org/packages/45/e0/e5dd841bf475765fb61cb04c1e70d2fd0675a0d4ddfacd50a333eafe7267/types_flask_cors-6.0.0.20250809.tar.gz", hash = "sha256:24380a2b82548634c0931d50b9aafab214eea9f85dcc04f15ab1518752a7e6aa", size = 9951, upload-time = "2025-08-09T03:16:37.454Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/66/34/7d64eb72d80bfd5b9e6dd31e7fe351a1c9a735f5c01e85b1d3b903a9d656/types_flask_cors-5.0.0.20250413-py3-none-any.whl", hash = "sha256:8183fdba764d45a5b40214468a1d5daa0e86c4ee6042d13f38cc428308f27a64", size = 9982, upload-time = "2025-04-13T04:04:14.27Z" }, + { url = "https://files.pythonhosted.org/packages/9f/5e/1e60c29eb5796233d4d627ca4979c4ae8da962fd0aae0cdb6e3e6a807bbc/types_flask_cors-6.0.0.20250809-py3-none-any.whl", hash = "sha256:f6d660dddab946779f4263cb561bffe275d86cb8747ce02e9fec8d340780131b", size = 9971, upload-time = "2025-08-09T03:16:36.593Z" }, ] [[package]] @@ -6745,14 +6470,14 @@ wheels = [ [[package]] name = "types-jsonschema" -version = "4.23.0.20250516" +version = "4.26.0.20260202" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "referencing" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a0/ec/27ea5bffdb306bf261f6677a98b6993d93893b2c2e30f7ecc1d2c99d32e7/types_jsonschema-4.23.0.20250516.tar.gz", hash = "sha256:9ace09d9d35c4390a7251ccd7d833b92ccc189d24d1b347f26212afce361117e", size = 14911, upload-time = "2025-05-16T03:09:33.728Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a1/07/68f63e715eb327ed2f5292e29e8be99785db0f72c7664d2c63bd4dbdc29d/types_jsonschema-4.26.0.20260202.tar.gz", hash = "sha256:29831baa4308865a9aec547a61797a06fc152b0dac8dddd531e002f32265cb07", size = 16168, upload-time = "2026-02-02T04:11:22.585Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e6/48/73ae8b388e19fc4a2a8060d0876325ec7310cfd09b53a2185186fd35959f/types_jsonschema-4.23.0.20250516-py3-none-any.whl", hash = "sha256:e7d0dd7db7e59e63c26e3230e26ffc64c4704cc5170dc21270b366a35ead1618", size = 15027, upload-time = "2025-05-16T03:09:32.499Z" }, + { url = "https://files.pythonhosted.org/packages/c1/06/962d4f364f779d7389cd31a1bb581907b057f52f0ace2c119a8dd8409db6/types_jsonschema-4.26.0.20260202-py3-none-any.whl", hash = "sha256:41c95343abc4de9264e333a55e95dfb4d401e463856d0164eec9cb182e8746da", size = 15914, upload-time = "2026-02-02T04:11:21.61Z" }, ] [[package]] @@ -6766,11 +6491,11 @@ wheels = [ [[package]] name = "types-oauthlib" -version = "3.2.0.20250516" +version = "3.3.0.20250822" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b1/2c/dba2c193ccff2d1e2835589d4075b230d5627b9db363e9c8de153261d6ec/types_oauthlib-3.2.0.20250516.tar.gz", hash = "sha256:56bf2cffdb8443ae718d4e83008e3fbd5f861230b4774e6d7799527758119d9a", size = 24683, upload-time = "2025-05-16T03:07:42.484Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/6e/d08033f562053c459322333c46baa8cf8d2d8c18f30d46dd898c8fd8df77/types_oauthlib-3.3.0.20250822.tar.gz", hash = "sha256:2cd41587dd80c199e4230e3f086777e9ae525e89579c64afe5e0039ab09be9de", size = 25700, upload-time = "2025-08-22T03:02:41.378Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/54/cdd62283338616fd2448f534b29110d79a42aaabffaf5f45e7aed365a366/types_oauthlib-3.2.0.20250516-py3-none-any.whl", hash = "sha256:5799235528bc9bd262827149a1633ff55ae6e5a5f5f151f4dae74359783a31b3", size = 45671, upload-time = "2025-05-16T03:07:41.268Z" }, + { url = "https://files.pythonhosted.org/packages/18/4b/00593b8b5d055550e1fcb9af2c42fa11b0a90bf16a94759a77bc1c3c0c72/types_oauthlib-3.3.0.20250822-py3-none-any.whl", hash = "sha256:b7f4c9b9eed0e020f454e0af800b10e93dd2efd196da65744b76910cce7e70d6", size = 48800, upload-time = "2025-08-22T03:02:40.427Z" }, ] [[package]] @@ -6811,11 +6536,11 @@ wheels = [ [[package]] name = "types-protobuf" -version = "5.29.1.20250403" +version = "6.32.1.20260221" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/78/6d/62a2e73b966c77609560800004dd49a926920dd4976a9fdd86cf998e7048/types_protobuf-5.29.1.20250403.tar.gz", hash = "sha256:7ff44f15022119c9d7558ce16e78b2d485bf7040b4fadced4dd069bb5faf77a2", size = 59413, upload-time = "2025-04-02T10:07:17.138Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5f/e2/9aa4a3b2469508bd7b4e2ae11cbedaf419222a09a1b94daffcd5efca4023/types_protobuf-6.32.1.20260221.tar.gz", hash = "sha256:6d5fb060a616bfb076cbb61b4b3c3969f5fc8bec5810f9a2f7e648ee5cbcbf6e", size = 64408, upload-time = "2026-02-21T03:55:13.916Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/e3/b74dcc2797b21b39d5a4f08a8b08e20369b4ca250d718df7af41a60dd9f0/types_protobuf-5.29.1.20250403-py3-none-any.whl", hash = "sha256:c71de04106a2d54e5b2173d0a422058fae0ef2d058d70cf369fb797bf61ffa59", size = 73874, upload-time = "2025-04-02T10:07:15.755Z" }, + { url = "https://files.pythonhosted.org/packages/2e/e8/1fd38926f9cf031188fbc5a96694203ea6f24b0e34bd64a225ec6f6291ba/types_protobuf-6.32.1.20260221-py3-none-any.whl", hash = "sha256:da7cdd947975964a93c30bfbcc2c6841ee646b318d3816b033adc2c4eb6448e4", size = 77956, upload-time = "2026-02-21T03:55:12.894Z" }, ] [[package]] @@ -6888,22 +6613,13 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/85/4f/b88274658cf489e35175be8571c970e9a1219713bafd8fc9e166d7351ecb/types_python_http_client-3.3.7.20250708-py3-none-any.whl", hash = "sha256:e2fc253859decab36713d82fc7f205868c3ddeaee79dbb55956ad9ca77abe12b", size = 8890, upload-time = "2025-07-08T03:14:35.506Z" }, ] -[[package]] -name = "types-pytz" -version = "2025.2.0.20251108" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" }, -] - [[package]] name = "types-pywin32" -version = "310.0.0.20250516" +version = "311.0.0.20260317" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/bc/c7be2934a37cc8c645c945ca88450b541e482c4df3ac51e5556377d34811/types_pywin32-310.0.0.20250516.tar.gz", hash = "sha256:91e5bfc033f65c9efb443722eff8101e31d690dd9a540fa77525590d3da9cc9d", size = 328459, upload-time = "2025-05-16T03:07:57.411Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/ca/738dab36a8cd4084c612faaa75cbb6694ae7234fc1afcaf8ec6826a8e251/types_pywin32-311.0.0.20260317.tar.gz", hash = "sha256:3b716a7fd0420d5f7dd45b2dcaf26e0831589ffd0b5b961ac293cc632ad4df1d", size = 330301, upload-time = "2026-03-17T04:06:28.46Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/72/469e4cc32399dbe6c843e38fdb6d04fee755e984e137c0da502f74d3ac59/types_pywin32-310.0.0.20250516-py3-none-any.whl", hash = "sha256:f9ef83a1ec3e5aae2b0e24c5f55ab41272b5dfeaabb9a0451d33684c9545e41a", size = 390411, upload-time = "2025-05-16T03:07:56.282Z" }, + { url = "https://files.pythonhosted.org/packages/fa/f7/aa92309ea16c795fe3f9217a3b1d5cbe155798e50cee5cfd72b8a670ddd8/types_pywin32-311.0.0.20260317-py3-none-any.whl", hash = "sha256:d1c2771a20d8189fc758ffb8bd3d04651a6908946b3d3ce6a3992c0e96740d68", size = 393384, upload-time = "2026-03-17T04:06:27.091Z" }, ] [[package]] @@ -6930,11 +6646,11 @@ wheels = [ [[package]] name = "types-regex" -version = "2024.11.6.20250403" +version = "2026.2.28.20260301" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/75/012b90c8557d3abb3b58a9073a94d211c8f75c9b2e26bf0d8af7ecf7bc78/types_regex-2024.11.6.20250403.tar.gz", hash = "sha256:3fdf2a70bbf830de4b3a28e9649a52d43dabb57cdb18fbfe2252eefb53666665", size = 12394, upload-time = "2025-04-03T02:54:35.379Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3a/ed/106958cb686316113b748ed4209fa363fd92b15759d5409c3930fed36606/types_regex-2026.2.28.20260301.tar.gz", hash = "sha256:644c231db3f368908320170c14905731a7ae5fabdac0f60f5d6d12ecdd3bc8dd", size = 13157, upload-time = "2026-03-01T04:11:13.559Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/61/49/67200c4708f557be6aa4ecdb1fa212d67a10558c5240251efdc799cca22f/types_regex-2024.11.6.20250403-py3-none-any.whl", hash = "sha256:e22c0f67d73f4b4af6086a340f387b6f7d03bed8a0bb306224b75c51a29b0001", size = 10396, upload-time = "2025-04-03T02:54:34.555Z" }, + { url = "https://files.pythonhosted.org/packages/c7/bb/9bc26fcf5155bd25efeca35f8ba6bffb8b3c9da2baac8bf40067606418f3/types_regex-2026.2.28.20260301-py3-none-any.whl", hash = "sha256:7da7a1fe67528238176a5844fd435ca90617cf605341308686afbc579fdea5c0", size = 11130, upload-time = "2026-03-01T04:11:11.454Z" }, ] [[package]] @@ -7223,6 +6939,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] +[[package]] +name = "uuid-utils" +version = "0.14.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/d1/38a573f0c631c062cf42fa1f5d021d4dd3c31fb23e4376e4b56b0c9fbbed/uuid_utils-0.14.1.tar.gz", hash = "sha256:9bfc95f64af80ccf129c604fb6b8ca66c6f256451e32bc4570f760e4309c9b69", size = 22195, upload-time = "2026-02-20T22:50:38.833Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/b7/add4363039a34506a58457d96d4aa2126061df3a143eb4d042aedd6a2e76/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:93a3b5dc798a54a1feb693f2d1cb4cf08258c32ff05ae4929b5f0a2ca624a4f0", size = 604679, upload-time = "2026-02-20T22:50:27.469Z" }, + { url = "https://files.pythonhosted.org/packages/dd/84/d1d0bef50d9e66d31b2019997c741b42274d53dde2e001b7a83e9511c339/uuid_utils-0.14.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ccd65a4b8e83af23eae5e56d88034b2fe7264f465d3e830845f10d1591b81741", size = 309346, upload-time = "2026-02-20T22:50:31.857Z" }, + { url = "https://files.pythonhosted.org/packages/ef/ed/b6d6fd52a6636d7c3eddf97d68da50910bf17cd5ac221992506fb56cf12e/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b56b0cacd81583834820588378e432b0696186683b813058b707aedc1e16c4b1", size = 344714, upload-time = "2026-02-20T22:50:42.642Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a7/a19a1719fb626fe0b31882db36056d44fe904dc0cf15b06fdf56b2679cf7/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb3cf14de789097320a3c56bfdfdd51b1225d11d67298afbedee7e84e3837c96", size = 350914, upload-time = "2026-02-20T22:50:36.487Z" }, + { url = "https://files.pythonhosted.org/packages/1d/fc/f6690e667fdc3bb1a73f57951f97497771c56fe23e3d302d7404be394d4f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60e0854a90d67f4b0cc6e54773deb8be618f4c9bad98d3326f081423b5d14fae", size = 482609, upload-time = "2026-02-20T22:50:37.511Z" }, + { url = "https://files.pythonhosted.org/packages/54/6e/dcd3fa031320921a12ec7b4672dea3bd1dd90ddffa363a91831ba834d559/uuid_utils-0.14.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6743ba194de3910b5feb1a62590cd2587e33a73ab6af8a01b642ceb5055862", size = 345699, upload-time = "2026-02-20T22:50:46.87Z" }, + { url = "https://files.pythonhosted.org/packages/04/28/e5220204b58b44ac0047226a9d016a113fde039280cc8732d9e6da43b39f/uuid_utils-0.14.1-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:043fb58fde6cf1620a6c066382f04f87a8e74feb0f95a585e4ed46f5d44af57b", size = 372205, upload-time = "2026-02-20T22:50:28.438Z" }, + { url = "https://files.pythonhosted.org/packages/c7/d9/3d2eb98af94b8dfffc82b6a33b4dfc87b0a5de2c68a28f6dde0db1f8681b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c915d53f22945e55fe0d3d3b0b87fd965a57f5fd15666fd92d6593a73b1dd297", size = 521836, upload-time = "2026-02-20T22:50:23.057Z" }, + { url = "https://files.pythonhosted.org/packages/a8/15/0eb106cc6fe182f7577bc0ab6e2f0a40be247f35c5e297dbf7bbc460bd02/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:0972488e3f9b449e83f006ead5a0e0a33ad4a13e4462e865b7c286ab7d7566a3", size = 625260, upload-time = "2026-02-20T22:50:25.949Z" }, + { url = "https://files.pythonhosted.org/packages/3c/17/f539507091334b109e7496830af2f093d9fc8082411eafd3ece58af1f8ba/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:1c238812ae0c8ffe77d8d447a32c6dfd058ea4631246b08b5a71df586ff08531", size = 587824, upload-time = "2026-02-20T22:50:35.225Z" }, + { url = "https://files.pythonhosted.org/packages/2e/c2/d37a7b2e41f153519367d4db01f0526e0d4b06f1a4a87f1c5dfca5d70a8b/uuid_utils-0.14.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:bec8f8ef627af86abf8298e7ec50926627e29b34fa907fcfbedb45aaa72bca43", size = 551407, upload-time = "2026-02-20T22:50:44.915Z" }, + { url = "https://files.pythonhosted.org/packages/65/36/2d24b2cbe78547c6532da33fb8613debd3126eccc33a6374ab788f5e46e9/uuid_utils-0.14.1-cp39-abi3-win32.whl", hash = "sha256:b54d6aa6252d96bac1fdbc80d26ba71bad9f220b2724d692ad2f2310c22ef523", size = 183476, upload-time = "2026-02-20T22:50:32.745Z" }, + { url = "https://files.pythonhosted.org/packages/83/92/2d7e90df8b1a69ec4cff33243ce02b7a62f926ef9e2f0eca5a026889cd73/uuid_utils-0.14.1-cp39-abi3-win_amd64.whl", hash = "sha256:fc27638c2ce267a0ce3e06828aff786f91367f093c80625ee21dad0208e0f5ba", size = 187147, upload-time = "2026-02-20T22:50:45.807Z" }, + { url = "https://files.pythonhosted.org/packages/d9/26/529f4beee17e5248e37e0bc17a2761d34c0fa3b1e5729c88adb2065bae6e/uuid_utils-0.14.1-cp39-abi3-win_arm64.whl", hash = "sha256:b04cb49b42afbc4ff8dbc60cf054930afc479d6f4dd7f1ec3bbe5dbfdde06b7a", size = 188132, upload-time = "2026-02-20T22:50:41.718Z" }, + { url = "https://files.pythonhosted.org/packages/91/f9/6c64bdbf71f58ccde7919e00491812556f446a5291573af92c49a5e9aaef/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:b197cd5424cf89fb019ca7f53641d05bfe34b1879614bed111c9c313b5574cd8", size = 591617, upload-time = "2026-02-20T22:50:24.532Z" }, + { url = "https://files.pythonhosted.org/packages/d0/f0/758c3b0fb0c4871c7704fef26a5bc861de4f8a68e4831669883bebe07b0f/uuid_utils-0.14.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:12c65020ba6cb6abe1d57fcbfc2d0ea0506c67049ee031714057f5caf0f9bc9c", size = 303702, upload-time = "2026-02-20T22:50:40.687Z" }, + { url = "https://files.pythonhosted.org/packages/85/89/d91862b544c695cd58855efe3201f83894ed82fffe34500774238ab8eba7/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b5d2ad28063d422ccc2c28d46471d47b61a58de885d35113a8f18cb547e25bf", size = 337678, upload-time = "2026-02-20T22:50:39.768Z" }, + { url = "https://files.pythonhosted.org/packages/ee/6b/cf342ba8a898f1de024be0243fac67c025cad530c79ea7f89c4ce718891a/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da2234387b45fde40b0fedfee64a0ba591caeea9c48c7698ab6e2d85c7991533", size = 343711, upload-time = "2026-02-20T22:50:43.965Z" }, + { url = "https://files.pythonhosted.org/packages/b3/20/049418d094d396dfa6606b30af925cc68a6670c3b9103b23e6990f84b589/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50fffc2827348c1e48972eed3d1c698959e63f9d030aa5dd82ba451113158a62", size = 476731, upload-time = "2026-02-20T22:50:30.589Z" }, + { url = "https://files.pythonhosted.org/packages/77/a1/0857f64d53a90321e6a46a3d4cc394f50e1366132dcd2ae147f9326ca98b/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1dbe718765f70f5b7f9b7f66b6a937802941b1cc56bcf642ce0274169741e01", size = 338902, upload-time = "2026-02-20T22:50:33.927Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d0/5bf7cbf1ac138c92b9ac21066d18faf4d7e7f651047b700eb192ca4b9fdb/uuid_utils-0.14.1-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:258186964039a8e36db10810c1ece879d229b01331e09e9030bc5dcabe231bd2", size = 364700, upload-time = "2026-02-20T22:50:21.732Z" }, +] + [[package]] name = "uuid6" version = "2025.0.1" @@ -7382,18 +7127,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, ] -[[package]] -name = "wcmatch" -version = "10.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bracex" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/79/3e/c0bdc27cf06f4e47680bd5803a07cb3dfd17de84cde92dd217dcb9e05253/wcmatch-10.1.tar.gz", hash = "sha256:f11f94208c8c8484a16f4f48638a85d771d9513f4ab3f37595978801cb9465af", size = 117421, upload-time = "2025-06-22T19:14:02.49Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/d8/0d1d2e9d3fabcf5d6840362adcf05f8cf3cd06a73358140c3a97189238ae/wcmatch-10.1-py3-none-any.whl", hash = "sha256:5848ace7dbb0476e5e55ab63c6bbd529745089343427caa5537f230cc01beb8a", size = 39854, upload-time = "2025-06-22T19:14:00.978Z" }, -] - [[package]] name = "wcwidth" version = "0.6.0" @@ -7542,18 +7275,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/21/abdedb4cdf6ff41ebf01a74087740a709e2edb146490e4d9beea054b0b7a/wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1", size = 23362, upload-time = "2023-11-09T06:33:28.271Z" }, ] -[[package]] -name = "wsproto" -version = "1.3.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "h11" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116, upload-time = "2025-11-20T18:18:01.871Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405, upload-time = "2025-11-20T18:18:00.454Z" }, -] - [[package]] name = "xinference-client" version = "1.2.2" @@ -7595,6 +7316,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/20/69a0e6058bc5ea74892d089d64dfc3a62ba78917ec5e2cfa70f7c92ba3a5/xmltodict-1.0.2-py3-none-any.whl", hash = "sha256:62d0fddb0dcbc9f642745d8bbf4d81fd17d6dfaec5a15b5c1876300aad92af0d", size = 13893, upload-time = "2025-09-17T21:59:24.859Z" }, ] +[[package]] +name = "xxhash" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/84/30869e01909fb37a6cc7e18688ee8bf1e42d57e7e0777636bd47524c43c7/xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6", size = 85160, upload-time = "2025-10-02T14:37:08.097Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/d4/cc2f0400e9154df4b9964249da78ebd72f318e35ccc425e9f403c392f22a/xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a", size = 32844, upload-time = "2025-10-02T14:34:14.037Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ec/1cc11cd13e26ea8bc3cb4af4eaadd8d46d5014aebb67be3f71fb0b68802a/xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa", size = 30809, upload-time = "2025-10-02T14:34:15.484Z" }, + { url = "https://files.pythonhosted.org/packages/04/5f/19fe357ea348d98ca22f456f75a30ac0916b51c753e1f8b2e0e6fb884cce/xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248", size = 194665, upload-time = "2025-10-02T14:34:16.541Z" }, + { url = "https://files.pythonhosted.org/packages/90/3b/d1f1a8f5442a5fd8beedae110c5af7604dc37349a8e16519c13c19a9a2de/xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62", size = 213550, upload-time = "2025-10-02T14:34:17.878Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ef/3a9b05eb527457d5db13a135a2ae1a26c80fecd624d20f3e8dcc4cb170f3/xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f", size = 212384, upload-time = "2025-10-02T14:34:19.182Z" }, + { url = "https://files.pythonhosted.org/packages/0f/18/ccc194ee698c6c623acbf0f8c2969811a8a4b6185af5e824cd27b9e4fd3e/xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e", size = 445749, upload-time = "2025-10-02T14:34:20.659Z" }, + { url = "https://files.pythonhosted.org/packages/a5/86/cf2c0321dc3940a7aa73076f4fd677a0fb3e405cb297ead7d864fd90847e/xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8", size = 193880, upload-time = "2025-10-02T14:34:22.431Z" }, + { url = "https://files.pythonhosted.org/packages/82/fb/96213c8560e6f948a1ecc9a7613f8032b19ee45f747f4fca4eb31bb6d6ed/xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0", size = 210912, upload-time = "2025-10-02T14:34:23.937Z" }, + { url = "https://files.pythonhosted.org/packages/40/aa/4395e669b0606a096d6788f40dbdf2b819d6773aa290c19e6e83cbfc312f/xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77", size = 198654, upload-time = "2025-10-02T14:34:25.644Z" }, + { url = "https://files.pythonhosted.org/packages/67/74/b044fcd6b3d89e9b1b665924d85d3f400636c23590226feb1eb09e1176ce/xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c", size = 210867, upload-time = "2025-10-02T14:34:27.203Z" }, + { url = "https://files.pythonhosted.org/packages/bc/fd/3ce73bf753b08cb19daee1eb14aa0d7fe331f8da9c02dd95316ddfe5275e/xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b", size = 414012, upload-time = "2025-10-02T14:34:28.409Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b3/5a4241309217c5c876f156b10778f3ab3af7ba7e3259e6d5f5c7d0129eb2/xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3", size = 191409, upload-time = "2025-10-02T14:34:29.696Z" }, + { url = "https://files.pythonhosted.org/packages/c0/01/99bfbc15fb9abb9a72b088c1d95219fc4782b7d01fc835bd5744d66dd0b8/xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd", size = 30574, upload-time = "2025-10-02T14:34:31.028Z" }, + { url = "https://files.pythonhosted.org/packages/65/79/9d24d7f53819fe301b231044ea362ce64e86c74f6e8c8e51320de248b3e5/xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef", size = 31481, upload-time = "2025-10-02T14:34:32.062Z" }, + { url = "https://files.pythonhosted.org/packages/30/4e/15cd0e3e8772071344eab2961ce83f6e485111fed8beb491a3f1ce100270/xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7", size = 27861, upload-time = "2025-10-02T14:34:33.555Z" }, + { url = "https://files.pythonhosted.org/packages/9a/07/d9412f3d7d462347e4511181dea65e47e0d0e16e26fbee2ea86a2aefb657/xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c", size = 32744, upload-time = "2025-10-02T14:34:34.622Z" }, + { url = "https://files.pythonhosted.org/packages/79/35/0429ee11d035fc33abe32dca1b2b69e8c18d236547b9a9b72c1929189b9a/xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204", size = 30816, upload-time = "2025-10-02T14:34:36.043Z" }, + { url = "https://files.pythonhosted.org/packages/b7/f2/57eb99aa0f7d98624c0932c5b9a170e1806406cdbcdb510546634a1359e0/xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490", size = 194035, upload-time = "2025-10-02T14:34:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/4c/ed/6224ba353690d73af7a3f1c7cdb1fc1b002e38f783cb991ae338e1eb3d79/xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2", size = 212914, upload-time = "2025-10-02T14:34:38.6Z" }, + { url = "https://files.pythonhosted.org/packages/38/86/fb6b6130d8dd6b8942cc17ab4d90e223653a89aa32ad2776f8af7064ed13/xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa", size = 212163, upload-time = "2025-10-02T14:34:39.872Z" }, + { url = "https://files.pythonhosted.org/packages/ee/dc/e84875682b0593e884ad73b2d40767b5790d417bde603cceb6878901d647/xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0", size = 445411, upload-time = "2025-10-02T14:34:41.569Z" }, + { url = "https://files.pythonhosted.org/packages/11/4f/426f91b96701ec2f37bb2b8cec664eff4f658a11f3fa9d94f0a887ea6d2b/xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2", size = 193883, upload-time = "2025-10-02T14:34:43.249Z" }, + { url = "https://files.pythonhosted.org/packages/53/5a/ddbb83eee8e28b778eacfc5a85c969673e4023cdeedcfcef61f36731610b/xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9", size = 210392, upload-time = "2025-10-02T14:34:45.042Z" }, + { url = "https://files.pythonhosted.org/packages/1e/c2/ff69efd07c8c074ccdf0a4f36fcdd3d27363665bcdf4ba399abebe643465/xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e", size = 197898, upload-time = "2025-10-02T14:34:46.302Z" }, + { url = "https://files.pythonhosted.org/packages/58/ca/faa05ac19b3b622c7c9317ac3e23954187516298a091eb02c976d0d3dd45/xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374", size = 210655, upload-time = "2025-10-02T14:34:47.571Z" }, + { url = "https://files.pythonhosted.org/packages/d4/7a/06aa7482345480cc0cb597f5c875b11a82c3953f534394f620b0be2f700c/xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d", size = 414001, upload-time = "2025-10-02T14:34:49.273Z" }, + { url = "https://files.pythonhosted.org/packages/23/07/63ffb386cd47029aa2916b3d2f454e6cc5b9f5c5ada3790377d5430084e7/xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae", size = 191431, upload-time = "2025-10-02T14:34:50.798Z" }, + { url = "https://files.pythonhosted.org/packages/0f/93/14fde614cadb4ddf5e7cebf8918b7e8fac5ae7861c1875964f17e678205c/xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb", size = 30617, upload-time = "2025-10-02T14:34:51.954Z" }, + { url = "https://files.pythonhosted.org/packages/13/5d/0d125536cbe7565a83d06e43783389ecae0c0f2ed037b48ede185de477c0/xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c", size = 31534, upload-time = "2025-10-02T14:34:53.276Z" }, + { url = "https://files.pythonhosted.org/packages/54/85/6ec269b0952ec7e36ba019125982cf11d91256a778c7c3f98a4c5043d283/xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829", size = 27876, upload-time = "2025-10-02T14:34:54.371Z" }, + { url = "https://files.pythonhosted.org/packages/93/1e/8aec23647a34a249f62e2398c42955acd9b4c6ed5cf08cbea94dc46f78d2/xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0", size = 30662, upload-time = "2025-10-02T14:37:01.743Z" }, + { url = "https://files.pythonhosted.org/packages/b8/0b/b14510b38ba91caf43006209db846a696ceea6a847a0c9ba0a5b1adc53d6/xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296", size = 41056, upload-time = "2025-10-02T14:37:02.879Z" }, + { url = "https://files.pythonhosted.org/packages/50/55/15a7b8a56590e66ccd374bbfa3f9ffc45b810886c8c3b614e3f90bd2367c/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13", size = 36251, upload-time = "2025-10-02T14:37:04.44Z" }, + { url = "https://files.pythonhosted.org/packages/62/b2/5ac99a041a29e58e95f907876b04f7067a0242cb85b5f39e726153981503/xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd", size = 32481, upload-time = "2025-10-02T14:37:05.869Z" }, + { url = "https://files.pythonhosted.org/packages/7b/d9/8d95e906764a386a3d3b596f3c68bb63687dfca806373509f51ce8eea81f/xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d", size = 31565, upload-time = "2025-10-02T14:37:06.966Z" }, +] + [[package]] name = "yarl" version = "1.18.3" diff --git a/dev/pyrefly-check-local b/dev/pyrefly-check-local new file mode 100755 index 0000000000..8fa5f121fc --- /dev/null +++ b/dev/pyrefly-check-local @@ -0,0 +1,36 @@ +#!/bin/bash + +set -euo pipefail + +SCRIPT_DIR="$(dirname "$(realpath "$0")")" +REPO_ROOT="$SCRIPT_DIR/.." +cd "$REPO_ROOT" + +EXCLUDES_FILE="api/pyrefly-local-excludes.txt" + +pyrefly_args=( + "--summary=none" + "--use-ignore-files=false" + "--disable-project-excludes-heuristics=true" + "--project-excludes=.venv" + "--project-excludes=migrations/" + "--project-excludes=tests/" +) + +if [[ -f "$EXCLUDES_FILE" ]]; then + while IFS= read -r exclude; do + [[ -z "$exclude" || "${exclude:0:1}" == "#" ]] && continue + pyrefly_args+=("--project-excludes=$exclude") + done < "$EXCLUDES_FILE" +fi + +tmp_output="$(mktemp)" +set +e +uv run --directory api --dev pyrefly check "${pyrefly_args[@]}" >"$tmp_output" 2>&1 +pyrefly_status=$? +set -e + +uv run --directory api python libs/pyrefly_diagnostics.py < "$tmp_output" +rm -f "$tmp_output" + +exit "$pyrefly_status" diff --git a/dev/pytest/pytest_config_tests.py b/dev/pytest/pytest_config_tests.py index 1ec95deb09..1ae115f85c 100644 --- a/dev/pytest/pytest_config_tests.py +++ b/dev/pytest/pytest_config_tests.py @@ -38,7 +38,6 @@ BASE_API_AND_DOCKER_CONFIG_SET_DIFF = { "UPSTASH_VECTOR_URL", "USING_UGC_INDEX", "WEAVIATE_BATCH_SIZE", - "WEAVIATE_GRPC_ENABLED", } BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = { @@ -86,7 +85,6 @@ BASE_API_AND_DOCKER_COMPOSE_CONFIG_SET_DIFF = { "VIKINGDB_CONNECTION_TIMEOUT", "VIKINGDB_SOCKET_TIMEOUT", "WEAVIATE_BATCH_SIZE", - "WEAVIATE_GRPC_ENABLED", } API_CONFIG_SET = set(dotenv_values(Path("api") / Path(".env.example")).keys()) diff --git a/dev/start-worker b/dev/start-worker index 0450851b56..8baa36f1ed 100755 --- a/dev/start-worker +++ b/dev/start-worker @@ -21,6 +21,7 @@ show_help() { echo "" echo "Available queues:" echo " dataset - RAG indexing and document processing" + echo " dataset_summary - LLM-heavy summary index generation (isolated from indexing)" echo " workflow - Workflow triggers (community edition)" echo " workflow_professional - Professional tier workflows (cloud edition)" echo " workflow_team - Team tier workflows (cloud edition)" @@ -106,10 +107,10 @@ if [[ -z "${QUEUES}" ]]; then # Configure queues based on edition if [[ "${EDITION}" == "CLOUD" ]]; then # Cloud edition: separate queues for dataset and trigger tasks - QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" + QUEUES="dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" else # Community edition (SELF_HOSTED): dataset and workflow have separate queues - QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" + QUEUES="dataset,dataset_summary,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention,workflow_based_app_execution" fi echo "No queues specified, using edition-based defaults: ${QUEUES}" diff --git a/docs/tlh/README.md b/docs/tlh/README.md index a25849c443..e2acd7734c 100644 --- a/docs/tlh/README.md +++ b/docs/tlh/README.md @@ -61,7 +61,7 @@

langgenius%2Fdify | Trendshift

-Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features and more, letting you quickly go from prototype to production. Here's a list of the core features: +Dify is an open-source LLM app development platform. Its intuitive interface combines AI workflow, RAG pipeline, agent capabilities, model management, observability features (including [Opik](https://www.comet.com/docs/opik/integrations/dify), [Langfuse](https://docs.langfuse.com), and [Arize Phoenix](https://docs.arize.com/phoenix)) and more, letting you quickly go from prototype to production. Here's a list of the core features:

**1. Workflow**: diff --git a/scripts/stress-test/common/config_helper.py b/scripts/stress-test/common/config_helper.py index 75fcbffa6f..fb34b43e26 100644 --- a/scripts/stress-test/common/config_helper.py +++ b/scripts/stress-test/common/config_helper.py @@ -6,6 +6,13 @@ from typing import Any class ConfigHelper: + _LEGACY_SECTION_MAP = { + "admin_config": "admin", + "token_config": "auth", + "app_config": "app", + "api_key_config": "api_key", + } + """Helper class for reading and writing configuration files.""" def __init__(self, base_dir: Path | None = None): @@ -50,14 +57,8 @@ class ConfigHelper: Dictionary containing config data, or None if file doesn't exist """ # Provide backward compatibility for old config names - if filename in ["admin_config", "token_config", "app_config", "api_key_config"]: - section_map = { - "admin_config": "admin", - "token_config": "auth", - "app_config": "app", - "api_key_config": "api_key", - } - return self.get_state_section(section_map[filename]) + if filename in self._LEGACY_SECTION_MAP: + return self.get_state_section(self._LEGACY_SECTION_MAP[filename]) config_path = self.get_config_path(filename) @@ -85,14 +86,11 @@ class ConfigHelper: True if successful, False otherwise """ # Provide backward compatibility for old config names - if filename in ["admin_config", "token_config", "app_config", "api_key_config"]: - section_map = { - "admin_config": "admin", - "token_config": "auth", - "app_config": "app", - "api_key_config": "api_key", - } - return self.update_state_section(section_map[filename], data) + if filename in self._LEGACY_SECTION_MAP: + return self.update_state_section( + self._LEGACY_SECTION_MAP[filename], + data, + ) self.ensure_config_dir() config_path = self.get_config_path(filename) diff --git a/sdks/nodejs-client/package.json b/sdks/nodejs-client/package.json index afbb58fee1..7c8a293446 100644 --- a/sdks/nodejs-client/package.json +++ b/sdks/nodejs-client/package.json @@ -54,17 +54,22 @@ "publish:npm": "./scripts/publish.sh" }, "dependencies": { - "axios": "^1.13.2" + "axios": "^1.13.6" }, "devDependencies": { - "@eslint/js": "^9.39.2", - "@types/node": "^25.0.3", - "@typescript-eslint/eslint-plugin": "^8.50.1", - "@typescript-eslint/parser": "^8.50.1", - "@vitest/coverage-v8": "4.0.16", - "eslint": "^9.39.2", + "@eslint/js": "^10.0.1", + "@types/node": "^25.4.0", + "@typescript-eslint/eslint-plugin": "^8.57.0", + "@typescript-eslint/parser": "^8.57.0", + "@vitest/coverage-v8": "4.0.18", + "eslint": "^10.0.3", "tsup": "^8.5.1", "typescript": "^5.9.3", - "vitest": "^4.0.16" + "vitest": "^4.0.18" + }, + "pnpm": { + "overrides": { + "rollup@>=4.0.0,<4.59.0": "4.59.0" + } } } diff --git a/sdks/nodejs-client/pnpm-lock.yaml b/sdks/nodejs-client/pnpm-lock.yaml index 1923a0f063..b0aee38cdf 100644 --- a/sdks/nodejs-client/pnpm-lock.yaml +++ b/sdks/nodejs-client/pnpm-lock.yaml @@ -4,41 +4,44 @@ settings: autoInstallPeers: true excludeLinksFromLockfile: false +overrides: + rollup@>=4.0.0,<4.59.0: 4.59.0 + importers: .: dependencies: axios: - specifier: ^1.13.2 - version: 1.13.5 + specifier: ^1.13.6 + version: 1.13.6 devDependencies: '@eslint/js': - specifier: ^9.39.2 - version: 9.39.2 + specifier: ^10.0.1 + version: 10.0.1(eslint@10.0.3) '@types/node': - specifier: ^25.0.3 - version: 25.0.3 + specifier: ^25.4.0 + version: 25.4.0 '@typescript-eslint/eslint-plugin': - specifier: ^8.50.1 - version: 8.50.1(@typescript-eslint/parser@8.50.1(eslint@9.39.2)(typescript@5.9.3))(eslint@9.39.2)(typescript@5.9.3) + specifier: ^8.57.0 + version: 8.57.0(@typescript-eslint/parser@8.57.0(eslint@10.0.3)(typescript@5.9.3))(eslint@10.0.3)(typescript@5.9.3) '@typescript-eslint/parser': - specifier: ^8.50.1 - version: 8.50.1(eslint@9.39.2)(typescript@5.9.3) + specifier: ^8.57.0 + version: 8.57.0(eslint@10.0.3)(typescript@5.9.3) '@vitest/coverage-v8': - specifier: 4.0.16 - version: 4.0.16(vitest@4.0.16(@types/node@25.0.3)) + specifier: 4.0.18 + version: 4.0.18(vitest@4.0.18(@types/node@25.4.0)) eslint: - specifier: ^9.39.2 - version: 9.39.2 + specifier: ^10.0.3 + version: 10.0.3 tsup: specifier: ^8.5.1 - version: 8.5.1(postcss@8.5.6)(typescript@5.9.3) + version: 8.5.1(postcss@8.5.8)(typescript@5.9.3) typescript: specifier: ^5.9.3 version: 5.9.3 vitest: - specifier: ^4.0.16 - version: 4.0.16(@types/node@25.0.3) + specifier: ^4.0.18 + version: 4.0.18(@types/node@25.4.0) packages: @@ -50,177 +53,177 @@ packages: resolution: {integrity: sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==} engines: {node: '>=6.9.0'} - '@babel/parser@7.28.5': - resolution: {integrity: sha512-KKBU1VGYR7ORr3At5HAtUQ+TV3SzRCXmA/8OdDZiLDBIZxVyzXuztPjfLd3BV1PRAQGCMWWSHYhL0F8d5uHBDQ==} + '@babel/parser@7.29.0': + resolution: {integrity: sha512-IyDgFV5GeDUVX4YdF/3CPULtVGSXXMLh1xVIgdCgxApktqnQV0r7/8Nqthg+8YLGaAtdyIlo2qIdZrbCv4+7ww==} engines: {node: '>=6.0.0'} hasBin: true - '@babel/types@7.28.5': - resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==} + '@babel/types@7.29.0': + resolution: {integrity: sha512-LwdZHpScM4Qz8Xw2iKSzS+cfglZzJGvofQICy7W7v4caru4EaAmyUuO6BGrbyQ2mYV11W0U8j5mBhd14dd3B0A==} engines: {node: '>=6.9.0'} '@bcoe/v8-coverage@1.0.2': resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==} engines: {node: '>=18'} - '@esbuild/aix-ppc64@0.27.2': - resolution: {integrity: sha512-GZMB+a0mOMZs4MpDbj8RJp4cw+w1WV5NYD6xzgvzUJ5Ek2jerwfO2eADyI6ExDSUED+1X8aMbegahsJi+8mgpw==} + '@esbuild/aix-ppc64@0.27.3': + resolution: {integrity: sha512-9fJMTNFTWZMh5qwrBItuziu834eOCUcEqymSH7pY+zoMVEZg3gcPuBNxH1EvfVYe9h0x/Ptw8KBzv7qxb7l8dg==} engines: {node: '>=18'} cpu: [ppc64] os: [aix] - '@esbuild/android-arm64@0.27.2': - resolution: {integrity: sha512-pvz8ZZ7ot/RBphf8fv60ljmaoydPU12VuXHImtAs0XhLLw+EXBi2BLe3OYSBslR4rryHvweW5gmkKFwTiFy6KA==} + '@esbuild/android-arm64@0.27.3': + resolution: {integrity: sha512-YdghPYUmj/FX2SYKJ0OZxf+iaKgMsKHVPF1MAq/P8WirnSpCStzKJFjOjzsW0QQ7oIAiccHdcqjbHmJxRb/dmg==} engines: {node: '>=18'} cpu: [arm64] os: [android] - '@esbuild/android-arm@0.27.2': - resolution: {integrity: sha512-DVNI8jlPa7Ujbr1yjU2PfUSRtAUZPG9I1RwW4F4xFB1Imiu2on0ADiI/c3td+KmDtVKNbi+nffGDQMfcIMkwIA==} + '@esbuild/android-arm@0.27.3': + resolution: {integrity: sha512-i5D1hPY7GIQmXlXhs2w8AWHhenb00+GxjxRncS2ZM7YNVGNfaMxgzSGuO8o8SJzRc/oZwU2bcScvVERk03QhzA==} engines: {node: '>=18'} cpu: [arm] os: [android] - '@esbuild/android-x64@0.27.2': - resolution: {integrity: sha512-z8Ank4Byh4TJJOh4wpz8g2vDy75zFL0TlZlkUkEwYXuPSgX8yzep596n6mT7905kA9uHZsf/o2OJZubl2l3M7A==} + '@esbuild/android-x64@0.27.3': + resolution: {integrity: sha512-IN/0BNTkHtk8lkOM8JWAYFg4ORxBkZQf9zXiEOfERX/CzxW3Vg1ewAhU7QSWQpVIzTW+b8Xy+lGzdYXV6UZObQ==} engines: {node: '>=18'} cpu: [x64] os: [android] - '@esbuild/darwin-arm64@0.27.2': - resolution: {integrity: sha512-davCD2Zc80nzDVRwXTcQP/28fiJbcOwvdolL0sOiOsbwBa72kegmVU0Wrh1MYrbuCL98Omp5dVhQFWRKR2ZAlg==} + '@esbuild/darwin-arm64@0.27.3': + resolution: {integrity: sha512-Re491k7ByTVRy0t3EKWajdLIr0gz2kKKfzafkth4Q8A5n1xTHrkqZgLLjFEHVD+AXdUGgQMq+Godfq45mGpCKg==} engines: {node: '>=18'} cpu: [arm64] os: [darwin] - '@esbuild/darwin-x64@0.27.2': - resolution: {integrity: sha512-ZxtijOmlQCBWGwbVmwOF/UCzuGIbUkqB1faQRf5akQmxRJ1ujusWsb3CVfk/9iZKr2L5SMU5wPBi1UWbvL+VQA==} + '@esbuild/darwin-x64@0.27.3': + resolution: {integrity: sha512-vHk/hA7/1AckjGzRqi6wbo+jaShzRowYip6rt6q7VYEDX4LEy1pZfDpdxCBnGtl+A5zq8iXDcyuxwtv3hNtHFg==} engines: {node: '>=18'} cpu: [x64] os: [darwin] - '@esbuild/freebsd-arm64@0.27.2': - resolution: {integrity: sha512-lS/9CN+rgqQ9czogxlMcBMGd+l8Q3Nj1MFQwBZJyoEKI50XGxwuzznYdwcav6lpOGv5BqaZXqvBSiB/kJ5op+g==} + '@esbuild/freebsd-arm64@0.27.3': + resolution: {integrity: sha512-ipTYM2fjt3kQAYOvo6vcxJx3nBYAzPjgTCk7QEgZG8AUO3ydUhvelmhrbOheMnGOlaSFUoHXB6un+A7q4ygY9w==} engines: {node: '>=18'} cpu: [arm64] os: [freebsd] - '@esbuild/freebsd-x64@0.27.2': - resolution: {integrity: sha512-tAfqtNYb4YgPnJlEFu4c212HYjQWSO/w/h/lQaBK7RbwGIkBOuNKQI9tqWzx7Wtp7bTPaGC6MJvWI608P3wXYA==} + '@esbuild/freebsd-x64@0.27.3': + resolution: {integrity: sha512-dDk0X87T7mI6U3K9VjWtHOXqwAMJBNN2r7bejDsc+j03SEjtD9HrOl8gVFByeM0aJksoUuUVU9TBaZa2rgj0oA==} engines: {node: '>=18'} cpu: [x64] os: [freebsd] - '@esbuild/linux-arm64@0.27.2': - resolution: {integrity: sha512-hYxN8pr66NsCCiRFkHUAsxylNOcAQaxSSkHMMjcpx0si13t1LHFphxJZUiGwojB1a/Hd5OiPIqDdXONia6bhTw==} + '@esbuild/linux-arm64@0.27.3': + resolution: {integrity: sha512-sZOuFz/xWnZ4KH3YfFrKCf1WyPZHakVzTiqji3WDc0BCl2kBwiJLCXpzLzUBLgmp4veFZdvN5ChW4Eq/8Fc2Fg==} engines: {node: '>=18'} cpu: [arm64] os: [linux] - '@esbuild/linux-arm@0.27.2': - resolution: {integrity: sha512-vWfq4GaIMP9AIe4yj1ZUW18RDhx6EPQKjwe7n8BbIecFtCQG4CfHGaHuh7fdfq+y3LIA2vGS/o9ZBGVxIDi9hw==} + '@esbuild/linux-arm@0.27.3': + resolution: {integrity: sha512-s6nPv2QkSupJwLYyfS+gwdirm0ukyTFNl3KTgZEAiJDd+iHZcbTPPcWCcRYH+WlNbwChgH2QkE9NSlNrMT8Gfw==} engines: {node: '>=18'} cpu: [arm] os: [linux] - '@esbuild/linux-ia32@0.27.2': - resolution: {integrity: sha512-MJt5BRRSScPDwG2hLelYhAAKh9imjHK5+NE/tvnRLbIqUWa+0E9N4WNMjmp/kXXPHZGqPLxggwVhz7QP8CTR8w==} + '@esbuild/linux-ia32@0.27.3': + resolution: {integrity: sha512-yGlQYjdxtLdh0a3jHjuwOrxQjOZYD/C9PfdbgJJF3TIZWnm/tMd/RcNiLngiu4iwcBAOezdnSLAwQDPqTmtTYg==} engines: {node: '>=18'} cpu: [ia32] os: [linux] - '@esbuild/linux-loong64@0.27.2': - resolution: {integrity: sha512-lugyF1atnAT463aO6KPshVCJK5NgRnU4yb3FUumyVz+cGvZbontBgzeGFO1nF+dPueHD367a2ZXe1NtUkAjOtg==} + '@esbuild/linux-loong64@0.27.3': + resolution: {integrity: sha512-WO60Sn8ly3gtzhyjATDgieJNet/KqsDlX5nRC5Y3oTFcS1l0KWba+SEa9Ja1GfDqSF1z6hif/SkpQJbL63cgOA==} engines: {node: '>=18'} cpu: [loong64] os: [linux] - '@esbuild/linux-mips64el@0.27.2': - resolution: {integrity: sha512-nlP2I6ArEBewvJ2gjrrkESEZkB5mIoaTswuqNFRv/WYd+ATtUpe9Y09RnJvgvdag7he0OWgEZWhviS1OTOKixw==} + '@esbuild/linux-mips64el@0.27.3': + resolution: {integrity: sha512-APsymYA6sGcZ4pD6k+UxbDjOFSvPWyZhjaiPyl/f79xKxwTnrn5QUnXR5prvetuaSMsb4jgeHewIDCIWljrSxw==} engines: {node: '>=18'} cpu: [mips64el] os: [linux] - '@esbuild/linux-ppc64@0.27.2': - resolution: {integrity: sha512-C92gnpey7tUQONqg1n6dKVbx3vphKtTHJaNG2Ok9lGwbZil6DrfyecMsp9CrmXGQJmZ7iiVXvvZH6Ml5hL6XdQ==} + '@esbuild/linux-ppc64@0.27.3': + resolution: {integrity: sha512-eizBnTeBefojtDb9nSh4vvVQ3V9Qf9Df01PfawPcRzJH4gFSgrObw+LveUyDoKU3kxi5+9RJTCWlj4FjYXVPEA==} engines: {node: '>=18'} cpu: [ppc64] os: [linux] - '@esbuild/linux-riscv64@0.27.2': - resolution: {integrity: sha512-B5BOmojNtUyN8AXlK0QJyvjEZkWwy/FKvakkTDCziX95AowLZKR6aCDhG7LeF7uMCXEJqwa8Bejz5LTPYm8AvA==} + '@esbuild/linux-riscv64@0.27.3': + resolution: {integrity: sha512-3Emwh0r5wmfm3ssTWRQSyVhbOHvqegUDRd0WhmXKX2mkHJe1SFCMJhagUleMq+Uci34wLSipf8Lagt4LlpRFWQ==} engines: {node: '>=18'} cpu: [riscv64] os: [linux] - '@esbuild/linux-s390x@0.27.2': - resolution: {integrity: sha512-p4bm9+wsPwup5Z8f4EpfN63qNagQ47Ua2znaqGH6bqLlmJ4bx97Y9JdqxgGZ6Y8xVTixUnEkoKSHcpRlDnNr5w==} + '@esbuild/linux-s390x@0.27.3': + resolution: {integrity: sha512-pBHUx9LzXWBc7MFIEEL0yD/ZVtNgLytvx60gES28GcWMqil8ElCYR4kvbV2BDqsHOvVDRrOxGySBM9Fcv744hw==} engines: {node: '>=18'} cpu: [s390x] os: [linux] - '@esbuild/linux-x64@0.27.2': - resolution: {integrity: sha512-uwp2Tip5aPmH+NRUwTcfLb+W32WXjpFejTIOWZFw/v7/KnpCDKG66u4DLcurQpiYTiYwQ9B7KOeMJvLCu/OvbA==} + '@esbuild/linux-x64@0.27.3': + resolution: {integrity: sha512-Czi8yzXUWIQYAtL/2y6vogER8pvcsOsk5cpwL4Gk5nJqH5UZiVByIY8Eorm5R13gq+DQKYg0+JyQoytLQas4dA==} engines: {node: '>=18'} cpu: [x64] os: [linux] - '@esbuild/netbsd-arm64@0.27.2': - resolution: {integrity: sha512-Kj6DiBlwXrPsCRDeRvGAUb/LNrBASrfqAIok+xB0LxK8CHqxZ037viF13ugfsIpePH93mX7xfJp97cyDuTZ3cw==} + '@esbuild/netbsd-arm64@0.27.3': + resolution: {integrity: sha512-sDpk0RgmTCR/5HguIZa9n9u+HVKf40fbEUt+iTzSnCaGvY9kFP0YKBWZtJaraonFnqef5SlJ8/TiPAxzyS+UoA==} engines: {node: '>=18'} cpu: [arm64] os: [netbsd] - '@esbuild/netbsd-x64@0.27.2': - resolution: {integrity: sha512-HwGDZ0VLVBY3Y+Nw0JexZy9o/nUAWq9MlV7cahpaXKW6TOzfVno3y3/M8Ga8u8Yr7GldLOov27xiCnqRZf0tCA==} + '@esbuild/netbsd-x64@0.27.3': + resolution: {integrity: sha512-P14lFKJl/DdaE00LItAukUdZO5iqNH7+PjoBm+fLQjtxfcfFE20Xf5CrLsmZdq5LFFZzb5JMZ9grUwvtVYzjiA==} engines: {node: '>=18'} cpu: [x64] os: [netbsd] - '@esbuild/openbsd-arm64@0.27.2': - resolution: {integrity: sha512-DNIHH2BPQ5551A7oSHD0CKbwIA/Ox7+78/AWkbS5QoRzaqlev2uFayfSxq68EkonB+IKjiuxBFoV8ESJy8bOHA==} + '@esbuild/openbsd-arm64@0.27.3': + resolution: {integrity: sha512-AIcMP77AvirGbRl/UZFTq5hjXK+2wC7qFRGoHSDrZ5v5b8DK/GYpXW3CPRL53NkvDqb9D+alBiC/dV0Fb7eJcw==} engines: {node: '>=18'} cpu: [arm64] os: [openbsd] - '@esbuild/openbsd-x64@0.27.2': - resolution: {integrity: sha512-/it7w9Nb7+0KFIzjalNJVR5bOzA9Vay+yIPLVHfIQYG/j+j9VTH84aNB8ExGKPU4AzfaEvN9/V4HV+F+vo8OEg==} + '@esbuild/openbsd-x64@0.27.3': + resolution: {integrity: sha512-DnW2sRrBzA+YnE70LKqnM3P+z8vehfJWHXECbwBmH/CU51z6FiqTQTHFenPlHmo3a8UgpLyH3PT+87OViOh1AQ==} engines: {node: '>=18'} cpu: [x64] os: [openbsd] - '@esbuild/openharmony-arm64@0.27.2': - resolution: {integrity: sha512-LRBbCmiU51IXfeXk59csuX/aSaToeG7w48nMwA6049Y4J4+VbWALAuXcs+qcD04rHDuSCSRKdmY63sruDS5qag==} + '@esbuild/openharmony-arm64@0.27.3': + resolution: {integrity: sha512-NinAEgr/etERPTsZJ7aEZQvvg/A6IsZG/LgZy+81wON2huV7SrK3e63dU0XhyZP4RKGyTm7aOgmQk0bGp0fy2g==} engines: {node: '>=18'} cpu: [arm64] os: [openharmony] - '@esbuild/sunos-x64@0.27.2': - resolution: {integrity: sha512-kMtx1yqJHTmqaqHPAzKCAkDaKsffmXkPHThSfRwZGyuqyIeBvf08KSsYXl+abf5HDAPMJIPnbBfXvP2ZC2TfHg==} + '@esbuild/sunos-x64@0.27.3': + resolution: {integrity: sha512-PanZ+nEz+eWoBJ8/f8HKxTTD172SKwdXebZ0ndd953gt1HRBbhMsaNqjTyYLGLPdoWHy4zLU7bDVJztF5f3BHA==} engines: {node: '>=18'} cpu: [x64] os: [sunos] - '@esbuild/win32-arm64@0.27.2': - resolution: {integrity: sha512-Yaf78O/B3Kkh+nKABUF++bvJv5Ijoy9AN1ww904rOXZFLWVc5OLOfL56W+C8F9xn5JQZa3UX6m+IktJnIb1Jjg==} + '@esbuild/win32-arm64@0.27.3': + resolution: {integrity: sha512-B2t59lWWYrbRDw/tjiWOuzSsFh1Y/E95ofKz7rIVYSQkUYBjfSgf6oeYPNWHToFRr2zx52JKApIcAS/D5TUBnA==} engines: {node: '>=18'} cpu: [arm64] os: [win32] - '@esbuild/win32-ia32@0.27.2': - resolution: {integrity: sha512-Iuws0kxo4yusk7sw70Xa2E2imZU5HoixzxfGCdxwBdhiDgt9vX9VUCBhqcwY7/uh//78A1hMkkROMJq9l27oLQ==} + '@esbuild/win32-ia32@0.27.3': + resolution: {integrity: sha512-QLKSFeXNS8+tHW7tZpMtjlNb7HKau0QDpwm49u0vUp9y1WOF+PEzkU84y9GqYaAVW8aH8f3GcBck26jh54cX4Q==} engines: {node: '>=18'} cpu: [ia32] os: [win32] - '@esbuild/win32-x64@0.27.2': - resolution: {integrity: sha512-sRdU18mcKf7F+YgheI/zGf5alZatMUTKj/jNS6l744f9u3WFu4v7twcUI9vu4mknF4Y9aDlblIie0IM+5xxaqQ==} + '@esbuild/win32-x64@0.27.3': + resolution: {integrity: sha512-4uJGhsxuptu3OcpVAzli+/gWusVGwZZHTlS63hh++ehExkVT8SgiEf7/uC/PclrPPkLhZqGgCTjd0VWLo6xMqA==} engines: {node: '>=18'} cpu: [x64] os: [win32] - '@eslint-community/eslint-utils@4.9.0': - resolution: {integrity: sha512-ayVFHdtZ+hsq1t2Dy24wCmGXGe4q9Gu3smhLYALJrr473ZH27MsnSL+LKUlimp4BWJqMDMLmPpx/Q9R3OAlL4g==} + '@eslint-community/eslint-utils@4.9.1': + resolution: {integrity: sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} peerDependencies: eslint: ^6.0.0 || ^7.0.0 || >=8.0.0 @@ -229,33 +232,34 @@ packages: resolution: {integrity: sha512-EriSTlt5OC9/7SXkRSCAhfSxxoSUgBm33OH+IkwbdpgoqsSsUg7y3uh+IICI/Qg4BBWr3U2i39RpmycbxMq4ew==} engines: {node: ^12.0.0 || ^14.0.0 || >=16.0.0} - '@eslint/config-array@0.21.1': - resolution: {integrity: sha512-aw1gNayWpdI/jSYVgzN5pL0cfzU02GT3NBpeT/DXbx1/1x7ZKxFPd9bwrzygx/qiwIQiJ1sw/zD8qY/kRvlGHA==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/config-array@0.23.3': + resolution: {integrity: sha512-j+eEWmB6YYLwcNOdlwQ6L2OsptI/LO6lNBuLIqe5R7RetD658HLoF+Mn7LzYmAWWNNzdC6cqP+L6r8ujeYXWLw==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - '@eslint/config-helpers@0.4.2': - resolution: {integrity: sha512-gBrxN88gOIf3R7ja5K9slwNayVcZgK6SOUORm2uBzTeIEfeVaIhOpCtTox3P6R7o2jLFwLFTLnC7kU/RGcYEgw==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/config-helpers@0.5.3': + resolution: {integrity: sha512-lzGN0onllOZCGroKJmRwY6QcEHxbjBw1gwB8SgRSqK8YbbtEXMvKynsXc3553ckIEBxsbMBU7oOZXKIPGZNeZw==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - '@eslint/core@0.17.0': - resolution: {integrity: sha512-yL/sLrpmtDaFEiUj1osRP4TI2MDz1AddJL+jZ7KSqvBuliN4xqYY54IfdN8qD8Toa6g1iloph1fxQNkjOxrrpQ==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/core@1.1.1': + resolution: {integrity: sha512-QUPblTtE51/7/Zhfv8BDwO0qkkzQL7P/aWWbqcf4xWLEYn1oKjdO0gglQBB4GAsu7u6wjijbCmzsUTy6mnk6oQ==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - '@eslint/eslintrc@3.3.3': - resolution: {integrity: sha512-Kr+LPIUVKz2qkx1HAMH8q1q6azbqBAsXJUxBl/ODDuVPX45Z9DfwB8tPjTi6nNZ8BuM3nbJxC5zCAg5elnBUTQ==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/js@10.0.1': + resolution: {integrity: sha512-zeR9k5pd4gxjZ0abRoIaxdc7I3nDktoXZk2qOv9gCNWx3mVwEn32VRhyLaRsDiJjTs0xq/T8mfPtyuXu7GWBcA==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} + peerDependencies: + eslint: ^10.0.0 + peerDependenciesMeta: + eslint: + optional: true - '@eslint/js@9.39.2': - resolution: {integrity: sha512-q1mjIoW1VX4IvSocvM/vbTiveKC4k9eLrajNEuSsmjymSDEbpGddtpfOoN7YGAqBK3NG+uqo8ia4PDTt8buCYA==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/object-schema@3.0.3': + resolution: {integrity: sha512-iM869Pugn9Nsxbh/YHRqYiqd23AmIbxJOcpUMOuWCVNdoQJ5ZtwL6h3t0bcZzJUlC3Dq9jCFCESBZnX0GTv7iQ==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - '@eslint/object-schema@2.1.7': - resolution: {integrity: sha512-VtAOaymWVfZcmZbp6E2mympDIHvyjXs/12LqWYjVw6qjrfF+VK+fyG33kChz3nnK+SU5/NeHOqrTEHS8sXO3OA==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} - - '@eslint/plugin-kit@0.4.1': - resolution: {integrity: sha512-43/qtrDUokr7LJqoF2c3+RInu/t4zfrpYdoSDfYyhg52rwLV6TnOvdG4fXm7IkSB3wErkcmJS9iEhjVtOSEjjA==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + '@eslint/plugin-kit@0.6.1': + resolution: {integrity: sha512-iH1B076HoAshH1mLpHMgwdGeTs0CYwL0SPMkGuSebZrwBp16v415e9NZXg2jtrqPVQjf6IANe2Vtlr5KswtcZQ==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} '@humanfs/core@0.19.1': resolution: {integrity: sha512-5DyQ4+1JEUzejeK1JGICcideyfUbGixgS9jNgex5nqkW+cY7WZhxBigmieN5Qnw9ZosSNVC9KQKyb+GUaGyKUA==} @@ -286,113 +290,141 @@ packages: '@jridgewell/trace-mapping@0.3.31': resolution: {integrity: sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==} - '@rollup/rollup-android-arm-eabi@4.54.0': - resolution: {integrity: sha512-OywsdRHrFvCdvsewAInDKCNyR3laPA2mc9bRYJ6LBp5IyvF3fvXbbNR0bSzHlZVFtn6E0xw2oZlyjg4rKCVcng==} + '@rollup/rollup-android-arm-eabi@4.59.0': + resolution: {integrity: sha512-upnNBkA6ZH2VKGcBj9Fyl9IGNPULcjXRlg0LLeaioQWueH30p6IXtJEbKAgvyv+mJaMxSm1l6xwDXYjpEMiLMg==} cpu: [arm] os: [android] - '@rollup/rollup-android-arm64@4.54.0': - resolution: {integrity: sha512-Skx39Uv+u7H224Af+bDgNinitlmHyQX1K/atIA32JP3JQw6hVODX5tkbi2zof/E69M1qH2UoN3Xdxgs90mmNYw==} + '@rollup/rollup-android-arm64@4.59.0': + resolution: {integrity: sha512-hZ+Zxj3SySm4A/DylsDKZAeVg0mvi++0PYVceVyX7hemkw7OreKdCvW2oQ3T1FMZvCaQXqOTHb8qmBShoqk69Q==} cpu: [arm64] os: [android] - '@rollup/rollup-darwin-arm64@4.54.0': - resolution: {integrity: sha512-k43D4qta/+6Fq+nCDhhv9yP2HdeKeP56QrUUTW7E6PhZP1US6NDqpJj4MY0jBHlJivVJD5P8NxrjuobZBJTCRw==} + '@rollup/rollup-darwin-arm64@4.59.0': + resolution: {integrity: sha512-W2Psnbh1J8ZJw0xKAd8zdNgF9HRLkdWwwdWqubSVk0pUuQkoHnv7rx4GiF9rT4t5DIZGAsConRE3AxCdJ4m8rg==} cpu: [arm64] os: [darwin] - '@rollup/rollup-darwin-x64@4.54.0': - resolution: {integrity: sha512-cOo7biqwkpawslEfox5Vs8/qj83M/aZCSSNIWpVzfU2CYHa2G3P1UN5WF01RdTHSgCkri7XOlTdtk17BezlV3A==} + '@rollup/rollup-darwin-x64@4.59.0': + resolution: {integrity: sha512-ZW2KkwlS4lwTv7ZVsYDiARfFCnSGhzYPdiOU4IM2fDbL+QGlyAbjgSFuqNRbSthybLbIJ915UtZBtmuLrQAT/w==} cpu: [x64] os: [darwin] - '@rollup/rollup-freebsd-arm64@4.54.0': - resolution: {integrity: sha512-miSvuFkmvFbgJ1BevMa4CPCFt5MPGw094knM64W9I0giUIMMmRYcGW/JWZDriaw/k1kOBtsWh1z6nIFV1vPNtA==} + '@rollup/rollup-freebsd-arm64@4.59.0': + resolution: {integrity: sha512-EsKaJ5ytAu9jI3lonzn3BgG8iRBjV4LxZexygcQbpiU0wU0ATxhNVEpXKfUa0pS05gTcSDMKpn3Sx+QB9RlTTA==} cpu: [arm64] os: [freebsd] - '@rollup/rollup-freebsd-x64@4.54.0': - resolution: {integrity: sha512-KGXIs55+b/ZfZsq9aR026tmr/+7tq6VG6MsnrvF4H8VhwflTIuYh+LFUlIsRdQSgrgmtM3fVATzEAj4hBQlaqQ==} + '@rollup/rollup-freebsd-x64@4.59.0': + resolution: {integrity: sha512-d3DuZi2KzTMjImrxoHIAODUZYoUUMsuUiY4SRRcJy6NJoZ6iIqWnJu9IScV9jXysyGMVuW+KNzZvBLOcpdl3Vg==} cpu: [x64] os: [freebsd] - '@rollup/rollup-linux-arm-gnueabihf@4.54.0': - resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==} + '@rollup/rollup-linux-arm-gnueabihf@4.59.0': + resolution: {integrity: sha512-t4ONHboXi/3E0rT6OZl1pKbl2Vgxf9vJfWgmUoCEVQVxhW6Cw/c8I6hbbu7DAvgp82RKiH7TpLwxnJeKv2pbsw==} cpu: [arm] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-arm-musleabihf@4.54.0': - resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==} + '@rollup/rollup-linux-arm-musleabihf@4.59.0': + resolution: {integrity: sha512-CikFT7aYPA2ufMD086cVORBYGHffBo4K8MQ4uPS/ZnY54GKj36i196u8U+aDVT2LX4eSMbyHtyOh7D7Zvk2VvA==} cpu: [arm] os: [linux] + libc: [musl] - '@rollup/rollup-linux-arm64-gnu@4.54.0': - resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==} + '@rollup/rollup-linux-arm64-gnu@4.59.0': + resolution: {integrity: sha512-jYgUGk5aLd1nUb1CtQ8E+t5JhLc9x5WdBKew9ZgAXg7DBk0ZHErLHdXM24rfX+bKrFe+Xp5YuJo54I5HFjGDAA==} cpu: [arm64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-arm64-musl@4.54.0': - resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==} + '@rollup/rollup-linux-arm64-musl@4.59.0': + resolution: {integrity: sha512-peZRVEdnFWZ5Bh2KeumKG9ty7aCXzzEsHShOZEFiCQlDEepP1dpUl/SrUNXNg13UmZl+gzVDPsiCwnV1uI0RUA==} cpu: [arm64] os: [linux] + libc: [musl] - '@rollup/rollup-linux-loong64-gnu@4.54.0': - resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==} + '@rollup/rollup-linux-loong64-gnu@4.59.0': + resolution: {integrity: sha512-gbUSW/97f7+r4gHy3Jlup8zDG190AuodsWnNiXErp9mT90iCy9NKKU0Xwx5k8VlRAIV2uU9CsMnEFg/xXaOfXg==} cpu: [loong64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-ppc64-gnu@4.54.0': - resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==} + '@rollup/rollup-linux-loong64-musl@4.59.0': + resolution: {integrity: sha512-yTRONe79E+o0FWFijasoTjtzG9EBedFXJMl888NBEDCDV9I2wGbFFfJQQe63OijbFCUZqxpHz1GzpbtSFikJ4Q==} + cpu: [loong64] + os: [linux] + libc: [musl] + + '@rollup/rollup-linux-ppc64-gnu@4.59.0': + resolution: {integrity: sha512-sw1o3tfyk12k3OEpRddF68a1unZ5VCN7zoTNtSn2KndUE+ea3m3ROOKRCZxEpmT9nsGnogpFP9x6mnLTCaoLkA==} cpu: [ppc64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-riscv64-gnu@4.54.0': - resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==} + '@rollup/rollup-linux-ppc64-musl@4.59.0': + resolution: {integrity: sha512-+2kLtQ4xT3AiIxkzFVFXfsmlZiG5FXYW7ZyIIvGA7Bdeuh9Z0aN4hVyXS/G1E9bTP/vqszNIN/pUKCk/BTHsKA==} + cpu: [ppc64] + os: [linux] + libc: [musl] + + '@rollup/rollup-linux-riscv64-gnu@4.59.0': + resolution: {integrity: sha512-NDYMpsXYJJaj+I7UdwIuHHNxXZ/b/N2hR15NyH3m2qAtb/hHPA4g4SuuvrdxetTdndfj9b1WOmy73kcPRoERUg==} cpu: [riscv64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-riscv64-musl@4.54.0': - resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==} + '@rollup/rollup-linux-riscv64-musl@4.59.0': + resolution: {integrity: sha512-nLckB8WOqHIf1bhymk+oHxvM9D3tyPndZH8i8+35p/1YiVoVswPid2yLzgX7ZJP0KQvnkhM4H6QZ5m0LzbyIAg==} cpu: [riscv64] os: [linux] + libc: [musl] - '@rollup/rollup-linux-s390x-gnu@4.54.0': - resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==} + '@rollup/rollup-linux-s390x-gnu@4.59.0': + resolution: {integrity: sha512-oF87Ie3uAIvORFBpwnCvUzdeYUqi2wY6jRFWJAy1qus/udHFYIkplYRW+wo+GRUP4sKzYdmE1Y3+rY5Gc4ZO+w==} cpu: [s390x] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-x64-gnu@4.54.0': - resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==} + '@rollup/rollup-linux-x64-gnu@4.59.0': + resolution: {integrity: sha512-3AHmtQq/ppNuUspKAlvA8HtLybkDflkMuLK4DPo77DfthRb71V84/c4MlWJXixZz4uruIH4uaa07IqoAkG64fg==} cpu: [x64] os: [linux] + libc: [glibc] - '@rollup/rollup-linux-x64-musl@4.54.0': - resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==} + '@rollup/rollup-linux-x64-musl@4.59.0': + resolution: {integrity: sha512-2UdiwS/9cTAx7qIUZB/fWtToJwvt0Vbo0zmnYt7ED35KPg13Q0ym1g442THLC7VyI6JfYTP4PiSOWyoMdV2/xg==} cpu: [x64] os: [linux] + libc: [musl] - '@rollup/rollup-openharmony-arm64@4.54.0': - resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==} + '@rollup/rollup-openbsd-x64@4.59.0': + resolution: {integrity: sha512-M3bLRAVk6GOwFlPTIxVBSYKUaqfLrn8l0psKinkCFxl4lQvOSz8ZrKDz2gxcBwHFpci0B6rttydI4IpS4IS/jQ==} + cpu: [x64] + os: [openbsd] + + '@rollup/rollup-openharmony-arm64@4.59.0': + resolution: {integrity: sha512-tt9KBJqaqp5i5HUZzoafHZX8b5Q2Fe7UjYERADll83O4fGqJ49O1FsL6LpdzVFQcpwvnyd0i+K/VSwu/o/nWlA==} cpu: [arm64] os: [openharmony] - '@rollup/rollup-win32-arm64-msvc@4.54.0': - resolution: {integrity: sha512-c2V0W1bsKIKfbLMBu/WGBz6Yci8nJ/ZJdheE0EwB73N3MvHYKiKGs3mVilX4Gs70eGeDaMqEob25Tw2Gb9Nqyw==} + '@rollup/rollup-win32-arm64-msvc@4.59.0': + resolution: {integrity: sha512-V5B6mG7OrGTwnxaNUzZTDTjDS7F75PO1ae6MJYdiMu60sq0CqN5CVeVsbhPxalupvTX8gXVSU9gq+Rx1/hvu6A==} cpu: [arm64] os: [win32] - '@rollup/rollup-win32-ia32-msvc@4.54.0': - resolution: {integrity: sha512-woEHgqQqDCkAzrDhvDipnSirm5vxUXtSKDYTVpZG3nUdW/VVB5VdCYA2iReSj/u3yCZzXID4kuKG7OynPnB3WQ==} + '@rollup/rollup-win32-ia32-msvc@4.59.0': + resolution: {integrity: sha512-UKFMHPuM9R0iBegwzKF4y0C4J9u8C6MEJgFuXTBerMk7EJ92GFVFYBfOZaSGLu6COf7FxpQNqhNS4c4icUPqxA==} cpu: [ia32] os: [win32] - '@rollup/rollup-win32-x64-gnu@4.54.0': - resolution: {integrity: sha512-dzAc53LOuFvHwbCEOS0rPbXp6SIhAf2txMP5p6mGyOXXw5mWY8NGGbPMPrs4P1WItkfApDathBj/NzMLUZ9rtQ==} + '@rollup/rollup-win32-x64-gnu@4.59.0': + resolution: {integrity: sha512-laBkYlSS1n2L8fSo1thDNGrCTQMmxjYY5G0WFWjFFYZkKPjsMBsgJfGf4TLxXrF6RyhI60L8TMOjBMvXiTcxeA==} cpu: [x64] os: [win32] - '@rollup/rollup-win32-x64-msvc@4.54.0': - resolution: {integrity: sha512-hYT5d3YNdSh3mbCU1gwQyPgQd3T2ne0A3KG8KSBdav5TiBg6eInVmV+TeR5uHufiIgSFg0XsOWGW5/RhNcSvPg==} + '@rollup/rollup-win32-x64-msvc@4.59.0': + resolution: {integrity: sha512-2HRCml6OztYXyJXAvdDXPKcawukWY2GpR5/nxKp4iBgiO3wcoEGkAaqctIbZcNB6KlUQBIqt8VYkNSj2397EfA==} cpu: [x64] os: [win32] @@ -405,88 +437,91 @@ packages: '@types/deep-eql@4.0.2': resolution: {integrity: sha512-c9h9dVVMigMPc4bwTvC5dxqtqJZwQPePsWjPlpSOnojbor6pGqdk541lfA7AqFQr5pB1BRdq0juY9db81BwyFw==} + '@types/esrecurse@4.3.1': + resolution: {integrity: sha512-xJBAbDifo5hpffDBuHl0Y8ywswbiAp/Wi7Y/GtAgSlZyIABppyurxVueOPE8LUQOxdlgi6Zqce7uoEpqNTeiUw==} + '@types/estree@1.0.8': resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==} '@types/json-schema@7.0.15': resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==} - '@types/node@25.0.3': - resolution: {integrity: sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==} + '@types/node@25.4.0': + resolution: {integrity: sha512-9wLpoeWuBlcbBpOY3XmzSTG3oscB6xjBEEtn+pYXTfhyXhIxC5FsBer2KTopBlvKEiW9l13po9fq+SJY/5lkhw==} - '@typescript-eslint/eslint-plugin@8.50.1': - resolution: {integrity: sha512-PKhLGDq3JAg0Jk/aK890knnqduuI/Qj+udH7wCf0217IGi4gt+acgCyPVe79qoT+qKUvHMDQkwJeKW9fwl8Cyw==} + '@typescript-eslint/eslint-plugin@8.57.0': + resolution: {integrity: sha512-qeu4rTHR3/IaFORbD16gmjq9+rEs9fGKdX0kF6BKSfi+gCuG3RCKLlSBYzn/bGsY9Tj7KE/DAQStbp8AHJGHEQ==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: - '@typescript-eslint/parser': ^8.50.1 - eslint: ^8.57.0 || ^9.0.0 + '@typescript-eslint/parser': ^8.57.0 + eslint: ^8.57.0 || ^9.0.0 || ^10.0.0 typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/parser@8.50.1': - resolution: {integrity: sha512-hM5faZwg7aVNa819m/5r7D0h0c9yC4DUlWAOvHAtISdFTc8xB86VmX5Xqabrama3wIPJ/q9RbGS1worb6JfnMg==} + '@typescript-eslint/parser@8.57.0': + resolution: {integrity: sha512-XZzOmihLIr8AD1b9hL9ccNMzEMWt/dE2u7NyTY9jJG6YNiNthaD5XtUHVF2uCXZ15ng+z2hT3MVuxnUYhq6k1g==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: - eslint: ^8.57.0 || ^9.0.0 + eslint: ^8.57.0 || ^9.0.0 || ^10.0.0 typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/project-service@8.50.1': - resolution: {integrity: sha512-E1ur1MCVf+YiP89+o4Les/oBAVzmSbeRB0MQLfSlYtbWU17HPxZ6Bhs5iYmKZRALvEuBoXIZMOIRRc/P++Ortg==} + '@typescript-eslint/project-service@8.57.0': + resolution: {integrity: sha512-pR+dK0BlxCLxtWfaKQWtYr7MhKmzqZxuii+ZjuFlZlIGRZm22HnXFqa2eY+90MUz8/i80YJmzFGDUsi8dMOV5w==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/scope-manager@8.50.1': - resolution: {integrity: sha512-mfRx06Myt3T4vuoHaKi8ZWNTPdzKPNBhiblze5N50//TSHOAQQevl/aolqA/BcqqbJ88GUnLqjjcBc8EWdBcVw==} + '@typescript-eslint/scope-manager@8.57.0': + resolution: {integrity: sha512-nvExQqAHF01lUM66MskSaZulpPL5pgy5hI5RfrxviLgzZVffB5yYzw27uK/ft8QnKXI2X0LBrHJFr1TaZtAibw==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} - '@typescript-eslint/tsconfig-utils@8.50.1': - resolution: {integrity: sha512-ooHmotT/lCWLXi55G4mvaUF60aJa012QzvLK0Y+Mp4WdSt17QhMhWOaBWeGTFVkb2gDgBe19Cxy1elPXylslDw==} + '@typescript-eslint/tsconfig-utils@8.57.0': + resolution: {integrity: sha512-LtXRihc5ytjJIQEH+xqjB0+YgsV4/tW35XKX3GTZHpWtcC8SPkT/d4tqdf1cKtesryHm2bgp6l555NYcT2NLvA==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/type-utils@8.50.1': - resolution: {integrity: sha512-7J3bf022QZE42tYMO6SL+6lTPKFk/WphhRPe9Tw/el+cEwzLz1Jjz2PX3GtGQVxooLDKeMVmMt7fWpYRdG5Etg==} + '@typescript-eslint/type-utils@8.57.0': + resolution: {integrity: sha512-yjgh7gmDcJ1+TcEg8x3uWQmn8ifvSupnPfjP21twPKrDP/pTHlEQgmKcitzF/rzPSmv7QjJ90vRpN4U+zoUjwQ==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: - eslint: ^8.57.0 || ^9.0.0 + eslint: ^8.57.0 || ^9.0.0 || ^10.0.0 typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/types@8.50.1': - resolution: {integrity: sha512-v5lFIS2feTkNyMhd7AucE/9j/4V9v5iIbpVRncjk/K0sQ6Sb+Np9fgYS/63n6nwqahHQvbmujeBL7mp07Q9mlA==} + '@typescript-eslint/types@8.57.0': + resolution: {integrity: sha512-dTLI8PEXhjUC7B9Kre+u0XznO696BhXcTlOn0/6kf1fHaQW8+VjJAVHJ3eTI14ZapTxdkOmc80HblPQLaEeJdg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} - '@typescript-eslint/typescript-estree@8.50.1': - resolution: {integrity: sha512-woHPdW+0gj53aM+cxchymJCrh0cyS7BTIdcDxWUNsclr9VDkOSbqC13juHzxOmQ22dDkMZEpZB+3X1WpUvzgVQ==} + '@typescript-eslint/typescript-estree@8.57.0': + resolution: {integrity: sha512-m7faHcyVg0BT3VdYTlX8GdJEM7COexXxS6KqGopxdtkQRvBanK377QDHr4W/vIPAR+ah9+B/RclSW5ldVniO1Q==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/utils@8.50.1': - resolution: {integrity: sha512-lCLp8H1T9T7gPbEuJSnHwnSuO9mDf8mfK/Nion5mZmiEaQD9sWf9W4dfeFqRyqRjF06/kBuTmAqcs9sewM2NbQ==} + '@typescript-eslint/utils@8.57.0': + resolution: {integrity: sha512-5iIHvpD3CZe06riAsbNxxreP+MuYgVUsV0n4bwLH//VJmgtt54sQeY2GszntJ4BjYCpMzrfVh2SBnUQTtys2lQ==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} peerDependencies: - eslint: ^8.57.0 || ^9.0.0 + eslint: ^8.57.0 || ^9.0.0 || ^10.0.0 typescript: '>=4.8.4 <6.0.0' - '@typescript-eslint/visitor-keys@8.50.1': - resolution: {integrity: sha512-IrDKrw7pCRUR94zeuCSUWQ+w8JEf5ZX5jl/e6AHGSLi1/zIr0lgutfn/7JpfCey+urpgQEdrZVYzCaVVKiTwhQ==} + '@typescript-eslint/visitor-keys@8.57.0': + resolution: {integrity: sha512-zm6xx8UT/Xy2oSr2ZXD0pZo7Jx2XsCoID2IUh9YSTFRu7z+WdwYTRk6LhUftm1crwqbuoF6I8zAFeCMw0YjwDg==} engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} - '@vitest/coverage-v8@4.0.16': - resolution: {integrity: sha512-2rNdjEIsPRzsdu6/9Eq0AYAzYdpP6Bx9cje9tL3FE5XzXRQF1fNU9pe/1yE8fCrS0HD+fBtt6gLPh6LI57tX7A==} + '@vitest/coverage-v8@4.0.18': + resolution: {integrity: sha512-7i+N2i0+ME+2JFZhfuz7Tg/FqKtilHjGyGvoHYQ6iLV0zahbsJ9sljC9OcFcPDbhYKCet+sG8SsVqlyGvPflZg==} peerDependencies: - '@vitest/browser': 4.0.16 - vitest: 4.0.16 + '@vitest/browser': 4.0.18 + vitest: 4.0.18 peerDependenciesMeta: '@vitest/browser': optional: true - '@vitest/expect@4.0.16': - resolution: {integrity: sha512-eshqULT2It7McaJkQGLkPjPjNph+uevROGuIMJdG3V+0BSR2w9u6J9Lwu+E8cK5TETlfou8GRijhafIMhXsimA==} + '@vitest/expect@4.0.18': + resolution: {integrity: sha512-8sCWUyckXXYvx4opfzVY03EOiYVxyNrHS5QxX3DAIi5dpJAAkyJezHCP77VMX4HKA2LDT/Jpfo8i2r5BE3GnQQ==} - '@vitest/mocker@4.0.16': - resolution: {integrity: sha512-yb6k4AZxJTB+q9ycAvsoxGn+j/po0UaPgajllBgt1PzoMAAmJGYFdDk0uCcRcxb3BrME34I6u8gHZTQlkqSZpg==} + '@vitest/mocker@4.0.18': + resolution: {integrity: sha512-HhVd0MDnzzsgevnOWCBj5Otnzobjy5wLBe4EdeeFGv8luMsGcYqDuFRMcttKWZA5vVO8RFjexVovXvAM4JoJDQ==} peerDependencies: msw: ^2.4.9 vite: ^6.0.0 || ^7.0.0-0 @@ -496,65 +531,57 @@ packages: vite: optional: true - '@vitest/pretty-format@4.0.16': - resolution: {integrity: sha512-eNCYNsSty9xJKi/UdVD8Ou16alu7AYiS2fCPRs0b1OdhJiV89buAXQLpTbe+X8V9L6qrs9CqyvU7OaAopJYPsA==} + '@vitest/pretty-format@4.0.18': + resolution: {integrity: sha512-P24GK3GulZWC5tz87ux0m8OADrQIUVDPIjjj65vBXYG17ZeU3qD7r+MNZ1RNv4l8CGU2vtTRqixrOi9fYk/yKw==} - '@vitest/runner@4.0.16': - resolution: {integrity: sha512-VWEDm5Wv9xEo80ctjORcTQRJ539EGPB3Pb9ApvVRAY1U/WkHXmmYISqU5E79uCwcW7xYUV38gwZD+RV755fu3Q==} + '@vitest/runner@4.0.18': + resolution: {integrity: sha512-rpk9y12PGa22Jg6g5M3UVVnTS7+zycIGk9ZNGN+m6tZHKQb7jrP7/77WfZy13Y/EUDd52NDsLRQhYKtv7XfPQw==} - '@vitest/snapshot@4.0.16': - resolution: {integrity: sha512-sf6NcrYhYBsSYefxnry+DR8n3UV4xWZwWxYbCJUt2YdvtqzSPR7VfGrY0zsv090DAbjFZsi7ZaMi1KnSRyK1XA==} + '@vitest/snapshot@4.0.18': + resolution: {integrity: sha512-PCiV0rcl7jKQjbgYqjtakly6T1uwv/5BQ9SwBLekVg/EaYeQFPiXcgrC2Y7vDMA8dM1SUEAEV82kgSQIlXNMvA==} - '@vitest/spy@4.0.16': - resolution: {integrity: sha512-4jIOWjKP0ZUaEmJm00E0cOBLU+5WE0BpeNr3XN6TEF05ltro6NJqHWxXD0kA8/Zc8Nh23AT8WQxwNG+WeROupw==} + '@vitest/spy@4.0.18': + resolution: {integrity: sha512-cbQt3PTSD7P2OARdVW3qWER5EGq7PHlvE+QfzSC0lbwO+xnt7+XH06ZzFjFRgzUX//JmpxrCu92VdwvEPlWSNw==} - '@vitest/utils@4.0.16': - resolution: {integrity: sha512-h8z9yYhV3e1LEfaQ3zdypIrnAg/9hguReGZoS7Gl0aBG5xgA410zBqECqmaF/+RkTggRsfnzc1XaAHA6bmUufA==} + '@vitest/utils@4.0.18': + resolution: {integrity: sha512-msMRKLMVLWygpK3u2Hybgi4MNjcYJvwTb0Ru09+fOyCXIgT5raYP041DRRdiJiI3k/2U6SEbAETB3YtBrUkCFA==} acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} peerDependencies: acorn: ^6.0.0 || ^7.0.0 || ^8.0.0 - acorn@8.15.0: - resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==} + acorn@8.16.0: + resolution: {integrity: sha512-UVJyE9MttOsBQIDKw1skb9nAwQuR5wuGD3+82K6JgJlm/Y+KI92oNsMNGZCYdDsVtRHSak0pcV5Dno5+4jh9sw==} engines: {node: '>=0.4.0'} hasBin: true - ajv@6.12.6: - resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} - - ansi-styles@4.3.0: - resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} - engines: {node: '>=8'} + ajv@6.14.0: + resolution: {integrity: sha512-IWrosm/yrn43eiKqkfkHis7QioDleaXQHdDVPKg0FSwwd/DuvyX79TZnFOnYpB7dcsFAMmtFztZuXPDvSePkFw==} any-promise@1.3.0: resolution: {integrity: sha512-7UvmKalWRt1wgjL1RrGxoSJW/0QZFIegpeGvZG9kjp8vrRu55XTHbwnqq2GpXm9uLbcuhxm3IqX9OB4MZR1b2A==} - argparse@2.0.1: - resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} - assertion-error@2.0.1: resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} engines: {node: '>=12'} - ast-v8-to-istanbul@0.3.10: - resolution: {integrity: sha512-p4K7vMz2ZSk3wN8l5o3y2bJAoZXT3VuJI5OLTATY/01CYWumWvwkUw0SqDBnNq6IiTO3qDa1eSQDibAV8g7XOQ==} + ast-v8-to-istanbul@0.3.12: + resolution: {integrity: sha512-BRRC8VRZY2R4Z4lFIL35MwNXmwVqBityvOIwETtsCSwvjl0IdgFsy9NhdaA6j74nUdtJJlIypeRhpDam19Wq3g==} asynckit@0.4.0: resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} - axios@1.13.5: - resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==} + axios@1.13.6: + resolution: {integrity: sha512-ChTCHMouEe2kn713WHbQGcuYrr6fXTBiu460OTwWrWob16g1bXn4vtz07Ope7ewMozJAnEquLk5lWQWtBig9DQ==} - balanced-match@1.0.2: - resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + balanced-match@4.0.4: + resolution: {integrity: sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==} + engines: {node: 18 || 20 || >=22} - brace-expansion@1.1.12: - resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==} - - brace-expansion@2.0.2: - resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==} + brace-expansion@5.0.4: + resolution: {integrity: sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==} + engines: {node: 18 || 20 || >=22} bundle-require@5.1.0: resolution: {integrity: sha512-3WrrOuZiyaaZPWiEt4G3+IffISVC9HYlWueJEBWED4ZH4aIAC2PnkdnuRrR94M+w6yGWn4AglWtJtBI8YqvgoA==} @@ -570,29 +597,14 @@ packages: resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==} engines: {node: '>= 0.4'} - callsites@3.1.0: - resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} - engines: {node: '>=6'} - chai@6.2.2: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} - chalk@4.1.2: - resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} - engines: {node: '>=10'} - chokidar@4.0.3: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} - color-convert@2.0.1: - resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} - engines: {node: '>=7.0.0'} - - color-name@1.1.4: - resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} - combined-stream@1.0.8: resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} engines: {node: '>= 0.8'} @@ -601,9 +613,6 @@ packages: resolution: {integrity: sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==} engines: {node: '>= 6'} - concat-map@0.0.1: - resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} - confbox@0.1.8: resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==} @@ -654,8 +663,8 @@ packages: resolution: {integrity: sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==} engines: {node: '>= 0.4'} - esbuild@0.27.2: - resolution: {integrity: sha512-HyNQImnsOC7X9PMNaCIeAm4ISCQXs5a5YasTXVliKv4uuBo1dKrG0A+uQS8M5eXjVMnLg3WgXaKvprHlFJQffw==} + esbuild@0.27.3: + resolution: {integrity: sha512-8VwMnyGCONIs6cWue2IdpHxHnAjzxnw2Zr7MkVxB2vjmQ2ivqGFb4LEG3SMnv0Gb2F/G/2yA8zUaiL1gywDCCg==} engines: {node: '>=18'} hasBin: true @@ -663,21 +672,21 @@ packages: resolution: {integrity: sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==} engines: {node: '>=10'} - eslint-scope@8.4.0: - resolution: {integrity: sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + eslint-scope@9.1.2: + resolution: {integrity: sha512-xS90H51cKw0jltxmvmHy2Iai1LIqrfbw57b79w/J7MfvDfkIkFZ+kj6zC3BjtUwh150HsSSdxXZcsuv72miDFQ==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} eslint-visitor-keys@3.4.3: resolution: {integrity: sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==} engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} - eslint-visitor-keys@4.2.1: - resolution: {integrity: sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + eslint-visitor-keys@5.0.1: + resolution: {integrity: sha512-tD40eHxA35h0PEIZNeIjkHoDR4YjjJp34biM0mDvplBe//mB+IHCqHDGV7pxF+7MklTvighcCPPZC7ynWyjdTA==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - eslint@9.39.2: - resolution: {integrity: sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + eslint@10.0.3: + resolution: {integrity: sha512-COV33RzXZkqhG9P2rZCFl9ZmJ7WL+gQSCRzE7RhkbclbQPtLAWReL7ysA0Sh4c8Im2U9ynybdR56PV0XcKvqaQ==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} hasBin: true peerDependencies: jiti: '*' @@ -685,12 +694,12 @@ packages: jiti: optional: true - espree@10.4.0: - resolution: {integrity: sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==} - engines: {node: ^18.18.0 || ^20.9.0 || >=21.1.0} + espree@11.2.0: + resolution: {integrity: sha512-7p3DrVEIopW1B1avAGLuCSh1jubc01H2JHc8B4qqGblmg5gI9yumBgACjWo4JlIc04ufug4xJ3SQI8HkS/Rgzw==} + engines: {node: ^20.19.0 || ^22.13.0 || >=24} - esquery@1.6.0: - resolution: {integrity: sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==} + esquery@1.7.0: + resolution: {integrity: sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==} engines: {node: '>=0.10'} esrecurse@4.3.0: @@ -745,8 +754,8 @@ packages: resolution: {integrity: sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==} engines: {node: '>=16'} - flatted@3.3.3: - resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==} + flatted@3.4.1: + resolution: {integrity: sha512-IxfVbRFVlV8V/yRaGzk0UVIcsKKHMSfYw66T/u4nTwlWteQePsxe//LjudR1AMX4tZW3WFCh3Zqa/sjlqpbURQ==} follow-redirects@1.15.11: resolution: {integrity: sha512-deG2P0JfjrTxl50XGCDyfI97ZGVCxIpfKYmfyrQ54n5FO/0gfIES8C/Psl6kWVDolizcaaxZJnTS0QSMxvnsBQ==} @@ -781,10 +790,6 @@ packages: resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==} engines: {node: '>=10.13.0'} - globals@14.0.0: - resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==} - engines: {node: '>=18'} - gopd@1.2.0: resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==} engines: {node: '>= 0.4'} @@ -816,10 +821,6 @@ packages: resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==} engines: {node: '>= 4'} - import-fresh@3.3.1: - resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==} - engines: {node: '>=6'} - imurmurhash@0.1.4: resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} engines: {node: '>=0.8.19'} @@ -843,10 +844,6 @@ packages: resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} engines: {node: '>=10'} - istanbul-lib-source-maps@5.0.6: - resolution: {integrity: sha512-yg2d+Em4KizZC5niWhQaIomgf5WlL4vOOjZ5xGCmF8SnPE/mDWWXgvRExdcpCgh9lLRRa1/fSYp2ymmbJ1pI+A==} - engines: {node: '>=10'} - istanbul-reports@3.2.0: resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==} engines: {node: '>=8'} @@ -855,12 +852,8 @@ packages: resolution: {integrity: sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==} engines: {node: '>=10'} - js-tokens@9.0.1: - resolution: {integrity: sha512-mxa9E9ITFOt0ban3j6L5MpjwegGz6lBQmM1IJkWeBZGcMxto50+eWdjC/52xDbS2vy0k7vIMK0Fe2wfL9OQSpQ==} - - js-yaml@4.1.1: - resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==} - hasBin: true + js-tokens@10.0.0: + resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==} json-buffer@3.0.1: resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==} @@ -893,14 +886,11 @@ packages: resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==} engines: {node: '>=10'} - lodash.merge@4.6.2: - resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==} - magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} - magicast@0.5.1: - resolution: {integrity: sha512-xrHS24IxaLrvuo613F719wvOIv9xPHFWQHuvGUBmPnCA/3MQxKI3b+r7n1jAoDHmsbC5bRhTZYR77invLAxVnw==} + magicast@0.5.2: + resolution: {integrity: sha512-E3ZJh4J3S9KfwdjZhe2afj6R9lGIN5Pher1pF39UGrXRqq/VDaGVIGN13BjHd2u8B61hArAGOnso7nBOouW3TQ==} make-dir@4.0.0: resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} @@ -918,15 +908,12 @@ packages: resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} engines: {node: '>= 0.6'} - minimatch@3.1.2: - resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + minimatch@10.2.4: + resolution: {integrity: sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==} + engines: {node: 18 || 20 || >=22} - minimatch@9.0.5: - resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==} - engines: {node: '>=16 || 14 >=14.17'} - - mlly@1.8.0: - resolution: {integrity: sha512-l8D9ODSRWLe2KHJSifWGwBqpTZXIXTeo8mlKjY+E2HAakaTeNpqAyBZ8GSqLzHgw4XmHmC8whvpjJNMbFZN7/g==} + mlly@1.8.1: + resolution: {integrity: sha512-SnL6sNutTwRWWR/vcmCYHSADjiEesp5TGQQ0pXyLhW5IoeibRlF/CbSLailbB3CNqJUk9cVJ9dUDnbD7GrcHBQ==} ms@2.1.3: resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} @@ -961,10 +948,6 @@ packages: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} engines: {node: '>=10'} - parent-module@1.0.1: - resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} - engines: {node: '>=6'} - path-exists@4.0.0: resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} engines: {node: '>=8'} @@ -1008,8 +991,8 @@ packages: yaml: optional: true - postcss@8.5.6: - resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==} + postcss@8.5.8: + resolution: {integrity: sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==} engines: {node: ^10 || ^12 || >=14} prelude-ls@1.2.1: @@ -1027,21 +1010,17 @@ packages: resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==} engines: {node: '>= 14.18.0'} - resolve-from@4.0.0: - resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==} - engines: {node: '>=4'} - resolve-from@5.0.0: resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} engines: {node: '>=8'} - rollup@4.54.0: - resolution: {integrity: sha512-3nk8Y3a9Ea8szgKhinMlGMhGMw89mqule3KWczxhIzqudyHdCIOHw8WJlj/r329fACjKLEh13ZSk7oE22kyeIw==} + rollup@4.59.0: + resolution: {integrity: sha512-2oMpl67a3zCH9H79LeMcbDhXW/UmWG/y2zuqnF2jQq5uq9TbM9TVyXvA4+t+ne2IIkBdrLpAaRQAvo7YI/Yyeg==} engines: {node: '>=18.0.0', npm: '>=8.0.0'} hasBin: true - semver@7.7.3: - resolution: {integrity: sha512-SdsKMrI9TdgjdweUSR9MweHA4EJ8YxHn8DFaDisvhVlUOe4BF1tLD7GAj0lIqWVl+dPb/rExr0Btby5loQm20Q==} + semver@7.7.4: + resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} engines: {node: '>=10'} hasBin: true @@ -1070,10 +1049,6 @@ packages: std-env@3.10.0: resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==} - strip-json-comments@3.1.1: - resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} - engines: {node: '>=8'} - sucrase@3.35.1: resolution: {integrity: sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==} engines: {node: '>=16 || 14 >=14.17'} @@ -1112,8 +1087,8 @@ packages: resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==} hasBin: true - ts-api-utils@2.1.0: - resolution: {integrity: sha512-CUgTZL1irw8u29bzrOD/nH85jqyc74D6SshFgujOIA7osm2Rz7dYH77agkx7H4FBNxDq7Cjf+IjaX/8zwFW+ZQ==} + ts-api-utils@2.4.0: + resolution: {integrity: sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==} engines: {node: '>=18.12'} peerDependencies: typescript: '>=4.8.4' @@ -1149,17 +1124,17 @@ packages: engines: {node: '>=14.17'} hasBin: true - ufo@1.6.1: - resolution: {integrity: sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==} + ufo@1.6.3: + resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==} - undici-types@7.16.0: - resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} + undici-types@7.18.2: + resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} - vite@7.3.0: - resolution: {integrity: sha512-dZwN5L1VlUBewiP6H9s2+B3e3Jg96D0vzN+Ry73sOefebhYr9f94wwkMNN/9ouoU8pV1BqA1d1zGk8928cx0rg==} + vite@7.3.1: + resolution: {integrity: sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==} engines: {node: ^20.19.0 || >=22.12.0} hasBin: true peerDependencies: @@ -1198,18 +1173,18 @@ packages: yaml: optional: true - vitest@4.0.16: - resolution: {integrity: sha512-E4t7DJ9pESL6E3I8nFjPa4xGUd3PmiWDLsDztS2qXSJWfHtbQnwAWylaBvSNY48I3vr8PTqIZlyK8TE3V3CA4Q==} + vitest@4.0.18: + resolution: {integrity: sha512-hOQuK7h0FGKgBAas7v0mSAsnvrIgAvWmRFjmzpJ7SwFHH3g1k2u37JtYwOwmEKhK6ZO3v9ggDBBm0La1LCK4uQ==} engines: {node: ^20.0.0 || ^22.0.0 || >=24.0.0} hasBin: true peerDependencies: '@edge-runtime/vm': '*' '@opentelemetry/api': ^1.9.0 '@types/node': ^20.0.0 || ^22.0.0 || >=24.0.0 - '@vitest/browser-playwright': 4.0.16 - '@vitest/browser-preview': 4.0.16 - '@vitest/browser-webdriverio': 4.0.16 - '@vitest/ui': 4.0.16 + '@vitest/browser-playwright': 4.0.18 + '@vitest/browser-preview': 4.0.18 + '@vitest/browser-webdriverio': 4.0.18 + '@vitest/ui': 4.0.18 happy-dom: '*' jsdom: '*' peerDependenciesMeta: @@ -1256,139 +1231,127 @@ snapshots: '@babel/helper-validator-identifier@7.28.5': {} - '@babel/parser@7.28.5': + '@babel/parser@7.29.0': dependencies: - '@babel/types': 7.28.5 + '@babel/types': 7.29.0 - '@babel/types@7.28.5': + '@babel/types@7.29.0': dependencies: '@babel/helper-string-parser': 7.27.1 '@babel/helper-validator-identifier': 7.28.5 '@bcoe/v8-coverage@1.0.2': {} - '@esbuild/aix-ppc64@0.27.2': + '@esbuild/aix-ppc64@0.27.3': optional: true - '@esbuild/android-arm64@0.27.2': + '@esbuild/android-arm64@0.27.3': optional: true - '@esbuild/android-arm@0.27.2': + '@esbuild/android-arm@0.27.3': optional: true - '@esbuild/android-x64@0.27.2': + '@esbuild/android-x64@0.27.3': optional: true - '@esbuild/darwin-arm64@0.27.2': + '@esbuild/darwin-arm64@0.27.3': optional: true - '@esbuild/darwin-x64@0.27.2': + '@esbuild/darwin-x64@0.27.3': optional: true - '@esbuild/freebsd-arm64@0.27.2': + '@esbuild/freebsd-arm64@0.27.3': optional: true - '@esbuild/freebsd-x64@0.27.2': + '@esbuild/freebsd-x64@0.27.3': optional: true - '@esbuild/linux-arm64@0.27.2': + '@esbuild/linux-arm64@0.27.3': optional: true - '@esbuild/linux-arm@0.27.2': + '@esbuild/linux-arm@0.27.3': optional: true - '@esbuild/linux-ia32@0.27.2': + '@esbuild/linux-ia32@0.27.3': optional: true - '@esbuild/linux-loong64@0.27.2': + '@esbuild/linux-loong64@0.27.3': optional: true - '@esbuild/linux-mips64el@0.27.2': + '@esbuild/linux-mips64el@0.27.3': optional: true - '@esbuild/linux-ppc64@0.27.2': + '@esbuild/linux-ppc64@0.27.3': optional: true - '@esbuild/linux-riscv64@0.27.2': + '@esbuild/linux-riscv64@0.27.3': optional: true - '@esbuild/linux-s390x@0.27.2': + '@esbuild/linux-s390x@0.27.3': optional: true - '@esbuild/linux-x64@0.27.2': + '@esbuild/linux-x64@0.27.3': optional: true - '@esbuild/netbsd-arm64@0.27.2': + '@esbuild/netbsd-arm64@0.27.3': optional: true - '@esbuild/netbsd-x64@0.27.2': + '@esbuild/netbsd-x64@0.27.3': optional: true - '@esbuild/openbsd-arm64@0.27.2': + '@esbuild/openbsd-arm64@0.27.3': optional: true - '@esbuild/openbsd-x64@0.27.2': + '@esbuild/openbsd-x64@0.27.3': optional: true - '@esbuild/openharmony-arm64@0.27.2': + '@esbuild/openharmony-arm64@0.27.3': optional: true - '@esbuild/sunos-x64@0.27.2': + '@esbuild/sunos-x64@0.27.3': optional: true - '@esbuild/win32-arm64@0.27.2': + '@esbuild/win32-arm64@0.27.3': optional: true - '@esbuild/win32-ia32@0.27.2': + '@esbuild/win32-ia32@0.27.3': optional: true - '@esbuild/win32-x64@0.27.2': + '@esbuild/win32-x64@0.27.3': optional: true - '@eslint-community/eslint-utils@4.9.0(eslint@9.39.2)': + '@eslint-community/eslint-utils@4.9.1(eslint@10.0.3)': dependencies: - eslint: 9.39.2 + eslint: 10.0.3 eslint-visitor-keys: 3.4.3 '@eslint-community/regexpp@4.12.2': {} - '@eslint/config-array@0.21.1': + '@eslint/config-array@0.23.3': dependencies: - '@eslint/object-schema': 2.1.7 + '@eslint/object-schema': 3.0.3 debug: 4.4.3 - minimatch: 3.1.2 + minimatch: 10.2.4 transitivePeerDependencies: - supports-color - '@eslint/config-helpers@0.4.2': + '@eslint/config-helpers@0.5.3': dependencies: - '@eslint/core': 0.17.0 + '@eslint/core': 1.1.1 - '@eslint/core@0.17.0': + '@eslint/core@1.1.1': dependencies: '@types/json-schema': 7.0.15 - '@eslint/eslintrc@3.3.3': + '@eslint/js@10.0.1(eslint@10.0.3)': + optionalDependencies: + eslint: 10.0.3 + + '@eslint/object-schema@3.0.3': {} + + '@eslint/plugin-kit@0.6.1': dependencies: - ajv: 6.12.6 - debug: 4.4.3 - espree: 10.4.0 - globals: 14.0.0 - ignore: 5.3.2 - import-fresh: 3.3.1 - js-yaml: 4.1.1 - minimatch: 3.1.2 - strip-json-comments: 3.1.1 - transitivePeerDependencies: - - supports-color - - '@eslint/js@9.39.2': {} - - '@eslint/object-schema@2.1.7': {} - - '@eslint/plugin-kit@0.4.1': - dependencies: - '@eslint/core': 0.17.0 + '@eslint/core': 1.1.1 levn: 0.4.1 '@humanfs/core@0.19.1': {} @@ -1416,70 +1379,79 @@ snapshots: '@jridgewell/resolve-uri': 3.1.2 '@jridgewell/sourcemap-codec': 1.5.5 - '@rollup/rollup-android-arm-eabi@4.54.0': + '@rollup/rollup-android-arm-eabi@4.59.0': optional: true - '@rollup/rollup-android-arm64@4.54.0': + '@rollup/rollup-android-arm64@4.59.0': optional: true - '@rollup/rollup-darwin-arm64@4.54.0': + '@rollup/rollup-darwin-arm64@4.59.0': optional: true - '@rollup/rollup-darwin-x64@4.54.0': + '@rollup/rollup-darwin-x64@4.59.0': optional: true - '@rollup/rollup-freebsd-arm64@4.54.0': + '@rollup/rollup-freebsd-arm64@4.59.0': optional: true - '@rollup/rollup-freebsd-x64@4.54.0': + '@rollup/rollup-freebsd-x64@4.59.0': optional: true - '@rollup/rollup-linux-arm-gnueabihf@4.54.0': + '@rollup/rollup-linux-arm-gnueabihf@4.59.0': optional: true - '@rollup/rollup-linux-arm-musleabihf@4.54.0': + '@rollup/rollup-linux-arm-musleabihf@4.59.0': optional: true - '@rollup/rollup-linux-arm64-gnu@4.54.0': + '@rollup/rollup-linux-arm64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-arm64-musl@4.54.0': + '@rollup/rollup-linux-arm64-musl@4.59.0': optional: true - '@rollup/rollup-linux-loong64-gnu@4.54.0': + '@rollup/rollup-linux-loong64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-ppc64-gnu@4.54.0': + '@rollup/rollup-linux-loong64-musl@4.59.0': optional: true - '@rollup/rollup-linux-riscv64-gnu@4.54.0': + '@rollup/rollup-linux-ppc64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-riscv64-musl@4.54.0': + '@rollup/rollup-linux-ppc64-musl@4.59.0': optional: true - '@rollup/rollup-linux-s390x-gnu@4.54.0': + '@rollup/rollup-linux-riscv64-gnu@4.59.0': optional: true - '@rollup/rollup-linux-x64-gnu@4.54.0': + '@rollup/rollup-linux-riscv64-musl@4.59.0': optional: true - '@rollup/rollup-linux-x64-musl@4.54.0': + '@rollup/rollup-linux-s390x-gnu@4.59.0': optional: true - '@rollup/rollup-openharmony-arm64@4.54.0': + '@rollup/rollup-linux-x64-gnu@4.59.0': optional: true - '@rollup/rollup-win32-arm64-msvc@4.54.0': + '@rollup/rollup-linux-x64-musl@4.59.0': optional: true - '@rollup/rollup-win32-ia32-msvc@4.54.0': + '@rollup/rollup-openbsd-x64@4.59.0': optional: true - '@rollup/rollup-win32-x64-gnu@4.54.0': + '@rollup/rollup-openharmony-arm64@4.59.0': optional: true - '@rollup/rollup-win32-x64-msvc@4.54.0': + '@rollup/rollup-win32-arm64-msvc@4.59.0': + optional: true + + '@rollup/rollup-win32-ia32-msvc@4.59.0': + optional: true + + '@rollup/rollup-win32-x64-gnu@4.59.0': + optional: true + + '@rollup/rollup-win32-x64-msvc@4.59.0': optional: true '@standard-schema/spec@1.1.0': {} @@ -1491,193 +1463,186 @@ snapshots: '@types/deep-eql@4.0.2': {} + '@types/esrecurse@4.3.1': {} + '@types/estree@1.0.8': {} '@types/json-schema@7.0.15': {} - '@types/node@25.0.3': + '@types/node@25.4.0': dependencies: - undici-types: 7.16.0 + undici-types: 7.18.2 - '@typescript-eslint/eslint-plugin@8.50.1(@typescript-eslint/parser@8.50.1(eslint@9.39.2)(typescript@5.9.3))(eslint@9.39.2)(typescript@5.9.3)': + '@typescript-eslint/eslint-plugin@8.57.0(@typescript-eslint/parser@8.57.0(eslint@10.0.3)(typescript@5.9.3))(eslint@10.0.3)(typescript@5.9.3)': dependencies: '@eslint-community/regexpp': 4.12.2 - '@typescript-eslint/parser': 8.50.1(eslint@9.39.2)(typescript@5.9.3) - '@typescript-eslint/scope-manager': 8.50.1 - '@typescript-eslint/type-utils': 8.50.1(eslint@9.39.2)(typescript@5.9.3) - '@typescript-eslint/utils': 8.50.1(eslint@9.39.2)(typescript@5.9.3) - '@typescript-eslint/visitor-keys': 8.50.1 - eslint: 9.39.2 + '@typescript-eslint/parser': 8.57.0(eslint@10.0.3)(typescript@5.9.3) + '@typescript-eslint/scope-manager': 8.57.0 + '@typescript-eslint/type-utils': 8.57.0(eslint@10.0.3)(typescript@5.9.3) + '@typescript-eslint/utils': 8.57.0(eslint@10.0.3)(typescript@5.9.3) + '@typescript-eslint/visitor-keys': 8.57.0 + eslint: 10.0.3 ignore: 7.0.5 natural-compare: 1.4.0 - ts-api-utils: 2.1.0(typescript@5.9.3) + ts-api-utils: 2.4.0(typescript@5.9.3) typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/parser@8.50.1(eslint@9.39.2)(typescript@5.9.3)': + '@typescript-eslint/parser@8.57.0(eslint@10.0.3)(typescript@5.9.3)': dependencies: - '@typescript-eslint/scope-manager': 8.50.1 - '@typescript-eslint/types': 8.50.1 - '@typescript-eslint/typescript-estree': 8.50.1(typescript@5.9.3) - '@typescript-eslint/visitor-keys': 8.50.1 + '@typescript-eslint/scope-manager': 8.57.0 + '@typescript-eslint/types': 8.57.0 + '@typescript-eslint/typescript-estree': 8.57.0(typescript@5.9.3) + '@typescript-eslint/visitor-keys': 8.57.0 debug: 4.4.3 - eslint: 9.39.2 + eslint: 10.0.3 typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/project-service@8.50.1(typescript@5.9.3)': + '@typescript-eslint/project-service@8.57.0(typescript@5.9.3)': dependencies: - '@typescript-eslint/tsconfig-utils': 8.50.1(typescript@5.9.3) - '@typescript-eslint/types': 8.50.1 + '@typescript-eslint/tsconfig-utils': 8.57.0(typescript@5.9.3) + '@typescript-eslint/types': 8.57.0 debug: 4.4.3 typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/scope-manager@8.50.1': + '@typescript-eslint/scope-manager@8.57.0': dependencies: - '@typescript-eslint/types': 8.50.1 - '@typescript-eslint/visitor-keys': 8.50.1 + '@typescript-eslint/types': 8.57.0 + '@typescript-eslint/visitor-keys': 8.57.0 - '@typescript-eslint/tsconfig-utils@8.50.1(typescript@5.9.3)': + '@typescript-eslint/tsconfig-utils@8.57.0(typescript@5.9.3)': dependencies: typescript: 5.9.3 - '@typescript-eslint/type-utils@8.50.1(eslint@9.39.2)(typescript@5.9.3)': + '@typescript-eslint/type-utils@8.57.0(eslint@10.0.3)(typescript@5.9.3)': dependencies: - '@typescript-eslint/types': 8.50.1 - '@typescript-eslint/typescript-estree': 8.50.1(typescript@5.9.3) - '@typescript-eslint/utils': 8.50.1(eslint@9.39.2)(typescript@5.9.3) + '@typescript-eslint/types': 8.57.0 + '@typescript-eslint/typescript-estree': 8.57.0(typescript@5.9.3) + '@typescript-eslint/utils': 8.57.0(eslint@10.0.3)(typescript@5.9.3) debug: 4.4.3 - eslint: 9.39.2 - ts-api-utils: 2.1.0(typescript@5.9.3) + eslint: 10.0.3 + ts-api-utils: 2.4.0(typescript@5.9.3) typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/types@8.50.1': {} + '@typescript-eslint/types@8.57.0': {} - '@typescript-eslint/typescript-estree@8.50.1(typescript@5.9.3)': + '@typescript-eslint/typescript-estree@8.57.0(typescript@5.9.3)': dependencies: - '@typescript-eslint/project-service': 8.50.1(typescript@5.9.3) - '@typescript-eslint/tsconfig-utils': 8.50.1(typescript@5.9.3) - '@typescript-eslint/types': 8.50.1 - '@typescript-eslint/visitor-keys': 8.50.1 + '@typescript-eslint/project-service': 8.57.0(typescript@5.9.3) + '@typescript-eslint/tsconfig-utils': 8.57.0(typescript@5.9.3) + '@typescript-eslint/types': 8.57.0 + '@typescript-eslint/visitor-keys': 8.57.0 debug: 4.4.3 - minimatch: 9.0.5 - semver: 7.7.3 + minimatch: 10.2.4 + semver: 7.7.4 tinyglobby: 0.2.15 - ts-api-utils: 2.1.0(typescript@5.9.3) + ts-api-utils: 2.4.0(typescript@5.9.3) typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/utils@8.50.1(eslint@9.39.2)(typescript@5.9.3)': + '@typescript-eslint/utils@8.57.0(eslint@10.0.3)(typescript@5.9.3)': dependencies: - '@eslint-community/eslint-utils': 4.9.0(eslint@9.39.2) - '@typescript-eslint/scope-manager': 8.50.1 - '@typescript-eslint/types': 8.50.1 - '@typescript-eslint/typescript-estree': 8.50.1(typescript@5.9.3) - eslint: 9.39.2 + '@eslint-community/eslint-utils': 4.9.1(eslint@10.0.3) + '@typescript-eslint/scope-manager': 8.57.0 + '@typescript-eslint/types': 8.57.0 + '@typescript-eslint/typescript-estree': 8.57.0(typescript@5.9.3) + eslint: 10.0.3 typescript: 5.9.3 transitivePeerDependencies: - supports-color - '@typescript-eslint/visitor-keys@8.50.1': + '@typescript-eslint/visitor-keys@8.57.0': dependencies: - '@typescript-eslint/types': 8.50.1 - eslint-visitor-keys: 4.2.1 + '@typescript-eslint/types': 8.57.0 + eslint-visitor-keys: 5.0.1 - '@vitest/coverage-v8@4.0.16(vitest@4.0.16(@types/node@25.0.3))': + '@vitest/coverage-v8@4.0.18(vitest@4.0.18(@types/node@25.4.0))': dependencies: '@bcoe/v8-coverage': 1.0.2 - '@vitest/utils': 4.0.16 - ast-v8-to-istanbul: 0.3.10 + '@vitest/utils': 4.0.18 + ast-v8-to-istanbul: 0.3.12 istanbul-lib-coverage: 3.2.2 istanbul-lib-report: 3.0.1 - istanbul-lib-source-maps: 5.0.6 istanbul-reports: 3.2.0 - magicast: 0.5.1 + magicast: 0.5.2 obug: 2.1.1 std-env: 3.10.0 tinyrainbow: 3.0.3 - vitest: 4.0.16(@types/node@25.0.3) - transitivePeerDependencies: - - supports-color + vitest: 4.0.18(@types/node@25.4.0) - '@vitest/expect@4.0.16': + '@vitest/expect@4.0.18': dependencies: '@standard-schema/spec': 1.1.0 '@types/chai': 5.2.3 - '@vitest/spy': 4.0.16 - '@vitest/utils': 4.0.16 + '@vitest/spy': 4.0.18 + '@vitest/utils': 4.0.18 chai: 6.2.2 tinyrainbow: 3.0.3 - '@vitest/mocker@4.0.16(vite@7.3.0(@types/node@25.0.3))': + '@vitest/mocker@4.0.18(vite@7.3.1(@types/node@25.4.0))': dependencies: - '@vitest/spy': 4.0.16 + '@vitest/spy': 4.0.18 estree-walker: 3.0.3 magic-string: 0.30.21 optionalDependencies: - vite: 7.3.0(@types/node@25.0.3) + vite: 7.3.1(@types/node@25.4.0) - '@vitest/pretty-format@4.0.16': + '@vitest/pretty-format@4.0.18': dependencies: tinyrainbow: 3.0.3 - '@vitest/runner@4.0.16': + '@vitest/runner@4.0.18': dependencies: - '@vitest/utils': 4.0.16 + '@vitest/utils': 4.0.18 pathe: 2.0.3 - '@vitest/snapshot@4.0.16': + '@vitest/snapshot@4.0.18': dependencies: - '@vitest/pretty-format': 4.0.16 + '@vitest/pretty-format': 4.0.18 magic-string: 0.30.21 pathe: 2.0.3 - '@vitest/spy@4.0.16': {} + '@vitest/spy@4.0.18': {} - '@vitest/utils@4.0.16': + '@vitest/utils@4.0.18': dependencies: - '@vitest/pretty-format': 4.0.16 + '@vitest/pretty-format': 4.0.18 tinyrainbow: 3.0.3 - acorn-jsx@5.3.2(acorn@8.15.0): + acorn-jsx@5.3.2(acorn@8.16.0): dependencies: - acorn: 8.15.0 + acorn: 8.16.0 - acorn@8.15.0: {} + acorn@8.16.0: {} - ajv@6.12.6: + ajv@6.14.0: dependencies: fast-deep-equal: 3.1.3 fast-json-stable-stringify: 2.1.0 json-schema-traverse: 0.4.1 uri-js: 4.4.1 - ansi-styles@4.3.0: - dependencies: - color-convert: 2.0.1 - any-promise@1.3.0: {} - argparse@2.0.1: {} - assertion-error@2.0.1: {} - ast-v8-to-istanbul@0.3.10: + ast-v8-to-istanbul@0.3.12: dependencies: '@jridgewell/trace-mapping': 0.3.31 estree-walker: 3.0.3 - js-tokens: 9.0.1 + js-tokens: 10.0.0 asynckit@0.4.0: {} - axios@1.13.5: + axios@1.13.6: dependencies: follow-redirects: 1.15.11 form-data: 4.0.5 @@ -1685,20 +1650,15 @@ snapshots: transitivePeerDependencies: - debug - balanced-match@1.0.2: {} + balanced-match@4.0.4: {} - brace-expansion@1.1.12: + brace-expansion@5.0.4: dependencies: - balanced-match: 1.0.2 - concat-map: 0.0.1 + balanced-match: 4.0.4 - brace-expansion@2.0.2: + bundle-require@5.1.0(esbuild@0.27.3): dependencies: - balanced-match: 1.0.2 - - bundle-require@5.1.0(esbuild@0.27.2): - dependencies: - esbuild: 0.27.2 + esbuild: 0.27.3 load-tsconfig: 0.2.5 cac@6.7.14: {} @@ -1708,33 +1668,18 @@ snapshots: es-errors: 1.3.0 function-bind: 1.1.2 - callsites@3.1.0: {} - chai@6.2.2: {} - chalk@4.1.2: - dependencies: - ansi-styles: 4.3.0 - supports-color: 7.2.0 - chokidar@4.0.3: dependencies: readdirp: 4.1.2 - color-convert@2.0.1: - dependencies: - color-name: 1.1.4 - - color-name@1.1.4: {} - combined-stream@1.0.8: dependencies: delayed-stream: 1.0.0 commander@4.1.1: {} - concat-map@0.0.1: {} - confbox@0.1.8: {} consola@3.4.2: {} @@ -1776,69 +1721,68 @@ snapshots: has-tostringtag: 1.0.2 hasown: 2.0.2 - esbuild@0.27.2: + esbuild@0.27.3: optionalDependencies: - '@esbuild/aix-ppc64': 0.27.2 - '@esbuild/android-arm': 0.27.2 - '@esbuild/android-arm64': 0.27.2 - '@esbuild/android-x64': 0.27.2 - '@esbuild/darwin-arm64': 0.27.2 - '@esbuild/darwin-x64': 0.27.2 - '@esbuild/freebsd-arm64': 0.27.2 - '@esbuild/freebsd-x64': 0.27.2 - '@esbuild/linux-arm': 0.27.2 - '@esbuild/linux-arm64': 0.27.2 - '@esbuild/linux-ia32': 0.27.2 - '@esbuild/linux-loong64': 0.27.2 - '@esbuild/linux-mips64el': 0.27.2 - '@esbuild/linux-ppc64': 0.27.2 - '@esbuild/linux-riscv64': 0.27.2 - '@esbuild/linux-s390x': 0.27.2 - '@esbuild/linux-x64': 0.27.2 - '@esbuild/netbsd-arm64': 0.27.2 - '@esbuild/netbsd-x64': 0.27.2 - '@esbuild/openbsd-arm64': 0.27.2 - '@esbuild/openbsd-x64': 0.27.2 - '@esbuild/openharmony-arm64': 0.27.2 - '@esbuild/sunos-x64': 0.27.2 - '@esbuild/win32-arm64': 0.27.2 - '@esbuild/win32-ia32': 0.27.2 - '@esbuild/win32-x64': 0.27.2 + '@esbuild/aix-ppc64': 0.27.3 + '@esbuild/android-arm': 0.27.3 + '@esbuild/android-arm64': 0.27.3 + '@esbuild/android-x64': 0.27.3 + '@esbuild/darwin-arm64': 0.27.3 + '@esbuild/darwin-x64': 0.27.3 + '@esbuild/freebsd-arm64': 0.27.3 + '@esbuild/freebsd-x64': 0.27.3 + '@esbuild/linux-arm': 0.27.3 + '@esbuild/linux-arm64': 0.27.3 + '@esbuild/linux-ia32': 0.27.3 + '@esbuild/linux-loong64': 0.27.3 + '@esbuild/linux-mips64el': 0.27.3 + '@esbuild/linux-ppc64': 0.27.3 + '@esbuild/linux-riscv64': 0.27.3 + '@esbuild/linux-s390x': 0.27.3 + '@esbuild/linux-x64': 0.27.3 + '@esbuild/netbsd-arm64': 0.27.3 + '@esbuild/netbsd-x64': 0.27.3 + '@esbuild/openbsd-arm64': 0.27.3 + '@esbuild/openbsd-x64': 0.27.3 + '@esbuild/openharmony-arm64': 0.27.3 + '@esbuild/sunos-x64': 0.27.3 + '@esbuild/win32-arm64': 0.27.3 + '@esbuild/win32-ia32': 0.27.3 + '@esbuild/win32-x64': 0.27.3 escape-string-regexp@4.0.0: {} - eslint-scope@8.4.0: + eslint-scope@9.1.2: dependencies: + '@types/esrecurse': 4.3.1 + '@types/estree': 1.0.8 esrecurse: 4.3.0 estraverse: 5.3.0 eslint-visitor-keys@3.4.3: {} - eslint-visitor-keys@4.2.1: {} + eslint-visitor-keys@5.0.1: {} - eslint@9.39.2: + eslint@10.0.3: dependencies: - '@eslint-community/eslint-utils': 4.9.0(eslint@9.39.2) + '@eslint-community/eslint-utils': 4.9.1(eslint@10.0.3) '@eslint-community/regexpp': 4.12.2 - '@eslint/config-array': 0.21.1 - '@eslint/config-helpers': 0.4.2 - '@eslint/core': 0.17.0 - '@eslint/eslintrc': 3.3.3 - '@eslint/js': 9.39.2 - '@eslint/plugin-kit': 0.4.1 + '@eslint/config-array': 0.23.3 + '@eslint/config-helpers': 0.5.3 + '@eslint/core': 1.1.1 + '@eslint/plugin-kit': 0.6.1 '@humanfs/node': 0.16.7 '@humanwhocodes/module-importer': 1.0.1 '@humanwhocodes/retry': 0.4.3 '@types/estree': 1.0.8 - ajv: 6.12.6 - chalk: 4.1.2 + ajv: 6.14.0 cross-spawn: 7.0.6 debug: 4.4.3 escape-string-regexp: 4.0.0 - eslint-scope: 8.4.0 - eslint-visitor-keys: 4.2.1 - espree: 10.4.0 - esquery: 1.6.0 + eslint-scope: 9.1.2 + eslint-visitor-keys: 5.0.1 + espree: 11.2.0 + esquery: 1.7.0 esutils: 2.0.3 fast-deep-equal: 3.1.3 file-entry-cache: 8.0.0 @@ -1848,20 +1792,19 @@ snapshots: imurmurhash: 0.1.4 is-glob: 4.0.3 json-stable-stringify-without-jsonify: 1.0.1 - lodash.merge: 4.6.2 - minimatch: 3.1.2 + minimatch: 10.2.4 natural-compare: 1.4.0 optionator: 0.9.4 transitivePeerDependencies: - supports-color - espree@10.4.0: + espree@11.2.0: dependencies: - acorn: 8.15.0 - acorn-jsx: 5.3.2(acorn@8.15.0) - eslint-visitor-keys: 4.2.1 + acorn: 8.16.0 + acorn-jsx: 5.3.2(acorn@8.16.0) + eslint-visitor-keys: 5.0.1 - esquery@1.6.0: + esquery@1.7.0: dependencies: estraverse: 5.3.0 @@ -1901,15 +1844,15 @@ snapshots: fix-dts-default-cjs-exports@1.0.1: dependencies: magic-string: 0.30.21 - mlly: 1.8.0 - rollup: 4.54.0 + mlly: 1.8.1 + rollup: 4.59.0 flat-cache@4.0.1: dependencies: - flatted: 3.3.3 + flatted: 3.4.1 keyv: 4.5.4 - flatted@3.3.3: {} + flatted@3.4.1: {} follow-redirects@1.15.11: {} @@ -1948,8 +1891,6 @@ snapshots: dependencies: is-glob: 4.0.3 - globals@14.0.0: {} - gopd@1.2.0: {} has-flag@4.0.0: {} @@ -1970,11 +1911,6 @@ snapshots: ignore@7.0.5: {} - import-fresh@3.3.1: - dependencies: - parent-module: 1.0.1 - resolve-from: 4.0.0 - imurmurhash@0.1.4: {} is-extglob@2.1.1: {} @@ -1993,14 +1929,6 @@ snapshots: make-dir: 4.0.0 supports-color: 7.2.0 - istanbul-lib-source-maps@5.0.6: - dependencies: - '@jridgewell/trace-mapping': 0.3.31 - debug: 4.4.3 - istanbul-lib-coverage: 3.2.2 - transitivePeerDependencies: - - supports-color - istanbul-reports@3.2.0: dependencies: html-escaper: 2.0.2 @@ -2008,11 +1936,7 @@ snapshots: joycon@3.1.1: {} - js-tokens@9.0.1: {} - - js-yaml@4.1.1: - dependencies: - argparse: 2.0.1 + js-tokens@10.0.0: {} json-buffer@3.0.1: {} @@ -2039,21 +1963,19 @@ snapshots: dependencies: p-locate: 5.0.0 - lodash.merge@4.6.2: {} - magic-string@0.30.21: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 - magicast@0.5.1: + magicast@0.5.2: dependencies: - '@babel/parser': 7.28.5 - '@babel/types': 7.28.5 + '@babel/parser': 7.29.0 + '@babel/types': 7.29.0 source-map-js: 1.2.1 make-dir@4.0.0: dependencies: - semver: 7.7.3 + semver: 7.7.4 math-intrinsics@1.1.0: {} @@ -2063,20 +1985,16 @@ snapshots: dependencies: mime-db: 1.52.0 - minimatch@3.1.2: + minimatch@10.2.4: dependencies: - brace-expansion: 1.1.12 + brace-expansion: 5.0.4 - minimatch@9.0.5: + mlly@1.8.1: dependencies: - brace-expansion: 2.0.2 - - mlly@1.8.0: - dependencies: - acorn: 8.15.0 + acorn: 8.16.0 pathe: 2.0.3 pkg-types: 1.3.1 - ufo: 1.6.1 + ufo: 1.6.3 ms@2.1.3: {} @@ -2111,10 +2029,6 @@ snapshots: dependencies: p-limit: 3.1.0 - parent-module@1.0.1: - dependencies: - callsites: 3.1.0 - path-exists@4.0.0: {} path-key@3.1.1: {} @@ -2130,16 +2044,16 @@ snapshots: pkg-types@1.3.1: dependencies: confbox: 0.1.8 - mlly: 1.8.0 + mlly: 1.8.1 pathe: 2.0.3 - postcss-load-config@6.0.1(postcss@8.5.6): + postcss-load-config@6.0.1(postcss@8.5.8): dependencies: lilconfig: 3.1.3 optionalDependencies: - postcss: 8.5.6 + postcss: 8.5.8 - postcss@8.5.6: + postcss@8.5.8: dependencies: nanoid: 3.3.11 picocolors: 1.1.1 @@ -2153,39 +2067,40 @@ snapshots: readdirp@4.1.2: {} - resolve-from@4.0.0: {} - resolve-from@5.0.0: {} - rollup@4.54.0: + rollup@4.59.0: dependencies: '@types/estree': 1.0.8 optionalDependencies: - '@rollup/rollup-android-arm-eabi': 4.54.0 - '@rollup/rollup-android-arm64': 4.54.0 - '@rollup/rollup-darwin-arm64': 4.54.0 - '@rollup/rollup-darwin-x64': 4.54.0 - '@rollup/rollup-freebsd-arm64': 4.54.0 - '@rollup/rollup-freebsd-x64': 4.54.0 - '@rollup/rollup-linux-arm-gnueabihf': 4.54.0 - '@rollup/rollup-linux-arm-musleabihf': 4.54.0 - '@rollup/rollup-linux-arm64-gnu': 4.54.0 - '@rollup/rollup-linux-arm64-musl': 4.54.0 - '@rollup/rollup-linux-loong64-gnu': 4.54.0 - '@rollup/rollup-linux-ppc64-gnu': 4.54.0 - '@rollup/rollup-linux-riscv64-gnu': 4.54.0 - '@rollup/rollup-linux-riscv64-musl': 4.54.0 - '@rollup/rollup-linux-s390x-gnu': 4.54.0 - '@rollup/rollup-linux-x64-gnu': 4.54.0 - '@rollup/rollup-linux-x64-musl': 4.54.0 - '@rollup/rollup-openharmony-arm64': 4.54.0 - '@rollup/rollup-win32-arm64-msvc': 4.54.0 - '@rollup/rollup-win32-ia32-msvc': 4.54.0 - '@rollup/rollup-win32-x64-gnu': 4.54.0 - '@rollup/rollup-win32-x64-msvc': 4.54.0 + '@rollup/rollup-android-arm-eabi': 4.59.0 + '@rollup/rollup-android-arm64': 4.59.0 + '@rollup/rollup-darwin-arm64': 4.59.0 + '@rollup/rollup-darwin-x64': 4.59.0 + '@rollup/rollup-freebsd-arm64': 4.59.0 + '@rollup/rollup-freebsd-x64': 4.59.0 + '@rollup/rollup-linux-arm-gnueabihf': 4.59.0 + '@rollup/rollup-linux-arm-musleabihf': 4.59.0 + '@rollup/rollup-linux-arm64-gnu': 4.59.0 + '@rollup/rollup-linux-arm64-musl': 4.59.0 + '@rollup/rollup-linux-loong64-gnu': 4.59.0 + '@rollup/rollup-linux-loong64-musl': 4.59.0 + '@rollup/rollup-linux-ppc64-gnu': 4.59.0 + '@rollup/rollup-linux-ppc64-musl': 4.59.0 + '@rollup/rollup-linux-riscv64-gnu': 4.59.0 + '@rollup/rollup-linux-riscv64-musl': 4.59.0 + '@rollup/rollup-linux-s390x-gnu': 4.59.0 + '@rollup/rollup-linux-x64-gnu': 4.59.0 + '@rollup/rollup-linux-x64-musl': 4.59.0 + '@rollup/rollup-openbsd-x64': 4.59.0 + '@rollup/rollup-openharmony-arm64': 4.59.0 + '@rollup/rollup-win32-arm64-msvc': 4.59.0 + '@rollup/rollup-win32-ia32-msvc': 4.59.0 + '@rollup/rollup-win32-x64-gnu': 4.59.0 + '@rollup/rollup-win32-x64-msvc': 4.59.0 fsevents: 2.3.3 - semver@7.7.3: {} + semver@7.7.4: {} shebang-command@2.0.0: dependencies: @@ -2203,8 +2118,6 @@ snapshots: std-env@3.10.0: {} - strip-json-comments@3.1.1: {} - sucrase@3.35.1: dependencies: '@jridgewell/gen-mapping': 0.3.13 @@ -2242,33 +2155,33 @@ snapshots: tree-kill@1.2.2: {} - ts-api-utils@2.1.0(typescript@5.9.3): + ts-api-utils@2.4.0(typescript@5.9.3): dependencies: typescript: 5.9.3 ts-interface-checker@0.1.13: {} - tsup@8.5.1(postcss@8.5.6)(typescript@5.9.3): + tsup@8.5.1(postcss@8.5.8)(typescript@5.9.3): dependencies: - bundle-require: 5.1.0(esbuild@0.27.2) + bundle-require: 5.1.0(esbuild@0.27.3) cac: 6.7.14 chokidar: 4.0.3 consola: 3.4.2 debug: 4.4.3 - esbuild: 0.27.2 + esbuild: 0.27.3 fix-dts-default-cjs-exports: 1.0.1 joycon: 3.1.1 picocolors: 1.1.1 - postcss-load-config: 6.0.1(postcss@8.5.6) + postcss-load-config: 6.0.1(postcss@8.5.8) resolve-from: 5.0.0 - rollup: 4.54.0 + rollup: 4.59.0 source-map: 0.7.6 sucrase: 3.35.1 tinyexec: 0.3.2 tinyglobby: 0.2.15 tree-kill: 1.2.2 optionalDependencies: - postcss: 8.5.6 + postcss: 8.5.8 typescript: 5.9.3 transitivePeerDependencies: - jiti @@ -2282,35 +2195,35 @@ snapshots: typescript@5.9.3: {} - ufo@1.6.1: {} + ufo@1.6.3: {} - undici-types@7.16.0: {} + undici-types@7.18.2: {} uri-js@4.4.1: dependencies: punycode: 2.3.1 - vite@7.3.0(@types/node@25.0.3): + vite@7.3.1(@types/node@25.4.0): dependencies: - esbuild: 0.27.2 + esbuild: 0.27.3 fdir: 6.5.0(picomatch@4.0.3) picomatch: 4.0.3 - postcss: 8.5.6 - rollup: 4.54.0 + postcss: 8.5.8 + rollup: 4.59.0 tinyglobby: 0.2.15 optionalDependencies: - '@types/node': 25.0.3 + '@types/node': 25.4.0 fsevents: 2.3.3 - vitest@4.0.16(@types/node@25.0.3): + vitest@4.0.18(@types/node@25.4.0): dependencies: - '@vitest/expect': 4.0.16 - '@vitest/mocker': 4.0.16(vite@7.3.0(@types/node@25.0.3)) - '@vitest/pretty-format': 4.0.16 - '@vitest/runner': 4.0.16 - '@vitest/snapshot': 4.0.16 - '@vitest/spy': 4.0.16 - '@vitest/utils': 4.0.16 + '@vitest/expect': 4.0.18 + '@vitest/mocker': 4.0.18(vite@7.3.1(@types/node@25.4.0)) + '@vitest/pretty-format': 4.0.18 + '@vitest/runner': 4.0.18 + '@vitest/snapshot': 4.0.18 + '@vitest/spy': 4.0.18 + '@vitest/utils': 4.0.18 es-module-lexer: 1.7.0 expect-type: 1.3.0 magic-string: 0.30.21 @@ -2322,10 +2235,10 @@ snapshots: tinyexec: 1.0.2 tinyglobby: 0.2.15 tinyrainbow: 3.0.3 - vite: 7.3.0(@types/node@25.0.3) + vite: 7.3.1(@types/node@25.4.0) why-is-node-running: 2.3.0 optionalDependencies: - '@types/node': 25.0.3 + '@types/node': 25.4.0 transitivePeerDependencies: - jiti - less diff --git a/web/.env.example b/web/.env.example index 2103f00f45..b0603afd8f 100644 --- a/web/.env.example +++ b/web/.env.example @@ -12,6 +12,11 @@ NEXT_PUBLIC_API_PREFIX=http://localhost:5001/console/api # console or api domain. # example: http://udify.app/api NEXT_PUBLIC_PUBLIC_API_PREFIX=http://localhost:5001/api +# Dev-only Hono proxy targets. The frontend keeps requesting http://localhost:5001 directly. +HONO_PROXY_HOST=127.0.0.1 +HONO_PROXY_PORT=5001 +HONO_CONSOLE_API_PROXY_TARGET= +HONO_PUBLIC_API_PROXY_TARGET= # When the frontend and backend run on different subdomains, set NEXT_PUBLIC_COOKIE_DOMAIN=1. NEXT_PUBLIC_COOKIE_DOMAIN= # WebSocket server URL. diff --git a/web/.husky/pre-commit b/web/.husky/pre-commit index dd4140b47e..3f25de256f 100644 --- a/web/.husky/pre-commit +++ b/web/.husky/pre-commit @@ -6,6 +6,20 @@ files=$(git diff --cached --name-only) api_modified=false web_modified=false +skip_web_checks=false + +git_path() { + git rev-parse --git-path "$1" +} + +if [ -f "$(git_path MERGE_HEAD)" ] || \ + [ -f "$(git_path CHERRY_PICK_HEAD)" ] || \ + [ -f "$(git_path REVERT_HEAD)" ] || \ + [ -f "$(git_path SQUASH_MSG)" ] || \ + [ -d "$(git_path rebase-merge)" ] || \ + [ -d "$(git_path rebase-apply)" ]; then + skip_web_checks=true +fi for file in $files do @@ -43,6 +57,11 @@ if $api_modified; then fi if $web_modified; then + if $skip_web_checks; then + echo "Git operation in progress, skipping web checks" + exit 0 + fi + echo "Running ESLint on web module" if git diff --cached --quiet -- 'web/**/*.ts' 'web/**/*.tsx'; then diff --git a/web/AGENTS.md b/web/AGENTS.md index 5dd41b8a3c..71000eafdb 100644 --- a/web/AGENTS.md +++ b/web/AGENTS.md @@ -2,6 +2,12 @@ - Refer to the `./docs/test.md` and `./docs/lint.md` for detailed frontend workflow instructions. +## Overlay Components (Mandatory) + +- `./docs/overlay-migration.md` is the source of truth for overlay-related work. +- In new or modified code, use only overlay primitives from `@/app/components/base/ui/*`. +- Do not introduce deprecated overlay imports from `@/app/components/base/*`; when touching legacy callers, prefer migrating them and keep the allowlist shrinking (never expanding). + ## Automated Test Generation - Use `./docs/test.md` as the canonical instruction set for generating frontend automated tests. diff --git a/web/Dockerfile b/web/Dockerfile index 392d319ea8..a79de627b9 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -39,7 +39,7 @@ COPY --from=packages /app/web/ . COPY . . ENV NODE_OPTIONS="--max-old-space-size=4096" -RUN pnpm build:docker +RUN pnpm build # production stage diff --git a/web/__tests__/component-coverage-filters.test.ts b/web/__tests__/component-coverage-filters.test.ts new file mode 100644 index 0000000000..cacc1e2142 --- /dev/null +++ b/web/__tests__/component-coverage-filters.test.ts @@ -0,0 +1,115 @@ +import fs from 'node:fs' +import os from 'node:os' +import path from 'node:path' +import { afterEach, describe, expect, it } from 'vitest' +import { + collectComponentCoverageExcludedFiles, + COMPONENT_COVERAGE_EXCLUDE_LABEL, + getComponentCoverageExclusionReasons, +} from '../scripts/component-coverage-filters.mjs' + +describe('component coverage filters', () => { + describe('getComponentCoverageExclusionReasons', () => { + it('should exclude type-only files by basename', () => { + expect( + getComponentCoverageExclusionReasons( + 'web/app/components/share/text-generation/types.ts', + 'export type ShareMode = "run-once" | "run-batch"', + ), + ).toContain('type-only') + }) + + it('should exclude pure barrel files', () => { + expect( + getComponentCoverageExclusionReasons( + 'web/app/components/base/amplitude/index.ts', + [ + 'export { default } from "./AmplitudeProvider"', + 'export { resetUser, trackEvent } from "./utils"', + ].join('\n'), + ), + ).toContain('pure-barrel') + }) + + it('should exclude generated files from marker comments', () => { + expect( + getComponentCoverageExclusionReasons( + 'web/app/components/base/icons/src/vender/workflow/Answer.tsx', + [ + '// GENERATE BY script', + '// DON NOT EDIT IT MANUALLY', + 'export default function Icon() {', + ' return null', + '}', + ].join('\n'), + ), + ).toContain('generated') + }) + + it('should exclude pure static files with exported constants only', () => { + expect( + getComponentCoverageExclusionReasons( + 'web/app/components/workflow/note-node/constants.ts', + [ + 'import { NoteTheme } from "./types"', + 'export const CUSTOM_NOTE_NODE = "custom-note"', + 'export const THEME_MAP = {', + ' [NoteTheme.blue]: { title: "bg-blue-100" },', + '}', + ].join('\n'), + ), + ).toContain('pure-static') + }) + + it('should keep runtime logic files tracked', () => { + expect( + getComponentCoverageExclusionReasons( + 'web/app/components/workflow/nodes/trigger-schedule/default.ts', + [ + 'const validate = (value: string) => value.trim()', + 'export const nodeDefault = {', + ' value: validate("x"),', + '}', + ].join('\n'), + ), + ).toEqual([]) + }) + }) + + describe('collectComponentCoverageExcludedFiles', () => { + const tempDirs: string[] = [] + + afterEach(() => { + for (const dir of tempDirs) + fs.rmSync(dir, { recursive: true, force: true }) + tempDirs.length = 0 + }) + + it('should collect excluded files for coverage config and keep runtime files out', () => { + const rootDir = fs.mkdtempSync(path.join(os.tmpdir(), 'component-coverage-filters-')) + tempDirs.push(rootDir) + + fs.mkdirSync(path.join(rootDir, 'barrel'), { recursive: true }) + fs.mkdirSync(path.join(rootDir, 'icons'), { recursive: true }) + fs.mkdirSync(path.join(rootDir, 'static'), { recursive: true }) + fs.mkdirSync(path.join(rootDir, 'runtime'), { recursive: true }) + + fs.writeFileSync(path.join(rootDir, 'barrel', 'index.ts'), 'export { default } from "./Button"\n') + fs.writeFileSync(path.join(rootDir, 'icons', 'generated-icon.tsx'), '// @generated\nexport default function Icon() { return null }\n') + fs.writeFileSync(path.join(rootDir, 'static', 'constants.ts'), 'export const COLORS = { primary: "#fff" }\n') + fs.writeFileSync(path.join(rootDir, 'runtime', 'config.ts'), 'export const config = makeConfig()\n') + fs.writeFileSync(path.join(rootDir, 'runtime', 'types.ts'), 'export type Config = { value: string }\n') + + expect(collectComponentCoverageExcludedFiles(rootDir, { pathPrefix: 'app/components' })).toEqual([ + 'app/components/barrel/index.ts', + 'app/components/icons/generated-icon.tsx', + 'app/components/runtime/types.ts', + 'app/components/static/constants.ts', + ]) + }) + }) + + it('should describe the excluded coverage categories', () => { + expect(COMPONENT_COVERAGE_EXCLUDE_LABEL).toBe('type-only files, pure barrel files, generated files, pure static files') + }) +}) diff --git a/web/__tests__/share/text-generation-index-flow.test.tsx b/web/__tests__/share/text-generation-index-flow.test.tsx new file mode 100644 index 0000000000..3292474bec --- /dev/null +++ b/web/__tests__/share/text-generation-index-flow.test.tsx @@ -0,0 +1,235 @@ +import type { AccessMode } from '@/models/access-control' +import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import * as React from 'react' +import TextGeneration from '@/app/components/share/text-generation' + +const useSearchParamsMock = vi.fn(() => new URLSearchParams()) + +vi.mock('next/navigation', () => ({ + useSearchParams: () => useSearchParamsMock(), +})) + +vi.mock('@/hooks/use-breakpoints', () => ({ + default: vi.fn(() => 'pc'), + MediaType: { pc: 'pc', pad: 'pad', mobile: 'mobile' }, +})) + +vi.mock('@/hooks/use-app-favicon', () => ({ + useAppFavicon: vi.fn(), +})) + +vi.mock('@/hooks/use-document-title', () => ({ + default: vi.fn(), +})) + +vi.mock('@/i18n-config/client', () => ({ + changeLanguage: vi.fn(() => Promise.resolve()), +})) + +vi.mock('@/app/components/share/text-generation/run-once', () => ({ + default: ({ + inputs, + onInputsChange, + onSend, + runControl, + }: { + inputs: Record + onInputsChange: (inputs: Record) => void + onSend: () => void + runControl?: { isStopping: boolean } | null + }) => ( +
+ {String(inputs.name ?? '')} + + + {runControl ? 'stop-ready' : 'idle'} +
+ ), +})) + +vi.mock('@/app/components/share/text-generation/run-batch', () => ({ + default: ({ onSend }: { onSend: (data: string[][]) => void }) => ( + + ), +})) + +vi.mock('@/app/components/app/text-generate/saved-items', () => ({ + default: ({ list }: { list: { id: string }[] }) =>
{list.length}
, +})) + +vi.mock('@/app/components/share/text-generation/menu-dropdown', () => ({ + default: () =>
, +})) + +vi.mock('@/app/components/share/text-generation/result', () => { + const MockResult = ({ + isCallBatchAPI, + onRunControlChange, + onRunStart, + taskId, + }: { + isCallBatchAPI: boolean + onRunControlChange?: (control: { onStop: () => void, isStopping: boolean } | null) => void + onRunStart: () => void + taskId?: number + }) => { + const runControlRef = React.useRef(false) + + React.useEffect(() => { + onRunStart() + }, [onRunStart]) + + React.useEffect(() => { + if (!isCallBatchAPI && !runControlRef.current) { + runControlRef.current = true + onRunControlChange?.({ onStop: vi.fn(), isStopping: false }) + } + }, [isCallBatchAPI, onRunControlChange]) + + return
+ } + + return { + default: MockResult, + } +}) + +const fetchSavedMessageMock = vi.fn() + +vi.mock('@/service/share', async () => { + const actual = await vi.importActual('@/service/share') + return { + ...actual, + fetchSavedMessage: (...args: Parameters) => fetchSavedMessageMock(...args), + removeMessage: vi.fn(), + saveMessage: vi.fn(), + } +}) + +const mockSystemFeatures = { + branding: { + enabled: false, + workspace_logo: null, + }, +} + +const mockWebAppState = { + appInfo: { + app_id: 'app-123', + site: { + title: 'Text Generation', + description: 'Share description', + default_language: 'en-US', + icon_type: 'emoji', + icon: 'robot', + icon_background: '#fff', + icon_url: '', + }, + custom_config: { + remove_webapp_brand: false, + replace_webapp_logo: '', + }, + }, + appParams: { + user_input_form: [ + { + 'text-input': { + label: 'Name', + variable: 'name', + required: true, + max_length: 48, + default: '', + hide: false, + }, + }, + ], + more_like_this: { + enabled: true, + }, + file_upload: { + enabled: false, + number_limits: 2, + detail: 'low', + allowed_upload_methods: ['local_file'], + }, + text_to_speech: { + enabled: true, + }, + system_parameters: { + image_file_size_limit: 10, + }, + }, + webAppAccessMode: 'public' as AccessMode, +} + +vi.mock('@/context/global-public-context', () => ({ + useGlobalPublicStore: (selector: (state: { systemFeatures: typeof mockSystemFeatures }) => unknown) => + selector({ systemFeatures: mockSystemFeatures }), +})) + +vi.mock('@/context/web-app-context', () => ({ + useWebAppStore: (selector: (state: typeof mockWebAppState) => unknown) => selector(mockWebAppState), +})) + +describe('TextGeneration', () => { + beforeEach(() => { + vi.clearAllMocks() + useSearchParamsMock.mockReturnValue(new URLSearchParams()) + fetchSavedMessageMock.mockResolvedValue({ + data: [{ id: 'saved-1' }, { id: 'saved-2' }], + }) + }) + + it('should switch between create, batch, and saved tabs after app state loads', async () => { + render() + + await waitFor(() => { + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + expect(screen.getByTestId('run-once-input-name')).toHaveTextContent('') + + fireEvent.click(screen.getByRole('button', { name: 'change-inputs' })) + await waitFor(() => { + expect(screen.getByTestId('run-once-input-name')).toHaveTextContent('Gamma') + }) + + fireEvent.click(screen.getByTestId('tab-header-item-batch')) + expect(screen.getByRole('button', { name: 'run-batch' })).toBeInTheDocument() + + fireEvent.click(screen.getByTestId('tab-header-item-saved')) + expect(screen.getByTestId('saved-items-mock')).toHaveTextContent('2') + + fireEvent.click(screen.getByTestId('tab-header-item-create')) + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + + it('should wire single-run stop control and clear it when batch execution starts', async () => { + render() + + await waitFor(() => { + expect(screen.getByTestId('run-once-mock')).toBeInTheDocument() + }) + + fireEvent.click(screen.getByRole('button', { name: 'run-once' })) + await waitFor(() => { + expect(screen.getByText('stop-ready')).toBeInTheDocument() + }) + expect(screen.getByTestId('result-single')).toBeInTheDocument() + + fireEvent.click(screen.getByTestId('tab-header-item-batch')) + fireEvent.click(screen.getByRole('button', { name: 'run-batch' })) + await waitFor(() => { + expect(screen.getByText('idle')).toBeInTheDocument() + }) + expect(screen.getByTestId('result-task-1')).toBeInTheDocument() + expect(screen.getByTestId('result-task-2')).toBeInTheDocument() + }) +}) diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts index 221ba2808f..71f5b009d3 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/tracing/config.ts @@ -5,7 +5,7 @@ export const docURL = { [TracingProvider.phoenix]: 'https://docs.arize.com/phoenix', [TracingProvider.langSmith]: 'https://docs.smith.langchain.com/', [TracingProvider.langfuse]: 'https://docs.langfuse.com', - [TracingProvider.opik]: 'https://www.comet.com/docs/opik/tracing/integrations/dify#setup-instructions', + [TracingProvider.opik]: 'https://www.comet.com/docs/opik/integrations/dify', [TracingProvider.weave]: 'https://weave-docs.wandb.ai/', [TracingProvider.aliyun]: 'https://help.aliyun.com/zh/arms/tracing-analysis/untitled-document-1750672984680', [TracingProvider.mlflow]: 'https://mlflow.org/docs/latest/genai/', diff --git a/web/app/(commonLayout)/layout.tsx b/web/app/(commonLayout)/layout.tsx index db2786f6cf..5ac39f1e39 100644 --- a/web/app/(commonLayout)/layout.tsx +++ b/web/app/(commonLayout)/layout.tsx @@ -1,6 +1,7 @@ import type { ReactNode } from 'react' import * as React from 'react' import { AppInitializer } from '@/app/components/app-initializer' +import InSiteMessageNotification from '@/app/components/app/in-site-message/notification' import AmplitudeProvider from '@/app/components/base/amplitude' import GA, { GaType } from '@/app/components/base/ga' import Zendesk from '@/app/components/base/zendesk' @@ -32,6 +33,7 @@ const Layout = ({ children }: { children: ReactNode }) => { {children} + diff --git a/web/app/account/(commonLayout)/account-page/AvatarWithEdit.tsx b/web/app/account/(commonLayout)/account-page/AvatarWithEdit.tsx index 76db83c1ba..3fc677d8d8 100644 --- a/web/app/account/(commonLayout)/account-page/AvatarWithEdit.tsx +++ b/web/app/account/(commonLayout)/account-page/AvatarWithEdit.tsx @@ -11,7 +11,7 @@ import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import ImageInput from '@/app/components/base/app-icon-picker/ImageInput' import getCroppedImg from '@/app/components/base/app-icon-picker/utils' -import Avatar from '@/app/components/base/avatar' +import { Avatar } from '@/app/components/base/avatar' import Button from '@/app/components/base/button' import Divider from '@/app/components/base/divider' import { useLocalFileUploader } from '@/app/components/base/image-uploader/hooks' @@ -103,7 +103,7 @@ const AvatarWithEdit = ({ onSave, ...props }: AvatarWithEditProps) => { <>
- setOnAvatarError(x)} /> + setOnAvatarError(status === 'error')} />
{ diff --git a/web/app/account/(commonLayout)/account-page/index.tsx b/web/app/account/(commonLayout)/account-page/index.tsx index 835663c721..9a104619da 100644 --- a/web/app/account/(commonLayout)/account-page/index.tsx +++ b/web/app/account/(commonLayout)/account-page/index.tsx @@ -4,6 +4,7 @@ import type { App } from '@/types/app' import { RiGraduationCapFill, } from '@remixicon/react' +import { useQueryClient } from '@tanstack/react-query' import { useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' @@ -15,11 +16,11 @@ import PremiumBadge from '@/app/components/base/premium-badge' import { ToastContext } from '@/app/components/base/toast/context' import Collapse from '@/app/components/header/account-setting/collapse' import { IS_CE_EDITION, validPassword } from '@/config' -import { useAppContext } from '@/context/app-context' import { useGlobalPublicStore } from '@/context/global-public-context' import { useProviderContext } from '@/context/provider-context' import { updateUserProfile } from '@/service/common' import { useAppList } from '@/service/use-apps' +import { commonQueryKeys, useUserProfile } from '@/service/use-common' import DeleteAccount from '../delete-account' import AvatarWithEdit from './AvatarWithEdit' @@ -37,7 +38,10 @@ export default function AccountPage() { const { systemFeatures } = useGlobalPublicStore() const { data: appList } = useAppList({ page: 1, limit: 100, name: '' }) const apps = appList?.data || [] - const { mutateUserProfile, userProfile } = useAppContext() + const queryClient = useQueryClient() + const { data: userProfileResp } = useUserProfile() + const userProfile = userProfileResp?.profile + const mutateUserProfile = () => queryClient.invalidateQueries({ queryKey: commonQueryKeys.userProfile }) const { isEducationAccount } = useProviderContext() const { notify } = useContext(ToastContext) const [editNameModalVisible, setEditNameModalVisible] = useState(false) @@ -53,6 +57,9 @@ export default function AccountPage() { const [showConfirmPassword, setShowConfirmPassword] = useState(false) const [showUpdateEmail, setShowUpdateEmail] = useState(false) + if (!userProfile) + return null + const handleEditName = () => { setEditNameModalVisible(true) setEditName(userProfile.name) @@ -149,7 +156,7 @@ export default function AccountPage() {

{t('account.myAccount', { ns: 'common' })}

- +

{userProfile.name} diff --git a/web/app/account/(commonLayout)/avatar.tsx b/web/app/account/(commonLayout)/avatar.tsx index 262fd35882..cca40a9792 100644 --- a/web/app/account/(commonLayout)/avatar.tsx +++ b/web/app/account/(commonLayout)/avatar.tsx @@ -7,12 +7,11 @@ import { useRouter } from 'next/navigation' import { Fragment } from 'react' import { useTranslation } from 'react-i18next' import { resetUser } from '@/app/components/base/amplitude/utils' -import Avatar from '@/app/components/base/avatar' +import { Avatar } from '@/app/components/base/avatar' import { LogOut01 } from '@/app/components/base/icons/src/vender/line/general' import PremiumBadge from '@/app/components/base/premium-badge' -import { useAppContext } from '@/context/app-context' import { useProviderContext } from '@/context/provider-context' -import { useLogout } from '@/service/use-common' +import { useLogout, useUserProfile } from '@/service/use-common' export type IAppSelector = { isMobile: boolean @@ -21,10 +20,15 @@ export type IAppSelector = { export default function AppSelector() { const router = useRouter() const { t } = useTranslation() - const { userProfile } = useAppContext() + const { data: userProfileResp } = useUserProfile() + const userProfile = userProfileResp?.profile const { isEducationAccount } = useProviderContext() const { mutateAsync: logout } = useLogout() + + if (!userProfile) + return null + const handleLogout = async () => { await logout() @@ -50,7 +54,7 @@ export default function AppSelector() { ${open && 'bg-components-panel-bg-blur'} `} > - +

{userProfile.email}
- +
diff --git a/web/app/account/oauth/authorize/page.tsx b/web/app/account/oauth/authorize/page.tsx index d718e0941d..835a1e702e 100644 --- a/web/app/account/oauth/authorize/page.tsx +++ b/web/app/account/oauth/authorize/page.tsx @@ -11,14 +11,13 @@ import { useRouter, useSearchParams } from 'next/navigation' import * as React from 'react' import { useEffect, useRef } from 'react' import { useTranslation } from 'react-i18next' -import Avatar from '@/app/components/base/avatar' +import { Avatar } from '@/app/components/base/avatar' import Button from '@/app/components/base/button' import Loading from '@/app/components/base/loading' import Toast from '@/app/components/base/toast' import { useLanguage } from '@/app/components/header/account-setting/model-provider-page/hooks' import { setPostLoginRedirect } from '@/app/signin/utils/post-login-redirect' -import { useAppContext } from '@/context/app-context' -import { useIsLogin } from '@/service/use-common' +import { useIsLogin, useUserProfile } from '@/service/use-common' import { useAuthorizeOAuthApp, useOAuthAppInfo } from '@/service/use-oauth' function buildReturnUrl(pathname: string, search: string) { @@ -62,7 +61,8 @@ export default function OAuthAuthorize() { const searchParams = useSearchParams() const client_id = decodeURIComponent(searchParams.get('client_id') || '') const redirect_uri = decodeURIComponent(searchParams.get('redirect_uri') || '') - const { userProfile } = useAppContext() + const { data: userProfileResp } = useUserProfile() + const userProfile = userProfileResp?.profile const { data: authAppInfo, isLoading: isOAuthLoading, isError } = useOAuthAppInfo(client_id, redirect_uri) const { mutateAsync: authorize, isPending: authorizing } = useAuthorizeOAuthApp() const hasNotifiedRef = useRef(false) @@ -138,7 +138,7 @@ export default function OAuthAuthorize() { {isLoggedIn && userProfile && (
- +
{userProfile.name}
{userProfile.email}
diff --git a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx index 12132df73a..5d6700fa88 100644 --- a/web/app/components/app/app-access-control/add-member-or-group-pop.tsx +++ b/web/app/components/app/app-access-control/add-member-or-group-pop.tsx @@ -10,7 +10,7 @@ import { SubjectType } from '@/models/access-control' import { useSearchForWhiteListCandidates } from '@/service/access-control' import { cn } from '@/utils/classnames' import useAccessControlStore from '../../../../context/access-control-store' -import Avatar from '../../base/avatar' +import { Avatar } from '../../base/avatar' import Button from '../../base/button' import Checkbox from '../../base/checkbox' import Input from '../../base/input' @@ -203,7 +203,7 @@ function MemberItem({ member }: MemberItemProps) {
- +

{member.name}

diff --git a/web/app/components/app/app-access-control/specific-groups-or-members.tsx b/web/app/components/app/app-access-control/specific-groups-or-members.tsx index e3a5d8c7b7..ad9f4ea425 100644 --- a/web/app/components/app/app-access-control/specific-groups-or-members.tsx +++ b/web/app/components/app/app-access-control/specific-groups-or-members.tsx @@ -6,7 +6,7 @@ import { useTranslation } from 'react-i18next' import { AccessMode } from '@/models/access-control' import { useAppWhiteListSubjects } from '@/service/access-control' import useAccessControlStore from '../../../../context/access-control-store' -import Avatar from '../../base/avatar' +import { Avatar } from '../../base/avatar' import Loading from '../../base/loading' import Tooltip from '../../base/tooltip' import AddMemberOrGroupDialog from './add-member-or-group-pop' @@ -106,7 +106,7 @@ function MemberItem({ member }: MemberItemProps) { }, [member, setSpecificMembers, specificMembers]) return ( } + icon={} onRemove={handleRemoveMember} >

{member.name}

diff --git a/web/app/components/app/configuration/dataset-config/card-item/index.spec.tsx b/web/app/components/app/configuration/dataset-config/card-item/index.spec.tsx index 0bbed83a99..09a5ff6d07 100644 --- a/web/app/components/app/configuration/dataset-config/card-item/index.spec.tsx +++ b/web/app/components/app/configuration/dataset-config/card-item/index.spec.tsx @@ -172,12 +172,8 @@ describe('dataset-config/card-item', () => { const [editButton] = within(card).getAllByRole('button', { hidden: true }) await user.click(editButton) - expect(screen.getByText('Mock settings modal')).toBeInTheDocument() - await waitFor(() => { - expect(screen.getByRole('dialog')).toBeVisible() - }) - - fireEvent.click(screen.getByText('Save changes')) + expect(await screen.findByText('Mock settings modal')).toBeInTheDocument() + fireEvent.click(await screen.findByText('Save changes')) await waitFor(() => { expect(onSave).toHaveBeenCalledWith(expect.objectContaining({ name: 'Updated dataset' })) @@ -194,7 +190,7 @@ describe('dataset-config/card-item', () => { const card = screen.getByText(dataset.name).closest('.group') as HTMLElement const buttons = within(card).getAllByRole('button', { hidden: true }) - const deleteButton = buttons[buttons.length - 1] + const deleteButton = buttons.at(-1)! expect(deleteButton.className).not.toContain('action-btn-destructive') @@ -233,7 +229,7 @@ describe('dataset-config/card-item', () => { await user.click(editButton) expect(screen.getByText('Mock settings modal')).toBeInTheDocument() - const overlay = Array.from(document.querySelectorAll('[class]')) + const overlay = [...document.querySelectorAll('[class]')] .find(element => element.className.toString().includes('bg-black/30')) expect(overlay).toBeInTheDocument() diff --git a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.spec.tsx b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.spec.tsx index d621bb3941..350ede8c96 100644 --- a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.spec.tsx +++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.spec.tsx @@ -91,7 +91,7 @@ vi.mock('@/app/components/base/chat/chat', () => ({ })) vi.mock('@/app/components/base/avatar', () => ({ - default: ({ name }: { name: string }) =>
{name}
, + Avatar: ({ name }: { name: string }) =>
{name}
, })) const createModelAndParameter = (overrides: Partial = {}): ModelAndParameter => ({ diff --git a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx index b7a7e90fca..e957fc24c4 100644 --- a/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx +++ b/web/app/components/app/configuration/debug/debug-with-multiple-model/chat-item.tsx @@ -7,7 +7,7 @@ import { useCallback, useMemo, } from 'react' -import Avatar from '@/app/components/base/avatar' +import { Avatar } from '@/app/components/base/avatar' import Chat from '@/app/components/base/chat/chat' import { useChat } from '@/app/components/base/chat/chat/hooks' import { getLastAnswer } from '@/app/components/base/chat/utils' @@ -149,7 +149,7 @@ const ChatItem: FC = ({ suggestedQuestions={suggestedQuestions} onSend={doSend} showPromptLog - questionIcon={} + questionIcon={} allToolIcons={allToolIcons} hideLogModal noSpacing diff --git a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx index addeb92297..84ff8b5ede 100644 --- a/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx +++ b/web/app/components/app/configuration/debug/debug-with-single-model/index.tsx @@ -3,7 +3,7 @@ import type { ChatConfig, ChatItem, OnSend } from '@/app/components/base/chat/ty import type { FileEntity } from '@/app/components/base/file-uploader/types' import { memo, useCallback, useImperativeHandle, useMemo } from 'react' import { useStore as useAppStore } from '@/app/components/app/store' -import Avatar from '@/app/components/base/avatar' +import { Avatar } from '@/app/components/base/avatar' import Chat from '@/app/components/base/chat/chat' import { useChat } from '@/app/components/base/chat/chat/hooks' import { getLastAnswer, isValidGeneratedAnswer } from '@/app/components/base/chat/utils' @@ -168,7 +168,7 @@ const DebugWithSingleModel = ( switchSibling={siblingMessageId => setTargetMessageId(siblingMessageId)} onStopResponding={handleStop} showPromptLog - questionIcon={} + questionIcon={} allToolIcons={allToolIcons} onAnnotationEdited={handleAnnotationEdited} onAnnotationAdded={handleAnnotationAdded} diff --git a/web/app/components/app/configuration/tools/index.tsx b/web/app/components/app/configuration/tools/index.tsx index f348a7718d..51a9e87a97 100644 --- a/web/app/components/app/configuration/tools/index.tsx +++ b/web/app/components/app/configuration/tools/index.tsx @@ -179,7 +179,7 @@ const Tools = () => {
handleSaveExternalDataToolModal({ ...item, enabled }, index)} /> diff --git a/web/app/components/app/in-site-message/index.spec.tsx b/web/app/components/app/in-site-message/index.spec.tsx new file mode 100644 index 0000000000..530084074d --- /dev/null +++ b/web/app/components/app/in-site-message/index.spec.tsx @@ -0,0 +1,142 @@ +import type { ComponentProps } from 'react' +import type { InSiteMessageActionItem } from './index' +import { fireEvent, render, screen } from '@testing-library/react' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' +import InSiteMessage from './index' + +vi.mock('@/app/components/base/amplitude', () => ({ + trackEvent: vi.fn(), +})) + +describe('InSiteMessage', () => { + const originalLocation = window.location + + beforeEach(() => { + vi.clearAllMocks() + vi.stubGlobal('open', vi.fn()) + }) + + afterEach(() => { + Object.defineProperty(window, 'location', { + value: originalLocation, + configurable: true, + }) + vi.unstubAllGlobals() + }) + + const renderComponent = (actions: InSiteMessageActionItem[], props?: Partial>) => { + return render( + , + ) + } + + // Validate baseline rendering and content normalization. + describe('Rendering', () => { + it('should render title, subtitle, markdown content, and action buttons', () => { + const actions: InSiteMessageActionItem[] = [ + { action: 'close', action_name: 'dismiss', text: 'Close', type: 'default' }, + { action: 'link', action_name: 'learn_more', text: 'Learn more', type: 'primary', data: 'https://example.com' }, + ] + + renderComponent(actions, { className: 'custom-message' }) + + const closeButton = screen.getByRole('button', { name: 'Close' }) + const learnMoreButton = screen.getByRole('button', { name: 'Learn more' }) + const panel = closeButton.closest('div.fixed') + const titleElement = panel?.querySelector('.title-3xl-bold') + const subtitleElement = panel?.querySelector('.body-md-regular') + expect(panel).toHaveClass('custom-message') + expect(titleElement).toHaveTextContent(/Title.*Line/s) + expect(subtitleElement).toHaveTextContent(/Subtitle.*Line/s) + expect(titleElement?.textContent).not.toContain('\\n') + expect(subtitleElement?.textContent).not.toContain('\\n') + expect(screen.getByText('Main content')).toBeInTheDocument() + expect(closeButton).toBeInTheDocument() + expect(learnMoreButton).toBeInTheDocument() + }) + + it('should fallback to default header background when headerBgUrl is empty string', () => { + const actions: InSiteMessageActionItem[] = [{ action: 'close', action_name: 'dismiss', text: 'Close', type: 'default' }] + + const { container } = renderComponent(actions, { headerBgUrl: '' }) + const header = container.querySelector('div[style]') + expect(header).toHaveStyle({ backgroundImage: 'url(/in-site-message/header-bg.svg)' }) + }) + }) + + // Validate action handling for close and link actions. + describe('Actions', () => { + it('should call onAction and hide component when close action is clicked', () => { + const onAction = vi.fn() + const closeAction: InSiteMessageActionItem = { action: 'close', action_name: 'dismiss', text: 'Close', type: 'default' } + + renderComponent([closeAction], { onAction }) + fireEvent.click(screen.getByRole('button', { name: 'Close' })) + + expect(onAction).toHaveBeenCalledWith(closeAction) + expect(screen.queryByRole('button', { name: 'Close' })).not.toBeInTheDocument() + }) + + it('should open a new tab when link action data is a string', () => { + const linkAction: InSiteMessageActionItem = { + action: 'link', + action_name: 'confirm', + text: 'Open link', + type: 'primary', + data: 'https://example.com', + } + + renderComponent([linkAction]) + fireEvent.click(screen.getByRole('button', { name: 'Open link' })) + + expect(window.open).toHaveBeenCalledWith('https://example.com', '_blank', 'noopener,noreferrer') + }) + + it('should navigate with location.assign when link action target is _self', () => { + const assignSpy = vi.fn() + Object.defineProperty(window, 'location', { + value: { + ...originalLocation, + assign: assignSpy, + }, + configurable: true, + }) + + const linkAction: InSiteMessageActionItem = { + action: 'link', + action_name: 'confirm', + text: 'Open self', + type: 'primary', + data: { href: 'https://example.com/self', target: '_self' }, + } + + renderComponent([linkAction]) + fireEvent.click(screen.getByRole('button', { name: 'Open self' })) + + expect(assignSpy).toHaveBeenCalledWith('https://example.com/self') + expect(window.open).not.toHaveBeenCalled() + }) + + it('should not trigger navigation when link data is invalid', () => { + const linkAction: InSiteMessageActionItem = { + action: 'link', + action_name: 'confirm', + text: 'Broken link', + type: 'primary', + data: { rel: 'noopener' }, + } + + renderComponent([linkAction]) + fireEvent.click(screen.getByRole('button', { name: 'Broken link' })) + + expect(window.open).not.toHaveBeenCalled() + }) + }) +}) diff --git a/web/app/components/app/in-site-message/index.tsx b/web/app/components/app/in-site-message/index.tsx new file mode 100644 index 0000000000..0276257860 --- /dev/null +++ b/web/app/components/app/in-site-message/index.tsx @@ -0,0 +1,148 @@ +'use client' + +import { useEffect, useMemo, useState } from 'react' +import { trackEvent } from '@/app/components/base/amplitude' +import Button from '@/app/components/base/button' +import { MarkdownWithDirective } from '@/app/components/base/markdown-with-directive' +import { cn } from '@/utils/classnames' + +type InSiteMessageAction = 'link' | 'close' +type InSiteMessageButtonType = 'primary' | 'default' + +export type InSiteMessageActionItem = { + action: InSiteMessageAction + action_name: string // for tracing and analytics + data?: unknown + text: string + type: InSiteMessageButtonType +} + +type InSiteMessageProps = { + notificationId: string + actions: InSiteMessageActionItem[] + className?: string + headerBgUrl?: string + main: string + onAction?: (action: InSiteMessageActionItem) => void + subtitle: string + title: string +} + +const LINE_BREAK_REGEX = /\\n/g + +function normalizeLineBreaks(text: string): string { + return text.replace(LINE_BREAK_REGEX, '\n') +} + +function normalizeLinkData(data: unknown): { href: string, rel?: string, target?: string } | null { + if (typeof data === 'string') + return { href: data, target: '_blank' } + + if (!data || typeof data !== 'object') + return null + + const candidate = data as { href?: unknown, rel?: unknown, target?: unknown } + if (typeof candidate.href !== 'string' || !candidate.href) + return null + + return { + href: candidate.href, + rel: typeof candidate.rel === 'string' ? candidate.rel : undefined, + target: typeof candidate.target === 'string' ? candidate.target : '_blank', + } +} + +const DEFAULT_HEADER_BG_URL = '/in-site-message/header-bg.svg' + +function InSiteMessage({ + notificationId, + actions, + className, + headerBgUrl = DEFAULT_HEADER_BG_URL, + main, + onAction, + subtitle, + title, +}: InSiteMessageProps) { + const [visible, setVisible] = useState(true) + const normalizedTitle = normalizeLineBreaks(title) + const normalizedSubtitle = normalizeLineBreaks(subtitle) + + const headerStyle = useMemo(() => { + return { + backgroundImage: `url(${headerBgUrl || DEFAULT_HEADER_BG_URL})`, + } + }, [headerBgUrl]) + + useEffect(() => { + trackEvent('in_site_message_show', { + notification_id: notificationId, + }) + }, [notificationId]) + + const handleAction = (item: InSiteMessageActionItem) => { + trackEvent('in_site_message_action', { + notification_id: notificationId, + action: item.action_name, + }) + onAction?.(item) + + if (item.action === 'close') { + setVisible(false) + return + } + + const linkData = normalizeLinkData(item.data) + if (!linkData) + return + + const target = linkData.target ?? '_blank' + if (target === '_self') { + window.location.assign(linkData.href) + return + } + + window.open(linkData.href, target, linkData.rel || 'noopener,noreferrer') + } + + if (!visible) + return null + + return ( +
+
+
+ {normalizedTitle} +
+
+ {normalizedSubtitle} +
+
+ +
+ +
+ +
+ {actions.map(item => ( + + ))} +
+
+ ) +} + +export default InSiteMessage diff --git a/web/app/components/app/in-site-message/notification.spec.tsx b/web/app/components/app/in-site-message/notification.spec.tsx new file mode 100644 index 0000000000..0d86d8a91c --- /dev/null +++ b/web/app/components/app/in-site-message/notification.spec.tsx @@ -0,0 +1,221 @@ +import type { ReactNode } from 'react' +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import InSiteMessageNotification from './notification' + +const { + mockConfig, + mockNotification, + mockNotificationDismiss, +} = vi.hoisted(() => ({ + mockConfig: { + isCloudEdition: true, + }, + mockNotification: vi.fn(), + mockNotificationDismiss: vi.fn(), +})) + +vi.mock(import('@/config'), async (importOriginal) => { + const actual = await importOriginal() + + return { + ...actual, + get IS_CLOUD_EDITION() { + return mockConfig.isCloudEdition + }, + } +}) + +vi.mock('@/service/client', () => ({ + consoleQuery: { + notification: { + queryOptions: (options?: Record) => ({ + queryKey: ['console', 'notification'], + queryFn: (...args: unknown[]) => mockNotification(...args), + ...options, + }), + }, + notificationDismiss: { + mutationOptions: (options?: Record) => ({ + mutationKey: ['console', 'notificationDismiss'], + mutationFn: (...args: unknown[]) => mockNotificationDismiss(...args), + ...options, + }), + }, + }, +})) + +const createWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { + queries: { + retry: false, + }, + mutations: { + retry: false, + }, + }, + }) + + const Wrapper = ({ children }: { children: ReactNode }) => ( + + {children} + + ) + + return Wrapper +} + +describe('InSiteMessageNotification', () => { + beforeEach(() => { + vi.clearAllMocks() + mockConfig.isCloudEdition = true + vi.stubGlobal('open', vi.fn()) + }) + + afterEach(() => { + vi.unstubAllGlobals() + }) + + // Validate query gating and empty state rendering. + describe('Rendering', () => { + it('should render null and skip query when not cloud edition', async () => { + mockConfig.isCloudEdition = false + const Wrapper = createWrapper() + const { container } = render(, { wrapper: Wrapper }) + + await waitFor(() => { + expect(mockNotification).not.toHaveBeenCalled() + }) + expect(container).toBeEmptyDOMElement() + }) + + it('should render null when notification list is empty', async () => { + mockNotification.mockResolvedValue({ notifications: [] }) + const Wrapper = createWrapper() + const { container } = render(, { wrapper: Wrapper }) + + await waitFor(() => { + expect(mockNotification).toHaveBeenCalledTimes(1) + }) + expect(container).toBeEmptyDOMElement() + }) + }) + + // Validate parsed-body behavior and action handling. + describe('Notification body parsing and actions', () => { + it('should render parsed main/actions and dismiss only on close action', async () => { + mockNotification.mockResolvedValue({ + notifications: [ + { + notification_id: 'n-1', + title: 'Update title', + subtitle: 'Update subtitle', + title_pic_url: 'https://example.com/bg.png', + body: JSON.stringify({ + main: 'Parsed body main', + actions: [ + { action: 'link', data: 'https://example.com/docs', text: 'Visit docs', type: 'primary' }, + { action: 'close', text: 'Dismiss now', type: 'default' }, + { action: 'link', data: 'https://example.com/invalid', text: 100, type: 'primary' }, + ], + }), + }, + ], + }) + mockNotificationDismiss.mockResolvedValue({ success: true }) + + const Wrapper = createWrapper() + render(, { wrapper: Wrapper }) + + await waitFor(() => { + expect(screen.getByText('Parsed body main')).toBeInTheDocument() + }) + expect(screen.getByRole('button', { name: 'Visit docs' })).toBeInTheDocument() + expect(screen.getByRole('button', { name: 'Dismiss now' })).toBeInTheDocument() + expect(screen.queryByRole('button', { name: 'Invalid' })).not.toBeInTheDocument() + + fireEvent.click(screen.getByRole('button', { name: 'Visit docs' })) + expect(mockNotificationDismiss).not.toHaveBeenCalled() + + fireEvent.click(screen.getByRole('button', { name: 'Dismiss now' })) + await waitFor(() => { + expect(mockNotificationDismiss).toHaveBeenCalledWith( + { + body: { + notification_id: 'n-1', + }, + }, + expect.objectContaining({ + mutationKey: ['console', 'notificationDismiss'], + }), + ) + }) + }) + + it('should fallback to raw body and default close action when body is invalid json', async () => { + mockNotification.mockResolvedValue({ + notifications: [ + { + notification_id: 'n-2', + title: 'Fallback title', + subtitle: 'Fallback subtitle', + title_pic_url: 'https://example.com/bg-2.png', + body: 'raw body text', + }, + ], + }) + mockNotificationDismiss.mockResolvedValue({ success: true }) + + const Wrapper = createWrapper() + render(, { wrapper: Wrapper }) + + await waitFor(() => { + expect(screen.getByText('raw body text')).toBeInTheDocument() + }) + + const closeButton = screen.getByRole('button', { name: 'common.operation.close' }) + fireEvent.click(closeButton) + + await waitFor(() => { + expect(mockNotificationDismiss).toHaveBeenCalledWith( + { + body: { + notification_id: 'n-2', + }, + }, + expect.objectContaining({ + mutationKey: ['console', 'notificationDismiss'], + }), + ) + }) + }) + + it('should fallback to default close action when parsed actions are all invalid', async () => { + mockNotification.mockResolvedValue({ + notifications: [ + { + notification_id: 'n-3', + title: 'Invalid action title', + subtitle: 'Invalid action subtitle', + title_pic_url: 'https://example.com/bg-3.png', + body: JSON.stringify({ + main: 'Main from parsed body', + actions: [ + { action: 'link', type: 'primary', text: 100, data: 'https://example.com' }, + ], + }), + }, + ], + }) + + const Wrapper = createWrapper() + render(, { wrapper: Wrapper }) + + await waitFor(() => { + expect(screen.getByText('Main from parsed body')).toBeInTheDocument() + }) + expect(screen.getByRole('button', { name: 'common.operation.close' })).toBeInTheDocument() + }) + }) +}) diff --git a/web/app/components/app/in-site-message/notification.tsx b/web/app/components/app/in-site-message/notification.tsx new file mode 100644 index 0000000000..cebf6ffd91 --- /dev/null +++ b/web/app/components/app/in-site-message/notification.tsx @@ -0,0 +1,111 @@ +'use client' + +import type { InSiteMessageActionItem } from './index' +import { useMutation, useQuery } from '@tanstack/react-query' +import { useTranslation } from 'react-i18next' +import { IS_CLOUD_EDITION } from '@/config' +import { consoleQuery } from '@/service/client' +import InSiteMessage from './index' + +type NotificationBodyPayload = { + actions: InSiteMessageActionItem[] + main: string +} + +function isValidActionItem(value: unknown): value is InSiteMessageActionItem { + if (!value || typeof value !== 'object') + return false + + const candidate = value as { + action?: unknown + data?: unknown + text?: unknown + type?: unknown + } + + return ( + typeof candidate.text === 'string' + && (candidate.type === 'primary' || candidate.type === 'default') + && (candidate.action === 'link' || candidate.action === 'close') + && (candidate.data === undefined || typeof candidate.data !== 'function') + ) +} + +function parseNotificationBody(body: string): NotificationBodyPayload | null { + try { + const parsed = JSON.parse(body) as { + actions?: unknown + main?: unknown + } + + if (!parsed || typeof parsed !== 'object') + return null + + if (typeof parsed.main !== 'string') + return null + + const actions = Array.isArray(parsed.actions) + ? parsed.actions.filter(isValidActionItem) + : [] + + return { + main: parsed.main, + actions, + } + } + catch { + return null + } +} + +function InSiteMessageNotification() { + const { t } = useTranslation() + const dismissNotificationMutation = useMutation(consoleQuery.notificationDismiss.mutationOptions()) + + const { data } = useQuery(consoleQuery.notification.queryOptions({ + enabled: IS_CLOUD_EDITION, + })) + + const notification = data?.notifications?.[0] + const parsedBody = notification ? parseNotificationBody(notification.body) : null + + if (!IS_CLOUD_EDITION || !notification) + return null + + const fallbackActions: InSiteMessageActionItem[] = [ + { + type: 'default', + action_name: 'dismiss', + text: t('operation.close', { ns: 'common' }), + action: 'close', + }, + ] + + const actions = parsedBody?.actions?.length ? parsedBody.actions : fallbackActions + const main = parsedBody?.main ?? notification.body + const handleAction = (action: InSiteMessageActionItem) => { + if (action.action !== 'close') + return + + dismissNotificationMutation.mutate({ + body: { + notification_id: notification.notification_id, + }, + }) + } + + return ( + + ) +} + +export default InSiteMessageNotification diff --git a/web/app/components/app/overview/settings/index.spec.tsx b/web/app/components/app/overview/settings/index.spec.tsx index d98e02ad57..b849b4f015 100644 --- a/web/app/components/app/overview/settings/index.spec.tsx +++ b/web/app/components/app/overview/settings/index.spec.tsx @@ -6,7 +6,7 @@ import type { ModalContextState } from '@/context/modal-context' import type { ProviderContextState } from '@/context/provider-context' import type { AppDetailResponse } from '@/models/app' import type { AppSSO } from '@/types/app' -import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import { act, fireEvent, render, screen, waitFor } from '@testing-library/react' import { Plan } from '@/app/components/billing/type' import { baseProviderContextValue } from '@/context/provider-context' import { AppModeEnum } from '@/types/app' @@ -131,6 +131,10 @@ describe('SettingsModal', () => { }) }) + afterEach(() => { + vi.useRealTimers() + }) + it('should render the modal and expose the expanded settings section', async () => { renderSettingsModal() expect(screen.getByText('appOverview.overview.appInfo.settings.title')).toBeInTheDocument() @@ -212,4 +216,54 @@ describe('SettingsModal', () => { })) expect(mockOnClose).toHaveBeenCalled() }) + + it('should clear the delayed hide-more timer when the modal unmounts after closing', () => { + vi.useFakeTimers() + const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout') + const { unmount } = renderSettingsModal() + + fireEvent.click(screen.getByText('appOverview.overview.appInfo.settings.more.entry')) + fireEvent.click(screen.getByText('common.operation.cancel')) + unmount() + + expect(clearTimeoutSpy).toHaveBeenCalled() + vi.runAllTimers() + }) + + it('should replace the pending hide-more timer and clear the ref after the timeout completes', async () => { + const hideCallbacks: Array<() => void> = [] + const originalSetTimeout = globalThis.setTimeout + const setTimeoutSpy = vi.spyOn(globalThis, 'setTimeout').mockImplementation((( + callback: TimerHandler, + delay?: number, + ...args: unknown[] + ) => { + if (delay === 200) { + hideCallbacks.push(() => { + if (typeof callback === 'function') + callback(...args) + }) + return hideCallbacks.length as unknown as ReturnType + } + + return originalSetTimeout(callback, delay, ...args) + }) as unknown as typeof setTimeout) + const clearTimeoutSpy = vi.spyOn(globalThis, 'clearTimeout') + renderSettingsModal() + + act(() => { + fireEvent.click(screen.getByText('common.operation.cancel')) + fireEvent.click(screen.getByText('common.operation.cancel')) + }) + + expect(clearTimeoutSpy).toHaveBeenCalled() + expect(hideCallbacks.length).toBeGreaterThanOrEqual(2) + + act(() => { + hideCallbacks.at(-1)?.() + }) + + setTimeoutSpy.mockRestore() + clearTimeoutSpy.mockRestore() + }) }) diff --git a/web/app/components/app/overview/settings/index.tsx b/web/app/components/app/overview/settings/index.tsx index 20461dda7e..f7c9e309ab 100644 --- a/web/app/components/app/overview/settings/index.tsx +++ b/web/app/components/app/overview/settings/index.tsx @@ -6,7 +6,7 @@ import type { AppIconType, AppSSO, Language } from '@/types/app' import { RiArrowRightSLine, RiCloseLine } from '@remixicon/react' import Link from 'next/link' import * as React from 'react' -import { useCallback, useEffect, useState } from 'react' +import { useCallback, useEffect, useRef, useState } from 'react' import { Trans, useTranslation } from 'react-i18next' import ActionButton from '@/app/components/base/action-button' import AppIcon from '@/app/components/base/app-icon' @@ -99,6 +99,7 @@ const SettingsModal: FC = ({ const [language, setLanguage] = useState(default_language) const [saveLoading, setSaveLoading] = useState(false) const { t } = useTranslation() + const hideMoreTimerRef = useRef | null>(null) const [showAppIconPicker, setShowAppIconPicker] = useState(false) const [appIcon, setAppIcon] = useState( @@ -137,10 +138,22 @@ const SettingsModal: FC = ({ : { type: 'emoji', icon, background: icon_background! }) }, [appInfo, chat_color_theme, chat_color_theme_inverted, copyright, custom_disclaimer, default_language, description, icon, icon_background, icon_type, icon_url, privacy_policy, show_workflow_steps, title, use_icon_as_answer_icon]) + useEffect(() => { + return () => { + if (hideMoreTimerRef.current) { + clearTimeout(hideMoreTimerRef.current) + hideMoreTimerRef.current = null + } + } + }, []) + const onHide = () => { onClose() - setTimeout(() => { + if (hideMoreTimerRef.current) + clearTimeout(hideMoreTimerRef.current) + hideMoreTimerRef.current = setTimeout(() => { setIsShowMore(false) + hideMoreTimerRef.current = null }, 200) } diff --git a/web/app/components/base/agent-log-modal/__tests__/detail.spec.tsx b/web/app/components/base/agent-log-modal/__tests__/detail.spec.tsx index 47d854e028..8b796435e0 100644 --- a/web/app/components/base/agent-log-modal/__tests__/detail.spec.tsx +++ b/web/app/components/base/agent-log-modal/__tests__/detail.spec.tsx @@ -2,6 +2,7 @@ import type { ComponentProps } from 'react' import type { IChatItem } from '@/app/components/base/chat/chat/type' import type { AgentLogDetailResponse } from '@/models/log' import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import { useStore as useAppStore } from '@/app/components/app/store' import { ToastContext } from '@/app/components/base/toast/context' import { fetchAgentLogDetail } from '@/service/log' import AgentLogDetail from '../detail' @@ -104,7 +105,7 @@ describe('AgentLogDetail', () => { describe('Rendering', () => { it('should show loading indicator while fetching data', async () => { - vi.mocked(fetchAgentLogDetail).mockReturnValue(new Promise(() => {})) + vi.mocked(fetchAgentLogDetail).mockReturnValue(new Promise(() => { })) renderComponent() @@ -193,6 +194,18 @@ describe('AgentLogDetail', () => { }) describe('Edge Cases', () => { + it('should not fetch data when app detail is unavailable', async () => { + vi.mocked(useAppStore).mockImplementationOnce(selector => selector({ appDetail: undefined } as never)) + vi.mocked(fetchAgentLogDetail).mockResolvedValue(createMockResponse()) + + renderComponent() + + await waitFor(() => { + expect(fetchAgentLogDetail).not.toHaveBeenCalled() + }) + expect(screen.getByRole('status')).toBeInTheDocument() + }) + it('should notify on API error', async () => { vi.mocked(fetchAgentLogDetail).mockRejectedValue(new Error('API Error')) diff --git a/web/app/components/base/agent-log-modal/__tests__/index.spec.tsx b/web/app/components/base/agent-log-modal/__tests__/index.spec.tsx index 6437ae5b43..b2db524453 100644 --- a/web/app/components/base/agent-log-modal/__tests__/index.spec.tsx +++ b/web/app/components/base/agent-log-modal/__tests__/index.spec.tsx @@ -139,4 +139,23 @@ describe('AgentLogModal', () => { expect(mockProps.onCancel).toHaveBeenCalledTimes(1) }) + + it('should ignore click-away before mounted state is set', () => { + vi.mocked(fetchAgentLogDetail).mockReturnValue(new Promise(() => {})) + let invoked = false + vi.mocked(useClickAway).mockImplementation((callback) => { + if (!invoked) { + invoked = true + callback(new Event('click')) + } + }) + + render( + ['value']}> + + , + ) + + expect(mockProps.onCancel).not.toHaveBeenCalled() + }) }) diff --git a/web/app/components/base/agent-log-modal/__tests__/result.spec.tsx b/web/app/components/base/agent-log-modal/__tests__/result.spec.tsx index 6fcf4c1859..ca2fcb9c57 100644 --- a/web/app/components/base/agent-log-modal/__tests__/result.spec.tsx +++ b/web/app/components/base/agent-log-modal/__tests__/result.spec.tsx @@ -82,4 +82,9 @@ describe('ResultPanel', () => { render() expect(screen.getByText('appDebug.agent.agentModeType.ReACT')).toBeInTheDocument() }) + + it('should fallback to zero tokens when total_tokens is undefined', () => { + render() + expect(screen.getByText('0 Tokens')).toBeInTheDocument() + }) }) diff --git a/web/app/components/base/agent-log-modal/__tests__/tool-call.spec.tsx b/web/app/components/base/agent-log-modal/__tests__/tool-call.spec.tsx index a5d6aa8d81..9b2a2726c5 100644 --- a/web/app/components/base/agent-log-modal/__tests__/tool-call.spec.tsx +++ b/web/app/components/base/agent-log-modal/__tests__/tool-call.spec.tsx @@ -2,6 +2,7 @@ import { fireEvent, render, screen } from '@testing-library/react' import * as React from 'react' import { describe, expect, it, vi } from 'vitest' import { BlockEnum } from '@/app/components/workflow/types' +import { useLocale } from '@/context/i18n' import ToolCallItem from '../tool-call' vi.mock('@/app/components/workflow/nodes/_base/components/editor/code-editor', () => ({ @@ -17,6 +18,10 @@ vi.mock('@/app/components/workflow/block-icon', () => ({ default: ({ type }: { type: BlockEnum }) =>
, })) +vi.mock('@/context/i18n', () => ({ + useLocale: vi.fn(() => 'en'), +})) + const mockToolCall = { status: 'success', error: null, @@ -41,6 +46,17 @@ describe('ToolCallItem', () => { expect(screen.getByTestId('block-icon')).toHaveAttribute('data-type', BlockEnum.Tool) }) + it('should fallback to locale key with underscores when hyphenated key is missing', () => { + vi.mocked(useLocale).mockReturnValueOnce('en-US') + const fallbackLocaleToolCall = { + ...mockToolCall, + tool_label: { en_US: 'Fallback Label' }, + } + + render() + expect(screen.getByText('Fallback Label')).toBeInTheDocument() + }) + it('should format time correctly', () => { render() expect(screen.getByText('1.500 s')).toBeInTheDocument() @@ -54,13 +70,17 @@ describe('ToolCallItem', () => { expect(screen.getByText('1 m 5.000 s')).toBeInTheDocument() }) - it('should format token count correctly', () => { + it('should format token count in K units', () => { render() expect(screen.getByText('1.2K tokens')).toBeInTheDocument() + }) + it('should format token count without unit for small values', () => { render() expect(screen.getByText('800 tokens')).toBeInTheDocument() + }) + it('should format token count in M units', () => { render() expect(screen.getByText('1.2M tokens')).toBeInTheDocument() }) diff --git a/web/app/components/base/amplitude/AmplitudeProvider.spec.tsx b/web/app/components/base/amplitude/AmplitudeProvider.spec.tsx new file mode 100644 index 0000000000..2402c84a3e --- /dev/null +++ b/web/app/components/base/amplitude/AmplitudeProvider.spec.tsx @@ -0,0 +1,139 @@ +import * as amplitude from '@amplitude/analytics-browser' +import { sessionReplayPlugin } from '@amplitude/plugin-session-replay-browser' +import { render } from '@testing-library/react' +import { beforeEach, describe, expect, it, vi } from 'vitest' +import AmplitudeProvider, { isAmplitudeEnabled } from './AmplitudeProvider' + +const mockConfig = vi.hoisted(() => ({ + AMPLITUDE_API_KEY: 'test-api-key', + IS_CLOUD_EDITION: true, +})) + +vi.mock('@/config', () => mockConfig) + +vi.mock('@amplitude/analytics-browser', () => ({ + init: vi.fn(), + add: vi.fn(), +})) + +vi.mock('@amplitude/plugin-session-replay-browser', () => ({ + sessionReplayPlugin: vi.fn(() => ({ name: 'session-replay' })), +})) + +describe('AmplitudeProvider', () => { + beforeEach(() => { + vi.clearAllMocks() + mockConfig.AMPLITUDE_API_KEY = 'test-api-key' + mockConfig.IS_CLOUD_EDITION = true + }) + + describe('isAmplitudeEnabled', () => { + it('returns true when cloud edition and api key present', () => { + expect(isAmplitudeEnabled()).toBe(true) + }) + + it('returns false when cloud edition but no api key', () => { + mockConfig.AMPLITUDE_API_KEY = '' + expect(isAmplitudeEnabled()).toBe(false) + }) + + it('returns false when not cloud edition', () => { + mockConfig.IS_CLOUD_EDITION = false + expect(isAmplitudeEnabled()).toBe(false) + }) + }) + + describe('Component', () => { + it('initializes amplitude when enabled', () => { + render() + + expect(amplitude.init).toHaveBeenCalledWith('test-api-key', expect.any(Object)) + expect(sessionReplayPlugin).toHaveBeenCalledWith({ sampleRate: 0.8 }) + expect(amplitude.add).toHaveBeenCalledTimes(2) + }) + + it('does not initialize amplitude when disabled', () => { + mockConfig.AMPLITUDE_API_KEY = '' + render() + + expect(amplitude.init).not.toHaveBeenCalled() + expect(amplitude.add).not.toHaveBeenCalled() + }) + + it('pageNameEnrichmentPlugin logic works as expected', async () => { + render() + const plugin = vi.mocked(amplitude.add).mock.calls[0]?.[0] as amplitude.Types.EnrichmentPlugin | undefined + expect(plugin).toBeDefined() + if (!plugin?.execute || !plugin.setup) + throw new Error('Expected page-name-enrichment plugin with setup/execute') + + expect(plugin.name).toBe('page-name-enrichment') + + const execute = plugin.execute + const setup = plugin.setup + type SetupFn = NonNullable + const getPageTitle = (evt: amplitude.Types.Event | null | undefined) => + (evt?.event_properties as Record | undefined)?.['[Amplitude] Page Title'] + + await setup( + {} as Parameters[0], + {} as Parameters[1], + ) + + const originalWindowLocation = window.location + try { + Object.defineProperty(window, 'location', { + value: { pathname: '/datasets' }, + writable: true, + }) + const event: amplitude.Types.Event = { + event_type: '[Amplitude] Page Viewed', + event_properties: {}, + } + const result = await execute(event) + expect(getPageTitle(result)).toBe('Knowledge') + window.location.pathname = '/' + await execute(event) + expect(getPageTitle(event)).toBe('Home') + window.location.pathname = '/apps' + await execute(event) + expect(getPageTitle(event)).toBe('Studio') + window.location.pathname = '/explore' + await execute(event) + expect(getPageTitle(event)).toBe('Explore') + window.location.pathname = '/tools' + await execute(event) + expect(getPageTitle(event)).toBe('Tools') + window.location.pathname = '/account' + await execute(event) + expect(getPageTitle(event)).toBe('Account') + window.location.pathname = '/signin' + await execute(event) + expect(getPageTitle(event)).toBe('Sign In') + window.location.pathname = '/signup' + await execute(event) + expect(getPageTitle(event)).toBe('Sign Up') + window.location.pathname = '/unknown' + await execute(event) + expect(getPageTitle(event)).toBe('Unknown') + const otherEvent = { + event_type: 'Button Clicked', + event_properties: {}, + } as amplitude.Types.Event + const otherResult = await execute(otherEvent) + expect(getPageTitle(otherResult)).toBeUndefined() + const noPropsEvent = { + event_type: '[Amplitude] Page Viewed', + } as amplitude.Types.Event + const noPropsResult = await execute(noPropsEvent) + expect(noPropsResult?.event_properties).toBeUndefined() + } + finally { + Object.defineProperty(window, 'location', { + value: originalWindowLocation, + writable: true, + }) + } + }) + }) +}) diff --git a/web/app/components/base/amplitude/AmplitudeProvider.tsx b/web/app/components/base/amplitude/AmplitudeProvider.tsx index 0f083a4a7d..e1d8e52eac 100644 --- a/web/app/components/base/amplitude/AmplitudeProvider.tsx +++ b/web/app/components/base/amplitude/AmplitudeProvider.tsx @@ -45,6 +45,7 @@ const pageNameEnrichmentPlugin = (): amplitude.Types.EnrichmentPlugin => { execute: async (event: amplitude.Types.Event) => { // Only modify page view events if (event.event_type === '[Amplitude] Page Viewed' && event.event_properties) { + /* v8 ignore next @preserve */ const pathname = typeof window !== 'undefined' ? window.location.pathname : '' event.event_properties['[Amplitude] Page Title'] = getEnglishPageName(pathname) } diff --git a/web/app/components/base/amplitude/index.spec.ts b/web/app/components/base/amplitude/index.spec.ts new file mode 100644 index 0000000000..919c0b68d1 --- /dev/null +++ b/web/app/components/base/amplitude/index.spec.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from 'vitest' +import AmplitudeProvider, { isAmplitudeEnabled } from './AmplitudeProvider' +import indexDefault, { + isAmplitudeEnabled as indexIsAmplitudeEnabled, + resetUser, + setUserId, + setUserProperties, + trackEvent, +} from './index' +import { + resetUser as utilsResetUser, + setUserId as utilsSetUserId, + setUserProperties as utilsSetUserProperties, + trackEvent as utilsTrackEvent, +} from './utils' + +describe('Amplitude index exports', () => { + it('exports AmplitudeProvider as default', () => { + expect(indexDefault).toBe(AmplitudeProvider) + }) + + it('exports isAmplitudeEnabled', () => { + expect(indexIsAmplitudeEnabled).toBe(isAmplitudeEnabled) + }) + + it('exports utils', () => { + expect(resetUser).toBe(utilsResetUser) + expect(setUserId).toBe(utilsSetUserId) + expect(setUserProperties).toBe(utilsSetUserProperties) + expect(trackEvent).toBe(utilsTrackEvent) + }) +}) diff --git a/web/app/components/base/amplitude/utils.spec.ts b/web/app/components/base/amplitude/utils.spec.ts new file mode 100644 index 0000000000..c69fc93aa4 --- /dev/null +++ b/web/app/components/base/amplitude/utils.spec.ts @@ -0,0 +1,119 @@ +import { resetUser, setUserId, setUserProperties, trackEvent } from './utils' + +const mockState = vi.hoisted(() => ({ + enabled: true, +})) + +const mockTrack = vi.hoisted(() => vi.fn()) +const mockSetUserId = vi.hoisted(() => vi.fn()) +const mockIdentify = vi.hoisted(() => vi.fn()) +const mockReset = vi.hoisted(() => vi.fn()) + +const MockIdentify = vi.hoisted(() => + class { + setCalls: Array<[string, unknown]> = [] + + set(key: string, value: unknown) { + this.setCalls.push([key, value]) + return this + } + }, +) + +vi.mock('./AmplitudeProvider', () => ({ + isAmplitudeEnabled: () => mockState.enabled, +})) + +vi.mock('@amplitude/analytics-browser', () => ({ + track: (...args: unknown[]) => mockTrack(...args), + setUserId: (...args: unknown[]) => mockSetUserId(...args), + identify: (...args: unknown[]) => mockIdentify(...args), + reset: (...args: unknown[]) => mockReset(...args), + Identify: MockIdentify, +})) + +describe('amplitude utils', () => { + beforeEach(() => { + vi.clearAllMocks() + mockState.enabled = true + }) + + describe('trackEvent', () => { + it('should call amplitude.track when amplitude is enabled', () => { + trackEvent('dataset_created', { source: 'wizard' }) + + expect(mockTrack).toHaveBeenCalledTimes(1) + expect(mockTrack).toHaveBeenCalledWith('dataset_created', { source: 'wizard' }) + }) + + it('should not call amplitude.track when amplitude is disabled', () => { + mockState.enabled = false + + trackEvent('dataset_created', { source: 'wizard' }) + + expect(mockTrack).not.toHaveBeenCalled() + }) + }) + + describe('setUserId', () => { + it('should call amplitude.setUserId when amplitude is enabled', () => { + setUserId('user-123') + + expect(mockSetUserId).toHaveBeenCalledTimes(1) + expect(mockSetUserId).toHaveBeenCalledWith('user-123') + }) + + it('should not call amplitude.setUserId when amplitude is disabled', () => { + mockState.enabled = false + + setUserId('user-123') + + expect(mockSetUserId).not.toHaveBeenCalled() + }) + }) + + describe('setUserProperties', () => { + it('should build identify event and call amplitude.identify when amplitude is enabled', () => { + const properties: Record = { + role: 'owner', + seats: 3, + verified: true, + } + + setUserProperties(properties) + + expect(mockIdentify).toHaveBeenCalledTimes(1) + const identifyArg = mockIdentify.mock.calls[0][0] as InstanceType + expect(identifyArg).toBeInstanceOf(MockIdentify) + expect(identifyArg.setCalls).toEqual([ + ['role', 'owner'], + ['seats', 3], + ['verified', true], + ]) + }) + + it('should not call amplitude.identify when amplitude is disabled', () => { + mockState.enabled = false + + setUserProperties({ role: 'owner' }) + + expect(mockIdentify).not.toHaveBeenCalled() + }) + }) + + describe('resetUser', () => { + it('should call amplitude.reset when amplitude is enabled', () => { + resetUser() + + expect(mockReset).toHaveBeenCalledTimes(1) + }) + + it('should not call amplitude.reset when amplitude is disabled', () => { + mockState.enabled = false + + resetUser() + + expect(mockReset).not.toHaveBeenCalled() + }) + }) +}) diff --git a/web/app/components/base/app-icon-picker/ImageInput.tsx b/web/app/components/base/app-icon-picker/ImageInput.tsx index e255b2cfe6..21ceae0fcf 100644 --- a/web/app/components/base/app-icon-picker/ImageInput.tsx +++ b/web/app/components/base/app-icon-picker/ImageInput.tsx @@ -42,6 +42,7 @@ const ImageInput: FC = ({ const [zoom, setZoom] = useState(1) const onCropComplete = async (_: Area, croppedAreaPixels: Area) => { + /* v8 ignore next -- unreachable guard when Cropper is rendered @preserve */ if (!inputImage) return onImageInput?.(true, inputImage.url, croppedAreaPixels, inputImage.file.name) diff --git a/web/app/components/base/audio-btn/__tests__/audio.player.manager.spec.ts b/web/app/components/base/audio-btn/__tests__/audio.player.manager.spec.ts new file mode 100644 index 0000000000..c613aa2c11 --- /dev/null +++ b/web/app/components/base/audio-btn/__tests__/audio.player.manager.spec.ts @@ -0,0 +1,148 @@ +import { AudioPlayerManager } from '../audio.player.manager' + +type AudioCallback = ((event: string) => void) | null +type AudioPlayerCtorArgs = [ + string, + boolean, + string | undefined, + string | null | undefined, + string | undefined, + AudioCallback, +] + +type MockAudioPlayerInstance = { + setCallback: ReturnType + pauseAudio: ReturnType + resetMsgId: ReturnType + cacheBuffers: Array + sourceBuffer: { + abort: ReturnType + } | undefined +} + +const mockState = vi.hoisted(() => ({ + instances: [] as MockAudioPlayerInstance[], +})) + +const mockAudioPlayerConstructor = vi.hoisted(() => vi.fn()) + +const MockAudioPlayer = vi.hoisted(() => { + return class MockAudioPlayerClass { + setCallback = vi.fn() + pauseAudio = vi.fn() + resetMsgId = vi.fn() + cacheBuffers = [new ArrayBuffer(1)] + sourceBuffer = { abort: vi.fn() } + + constructor(...args: AudioPlayerCtorArgs) { + mockAudioPlayerConstructor(...args) + mockState.instances.push(this as unknown as MockAudioPlayerInstance) + } + } +}) + +vi.mock('@/app/components/base/audio-btn/audio', () => ({ + default: MockAudioPlayer, +})) + +describe('AudioPlayerManager', () => { + beforeEach(() => { + vi.clearAllMocks() + mockState.instances = [] + Reflect.set(AudioPlayerManager, 'instance', undefined) + }) + + describe('getInstance', () => { + it('should return the same singleton instance across calls', () => { + const first = AudioPlayerManager.getInstance() + const second = AudioPlayerManager.getInstance() + + expect(first).toBe(second) + }) + }) + + describe('getAudioPlayer', () => { + it('should create a new audio player when no existing player is cached', () => { + const manager = AudioPlayerManager.getInstance() + const callback = vi.fn() + + const result = manager.getAudioPlayer('/text-to-audio', false, 'msg-1', 'hello', 'en-US', callback) + + expect(mockAudioPlayerConstructor).toHaveBeenCalledTimes(1) + expect(mockAudioPlayerConstructor).toHaveBeenCalledWith( + '/text-to-audio', + false, + 'msg-1', + 'hello', + 'en-US', + callback, + ) + expect(result).toBe(mockState.instances[0]) + }) + + it('should reuse existing player and update callback when msg id is unchanged', () => { + const manager = AudioPlayerManager.getInstance() + const firstCallback = vi.fn() + const secondCallback = vi.fn() + + const first = manager.getAudioPlayer('/text-to-audio', false, 'msg-1', 'hello', 'en-US', firstCallback) + const second = manager.getAudioPlayer('/ignored', true, 'msg-1', 'ignored', 'fr-FR', secondCallback) + + expect(mockAudioPlayerConstructor).toHaveBeenCalledTimes(1) + expect(first).toBe(second) + expect(mockState.instances[0].setCallback).toHaveBeenCalledTimes(1) + expect(mockState.instances[0].setCallback).toHaveBeenCalledWith(secondCallback) + }) + + it('should cleanup existing player and create a new one when msg id changes', () => { + const manager = AudioPlayerManager.getInstance() + const callback = vi.fn() + manager.getAudioPlayer('/text-to-audio', false, 'msg-1', 'hello', 'en-US', callback) + const previous = mockState.instances[0] + + const next = manager.getAudioPlayer('/apps/1/text-to-audio', false, 'msg-2', 'world', 'en-US', callback) + + expect(previous.pauseAudio).toHaveBeenCalledTimes(1) + expect(previous.cacheBuffers).toEqual([]) + expect(previous.sourceBuffer?.abort).toHaveBeenCalledTimes(1) + expect(mockAudioPlayerConstructor).toHaveBeenCalledTimes(2) + expect(next).toBe(mockState.instances[1]) + }) + + it('should swallow cleanup errors and still create a new player', () => { + const manager = AudioPlayerManager.getInstance() + const callback = vi.fn() + manager.getAudioPlayer('/text-to-audio', false, 'msg-1', 'hello', 'en-US', callback) + const previous = mockState.instances[0] + previous.pauseAudio.mockImplementation(() => { + throw new Error('cleanup failure') + }) + + expect(() => { + manager.getAudioPlayer('/apps/1/text-to-audio', false, 'msg-2', 'world', 'en-US', callback) + }).not.toThrow() + + expect(previous.pauseAudio).toHaveBeenCalledTimes(1) + expect(mockAudioPlayerConstructor).toHaveBeenCalledTimes(2) + }) + }) + + describe('resetMsgId', () => { + it('should forward reset message id to the cached audio player when present', () => { + const manager = AudioPlayerManager.getInstance() + const callback = vi.fn() + manager.getAudioPlayer('/text-to-audio', false, 'msg-1', 'hello', 'en-US', callback) + + manager.resetMsgId('msg-updated') + + expect(mockState.instances[0].resetMsgId).toHaveBeenCalledTimes(1) + expect(mockState.instances[0].resetMsgId).toHaveBeenCalledWith('msg-updated') + }) + + it('should not throw when resetting message id without an audio player', () => { + const manager = AudioPlayerManager.getInstance() + + expect(() => manager.resetMsgId('msg-updated')).not.toThrow() + }) + }) +}) diff --git a/web/app/components/base/audio-btn/__tests__/audio.spec.ts b/web/app/components/base/audio-btn/__tests__/audio.spec.ts new file mode 100644 index 0000000000..00ffea2dfb --- /dev/null +++ b/web/app/components/base/audio-btn/__tests__/audio.spec.ts @@ -0,0 +1,610 @@ +import { Buffer } from 'node:buffer' +import { waitFor } from '@testing-library/react' +import { AppSourceType } from '@/service/share' +import AudioPlayer from '../audio' + +const mockToastNotify = vi.hoisted(() => vi.fn()) +const mockTextToAudioStream = vi.hoisted(() => vi.fn()) + +vi.mock('@/app/components/base/toast', () => ({ + default: { + notify: (...args: unknown[]) => mockToastNotify(...args), + }, +})) + +vi.mock('@/service/share', () => ({ + AppSourceType: { + webApp: 'webApp', + installedApp: 'installedApp', + }, + textToAudioStream: (...args: unknown[]) => mockTextToAudioStream(...args), +})) + +type AudioEventName = 'ended' | 'paused' | 'loaded' | 'play' | 'timeupdate' | 'loadeddate' | 'canplay' | 'error' | 'sourceopen' + +type AudioEventListener = () => void + +type ReaderResult = { + value: Uint8Array | undefined + done: boolean +} + +type Reader = { + read: () => Promise +} + +type AudioResponse = { + status: number + body: { + getReader: () => Reader + } +} + +class MockSourceBuffer { + updating = false + appendBuffer = vi.fn((_buffer: ArrayBuffer) => undefined) + abort = vi.fn(() => undefined) +} + +class MockMediaSource { + readyState: 'open' | 'closed' = 'open' + sourceBuffer = new MockSourceBuffer() + private listeners: Partial> = {} + + addEventListener = vi.fn((event: AudioEventName, listener: AudioEventListener) => { + const listeners = this.listeners[event] || [] + listeners.push(listener) + this.listeners[event] = listeners + }) + + addSourceBuffer = vi.fn((_contentType: string) => this.sourceBuffer) + endOfStream = vi.fn(() => undefined) + + emit(event: AudioEventName) { + const listeners = this.listeners[event] || [] + listeners.forEach((listener) => { + listener() + }) + } +} + +class MockAudio { + src = '' + autoplay = false + disableRemotePlayback = false + controls = false + paused = true + ended = false + played: unknown = null + private listeners: Partial> = {} + + addEventListener = vi.fn((event: AudioEventName, listener: AudioEventListener) => { + const listeners = this.listeners[event] || [] + listeners.push(listener) + this.listeners[event] = listeners + }) + + play = vi.fn(async () => { + this.paused = false + }) + + pause = vi.fn(() => { + this.paused = true + }) + + emit(event: AudioEventName) { + const listeners = this.listeners[event] || [] + listeners.forEach((listener) => { + listener() + }) + } +} + +class MockAudioContext { + state: 'running' | 'suspended' = 'running' + destination = {} + connect = vi.fn(() => undefined) + createMediaElementSource = vi.fn((_audio: MockAudio) => ({ + connect: this.connect, + })) + + resume = vi.fn(async () => { + this.state = 'running' + }) + + suspend = vi.fn(() => { + this.state = 'suspended' + }) +} + +const testState = { + mediaSources: [] as MockMediaSource[], + audios: [] as MockAudio[], + audioContexts: [] as MockAudioContext[], +} + +class MockMediaSourceCtor extends MockMediaSource { + constructor() { + super() + testState.mediaSources.push(this) + } +} + +class MockAudioCtor extends MockAudio { + constructor() { + super() + testState.audios.push(this) + } +} + +class MockAudioContextCtor extends MockAudioContext { + constructor() { + super() + testState.audioContexts.push(this) + } +} + +const originalAudio = globalThis.Audio +const originalAudioContext = globalThis.AudioContext +const originalCreateObjectURL = globalThis.URL.createObjectURL +const originalMediaSource = window.MediaSource +const originalManagedMediaSource = window.ManagedMediaSource + +const setMediaSourceSupport = (options: { mediaSource: boolean, managedMediaSource: boolean }) => { + Object.defineProperty(window, 'MediaSource', { + configurable: true, + writable: true, + value: options.mediaSource ? MockMediaSourceCtor : undefined, + }) + Object.defineProperty(window, 'ManagedMediaSource', { + configurable: true, + writable: true, + value: options.managedMediaSource ? MockMediaSourceCtor : undefined, + }) +} + +const makeAudioResponse = (status: number, reads: ReaderResult[]): AudioResponse => { + const read = vi.fn<() => Promise>() + reads.forEach((result) => { + read.mockResolvedValueOnce(result) + }) + + return { + status, + body: { + getReader: () => ({ read }), + }, + } +} + +describe('AudioPlayer', () => { + beforeEach(() => { + vi.clearAllMocks() + testState.mediaSources = [] + testState.audios = [] + testState.audioContexts = [] + + Object.defineProperty(globalThis, 'Audio', { + configurable: true, + writable: true, + value: MockAudioCtor, + }) + Object.defineProperty(globalThis, 'AudioContext', { + configurable: true, + writable: true, + value: MockAudioContextCtor, + }) + Object.defineProperty(globalThis.URL, 'createObjectURL', { + configurable: true, + writable: true, + value: vi.fn(() => 'blob:mock-url'), + }) + + setMediaSourceSupport({ mediaSource: true, managedMediaSource: false }) + }) + + afterAll(() => { + Object.defineProperty(globalThis, 'Audio', { + configurable: true, + writable: true, + value: originalAudio, + }) + Object.defineProperty(globalThis, 'AudioContext', { + configurable: true, + writable: true, + value: originalAudioContext, + }) + Object.defineProperty(globalThis.URL, 'createObjectURL', { + configurable: true, + writable: true, + value: originalCreateObjectURL, + }) + Object.defineProperty(window, 'MediaSource', { + configurable: true, + writable: true, + value: originalMediaSource, + }) + Object.defineProperty(window, 'ManagedMediaSource', { + configurable: true, + writable: true, + value: originalManagedMediaSource, + }) + }) + + describe('constructor behavior', () => { + it('should initialize media source, audio, and media element source when MediaSource exists', () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + const mediaSource = testState.mediaSources[0] + + expect(player.mediaSource).toBe(mediaSource as unknown as MediaSource) + expect(globalThis.URL.createObjectURL).toHaveBeenCalledTimes(1) + expect(audio.src).toBe('blob:mock-url') + expect(audio.autoplay).toBe(true) + expect(audioContext.createMediaElementSource).toHaveBeenCalledWith(audio) + expect(audioContext.connect).toHaveBeenCalledTimes(1) + }) + + it('should notify unsupported browser when no MediaSource implementation exists', () => { + setMediaSourceSupport({ mediaSource: false, managedMediaSource: false }) + + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const audio = testState.audios[0] + + expect(player.mediaSource).toBeNull() + expect(audio.src).toBe('') + expect(mockToastNotify).toHaveBeenCalledTimes(1) + expect(mockToastNotify).toHaveBeenCalledWith( + expect.objectContaining({ + type: 'error', + }), + ) + }) + + it('should configure fallback audio controls when ManagedMediaSource is used', () => { + setMediaSourceSupport({ mediaSource: false, managedMediaSource: true }) + + // Create with callback to ensure constructor path completes with fallback source. + const player = new AudioPlayer('/text-to-audio', false, 'msg-1', 'hello', undefined, vi.fn()) + const audio = testState.audios[0] + + expect(player.mediaSource).not.toBeNull() + expect(audio.disableRemotePlayback).toBe(true) + expect(audio.controls).toBe(true) + }) + }) + + describe('event wiring', () => { + it('should forward registered audio events to callback', () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + + audio.emit('play') + audio.emit('ended') + audio.emit('error') + audio.emit('paused') + audio.emit('loaded') + audio.emit('timeupdate') + audio.emit('loadeddate') + audio.emit('canplay') + + expect(player.callback).toBe(callback) + expect(callback).toHaveBeenCalledWith('play') + expect(callback).toHaveBeenCalledWith('ended') + expect(callback).toHaveBeenCalledWith('error') + expect(callback).toHaveBeenCalledWith('paused') + expect(callback).toHaveBeenCalledWith('loaded') + expect(callback).toHaveBeenCalledWith('timeupdate') + expect(callback).toHaveBeenCalledWith('loadeddate') + expect(callback).toHaveBeenCalledWith('canplay') + }) + + it('should initialize source buffer only once when sourceopen fires multiple times', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', vi.fn()) + const mediaSource = testState.mediaSources[0] + + mediaSource.emit('sourceopen') + mediaSource.emit('sourceopen') + + expect(mediaSource.addSourceBuffer).toHaveBeenCalledTimes(1) + expect(player.sourceBuffer).toBe(mediaSource.sourceBuffer) + }) + }) + + describe('playback control', () => { + it('should request streaming audio when playAudio is called before loading', async () => { + mockTextToAudioStream.mockResolvedValue( + makeAudioResponse(200, [ + { value: new Uint8Array([4, 5]), done: false }, + { value: new Uint8Array([1, 2, 3]), done: true }, + ]), + ) + + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', vi.fn()) + player.playAudio() + + await waitFor(() => { + expect(mockTextToAudioStream).toHaveBeenCalledTimes(1) + }) + + expect(mockTextToAudioStream).toHaveBeenCalledWith( + '/text-to-audio', + AppSourceType.webApp, + { content_type: 'audio/mpeg' }, + { + message_id: 'msg-1', + streaming: true, + voice: 'en-US', + text: 'hello', + }, + ) + expect(player.isLoadData).toBe(true) + }) + + it('should emit error callback and reset load flag when stream response status is not 200', async () => { + const callback = vi.fn() + mockTextToAudioStream.mockResolvedValue( + makeAudioResponse(500, [{ value: new Uint8Array([1]), done: true }]), + ) + + const player = new AudioPlayer('/text-to-audio', false, 'msg-2', 'world', undefined, callback) + player.playAudio() + + await waitFor(() => { + expect(callback).toHaveBeenCalledWith('error') + }) + expect(player.isLoadData).toBe(false) + }) + + it('should resume and play immediately when playAudio is called in suspended loaded state', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', false, 'msg-1', 'hello', undefined, callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + + player.isLoadData = true + audioContext.state = 'suspended' + player.playAudio() + await Promise.resolve() + + expect(audioContext.resume).toHaveBeenCalledTimes(1) + expect(audio.play).toHaveBeenCalledTimes(1) + expect(callback).toHaveBeenCalledWith('play') + }) + + it('should play ended audio when data is already loaded', () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', false, 'msg-1', 'hello', undefined, callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + + player.isLoadData = true + audioContext.state = 'running' + audio.ended = true + player.playAudio() + + expect(audio.play).toHaveBeenCalledTimes(1) + expect(callback).toHaveBeenCalledWith('play') + }) + + it('should only emit play callback without replaying when loaded audio is already playing', () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', false, 'msg-1', 'hello', undefined, callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + + player.isLoadData = true + audioContext.state = 'running' + audio.ended = false + player.playAudio() + + expect(audio.play).not.toHaveBeenCalled() + expect(callback).toHaveBeenCalledWith('play') + }) + + it('should emit error callback when stream request throws', async () => { + const callback = vi.fn() + mockTextToAudioStream.mockRejectedValue(new Error('network failed')) + const player = new AudioPlayer('/text-to-audio', false, 'msg-2', 'world', undefined, callback) + + player.playAudio() + + await waitFor(() => { + expect(callback).toHaveBeenCalledWith('error') + }) + expect(player.isLoadData).toBe(false) + }) + + it('should call pause flow and notify paused event when pauseAudio is invoked', () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + + player.pauseAudio() + + expect(callback).toHaveBeenCalledWith('paused') + expect(audio.pause).toHaveBeenCalledTimes(1) + expect(audioContext.suspend).toHaveBeenCalledTimes(1) + }) + }) + + describe('message and direct-audio helpers', () => { + it('should update message id through resetMsgId', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + + player.resetMsgId('msg-2') + + expect(player.msgId).toBe('msg-2') + }) + + it('should end stream without playback when playAudioWithAudio receives empty content', async () => { + vi.useFakeTimers() + try { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const mediaSource = testState.mediaSources[0] + + await player.playAudioWithAudio('', true) + await vi.advanceTimersByTimeAsync(40) + + expect(player.isLoadData).toBe(false) + expect(player.cacheBuffers).toHaveLength(0) + expect(mediaSource.endOfStream).toHaveBeenCalledTimes(1) + expect(callback).not.toHaveBeenCalledWith('play') + } + finally { + vi.useRealTimers() + } + }) + + it('should decode base64 and start playback when playAudioWithAudio is called with playable content', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + const mediaSource = testState.mediaSources[0] + const audioBase64 = Buffer.from('hello').toString('base64') + + mediaSource.emit('sourceopen') + audio.paused = true + await player.playAudioWithAudio(audioBase64, true) + await Promise.resolve() + + expect(player.isLoadData).toBe(true) + expect(player.cacheBuffers).toHaveLength(0) + expect(mediaSource.sourceBuffer.appendBuffer).toHaveBeenCalledTimes(1) + const appendedAudioData = mediaSource.sourceBuffer.appendBuffer.mock.calls[0][0] + expect(appendedAudioData).toBeInstanceOf(ArrayBuffer) + expect(appendedAudioData.byteLength).toBeGreaterThan(0) + expect(audioContext.resume).toHaveBeenCalledTimes(1) + expect(audio.play).toHaveBeenCalledTimes(1) + expect(callback).toHaveBeenCalledWith('play') + }) + + it('should skip playback when playAudioWithAudio is called with play=false', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + const audioContext = testState.audioContexts[0] + + await player.playAudioWithAudio(Buffer.from('hello').toString('base64'), false) + + expect(player.isLoadData).toBe(false) + expect(audioContext.resume).not.toHaveBeenCalled() + expect(audio.play).not.toHaveBeenCalled() + expect(callback).not.toHaveBeenCalledWith('play') + }) + + it('should play immediately for ended audio in playAudioWithAudio', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + + audio.paused = false + audio.ended = true + await player.playAudioWithAudio(Buffer.from('hello').toString('base64'), true) + + expect(audio.play).toHaveBeenCalledTimes(1) + expect(callback).toHaveBeenCalledWith('play') + }) + + it('should not replay when played list exists in playAudioWithAudio', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + + audio.paused = false + audio.ended = false + audio.played = {} + await player.playAudioWithAudio(Buffer.from('hello').toString('base64'), true) + + expect(audio.play).not.toHaveBeenCalled() + expect(callback).not.toHaveBeenCalledWith('play') + }) + + it('should replay when paused is false and played list is empty in playAudioWithAudio', async () => { + const callback = vi.fn() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', callback) + const audio = testState.audios[0] + + audio.paused = false + audio.ended = false + audio.played = null + await player.playAudioWithAudio(Buffer.from('hello').toString('base64'), true) + + expect(audio.play).toHaveBeenCalledTimes(1) + expect(callback).toHaveBeenCalledWith('play') + }) + }) + + describe('buffering internals', () => { + it('should finish stream when receiveAudioData gets an undefined chunk', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const finishStream = vi + .spyOn(player as unknown as { finishStream: () => void }, 'finishStream') + .mockImplementation(() => { }) + + ; (player as unknown as { receiveAudioData: (data: Uint8Array | undefined) => void }).receiveAudioData(undefined) + + expect(finishStream).toHaveBeenCalledTimes(1) + }) + + it('should finish stream when receiveAudioData gets empty bytes while source is open', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const finishStream = vi + .spyOn(player as unknown as { finishStream: () => void }, 'finishStream') + .mockImplementation(() => { }) + + ; (player as unknown as { receiveAudioData: (data: Uint8Array) => void }).receiveAudioData(new Uint8Array(0)) + + expect(finishStream).toHaveBeenCalledTimes(1) + }) + + it('should queue incoming buffer when source buffer is updating', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const mediaSource = testState.mediaSources[0] + mediaSource.emit('sourceopen') + mediaSource.sourceBuffer.updating = true + + ; (player as unknown as { receiveAudioData: (data: Uint8Array) => void }).receiveAudioData(new Uint8Array([1, 2, 3])) + + expect(player.cacheBuffers.length).toBe(1) + }) + + it('should append previously queued buffer before new one when source buffer is idle', () => { + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const mediaSource = testState.mediaSources[0] + mediaSource.emit('sourceopen') + + const existingBuffer = new ArrayBuffer(2) + player.cacheBuffers = [existingBuffer] + mediaSource.sourceBuffer.updating = false + + ; (player as unknown as { receiveAudioData: (data: Uint8Array) => void }).receiveAudioData(new Uint8Array([9])) + + expect(mediaSource.sourceBuffer.appendBuffer).toHaveBeenCalledTimes(1) + expect(mediaSource.sourceBuffer.appendBuffer).toHaveBeenCalledWith(existingBuffer) + expect(player.cacheBuffers.length).toBe(1) + }) + + it('should append cache chunks and end stream when finishStream drains buffers', () => { + vi.useFakeTimers() + const player = new AudioPlayer('/text-to-audio', true, 'msg-1', 'hello', 'en-US', null) + const mediaSource = testState.mediaSources[0] + mediaSource.emit('sourceopen') + mediaSource.sourceBuffer.updating = false + player.cacheBuffers = [new ArrayBuffer(3)] + + ; (player as unknown as { finishStream: () => void }).finishStream() + vi.advanceTimersByTime(50) + + expect(mediaSource.sourceBuffer.appendBuffer).toHaveBeenCalledTimes(1) + expect(mediaSource.endOfStream).toHaveBeenCalledTimes(1) + vi.useRealTimers() + }) + }) +}) diff --git a/web/app/components/base/audio-gallery/AudioPlayer.tsx b/web/app/components/base/audio-gallery/AudioPlayer.tsx index 4e5d5e61ab..cbf50ddc13 100644 --- a/web/app/components/base/audio-gallery/AudioPlayer.tsx +++ b/web/app/components/base/audio-gallery/AudioPlayer.tsx @@ -26,6 +26,7 @@ const AudioPlayer: React.FC = ({ src, srcs }) => { useEffect(() => { const audio = audioRef.current + /* v8 ignore next 2 - @preserve */ if (!audio) return @@ -64,7 +65,7 @@ const AudioPlayer: React.FC = ({ src, srcs }) => { if (primarySrc) { // Delayed generation of waveform data // eslint-disable-next-line ts/no-use-before-define - const timer = setTimeout(() => generateWaveformData(primarySrc), 1000) + const timer = setTimeout(generateWaveformData, 1000, primarySrc) return () => { audio.removeEventListener('loadedmetadata', setAudioData) audio.removeEventListener('timeupdate', setAudioTime) @@ -217,6 +218,7 @@ const AudioPlayer: React.FC = ({ src, srcs }) => { const drawWaveform = useCallback(() => { const canvas = canvasRef.current + /* v8 ignore next 2 - @preserve */ if (!canvas) return @@ -268,14 +270,20 @@ const AudioPlayer: React.FC = ({ src, srcs }) => { drawWaveform() }, [drawWaveform, bufferedTime, hasStartedPlaying]) - const handleMouseMove = useCallback((e: React.MouseEvent) => { + const handleMouseMove = useCallback((e: React.MouseEvent | React.TouchEvent) => { const canvas = canvasRef.current const audio = audioRef.current if (!canvas || !audio) return + const clientX = 'touches' in e + ? e.touches[0]?.clientX ?? e.changedTouches[0]?.clientX + : e.clientX + if (clientX === undefined) + return + const rect = canvas.getBoundingClientRect() - const percent = Math.min(Math.max(0, e.clientX - rect.left), rect.width) / rect.width + const percent = Math.min(Math.max(0, clientX - rect.left), rect.width) / rect.width const time = percent * duration // Check if the hovered position is within a buffered range before updating hoverTime @@ -289,20 +297,22 @@ const AudioPlayer: React.FC = ({ src, srcs }) => { return (
-