diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index ddec42e0ee..3998a69c36 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,6 +6,9 @@ "context": "..", "dockerfile": "Dockerfile" }, + "mounts": [ + "source=dify-dev-tmp,target=/tmp,type=volume" + ], "features": { "ghcr.io/devcontainers/features/node:1": { "nodeGypDependencies": true, @@ -34,19 +37,13 @@ }, "postStartCommand": "./.devcontainer/post_start_command.sh", "postCreateCommand": "./.devcontainer/post_create_command.sh" - // Features to add to the dev container. More info: https://containers.dev/features. // "features": {}, - // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [], - // Use 'postCreateCommand' to run commands after the container is created. // "postCreateCommand": "python --version", - // Configure tool-specific properties. // "customizations": {}, - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" -} +} \ No newline at end of file diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh index ce9135476f..220f77e5ce 100755 --- a/.devcontainer/post_create_command.sh +++ b/.devcontainer/post_create_command.sh @@ -1,6 +1,7 @@ #!/bin/bash WORKSPACE_ROOT=$(pwd) +export COREPACK_ENABLE_DOWNLOAD_PROMPT=0 corepack enable cd web && pnpm install pipx install uv diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4bc4f085c2..106c26bbed 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -7,244 +7,243 @@ * @crazywoola @laipz8200 @Yeuoly # CODEOWNERS file -.github/CODEOWNERS @laipz8200 @crazywoola +/.github/CODEOWNERS @laipz8200 @crazywoola # Docs -docs/ @crazywoola +/docs/ @crazywoola # Backend (default owner, more specific rules below will override) -api/ @QuantumGhost +/api/ @QuantumGhost # Backend - MCP -api/core/mcp/ @Nov1c444 -api/core/entities/mcp_provider.py @Nov1c444 -api/services/tools/mcp_tools_manage_service.py @Nov1c444 -api/controllers/mcp/ @Nov1c444 -api/controllers/console/app/mcp_server.py @Nov1c444 -api/tests/**/*mcp* @Nov1c444 +/api/core/mcp/ @Nov1c444 +/api/core/entities/mcp_provider.py @Nov1c444 +/api/services/tools/mcp_tools_manage_service.py @Nov1c444 +/api/controllers/mcp/ @Nov1c444 +/api/controllers/console/app/mcp_server.py @Nov1c444 +/api/tests/**/*mcp* @Nov1c444 # Backend - Workflow - Engine (Core graph execution engine) -api/core/workflow/graph_engine/ @laipz8200 @QuantumGhost -api/core/workflow/runtime/ @laipz8200 @QuantumGhost -api/core/workflow/graph/ @laipz8200 @QuantumGhost -api/core/workflow/graph_events/ @laipz8200 @QuantumGhost -api/core/workflow/node_events/ @laipz8200 @QuantumGhost -api/core/model_runtime/ @laipz8200 @QuantumGhost +/api/core/workflow/graph_engine/ @laipz8200 @QuantumGhost +/api/core/workflow/runtime/ @laipz8200 @QuantumGhost +/api/core/workflow/graph/ @laipz8200 @QuantumGhost +/api/core/workflow/graph_events/ @laipz8200 @QuantumGhost +/api/core/workflow/node_events/ @laipz8200 @QuantumGhost +/api/core/model_runtime/ @laipz8200 @QuantumGhost # Backend - Workflow - Nodes (Agent, Iteration, Loop, LLM) -api/core/workflow/nodes/agent/ @Nov1c444 -api/core/workflow/nodes/iteration/ @Nov1c444 -api/core/workflow/nodes/loop/ @Nov1c444 -api/core/workflow/nodes/llm/ @Nov1c444 +/api/core/workflow/nodes/agent/ @Nov1c444 +/api/core/workflow/nodes/iteration/ @Nov1c444 +/api/core/workflow/nodes/loop/ @Nov1c444 +/api/core/workflow/nodes/llm/ @Nov1c444 # Backend - RAG (Retrieval Augmented Generation) -api/core/rag/ @JohnJyong -api/services/rag_pipeline/ @JohnJyong -api/services/dataset_service.py @JohnJyong -api/services/knowledge_service.py @JohnJyong -api/services/external_knowledge_service.py @JohnJyong -api/services/hit_testing_service.py @JohnJyong -api/services/metadata_service.py @JohnJyong -api/services/vector_service.py @JohnJyong -api/services/entities/knowledge_entities/ @JohnJyong -api/services/entities/external_knowledge_entities/ @JohnJyong -api/controllers/console/datasets/ @JohnJyong -api/controllers/service_api/dataset/ @JohnJyong -api/models/dataset.py @JohnJyong -api/tasks/rag_pipeline/ @JohnJyong -api/tasks/add_document_to_index_task.py @JohnJyong -api/tasks/batch_clean_document_task.py @JohnJyong -api/tasks/clean_document_task.py @JohnJyong -api/tasks/clean_notion_document_task.py @JohnJyong -api/tasks/document_indexing_task.py @JohnJyong -api/tasks/document_indexing_sync_task.py @JohnJyong -api/tasks/document_indexing_update_task.py @JohnJyong -api/tasks/duplicate_document_indexing_task.py @JohnJyong -api/tasks/recover_document_indexing_task.py @JohnJyong -api/tasks/remove_document_from_index_task.py @JohnJyong -api/tasks/retry_document_indexing_task.py @JohnJyong -api/tasks/sync_website_document_indexing_task.py @JohnJyong -api/tasks/batch_create_segment_to_index_task.py @JohnJyong -api/tasks/create_segment_to_index_task.py @JohnJyong -api/tasks/delete_segment_from_index_task.py @JohnJyong -api/tasks/disable_segment_from_index_task.py @JohnJyong -api/tasks/disable_segments_from_index_task.py @JohnJyong -api/tasks/enable_segment_to_index_task.py @JohnJyong -api/tasks/enable_segments_to_index_task.py @JohnJyong -api/tasks/clean_dataset_task.py @JohnJyong -api/tasks/deal_dataset_index_update_task.py @JohnJyong -api/tasks/deal_dataset_vector_index_task.py @JohnJyong +/api/core/rag/ @JohnJyong +/api/services/rag_pipeline/ @JohnJyong +/api/services/dataset_service.py @JohnJyong +/api/services/knowledge_service.py @JohnJyong +/api/services/external_knowledge_service.py @JohnJyong +/api/services/hit_testing_service.py @JohnJyong +/api/services/metadata_service.py @JohnJyong +/api/services/vector_service.py @JohnJyong +/api/services/entities/knowledge_entities/ @JohnJyong +/api/services/entities/external_knowledge_entities/ @JohnJyong +/api/controllers/console/datasets/ @JohnJyong +/api/controllers/service_api/dataset/ @JohnJyong +/api/models/dataset.py @JohnJyong +/api/tasks/rag_pipeline/ @JohnJyong +/api/tasks/add_document_to_index_task.py @JohnJyong +/api/tasks/batch_clean_document_task.py @JohnJyong +/api/tasks/clean_document_task.py @JohnJyong +/api/tasks/clean_notion_document_task.py @JohnJyong +/api/tasks/document_indexing_task.py @JohnJyong +/api/tasks/document_indexing_sync_task.py @JohnJyong +/api/tasks/document_indexing_update_task.py @JohnJyong +/api/tasks/duplicate_document_indexing_task.py @JohnJyong +/api/tasks/recover_document_indexing_task.py @JohnJyong +/api/tasks/remove_document_from_index_task.py @JohnJyong +/api/tasks/retry_document_indexing_task.py @JohnJyong +/api/tasks/sync_website_document_indexing_task.py @JohnJyong +/api/tasks/batch_create_segment_to_index_task.py @JohnJyong +/api/tasks/create_segment_to_index_task.py @JohnJyong +/api/tasks/delete_segment_from_index_task.py @JohnJyong +/api/tasks/disable_segment_from_index_task.py @JohnJyong +/api/tasks/disable_segments_from_index_task.py @JohnJyong +/api/tasks/enable_segment_to_index_task.py @JohnJyong +/api/tasks/enable_segments_to_index_task.py @JohnJyong +/api/tasks/clean_dataset_task.py @JohnJyong +/api/tasks/deal_dataset_index_update_task.py @JohnJyong +/api/tasks/deal_dataset_vector_index_task.py @JohnJyong # Backend - Plugins -api/core/plugin/ @Mairuis @Yeuoly @Stream29 -api/services/plugin/ @Mairuis @Yeuoly @Stream29 -api/controllers/console/workspace/plugin.py @Mairuis @Yeuoly @Stream29 -api/controllers/inner_api/plugin/ @Mairuis @Yeuoly @Stream29 -api/tasks/process_tenant_plugin_autoupgrade_check_task.py @Mairuis @Yeuoly @Stream29 +/api/core/plugin/ @Mairuis @Yeuoly @Stream29 +/api/services/plugin/ @Mairuis @Yeuoly @Stream29 +/api/controllers/console/workspace/plugin.py @Mairuis @Yeuoly @Stream29 +/api/controllers/inner_api/plugin/ @Mairuis @Yeuoly @Stream29 +/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @Mairuis @Yeuoly @Stream29 # Backend - Trigger/Schedule/Webhook -api/controllers/trigger/ @Mairuis @Yeuoly -api/controllers/console/app/workflow_trigger.py @Mairuis @Yeuoly -api/controllers/console/workspace/trigger_providers.py @Mairuis @Yeuoly -api/core/trigger/ @Mairuis @Yeuoly -api/core/app/layers/trigger_post_layer.py @Mairuis @Yeuoly -api/services/trigger/ @Mairuis @Yeuoly -api/models/trigger.py @Mairuis @Yeuoly -api/fields/workflow_trigger_fields.py @Mairuis @Yeuoly -api/repositories/workflow_trigger_log_repository.py @Mairuis @Yeuoly -api/repositories/sqlalchemy_workflow_trigger_log_repository.py @Mairuis @Yeuoly -api/libs/schedule_utils.py @Mairuis @Yeuoly -api/services/workflow/scheduler.py @Mairuis @Yeuoly -api/schedule/trigger_provider_refresh_task.py @Mairuis @Yeuoly -api/schedule/workflow_schedule_task.py @Mairuis @Yeuoly -api/tasks/trigger_processing_tasks.py @Mairuis @Yeuoly -api/tasks/trigger_subscription_refresh_tasks.py @Mairuis @Yeuoly -api/tasks/workflow_schedule_tasks.py @Mairuis @Yeuoly -api/tasks/workflow_cfs_scheduler/ @Mairuis @Yeuoly -api/events/event_handlers/sync_plugin_trigger_when_app_created.py @Mairuis @Yeuoly -api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @Mairuis @Yeuoly -api/events/event_handlers/sync_workflow_schedule_when_app_published.py @Mairuis @Yeuoly -api/events/event_handlers/sync_webhook_when_app_created.py @Mairuis @Yeuoly +/api/controllers/trigger/ @Mairuis @Yeuoly +/api/controllers/console/app/workflow_trigger.py @Mairuis @Yeuoly +/api/controllers/console/workspace/trigger_providers.py @Mairuis @Yeuoly +/api/core/trigger/ @Mairuis @Yeuoly +/api/core/app/layers/trigger_post_layer.py @Mairuis @Yeuoly +/api/services/trigger/ @Mairuis @Yeuoly +/api/models/trigger.py @Mairuis @Yeuoly +/api/fields/workflow_trigger_fields.py @Mairuis @Yeuoly +/api/repositories/workflow_trigger_log_repository.py @Mairuis @Yeuoly +/api/repositories/sqlalchemy_workflow_trigger_log_repository.py @Mairuis @Yeuoly +/api/libs/schedule_utils.py @Mairuis @Yeuoly +/api/services/workflow/scheduler.py @Mairuis @Yeuoly +/api/schedule/trigger_provider_refresh_task.py @Mairuis @Yeuoly +/api/schedule/workflow_schedule_task.py @Mairuis @Yeuoly +/api/tasks/trigger_processing_tasks.py @Mairuis @Yeuoly +/api/tasks/trigger_subscription_refresh_tasks.py @Mairuis @Yeuoly +/api/tasks/workflow_schedule_tasks.py @Mairuis @Yeuoly +/api/tasks/workflow_cfs_scheduler/ @Mairuis @Yeuoly +/api/events/event_handlers/sync_plugin_trigger_when_app_created.py @Mairuis @Yeuoly +/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @Mairuis @Yeuoly +/api/events/event_handlers/sync_workflow_schedule_when_app_published.py @Mairuis @Yeuoly +/api/events/event_handlers/sync_webhook_when_app_created.py @Mairuis @Yeuoly # Backend - Async Workflow -api/services/async_workflow_service.py @Mairuis @Yeuoly -api/tasks/async_workflow_tasks.py @Mairuis @Yeuoly +/api/services/async_workflow_service.py @Mairuis @Yeuoly +/api/tasks/async_workflow_tasks.py @Mairuis @Yeuoly # Backend - Billing -api/services/billing_service.py @hj24 @zyssyz123 -api/controllers/console/billing/ @hj24 @zyssyz123 +/api/services/billing_service.py @hj24 @zyssyz123 +/api/controllers/console/billing/ @hj24 @zyssyz123 # Backend - Enterprise -api/configs/enterprise/ @GarfieldDai @GareArc -api/services/enterprise/ @GarfieldDai @GareArc -api/services/feature_service.py @GarfieldDai @GareArc -api/controllers/console/feature.py @GarfieldDai @GareArc -api/controllers/web/feature.py @GarfieldDai @GareArc +/api/configs/enterprise/ @GarfieldDai @GareArc +/api/services/enterprise/ @GarfieldDai @GareArc +/api/services/feature_service.py @GarfieldDai @GareArc +/api/controllers/console/feature.py @GarfieldDai @GareArc +/api/controllers/web/feature.py @GarfieldDai @GareArc # Backend - Database Migrations -api/migrations/ @snakevash @laipz8200 @MRZHUH +/api/migrations/ @snakevash @laipz8200 @MRZHUH # Backend - Vector DB Middleware -api/configs/middleware/vdb/* @JohnJyong +/api/configs/middleware/vdb/* @JohnJyong # Frontend -web/ @iamjoel +/web/ @iamjoel # Frontend - Web Tests -.github/workflows/web-tests.yml @iamjoel +/.github/workflows/web-tests.yml @iamjoel # Frontend - App - Orchestration -web/app/components/workflow/ @iamjoel @zxhlyh -web/app/components/workflow-app/ @iamjoel @zxhlyh -web/app/components/app/configuration/ @iamjoel @zxhlyh -web/app/components/app/app-publisher/ @iamjoel @zxhlyh +/web/app/components/workflow/ @iamjoel @zxhlyh +/web/app/components/workflow-app/ @iamjoel @zxhlyh +/web/app/components/app/configuration/ @iamjoel @zxhlyh +/web/app/components/app/app-publisher/ @iamjoel @zxhlyh # Frontend - WebApp - Chat -web/app/components/base/chat/ @iamjoel @zxhlyh +/web/app/components/base/chat/ @iamjoel @zxhlyh # Frontend - WebApp - Completion -web/app/components/share/text-generation/ @iamjoel @zxhlyh +/web/app/components/share/text-generation/ @iamjoel @zxhlyh # Frontend - App - List and Creation -web/app/components/apps/ @JzoNgKVO @iamjoel -web/app/components/app/create-app-dialog/ @JzoNgKVO @iamjoel -web/app/components/app/create-app-modal/ @JzoNgKVO @iamjoel -web/app/components/app/create-from-dsl-modal/ @JzoNgKVO @iamjoel +/web/app/components/apps/ @JzoNgKVO @iamjoel +/web/app/components/app/create-app-dialog/ @JzoNgKVO @iamjoel +/web/app/components/app/create-app-modal/ @JzoNgKVO @iamjoel +/web/app/components/app/create-from-dsl-modal/ @JzoNgKVO @iamjoel # Frontend - App - API Documentation -web/app/components/develop/ @JzoNgKVO @iamjoel +/web/app/components/develop/ @JzoNgKVO @iamjoel # Frontend - App - Logs and Annotations -web/app/components/app/workflow-log/ @JzoNgKVO @iamjoel -web/app/components/app/log/ @JzoNgKVO @iamjoel -web/app/components/app/log-annotation/ @JzoNgKVO @iamjoel -web/app/components/app/annotation/ @JzoNgKVO @iamjoel +/web/app/components/app/workflow-log/ @JzoNgKVO @iamjoel +/web/app/components/app/log/ @JzoNgKVO @iamjoel +/web/app/components/app/log-annotation/ @JzoNgKVO @iamjoel +/web/app/components/app/annotation/ @JzoNgKVO @iamjoel # Frontend - App - Monitoring -web/app/(commonLayout)/app/(appDetailLayout)/\[appId\]/overview/ @JzoNgKVO @iamjoel -web/app/components/app/overview/ @JzoNgKVO @iamjoel +/web/app/(commonLayout)/app/(appDetailLayout)/\[appId\]/overview/ @JzoNgKVO @iamjoel +/web/app/components/app/overview/ @JzoNgKVO @iamjoel # Frontend - App - Settings -web/app/components/app-sidebar/ @JzoNgKVO @iamjoel +/web/app/components/app-sidebar/ @JzoNgKVO @iamjoel # Frontend - RAG - Hit Testing -web/app/components/datasets/hit-testing/ @JzoNgKVO @iamjoel +/web/app/components/datasets/hit-testing/ @JzoNgKVO @iamjoel # Frontend - RAG - List and Creation -web/app/components/datasets/list/ @iamjoel @WTW0313 -web/app/components/datasets/create/ @iamjoel @WTW0313 -web/app/components/datasets/create-from-pipeline/ @iamjoel @WTW0313 -web/app/components/datasets/external-knowledge-base/ @iamjoel @WTW0313 +/web/app/components/datasets/list/ @iamjoel @WTW0313 +/web/app/components/datasets/create/ @iamjoel @WTW0313 +/web/app/components/datasets/create-from-pipeline/ @iamjoel @WTW0313 +/web/app/components/datasets/external-knowledge-base/ @iamjoel @WTW0313 # Frontend - RAG - Orchestration (general rule first, specific rules below override) -web/app/components/rag-pipeline/ @iamjoel @WTW0313 -web/app/components/rag-pipeline/components/rag-pipeline-main.tsx @iamjoel @zxhlyh -web/app/components/rag-pipeline/store/ @iamjoel @zxhlyh +/web/app/components/rag-pipeline/ @iamjoel @WTW0313 +/web/app/components/rag-pipeline/components/rag-pipeline-main.tsx @iamjoel @zxhlyh +/web/app/components/rag-pipeline/store/ @iamjoel @zxhlyh # Frontend - RAG - Documents List -web/app/components/datasets/documents/list.tsx @iamjoel @WTW0313 -web/app/components/datasets/documents/create-from-pipeline/ @iamjoel @WTW0313 +/web/app/components/datasets/documents/list.tsx @iamjoel @WTW0313 +/web/app/components/datasets/documents/create-from-pipeline/ @iamjoel @WTW0313 # Frontend - RAG - Segments List -web/app/components/datasets/documents/detail/ @iamjoel @WTW0313 +/web/app/components/datasets/documents/detail/ @iamjoel @WTW0313 # Frontend - RAG - Settings -web/app/components/datasets/settings/ @iamjoel @WTW0313 +/web/app/components/datasets/settings/ @iamjoel @WTW0313 # Frontend - Ecosystem - Plugins -web/app/components/plugins/ @iamjoel @zhsama +/web/app/components/plugins/ @iamjoel @zhsama # Frontend - Ecosystem - Tools -web/app/components/tools/ @iamjoel @Yessenia-d +/web/app/components/tools/ @iamjoel @Yessenia-d # Frontend - Ecosystem - MarketPlace -web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d +/web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d # Frontend - Login and Registration -web/app/signin/ @douxc @iamjoel -web/app/signup/ @douxc @iamjoel -web/app/reset-password/ @douxc @iamjoel - -web/app/install/ @douxc @iamjoel -web/app/init/ @douxc @iamjoel -web/app/forgot-password/ @douxc @iamjoel -web/app/account/ @douxc @iamjoel +/web/app/signin/ @douxc @iamjoel +/web/app/signup/ @douxc @iamjoel +/web/app/reset-password/ @douxc @iamjoel +/web/app/install/ @douxc @iamjoel +/web/app/init/ @douxc @iamjoel +/web/app/forgot-password/ @douxc @iamjoel +/web/app/account/ @douxc @iamjoel # Frontend - Service Authentication -web/service/base.ts @douxc @iamjoel +/web/service/base.ts @douxc @iamjoel # Frontend - WebApp Authentication and Access Control -web/app/(shareLayout)/components/ @douxc @iamjoel -web/app/(shareLayout)/webapp-signin/ @douxc @iamjoel -web/app/(shareLayout)/webapp-reset-password/ @douxc @iamjoel -web/app/components/app/app-access-control/ @douxc @iamjoel +/web/app/(shareLayout)/components/ @douxc @iamjoel +/web/app/(shareLayout)/webapp-signin/ @douxc @iamjoel +/web/app/(shareLayout)/webapp-reset-password/ @douxc @iamjoel +/web/app/components/app/app-access-control/ @douxc @iamjoel # Frontend - Explore Page -web/app/components/explore/ @CodingOnStar @iamjoel +/web/app/components/explore/ @CodingOnStar @iamjoel # Frontend - Personal Settings -web/app/components/header/account-setting/ @CodingOnStar @iamjoel -web/app/components/header/account-dropdown/ @CodingOnStar @iamjoel +/web/app/components/header/account-setting/ @CodingOnStar @iamjoel +/web/app/components/header/account-dropdown/ @CodingOnStar @iamjoel # Frontend - Analytics -web/app/components/base/ga/ @CodingOnStar @iamjoel +/web/app/components/base/ga/ @CodingOnStar @iamjoel # Frontend - Base Components -web/app/components/base/ @iamjoel @zxhlyh +/web/app/components/base/ @iamjoel @zxhlyh # Frontend - Utils and Hooks -web/utils/classnames.ts @iamjoel @zxhlyh -web/utils/time.ts @iamjoel @zxhlyh -web/utils/format.ts @iamjoel @zxhlyh -web/utils/clipboard.ts @iamjoel @zxhlyh -web/hooks/use-document-title.ts @iamjoel @zxhlyh +/web/utils/classnames.ts @iamjoel @zxhlyh +/web/utils/time.ts @iamjoel @zxhlyh +/web/utils/format.ts @iamjoel @zxhlyh +/web/utils/clipboard.ts @iamjoel @zxhlyh +/web/hooks/use-document-title.ts @iamjoel @zxhlyh # Frontend - Billing and Education -web/app/components/billing/ @iamjoel @zxhlyh -web/app/education-apply/ @iamjoel @zxhlyh +/web/app/components/billing/ @iamjoel @zxhlyh +/web/app/education-apply/ @iamjoel @zxhlyh # Frontend - Workspace -web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh +/web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh # Docker -docker/* @laipz8200 +/docker/* @laipz8200 diff --git a/api/.env.example b/api/.env.example index b87d9c7b02..9cbb111d31 100644 --- a/api/.env.example +++ b/api/.env.example @@ -116,6 +116,7 @@ ALIYUN_OSS_AUTH_VERSION=v1 ALIYUN_OSS_REGION=your-region # Don't start with '/'. OSS doesn't support leading slash in object names. ALIYUN_OSS_PATH=your-path +ALIYUN_CLOUDBOX_ID=your-cloudbox-id # Google Storage configuration GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name @@ -133,6 +134,7 @@ HUAWEI_OBS_BUCKET_NAME=your-bucket-name HUAWEI_OBS_SECRET_KEY=your-secret-key HUAWEI_OBS_ACCESS_KEY=your-access-key HUAWEI_OBS_SERVER=your-server-url +HUAWEI_OBS_PATH_STYLE=false # Baidu OBS Storage Configuration BAIDU_OBS_BUCKET_NAME=your-bucket-name @@ -690,7 +692,6 @@ ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5 ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20 # Maximum number of concurrent annotation import tasks per tenant ANNOTATION_IMPORT_MAX_CONCURRENT=5 - # Sandbox expired records clean configuration SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21 SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000 diff --git a/api/configs/middleware/storage/aliyun_oss_storage_config.py b/api/configs/middleware/storage/aliyun_oss_storage_config.py index 331c486d54..6df14175ae 100644 --- a/api/configs/middleware/storage/aliyun_oss_storage_config.py +++ b/api/configs/middleware/storage/aliyun_oss_storage_config.py @@ -41,3 +41,8 @@ class AliyunOSSStorageConfig(BaseSettings): description="Base path within the bucket to store objects (e.g., 'my-app-data/')", default=None, ) + + ALIYUN_CLOUDBOX_ID: str | None = Field( + description="Cloudbox id for aliyun cloudbox service", + default=None, + ) diff --git a/api/configs/middleware/storage/huawei_obs_storage_config.py b/api/configs/middleware/storage/huawei_obs_storage_config.py index 5b5cd2f750..46b6f2e68d 100644 --- a/api/configs/middleware/storage/huawei_obs_storage_config.py +++ b/api/configs/middleware/storage/huawei_obs_storage_config.py @@ -26,3 +26,8 @@ class HuaweiCloudOBSStorageConfig(BaseSettings): description="Endpoint URL for Huawei Cloud OBS (e.g., 'https://obs.cn-north-4.myhuaweicloud.com')", default=None, ) + + HUAWEI_OBS_PATH_STYLE: bool = Field( + description="Flag to indicate whether to use path-style URLs for OBS requests", + default=False, + ) diff --git a/api/controllers/console/auth/activate.py b/api/controllers/console/auth/activate.py index 6834656a7f..fe70d930fb 100644 --- a/api/controllers/console/auth/activate.py +++ b/api/controllers/console/auth/activate.py @@ -7,9 +7,9 @@ from controllers.console import console_ns from controllers.console.error import AlreadyActivateError from extensions.ext_database import db from libs.datetime_utils import naive_utc_now -from libs.helper import EmailStr, extract_remote_ip, timezone +from libs.helper import EmailStr, timezone from models import AccountStatus -from services.account_service import AccountService, RegisterService +from services.account_service import RegisterService DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}" @@ -93,7 +93,6 @@ class ActivateApi(Resource): "ActivationResponse", { "result": fields.String(description="Operation result"), - "data": fields.Raw(description="Login token data"), }, ), ) @@ -117,6 +116,4 @@ class ActivateApi(Resource): account.initialized_at = naive_utc_now() db.session.commit() - token_pair = AccountService.login(account, ip_address=extract_remote_ip(request)) - - return {"result": "success", "data": token_pair.model_dump()} + return {"result": "success"} diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index be6d837032..40e4bde389 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -4,7 +4,7 @@ from uuid import UUID from flask import request from flask_restx import Resource from flask_restx._http import HTTPStatus -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator from sqlalchemy.orm import Session from werkzeug.exceptions import BadRequest, NotFound @@ -51,6 +51,32 @@ class ConversationRenamePayload(BaseModel): class ConversationVariablesQuery(BaseModel): last_id: UUID | None = Field(default=None, description="Last variable ID for pagination") limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return") + variable_name: str | None = Field( + default=None, description="Filter variables by name", min_length=1, max_length=255 + ) + + @field_validator("variable_name", mode="before") + @classmethod + def validate_variable_name(cls, v: str | None) -> str | None: + """ + Validate variable_name to prevent injection attacks. + """ + if v is None: + return v + + # Only allow safe characters: alphanumeric, underscore, hyphen, period + if not v.replace("-", "").replace("_", "").replace(".", "").isalnum(): + raise ValueError( + "Variable name can only contain letters, numbers, hyphens (-), underscores (_), and periods (.)" + ) + + # Prevent SQL injection patterns + dangerous_patterns = ["'", '"', ";", "--", "/*", "*/", "xp_", "sp_"] + for pattern in dangerous_patterns: + if pattern in v.lower(): + raise ValueError(f"Variable name contains invalid characters: {pattern}") + + return v class ConversationVariableUpdatePayload(BaseModel): @@ -199,7 +225,7 @@ class ConversationVariablesApi(Resource): try: return ConversationService.get_conversational_variable( - app_model, conversation_id, end_user, query_args.limit, last_id + app_model, conversation_id, end_user, query_args.limit, last_id, query_args.variable_name ) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py index 60193f5f15..db3b93a4dc 100644 --- a/api/controllers/web/app.py +++ b/api/controllers/web/app.py @@ -1,14 +1,13 @@ import logging from flask import request -from flask_restx import Resource, marshal_with, reqparse +from flask_restx import Resource, marshal_with +from pydantic import BaseModel, ConfigDict, Field from werkzeug.exceptions import Unauthorized from constants import HEADER_NAME_APP_CODE from controllers.common import fields -from controllers.web import web_ns -from controllers.web.error import AppUnavailableError -from controllers.web.wraps import WebApiResource +from controllers.common.schema import register_schema_models from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict from libs.passport import PassportService from libs.token import extract_webapp_passport @@ -18,9 +17,23 @@ from services.enterprise.enterprise_service import EnterpriseService from services.feature_service import FeatureService from services.webapp_auth_service import WebAppAuthService +from . import web_ns +from .error import AppUnavailableError +from .wraps import WebApiResource + logger = logging.getLogger(__name__) +class AppAccessModeQuery(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + app_id: str | None = Field(default=None, alias="appId", description="Application ID") + app_code: str | None = Field(default=None, alias="appCode", description="Application code") + + +register_schema_models(web_ns, AppAccessModeQuery) + + @web_ns.route("/parameters") class AppParameterApi(WebApiResource): """Resource for app variables.""" @@ -96,21 +109,16 @@ class AppAccessMode(Resource): } ) def get(self): - parser = ( - reqparse.RequestParser() - .add_argument("appId", type=str, required=False, location="args") - .add_argument("appCode", type=str, required=False, location="args") - ) - args = parser.parse_args() + raw_args = request.args.to_dict() + args = AppAccessModeQuery.model_validate(raw_args) features = FeatureService.get_system_features() if not features.webapp_auth.enabled: return {"accessMode": "public"} - app_id = args.get("appId") - if args.get("appCode"): - app_code = args["appCode"] - app_id = AppService.get_app_id_by_code(app_code) + app_id = args.app_id + if args.app_code: + app_id = AppService.get_app_id_by_code(args.app_code) if not app_id: raise ValueError("appId or appCode must be provided") diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index 9f9aa4838c..5c7ea9e69a 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -1,9 +1,12 @@ import logging +from typing import Literal -from flask_restx import fields, marshal_with, reqparse -from flask_restx.inputs import int_range +from flask import request +from flask_restx import fields, marshal_with +from pydantic import BaseModel, Field, field_validator from werkzeug.exceptions import InternalServerError, NotFound +from controllers.common.schema import register_schema_models from controllers.web import web_ns from controllers.web.error import ( AppMoreLikeThisDisabledError, @@ -38,6 +41,33 @@ from services.message_service import MessageService logger = logging.getLogger(__name__) +class MessageListQuery(BaseModel): + conversation_id: str = Field(description="Conversation UUID") + first_id: str | None = Field(default=None, description="First message ID for pagination") + limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)") + + @field_validator("conversation_id", "first_id") + @classmethod + def validate_uuid(cls, value: str | None) -> str | None: + if value is None: + return value + return uuid_value(value) + + +class MessageFeedbackPayload(BaseModel): + rating: Literal["like", "dislike"] | None = Field(default=None, description="Feedback rating") + content: str | None = Field(default=None, description="Feedback content") + + +class MessageMoreLikeThisQuery(BaseModel): + response_mode: Literal["blocking", "streaming"] = Field( + description="Response mode", + ) + + +register_schema_models(web_ns, MessageListQuery, MessageFeedbackPayload, MessageMoreLikeThisQuery) + + @web_ns.route("/messages") class MessageListApi(WebApiResource): message_fields = { @@ -68,7 +98,11 @@ class MessageListApi(WebApiResource): @web_ns.doc( params={ "conversation_id": {"description": "Conversation UUID", "type": "string", "required": True}, - "first_id": {"description": "First message ID for pagination", "type": "string", "required": False}, + "first_id": { + "description": "First message ID for pagination", + "type": "string", + "required": False, + }, "limit": { "description": "Number of messages to return (1-100)", "type": "integer", @@ -93,17 +127,12 @@ class MessageListApi(WebApiResource): if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: raise NotChatAppError() - parser = ( - reqparse.RequestParser() - .add_argument("conversation_id", required=True, type=uuid_value, location="args") - .add_argument("first_id", type=uuid_value, location="args") - .add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args") - ) - args = parser.parse_args() + raw_args = request.args.to_dict() + query = MessageListQuery.model_validate(raw_args) try: return MessageService.pagination_by_first_id( - app_model, end_user, args["conversation_id"], args["first_id"], args["limit"] + app_model, end_user, query.conversation_id, query.first_id, query.limit ) except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") @@ -128,7 +157,7 @@ class MessageFeedbackApi(WebApiResource): "enum": ["like", "dislike"], "required": False, }, - "content": {"description": "Feedback content/comment", "type": "string", "required": False}, + "content": {"description": "Feedback content", "type": "string", "required": False}, } ) @web_ns.doc( @@ -145,20 +174,15 @@ class MessageFeedbackApi(WebApiResource): def post(self, app_model, end_user, message_id): message_id = str(message_id) - parser = ( - reqparse.RequestParser() - .add_argument("rating", type=str, choices=["like", "dislike", None], location="json") - .add_argument("content", type=str, location="json", default=None) - ) - args = parser.parse_args() + payload = MessageFeedbackPayload.model_validate(web_ns.payload or {}) try: MessageService.create_feedback( app_model=app_model, message_id=message_id, user=end_user, - rating=args.get("rating"), - content=args.get("content"), + rating=payload.rating, + content=payload.content, ) except MessageNotExistsError: raise NotFound("Message Not Exists.") @@ -170,17 +194,7 @@ class MessageFeedbackApi(WebApiResource): class MessageMoreLikeThisApi(WebApiResource): @web_ns.doc("Generate More Like This") @web_ns.doc(description="Generate a new completion similar to an existing message (completion apps only).") - @web_ns.doc( - params={ - "message_id": {"description": "Message UUID", "type": "string", "required": True}, - "response_mode": { - "description": "Response mode", - "type": "string", - "enum": ["blocking", "streaming"], - "required": True, - }, - } - ) + @web_ns.expect(web_ns.models[MessageMoreLikeThisQuery.__name__]) @web_ns.doc( responses={ 200: "Success", @@ -197,12 +211,10 @@ class MessageMoreLikeThisApi(WebApiResource): message_id = str(message_id) - parser = reqparse.RequestParser().add_argument( - "response_mode", type=str, required=True, choices=["blocking", "streaming"], location="args" - ) - args = parser.parse_args() + raw_args = request.args.to_dict() + query = MessageMoreLikeThisQuery.model_validate(raw_args) - streaming = args["response_mode"] == "streaming" + streaming = query.response_mode == "streaming" try: response = AppGenerateService.generate_more_like_this( diff --git a/api/core/app/apps/base_app_generator.py b/api/core/app/apps/base_app_generator.py index 02d58a07d1..a6aace168e 100644 --- a/api/core/app/apps/base_app_generator.py +++ b/api/core/app/apps/base_app_generator.py @@ -105,8 +105,9 @@ class BaseAppGenerator: variable_entity.type in {VariableEntityType.FILE, VariableEntityType.FILE_LIST} and not variable_entity.required ): - # Treat empty string (frontend default) or empty list as unset - if not value and isinstance(value, (str, list)): + # Treat empty string (frontend default) as unset + # For FILE_LIST, allow empty list [] to pass through + if isinstance(value, str) and not value: return None if variable_entity.type in { diff --git a/api/core/rag/datasource/keyword/jieba/jieba.py b/api/core/rag/datasource/keyword/jieba/jieba.py index 97052717db..0f19ecadc8 100644 --- a/api/core/rag/datasource/keyword/jieba/jieba.py +++ b/api/core/rag/datasource/keyword/jieba/jieba.py @@ -90,13 +90,17 @@ class Jieba(BaseKeyword): sorted_chunk_indices = self._retrieve_ids_by_query(keyword_table or {}, query, k) documents = [] + + segment_query_stmt = db.session.query(DocumentSegment).where( + DocumentSegment.dataset_id == self.dataset.id, DocumentSegment.index_node_id.in_(sorted_chunk_indices) + ) + if document_ids_filter: + segment_query_stmt = segment_query_stmt.where(DocumentSegment.document_id.in_(document_ids_filter)) + + segments = db.session.execute(segment_query_stmt).scalars().all() + segment_map = {segment.index_node_id: segment for segment in segments} for chunk_index in sorted_chunk_indices: - segment_query = db.session.query(DocumentSegment).where( - DocumentSegment.dataset_id == self.dataset.id, DocumentSegment.index_node_id == chunk_index - ) - if document_ids_filter: - segment_query = segment_query.where(DocumentSegment.document_id.in_(document_ids_filter)) - segment = segment_query.first() + segment = segment_map.get(chunk_index) if segment: documents.append( diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index a139fba4d0..9807cb4e6a 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -7,6 +7,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session, load_only from configs import dify_config +from core.db.session_factory import session_factory from core.model_manager import ModelManager from core.model_runtime.entities.model_entities import ModelType from core.rag.data_post_processor.data_post_processor import DataPostProcessor @@ -138,37 +139,47 @@ class RetrievalService: @classmethod def _deduplicate_documents(cls, documents: list[Document]) -> list[Document]: - """Deduplicate documents based on doc_id to avoid duplicate chunks in hybrid search.""" + """Deduplicate documents in O(n) while preserving first-seen order. + + Rules: + - For provider == "dify" and metadata["doc_id"] exists: keep the doc with the highest + metadata["score"] among duplicates; if a later duplicate has no score, ignore it. + - For non-dify documents (or dify without doc_id): deduplicate by content key + (provider, page_content), keeping the first occurrence. + """ if not documents: return documents - unique_documents = [] - seen_doc_ids = set() + # Map of dedup key -> chosen Document + chosen: dict[tuple, Document] = {} + # Preserve the order of first appearance of each dedup key + order: list[tuple] = [] - for document in documents: - # For dify provider documents, use doc_id for deduplication - if document.provider == "dify" and document.metadata is not None and "doc_id" in document.metadata: - doc_id = document.metadata["doc_id"] - if doc_id not in seen_doc_ids: - seen_doc_ids.add(doc_id) - unique_documents.append(document) - # If duplicate, keep the one with higher score - elif "score" in document.metadata: - # Find existing document with same doc_id and compare scores - for i, existing_doc in enumerate(unique_documents): - if ( - existing_doc.metadata - and existing_doc.metadata.get("doc_id") == doc_id - and existing_doc.metadata.get("score", 0) < document.metadata.get("score", 0) - ): - unique_documents[i] = document - break + for doc in documents: + is_dify = doc.provider == "dify" + doc_id = (doc.metadata or {}).get("doc_id") if is_dify else None + + if is_dify and doc_id: + key = ("dify", doc_id) + if key not in chosen: + chosen[key] = doc + order.append(key) + else: + # Only replace if the new one has a score and it's strictly higher + if "score" in doc.metadata: + new_score = float(doc.metadata.get("score", 0.0)) + old_score = float(chosen[key].metadata.get("score", 0.0)) if chosen[key].metadata else 0.0 + if new_score > old_score: + chosen[key] = doc else: - # For non-dify documents, use content-based deduplication - if document not in unique_documents: - unique_documents.append(document) + # Content-based dedup for non-dify or dify without doc_id + content_key = (doc.provider or "dify", doc.page_content) + if content_key not in chosen: + chosen[content_key] = doc + order.append(content_key) + # If duplicate content appears, we keep the first occurrence (no score comparison) - return unique_documents + return [chosen[k] for k in order] @classmethod def _get_dataset(cls, dataset_id: str) -> Dataset | None: @@ -371,58 +382,96 @@ class RetrievalService: include_segment_ids = set() segment_child_map = {} segment_file_map = {} - with Session(bind=db.engine, expire_on_commit=False) as session: - # Process documents - for document in documents: - segment_id = None - attachment_info = None - child_chunk = None - document_id = document.metadata.get("document_id") - if document_id not in dataset_documents: - continue - dataset_document = dataset_documents[document_id] - if not dataset_document: - continue + valid_dataset_documents = {} + image_doc_ids = [] + child_index_node_ids = [] + index_node_ids = [] + doc_to_document_map = {} + for document in documents: + document_id = document.metadata.get("document_id") + if document_id not in dataset_documents: + continue - if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX: - # Handle parent-child documents - if document.metadata.get("doc_type") == DocType.IMAGE: - attachment_info_dict = cls.get_segment_attachment_info( - dataset_document.dataset_id, - dataset_document.tenant_id, - document.metadata.get("doc_id") or "", - session, - ) - if attachment_info_dict: - attachment_info = attachment_info_dict["attachment_info"] - segment_id = attachment_info_dict["segment_id"] - else: - child_index_node_id = document.metadata.get("doc_id") - child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id == child_index_node_id) - child_chunk = session.scalar(child_chunk_stmt) + dataset_document = dataset_documents[document_id] + if not dataset_document: + continue + valid_dataset_documents[document_id] = dataset_document - if not child_chunk: - continue - segment_id = child_chunk.segment_id + if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX: + doc_id = document.metadata.get("doc_id") or "" + doc_to_document_map[doc_id] = document + if document.metadata.get("doc_type") == DocType.IMAGE: + image_doc_ids.append(doc_id) + else: + child_index_node_ids.append(doc_id) + else: + doc_id = document.metadata.get("doc_id") or "" + doc_to_document_map[doc_id] = document + if document.metadata.get("doc_type") == DocType.IMAGE: + image_doc_ids.append(doc_id) + else: + index_node_ids.append(doc_id) - if not segment_id: - continue + image_doc_ids = [i for i in image_doc_ids if i] + child_index_node_ids = [i for i in child_index_node_ids if i] + index_node_ids = [i for i in index_node_ids if i] - segment = ( - session.query(DocumentSegment) - .where( - DocumentSegment.dataset_id == dataset_document.dataset_id, - DocumentSegment.enabled == True, - DocumentSegment.status == "completed", - DocumentSegment.id == segment_id, - ) - .first() - ) + segment_ids = [] + index_node_segments: list[DocumentSegment] = [] + segments: list[DocumentSegment] = [] + attachment_map = {} + child_chunk_map = {} + doc_segment_map = {} - if not segment: - continue + with session_factory.create_session() as session: + attachments = cls.get_segment_attachment_infos(image_doc_ids, session) + for attachment in attachments: + segment_ids.append(attachment["segment_id"]) + attachment_map[attachment["segment_id"]] = attachment + doc_segment_map[attachment["segment_id"]] = attachment["attachment_id"] + + child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids)) + child_index_nodes = session.execute(child_chunk_stmt).scalars().all() + + for i in child_index_nodes: + segment_ids.append(i.segment_id) + child_chunk_map[i.segment_id] = i + doc_segment_map[i.segment_id] = i.index_node_id + + if index_node_ids: + document_segment_stmt = select(DocumentSegment).where( + DocumentSegment.enabled == True, + DocumentSegment.status == "completed", + DocumentSegment.index_node_id.in_(index_node_ids), + ) + index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore + for index_node_segment in index_node_segments: + doc_segment_map[index_node_segment.id] = index_node_segment.index_node_id + if segment_ids: + document_segment_stmt = select(DocumentSegment).where( + DocumentSegment.enabled == True, + DocumentSegment.status == "completed", + DocumentSegment.id.in_(segment_ids), + ) + segments = session.execute(document_segment_stmt).scalars().all() # type: ignore + + if index_node_segments: + segments.extend(index_node_segments) + + for segment in segments: + doc_id = doc_segment_map.get(segment.id) + child_chunk = child_chunk_map.get(segment.id) + attachment_info = attachment_map.get(segment.id) + + if doc_id: + document = doc_to_document_map[doc_id] + ds_dataset_document: DatasetDocument | None = valid_dataset_documents.get( + document.metadata.get("document_id") + ) + + if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX: if segment.id not in include_segment_ids: include_segment_ids.add(segment.id) if child_chunk: @@ -430,10 +479,10 @@ class RetrievalService: "id": child_chunk.id, "content": child_chunk.content, "position": child_chunk.position, - "score": document.metadata.get("score", 0.0), + "score": document.metadata.get("score", 0.0) if document else 0.0, } map_detail = { - "max_score": document.metadata.get("score", 0.0), + "max_score": document.metadata.get("score", 0.0) if document else 0.0, "child_chunks": [child_chunk_detail], } segment_child_map[segment.id] = map_detail @@ -452,13 +501,14 @@ class RetrievalService: "score": document.metadata.get("score", 0.0), } if segment.id in segment_child_map: - segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) + segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) # type: ignore segment_child_map[segment.id]["max_score"] = max( - segment_child_map[segment.id]["max_score"], document.metadata.get("score", 0.0) + segment_child_map[segment.id]["max_score"], + document.metadata.get("score", 0.0) if document else 0.0, ) else: segment_child_map[segment.id] = { - "max_score": document.metadata.get("score", 0.0), + "max_score": document.metadata.get("score", 0.0) if document else 0.0, "child_chunks": [child_chunk_detail], } if attachment_info: @@ -467,46 +517,11 @@ class RetrievalService: else: segment_file_map[segment.id] = [attachment_info] else: - # Handle normal documents - segment = None - if document.metadata.get("doc_type") == DocType.IMAGE: - attachment_info_dict = cls.get_segment_attachment_info( - dataset_document.dataset_id, - dataset_document.tenant_id, - document.metadata.get("doc_id") or "", - session, - ) - if attachment_info_dict: - attachment_info = attachment_info_dict["attachment_info"] - segment_id = attachment_info_dict["segment_id"] - document_segment_stmt = select(DocumentSegment).where( - DocumentSegment.dataset_id == dataset_document.dataset_id, - DocumentSegment.enabled == True, - DocumentSegment.status == "completed", - DocumentSegment.id == segment_id, - ) - segment = session.scalar(document_segment_stmt) - if segment: - segment_file_map[segment.id] = [attachment_info] - else: - index_node_id = document.metadata.get("doc_id") - if not index_node_id: - continue - document_segment_stmt = select(DocumentSegment).where( - DocumentSegment.dataset_id == dataset_document.dataset_id, - DocumentSegment.enabled == True, - DocumentSegment.status == "completed", - DocumentSegment.index_node_id == index_node_id, - ) - segment = session.scalar(document_segment_stmt) - - if not segment: - continue if segment.id not in include_segment_ids: include_segment_ids.add(segment.id) record = { "segment": segment, - "score": document.metadata.get("score"), # type: ignore + "score": document.metadata.get("score", 0.0), # type: ignore } if attachment_info: segment_file_map[segment.id] = [attachment_info] @@ -522,7 +537,7 @@ class RetrievalService: for record in records: if record["segment"].id in segment_child_map: record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore - record["score"] = segment_child_map[record["segment"].id]["max_score"] + record["score"] = segment_child_map[record["segment"].id]["max_score"] # type: ignore if record["segment"].id in segment_file_map: record["files"] = segment_file_map[record["segment"].id] # type: ignore[assignment] @@ -565,6 +580,8 @@ class RetrievalService: flask_app: Flask, retrieval_method: RetrievalMethod, dataset: Dataset, + all_documents: list[Document], + exceptions: list[str], query: str | None = None, top_k: int = 4, score_threshold: float | None = 0.0, @@ -573,8 +590,6 @@ class RetrievalService: weights: dict | None = None, document_ids_filter: list[str] | None = None, attachment_id: str | None = None, - all_documents: list[Document] = [], - exceptions: list[str] = [], ): if not query and not attachment_id: return @@ -696,3 +711,37 @@ class RetrievalService: } return {"attachment_info": attachment_info, "segment_id": attachment_binding.segment_id} return None + + @classmethod + def get_segment_attachment_infos(cls, attachment_ids: list[str], session: Session) -> list[dict[str, Any]]: + attachment_infos = [] + upload_files = session.query(UploadFile).where(UploadFile.id.in_(attachment_ids)).all() + if upload_files: + upload_file_ids = [upload_file.id for upload_file in upload_files] + attachment_bindings = ( + session.query(SegmentAttachmentBinding) + .where(SegmentAttachmentBinding.attachment_id.in_(upload_file_ids)) + .all() + ) + attachment_binding_map = {binding.attachment_id: binding for binding in attachment_bindings} + + if attachment_bindings: + for upload_file in upload_files: + attachment_binding = attachment_binding_map.get(upload_file.id) + attachment_info = { + "id": upload_file.id, + "name": upload_file.name, + "extension": "." + upload_file.extension, + "mime_type": upload_file.mime_type, + "source_url": sign_upload_file(upload_file.id, upload_file.extension), + "size": upload_file.size, + } + if attachment_binding: + attachment_infos.append( + { + "attachment_id": attachment_binding.attachment_id, + "attachment_info": attachment_info, + "segment_id": attachment_binding.segment_id, + } + ) + return attachment_infos diff --git a/api/core/rag/datasource/vdb/oracle/oraclevector.py b/api/core/rag/datasource/vdb/oracle/oraclevector.py index d82ab89a34..cb05c22b55 100644 --- a/api/core/rag/datasource/vdb/oracle/oraclevector.py +++ b/api/core/rag/datasource/vdb/oracle/oraclevector.py @@ -289,7 +289,8 @@ class OracleVector(BaseVector): words = pseg.cut(query) current_entity = "" for word, pos in words: - if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}: # nr: 人名,ns: 地名,nt: 机构名 + # `nr`: Person, `ns`: Location, `nt`: Organization + if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}: current_entity += word else: if current_entity: diff --git a/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py b/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py index 86b6ace3f6..d080e8da58 100644 --- a/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py +++ b/api/core/rag/datasource/vdb/pyvastbase/vastbase_vector.py @@ -213,7 +213,7 @@ class VastbaseVector(BaseVector): with self._get_cursor() as cur: cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension)) - # Vastbase 支持的向量维度取值范围为 [1,16000] + # Vastbase supports vector dimensions in the range [1, 16,000] if dimension <= 16000: cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name)) redis_client.set(collection_exist_cache_key, 1, ex=3600) diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py index 8a28eb477a..e36b54eedd 100644 --- a/api/core/rag/index_processor/index_processor_base.py +++ b/api/core/rag/index_processor/index_processor_base.py @@ -231,7 +231,7 @@ class BaseIndexProcessor(ABC): if not filename: parsed_url = urlparse(image_url) - # unquote 处理 URL 中的中文 + # Decode percent-encoded characters in the URL path. path = unquote(parsed_url.path) filename = os.path.basename(path) diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 635eab73f0..baf879df95 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -151,20 +151,14 @@ class DatasetRetrieval: if ModelFeature.TOOL_CALL in features or ModelFeature.MULTI_TOOL_CALL in features: planning_strategy = PlanningStrategy.ROUTER available_datasets = [] - for dataset_id in dataset_ids: - # get dataset from dataset id - dataset_stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id) - dataset = db.session.scalar(dataset_stmt) - # pass if dataset is not available - if not dataset: + dataset_stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id.in_(dataset_ids)) + datasets: list[Dataset] = db.session.execute(dataset_stmt).scalars().all() # type: ignore + for dataset in datasets: + if dataset.available_document_count == 0 and dataset.provider != "external": continue - - # pass if dataset is not available - if dataset and dataset.available_document_count == 0 and dataset.provider != "external": - continue - available_datasets.append(dataset) + if inputs: inputs = {key: str(value) for key, value in inputs.items()} else: @@ -282,26 +276,35 @@ class DatasetRetrieval: ) context_files.append(attachment_info) if show_retrieve_source: + dataset_ids = [record.segment.dataset_id for record in records] + document_ids = [record.segment.document_id for record in records] + dataset_document_stmt = select(DatasetDocument).where( + DatasetDocument.id.in_(document_ids), + DatasetDocument.enabled == True, + DatasetDocument.archived == False, + ) + documents = db.session.execute(dataset_document_stmt).scalars().all() # type: ignore + dataset_stmt = select(Dataset).where( + Dataset.id.in_(dataset_ids), + ) + datasets = db.session.execute(dataset_stmt).scalars().all() # type: ignore + dataset_map = {i.id: i for i in datasets} + document_map = {i.id: i for i in documents} for record in records: segment = record.segment - dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first() - dataset_document_stmt = select(DatasetDocument).where( - DatasetDocument.id == segment.document_id, - DatasetDocument.enabled == True, - DatasetDocument.archived == False, - ) - document = db.session.scalar(dataset_document_stmt) - if dataset and document: + dataset_item = dataset_map.get(segment.dataset_id) + document_item = document_map.get(segment.document_id) + if dataset_item and document_item: source = RetrievalSourceMetadata( - dataset_id=dataset.id, - dataset_name=dataset.name, - document_id=document.id, - document_name=document.name, - data_source_type=document.data_source_type, + dataset_id=dataset_item.id, + dataset_name=dataset_item.name, + document_id=document_item.id, + document_name=document_item.name, + data_source_type=document_item.data_source_type, segment_id=segment.id, retriever_from=invoke_from.to_source(), score=record.score or 0.0, - doc_metadata=document.doc_metadata, + doc_metadata=document_item.doc_metadata, ) if invoke_from.to_source() == "dev": diff --git a/api/extensions/storage/aliyun_oss_storage.py b/api/extensions/storage/aliyun_oss_storage.py index 2283581f62..3d7ef99c9e 100644 --- a/api/extensions/storage/aliyun_oss_storage.py +++ b/api/extensions/storage/aliyun_oss_storage.py @@ -26,6 +26,7 @@ class AliyunOssStorage(BaseStorage): self.bucket_name, connect_timeout=30, region=region, + cloudbox_id=dify_config.ALIYUN_CLOUDBOX_ID, ) def save(self, filename, data): diff --git a/api/extensions/storage/huawei_obs_storage.py b/api/extensions/storage/huawei_obs_storage.py index 74fed26f65..72cb59abbe 100644 --- a/api/extensions/storage/huawei_obs_storage.py +++ b/api/extensions/storage/huawei_obs_storage.py @@ -17,6 +17,7 @@ class HuaweiObsStorage(BaseStorage): access_key_id=dify_config.HUAWEI_OBS_ACCESS_KEY, secret_access_key=dify_config.HUAWEI_OBS_SECRET_KEY, server=dify_config.HUAWEI_OBS_SERVER, + path_style=dify_config.HUAWEI_OBS_PATH_STYLE, ) def save(self, filename, data): diff --git a/api/pyproject.toml b/api/pyproject.toml index 870de33f4b..6716603dd4 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "pydantic-extra-types~=2.10.3", "pydantic-settings~=2.11.0", "pyjwt~=2.10.1", - "pypdfium2==4.30.0", + "pypdfium2==5.2.0", "python-docx~=1.1.0", "python-dotenv==1.0.1", "pyyaml~=6.0.1", diff --git a/api/services/conversation_service.py b/api/services/conversation_service.py index 5253199552..659e7406fb 100644 --- a/api/services/conversation_service.py +++ b/api/services/conversation_service.py @@ -6,7 +6,9 @@ from typing import Any, Union from sqlalchemy import asc, desc, func, or_, select from sqlalchemy.orm import Session +from configs import dify_config from core.app.entities.app_invoke_entities import InvokeFrom +from core.db.session_factory import session_factory from core.llm_generator.llm_generator import LLMGenerator from core.variables.types import SegmentType from core.workflow.nodes.variable_assigner.common.impl import conversation_variable_updater_factory @@ -202,6 +204,7 @@ class ConversationService: user: Union[Account, EndUser] | None, limit: int, last_id: str | None, + variable_name: str | None = None, ) -> InfiniteScrollPagination: conversation = cls.get_conversation(app_model, conversation_id, user) @@ -212,7 +215,25 @@ class ConversationService: .order_by(ConversationVariable.created_at) ) - with Session(db.engine) as session: + # Apply variable_name filter if provided + if variable_name: + # Filter using JSON extraction to match variable names case-insensitively + escaped_variable_name = variable_name.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + # Filter using JSON extraction to match variable names case-insensitively + if dify_config.DB_TYPE in ["mysql", "oceanbase", "seekdb"]: + stmt = stmt.where( + func.json_extract(ConversationVariable.data, "$.name").ilike( + f"%{escaped_variable_name}%", escape="\\" + ) + ) + elif dify_config.DB_TYPE == "postgresql": + stmt = stmt.where( + func.json_extract_path_text(ConversationVariable.data, "name").ilike( + f"%{escaped_variable_name}%", escape="\\" + ) + ) + + with session_factory.create_session() as session: if last_id: last_variable = session.scalar(stmt.where(ConversationVariable.id == last_id)) if not last_variable: @@ -279,7 +300,7 @@ class ConversationService: .where(ConversationVariable.id == variable_id) ) - with Session(db.engine) as session: + with session_factory.create_session() as session: existing_variable = session.scalar(stmt) if not existing_variable: raise ConversationVariableNotExistsError() diff --git a/api/tests/unit_tests/controllers/console/auth/test_account_activation.py b/api/tests/unit_tests/controllers/console/auth/test_account_activation.py index 4192fb2ca7..da21e0e358 100644 --- a/api/tests/unit_tests/controllers/console/auth/test_account_activation.py +++ b/api/tests/unit_tests/controllers/console/auth/test_account_activation.py @@ -163,34 +163,17 @@ class TestActivateApi: "account": mock_account, } - @pytest.fixture - def mock_token_pair(self): - """Create mock token pair object.""" - token_pair = MagicMock() - token_pair.access_token = "access_token" - token_pair.refresh_token = "refresh_token" - token_pair.csrf_token = "csrf_token" - token_pair.model_dump.return_value = { - "access_token": "access_token", - "refresh_token": "refresh_token", - "csrf_token": "csrf_token", - } - return token_pair - @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") @patch("controllers.console.auth.activate.RegisterService.revoke_token") @patch("controllers.console.auth.activate.db") - @patch("controllers.console.auth.activate.AccountService.login") def test_successful_account_activation( self, - mock_login, mock_db, mock_revoke_token, mock_get_invitation, app, mock_invitation, mock_account, - mock_token_pair, ): """ Test successful account activation. @@ -198,12 +181,10 @@ class TestActivateApi: Verifies that: - Account is activated with user preferences - Account status is set to ACTIVE - - User is logged in after activation - Invitation token is revoked """ # Arrange mock_get_invitation.return_value = mock_invitation - mock_login.return_value = mock_token_pair # Act with app.test_request_context( @@ -230,7 +211,6 @@ class TestActivateApi: assert mock_account.initialized_at is not None mock_revoke_token.assert_called_once_with("workspace-123", "invitee@example.com", "valid_token") mock_db.session.commit.assert_called_once() - mock_login.assert_called_once() @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") def test_activation_with_invalid_token(self, mock_get_invitation, app): @@ -264,17 +244,14 @@ class TestActivateApi: @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") @patch("controllers.console.auth.activate.RegisterService.revoke_token") @patch("controllers.console.auth.activate.db") - @patch("controllers.console.auth.activate.AccountService.login") def test_activation_sets_interface_theme( self, - mock_login, mock_db, mock_revoke_token, mock_get_invitation, app, mock_invitation, mock_account, - mock_token_pair, ): """ Test that activation sets default interface theme. @@ -284,7 +261,6 @@ class TestActivateApi: """ # Arrange mock_get_invitation.return_value = mock_invitation - mock_login.return_value = mock_token_pair # Act with app.test_request_context( @@ -317,17 +293,14 @@ class TestActivateApi: @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") @patch("controllers.console.auth.activate.RegisterService.revoke_token") @patch("controllers.console.auth.activate.db") - @patch("controllers.console.auth.activate.AccountService.login") def test_activation_with_different_locales( self, - mock_login, mock_db, mock_revoke_token, mock_get_invitation, app, mock_invitation, mock_account, - mock_token_pair, language, timezone, ): @@ -341,7 +314,6 @@ class TestActivateApi: """ # Arrange mock_get_invitation.return_value = mock_invitation - mock_login.return_value = mock_token_pair # Act with app.test_request_context( @@ -367,27 +339,23 @@ class TestActivateApi: @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") @patch("controllers.console.auth.activate.RegisterService.revoke_token") @patch("controllers.console.auth.activate.db") - @patch("controllers.console.auth.activate.AccountService.login") - def test_activation_returns_token_data( + def test_activation_returns_success_response( self, - mock_login, mock_db, mock_revoke_token, mock_get_invitation, app, mock_invitation, - mock_token_pair, ): """ - Test that activation returns authentication tokens. + Test that activation returns a success response without authentication tokens. Verifies that: - - Token pair is returned in response - - All token types are included (access, refresh, csrf) + - Response contains a success result + - No token data is returned """ # Arrange mock_get_invitation.return_value = mock_invitation - mock_login.return_value = mock_token_pair # Act with app.test_request_context( @@ -406,24 +374,18 @@ class TestActivateApi: response = api.post() # Assert - assert "data" in response - assert response["data"]["access_token"] == "access_token" - assert response["data"]["refresh_token"] == "refresh_token" - assert response["data"]["csrf_token"] == "csrf_token" + assert response == {"result": "success"} @patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid") @patch("controllers.console.auth.activate.RegisterService.revoke_token") @patch("controllers.console.auth.activate.db") - @patch("controllers.console.auth.activate.AccountService.login") def test_activation_without_workspace_id( self, - mock_login, mock_db, mock_revoke_token, mock_get_invitation, app, mock_invitation, - mock_token_pair, ): """ Test account activation without workspace_id. @@ -434,7 +396,6 @@ class TestActivateApi: """ # Arrange mock_get_invitation.return_value = mock_invitation - mock_login.return_value = mock_token_pair # Act with app.test_request_context( diff --git a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py index d622c3a555..1000d71399 100644 --- a/api/tests/unit_tests/core/app/apps/test_base_app_generator.py +++ b/api/tests/unit_tests/core/app/apps/test_base_app_generator.py @@ -287,7 +287,7 @@ def test_validate_inputs_optional_file_with_empty_string(): def test_validate_inputs_optional_file_list_with_empty_list(): - """Test that optional FILE_LIST variable with empty list returns None""" + """Test that optional FILE_LIST variable with empty list returns empty list (not None)""" base_app_generator = BaseAppGenerator() var_file_list = VariableEntity( @@ -302,6 +302,28 @@ def test_validate_inputs_optional_file_list_with_empty_list(): value=[], ) + # Empty list should be preserved, not converted to None + # This allows downstream components like document_extractor to handle empty lists properly + assert result == [] + + +def test_validate_inputs_optional_file_list_with_empty_string(): + """Test that optional FILE_LIST variable with empty string returns None""" + base_app_generator = BaseAppGenerator() + + var_file_list = VariableEntity( + variable="test_file_list", + label="test_file_list", + type=VariableEntityType.FILE_LIST, + required=False, + ) + + result = base_app_generator._validate_inputs( + variable_entity=var_file_list, + value="", + ) + + # Empty string should be treated as unset assert result is None diff --git a/api/uv.lock b/api/uv.lock index 8d0dffbd8f..4c2cb3c3f1 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1636,7 +1636,7 @@ requires-dist = [ { name = "pydantic-extra-types", specifier = "~=2.10.3" }, { name = "pydantic-settings", specifier = "~=2.11.0" }, { name = "pyjwt", specifier = "~=2.10.1" }, - { name = "pypdfium2", specifier = "==4.30.0" }, + { name = "pypdfium2", specifier = "==5.2.0" }, { name = "python-docx", specifier = "~=1.1.0" }, { name = "python-dotenv", specifier = "==1.0.1" }, { name = "pyyaml", specifier = "~=6.0.1" }, @@ -4993,22 +4993,31 @@ wheels = [ [[package]] name = "pypdfium2" -version = "4.30.0" +version = "5.2.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/14/838b3ba247a0ba92e4df5d23f2bea9478edcfd72b78a39d6ca36ccd84ad2/pypdfium2-4.30.0.tar.gz", hash = "sha256:48b5b7e5566665bc1015b9d69c1ebabe21f6aee468b509531c3c8318eeee2e16", size = 140239, upload-time = "2024-05-09T18:33:17.552Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/ab/73c7d24e4eac9ba952569403b32b7cca9412fc5b9bef54fdbd669551389f/pypdfium2-5.2.0.tar.gz", hash = "sha256:43863625231ce999c1ebbed6721a88de818b2ab4d909c1de558d413b9a400256", size = 269999, upload-time = "2025-12-12T13:20:15.353Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/9a/c8ff5cc352c1b60b0b97642ae734f51edbab6e28b45b4fcdfe5306ee3c83/pypdfium2-4.30.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:b33ceded0b6ff5b2b93bc1fe0ad4b71aa6b7e7bd5875f1ca0cdfb6ba6ac01aab", size = 2837254, upload-time = "2024-05-09T18:32:48.653Z" }, - { url = "https://files.pythonhosted.org/packages/21/8b/27d4d5409f3c76b985f4ee4afe147b606594411e15ac4dc1c3363c9a9810/pypdfium2-4.30.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4e55689f4b06e2d2406203e771f78789bd4f190731b5d57383d05cf611d829de", size = 2707624, upload-time = "2024-05-09T18:32:51.458Z" }, - { url = "https://files.pythonhosted.org/packages/11/63/28a73ca17c24b41a205d658e177d68e198d7dde65a8c99c821d231b6ee3d/pypdfium2-4.30.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6e50f5ce7f65a40a33d7c9edc39f23140c57e37144c2d6d9e9262a2a854854", size = 2793126, upload-time = "2024-05-09T18:32:53.581Z" }, - { url = "https://files.pythonhosted.org/packages/d1/96/53b3ebf0955edbd02ac6da16a818ecc65c939e98fdeb4e0958362bd385c8/pypdfium2-4.30.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3d0dd3ecaffd0b6dbda3da663220e705cb563918249bda26058c6036752ba3a2", size = 2591077, upload-time = "2024-05-09T18:32:55.99Z" }, - { url = "https://files.pythonhosted.org/packages/ec/ee/0394e56e7cab8b5b21f744d988400948ef71a9a892cbeb0b200d324ab2c7/pypdfium2-4.30.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc3bf29b0db8c76cdfaac1ec1cde8edf211a7de7390fbf8934ad2aa9b4d6dfad", size = 2864431, upload-time = "2024-05-09T18:32:57.911Z" }, - { url = "https://files.pythonhosted.org/packages/65/cd/3f1edf20a0ef4a212a5e20a5900e64942c5a374473671ac0780eaa08ea80/pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1f78d2189e0ddf9ac2b7a9b9bd4f0c66f54d1389ff6c17e9fd9dc034d06eb3f", size = 2812008, upload-time = "2024-05-09T18:32:59.886Z" }, - { url = "https://files.pythonhosted.org/packages/c8/91/2d517db61845698f41a2a974de90762e50faeb529201c6b3574935969045/pypdfium2-4.30.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:5eda3641a2da7a7a0b2f4dbd71d706401a656fea521b6b6faa0675b15d31a163", size = 6181543, upload-time = "2024-05-09T18:33:02.597Z" }, - { url = "https://files.pythonhosted.org/packages/ba/c4/ed1315143a7a84b2c7616569dfb472473968d628f17c231c39e29ae9d780/pypdfium2-4.30.0-py3-none-musllinux_1_1_i686.whl", hash = "sha256:0dfa61421b5eb68e1188b0b2231e7ba35735aef2d867d86e48ee6cab6975195e", size = 6175911, upload-time = "2024-05-09T18:33:05.376Z" }, - { url = "https://files.pythonhosted.org/packages/7a/c4/9e62d03f414e0e3051c56d5943c3bf42aa9608ede4e19dc96438364e9e03/pypdfium2-4.30.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:f33bd79e7a09d5f7acca3b0b69ff6c8a488869a7fab48fdf400fec6e20b9c8be", size = 6267430, upload-time = "2024-05-09T18:33:08.067Z" }, - { url = "https://files.pythonhosted.org/packages/90/47/eda4904f715fb98561e34012826e883816945934a851745570521ec89520/pypdfium2-4.30.0-py3-none-win32.whl", hash = "sha256:ee2410f15d576d976c2ab2558c93d392a25fb9f6635e8dd0a8a3a5241b275e0e", size = 2775951, upload-time = "2024-05-09T18:33:10.567Z" }, - { url = "https://files.pythonhosted.org/packages/25/bd/56d9ec6b9f0fc4e0d95288759f3179f0fcd34b1a1526b75673d2f6d5196f/pypdfium2-4.30.0-py3-none-win_amd64.whl", hash = "sha256:90dbb2ac07be53219f56be09961eb95cf2473f834d01a42d901d13ccfad64b4c", size = 2892098, upload-time = "2024-05-09T18:33:13.107Z" }, - { url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118, upload-time = "2024-05-09T18:33:15.489Z" }, + { url = "https://files.pythonhosted.org/packages/fb/0c/9108ae5266ee4cdf495f99205c44d4b5c83b4eb227c2b610d35c9e9fe961/pypdfium2-5.2.0-py3-none-android_23_arm64_v8a.whl", hash = "sha256:1ba4187a45ce4cf08f2a8c7e0f8970c36b9aa1770c8a3412a70781c1d80fb145", size = 2763268, upload-time = "2025-12-12T13:19:37.354Z" }, + { url = "https://files.pythonhosted.org/packages/35/8c/55f5c8a2c6b293f5c020be4aa123eaa891e797c514e5eccd8cb042740d37/pypdfium2-5.2.0-py3-none-android_23_armeabi_v7a.whl", hash = "sha256:80c55e10a8c9242f0901d35a9a306dd09accce8e497507bb23fcec017d45fe2e", size = 2301821, upload-time = "2025-12-12T13:19:39.484Z" }, + { url = "https://files.pythonhosted.org/packages/5e/7d/efa013e3795b41c59dd1e472f7201c241232c3a6553be4917e3a26b9f225/pypdfium2-5.2.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:73523ae69cd95c084c1342096893b2143ea73c36fdde35494780ba431e6a7d6e", size = 2816428, upload-time = "2025-12-12T13:19:41.735Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/8c30af6ff2ab41a7cb84753ee79dd1e0a8932c9bda9fe19759d69cbbf115/pypdfium2-5.2.0-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:19c501d22ef5eb98e42416d22cc3ac66d4808b436e3d06686392f24d8d9f708d", size = 2939486, upload-time = "2025-12-12T13:19:43.176Z" }, + { url = "https://files.pythonhosted.org/packages/64/64/454a73c49a04c2c290917ad86184e4da959e9e5aba94b3b046328c89be93/pypdfium2-5.2.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ed15a3f58d6ee4905f0d0a731e30b381b457c30689512589c7f57950b0cdcec", size = 2979235, upload-time = "2025-12-12T13:19:44.635Z" }, + { url = "https://files.pythonhosted.org/packages/4e/29/f1cab8e31192dd367dc7b1afa71f45cfcb8ff0b176f1d2a0f528faf04052/pypdfium2-5.2.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:329cd1e9f068e8729e0d0b79a070d6126f52bc48ff1e40505cb207a5e20ce0ba", size = 2763001, upload-time = "2025-12-12T13:19:47.598Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5d/e95fad8fdac960854173469c4b6931d5de5e09d05e6ee7d9756f8b95eef0/pypdfium2-5.2.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:325259759886e66619504df4721fef3b8deabf8a233e4f4a66e0c32ebae60c2f", size = 3057024, upload-time = "2025-12-12T13:19:49.179Z" }, + { url = "https://files.pythonhosted.org/packages/f4/32/468591d017ab67f8142d40f4db8163b6d8bb404fe0d22da75a5c661dc144/pypdfium2-5.2.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5683e8f08ab38ed05e0e59e611451ec74332803d4e78f8c45658ea1d372a17af", size = 3448598, upload-time = "2025-12-12T13:19:50.979Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a5/57b4e389b77ab5f7e9361dc7fc03b5378e678ba81b21e791e85350fbb235/pypdfium2-5.2.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da4815426a5adcf03bf4d2c5f26c0ff8109dbfaf2c3415984689931bc6006ef9", size = 2993946, upload-time = "2025-12-12T13:19:53.154Z" }, + { url = "https://files.pythonhosted.org/packages/84/3a/e03e9978f817632aa56183bb7a4989284086fdd45de3245ead35f147179b/pypdfium2-5.2.0-py3-none-manylinux_2_27_s390x.manylinux_2_28_s390x.whl", hash = "sha256:64bf5c039b2c314dab1fd158bfff99db96299a5b5c6d96fc056071166056f1de", size = 3673148, upload-time = "2025-12-12T13:19:54.528Z" }, + { url = "https://files.pythonhosted.org/packages/13/ee/e581506806553afa4b7939d47bf50dca35c1151b8cc960f4542a6eb135ce/pypdfium2-5.2.0-py3-none-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:76b42a17748ac7dc04d5ef04d0561c6a0a4b546d113ec1d101d59650c6a340f7", size = 2964757, upload-time = "2025-12-12T13:19:56.406Z" }, + { url = "https://files.pythonhosted.org/packages/00/be/3715c652aff30f12284523dd337843d0efe3e721020f0ec303a99ffffd8d/pypdfium2-5.2.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9d4367d471439fae846f0aba91ff9e8d66e524edcf3c8d6e02fe96fa306e13b9", size = 4130319, upload-time = "2025-12-12T13:19:57.889Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0b/28aa2ede9004dd4192266bbad394df0896787f7c7bcfa4d1a6e091ad9a2c/pypdfium2-5.2.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:613f6bb2b47d76b66c0bf2ca581c7c33e3dd9dcb29d65d8c34fef4135f933149", size = 3746488, upload-time = "2025-12-12T13:19:59.469Z" }, + { url = "https://files.pythonhosted.org/packages/bc/04/1b791e1219652bbfc51df6498267d8dcec73ad508b99388b2890902ccd9d/pypdfium2-5.2.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c03fad3f2fa68d358f5dd4deb07e438482fa26fae439c49d127576d969769ca1", size = 4336534, upload-time = "2025-12-12T13:20:01.28Z" }, + { url = "https://files.pythonhosted.org/packages/4f/e3/6f00f963bb702ffd2e3e2d9c7286bc3bb0bebcdfa96ca897d466f66976c6/pypdfium2-5.2.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:f10be1900ae21879d02d9f4d58c2d2db3a2e6da611736a8e9decc22d1fb02909", size = 4375079, upload-time = "2025-12-12T13:20:03.117Z" }, + { url = "https://files.pythonhosted.org/packages/3a/2a/7ec2b191b5e1b7716a0dfc14e6860e89bb355fb3b94ed0c1d46db526858c/pypdfium2-5.2.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:97c1a126d30378726872f94866e38c055740cae80313638dafd1cd448d05e7c0", size = 3928648, upload-time = "2025-12-12T13:20:05.041Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c3/c6d972fa095ff3ace76f9d3a91ceaf8a9dbbe0d9a5a84ac1d6178a46630e/pypdfium2-5.2.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:c369f183a90781b788af9a357a877bc8caddc24801e8346d0bf23f3295f89f3a", size = 4997772, upload-time = "2025-12-12T13:20:06.453Z" }, + { url = "https://files.pythonhosted.org/packages/22/45/2c64584b7a3ca5c4652280a884f4b85b8ed24e27662adeebdc06d991c917/pypdfium2-5.2.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b391f1cceb454934b612a05b54e90f98aafeffe5e73830d71700b17f0812226b", size = 4180046, upload-time = "2025-12-12T13:20:08.715Z" }, + { url = "https://files.pythonhosted.org/packages/d6/99/8d1ff87b626649400e62a2840e6e10fe258443ba518798e071fee4cd86f9/pypdfium2-5.2.0-py3-none-win32.whl", hash = "sha256:c68067938f617c37e4d17b18de7cac231fc7ce0eb7b6653b7283ebe8764d4999", size = 2990175, upload-time = "2025-12-12T13:20:10.241Z" }, + { url = "https://files.pythonhosted.org/packages/93/fc/114fff8895b620aac4984808e93d01b6d7b93e342a1635fcfe2a5f39cf39/pypdfium2-5.2.0-py3-none-win_amd64.whl", hash = "sha256:eb0591b720e8aaeab9475c66d653655ec1be0464b946f3f48a53922e843f0f3b", size = 3098615, upload-time = "2025-12-12T13:20:11.795Z" }, + { url = "https://files.pythonhosted.org/packages/08/97/eb738bff5998760d6e0cbcb7dd04cbf1a95a97b997fac6d4e57562a58992/pypdfium2-5.2.0-py3-none-win_arm64.whl", hash = "sha256:5dd1ef579f19fa3719aee4959b28bda44b1072405756708b5e83df8806a19521", size = 2939479, upload-time = "2025-12-12T13:20:13.815Z" }, ] [[package]] diff --git a/docker/.env.example b/docker/.env.example index e5cdb64dae..16d47409f5 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -468,6 +468,7 @@ ALIYUN_OSS_REGION=ap-southeast-1 ALIYUN_OSS_AUTH_VERSION=v4 # Don't start with '/'. OSS doesn't support leading slash in object names. ALIYUN_OSS_PATH=your-path +ALIYUN_CLOUDBOX_ID=your-cloudbox-id # Tencent COS Configuration # @@ -491,6 +492,7 @@ HUAWEI_OBS_BUCKET_NAME=your-bucket-name HUAWEI_OBS_SECRET_KEY=your-secret-key HUAWEI_OBS_ACCESS_KEY=your-access-key HUAWEI_OBS_SERVER=your-server-url +HUAWEI_OBS_PATH_STYLE=false # Volcengine TOS Configuration # diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index a07ed9e8ad..0de9d3e939 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -270,7 +270,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.5.1-local + image: langgenius/dify-plugin-daemon:0.5.2-local restart: always environment: # Use the shared environment variables. diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index 68ef217bbd..dba61d1816 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -123,7 +123,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.5.1-local + image: langgenius/dify-plugin-daemon:0.5.2-local restart: always env_file: - ./middleware.env diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 24e1077ebe..964b9fe724 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -134,6 +134,7 @@ x-shared-env: &shared-api-worker-env ALIYUN_OSS_REGION: ${ALIYUN_OSS_REGION:-ap-southeast-1} ALIYUN_OSS_AUTH_VERSION: ${ALIYUN_OSS_AUTH_VERSION:-v4} ALIYUN_OSS_PATH: ${ALIYUN_OSS_PATH:-your-path} + ALIYUN_CLOUDBOX_ID: ${ALIYUN_CLOUDBOX_ID:-your-cloudbox-id} TENCENT_COS_BUCKET_NAME: ${TENCENT_COS_BUCKET_NAME:-your-bucket-name} TENCENT_COS_SECRET_KEY: ${TENCENT_COS_SECRET_KEY:-your-secret-key} TENCENT_COS_SECRET_ID: ${TENCENT_COS_SECRET_ID:-your-secret-id} @@ -148,6 +149,7 @@ x-shared-env: &shared-api-worker-env HUAWEI_OBS_SECRET_KEY: ${HUAWEI_OBS_SECRET_KEY:-your-secret-key} HUAWEI_OBS_ACCESS_KEY: ${HUAWEI_OBS_ACCESS_KEY:-your-access-key} HUAWEI_OBS_SERVER: ${HUAWEI_OBS_SERVER:-your-server-url} + HUAWEI_OBS_PATH_STYLE: ${HUAWEI_OBS_PATH_STYLE:-false} VOLCENGINE_TOS_BUCKET_NAME: ${VOLCENGINE_TOS_BUCKET_NAME:-your-bucket-name} VOLCENGINE_TOS_SECRET_KEY: ${VOLCENGINE_TOS_SECRET_KEY:-your-secret-key} VOLCENGINE_TOS_ACCESS_KEY: ${VOLCENGINE_TOS_ACCESS_KEY:-your-access-key} @@ -939,7 +941,7 @@ services: # plugin daemon plugin_daemon: - image: langgenius/dify-plugin-daemon:0.5.1-local + image: langgenius/dify-plugin-daemon:0.5.2-local restart: always environment: # Use the shared environment variables. diff --git a/web/app/components/base/chat/chat/index.tsx b/web/app/components/base/chat/chat/index.tsx index 19c7b0da52..9864dda6ae 100644 --- a/web/app/components/base/chat/chat/index.tsx +++ b/web/app/components/base/chat/chat/index.tsx @@ -222,11 +222,16 @@ const Chat: FC = ({ return () => container.removeEventListener('scroll', setUserScrolled) }, []) - // Reset user scroll state when a new chat starts (length <= 1) + // Reset user scroll state when conversation changes or a new chat starts + // Track the first message ID to detect conversation switches (fixes #29820) + const prevFirstMessageIdRef = useRef(undefined) useEffect(() => { - if (chatList.length <= 1) + const firstMessageId = chatList[0]?.id + // Reset when: new chat (length <= 1) OR conversation switched (first message ID changed) + if (chatList.length <= 1 || (firstMessageId && prevFirstMessageIdRef.current !== firstMessageId)) userScrolledRef.current = false - }, [chatList.length]) + prevFirstMessageIdRef.current = firstMessageId + }, [chatList]) useEffect(() => { if (!sidebarCollapseState) diff --git a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx index 14654c1196..ae18e23097 100644 --- a/web/app/components/header/account-setting/members-page/invite-modal/index.tsx +++ b/web/app/components/header/account-setting/members-page/invite-modal/index.tsx @@ -116,7 +116,7 @@ const InviteModal = ({ inputClassName='bg-transparent' onChange={setEmails} getLabel={(email, index, removeEmail) => -
+
{email}
removeEmail(index)}> × diff --git a/web/app/components/workflow/nodes/_base/components/entry-node-container.tsx b/web/app/components/workflow/nodes/_base/components/entry-node-container.tsx index b0cecdd0ae..7c316d2443 100644 --- a/web/app/components/workflow/nodes/_base/components/entry-node-container.tsx +++ b/web/app/components/workflow/nodes/_base/components/entry-node-container.tsx @@ -27,7 +27,7 @@ const EntryNodeContainer: FC = ({ return (
-
+
{label} diff --git a/web/app/components/workflow/nodes/code/code-parser.ts b/web/app/components/workflow/nodes/code/code-parser.ts index 86447a06e5..7550e62e96 100644 --- a/web/app/components/workflow/nodes/code/code-parser.ts +++ b/web/app/components/workflow/nodes/code/code-parser.ts @@ -31,7 +31,7 @@ export const extractReturnType = (code: string, language: CodeLanguage): OutputV if (returnIndex === -1) return {} - // return から始まる部分文字列を取得 + // Extract the substring starting with 'return'. const codeAfterReturn = codeWithoutComments.slice(returnIndex) let bracketCount = 0