From 57b51603f5864a857828b19c87f4947c5417a4b1 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Sat, 20 Dec 2025 17:13:23 +0800 Subject: [PATCH 1/9] chore: Add codeowner for web test, vdb and docker (#29948) --- .github/CODEOWNERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 06a60308c2..4bc4f085c2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -124,9 +124,15 @@ api/controllers/web/feature.py @GarfieldDai @GareArc # Backend - Database Migrations api/migrations/ @snakevash @laipz8200 @MRZHUH +# Backend - Vector DB Middleware +api/configs/middleware/vdb/* @JohnJyong + # Frontend web/ @iamjoel +# Frontend - Web Tests +.github/workflows/web-tests.yml @iamjoel + # Frontend - App - Orchestration web/app/components/workflow/ @iamjoel @zxhlyh web/app/components/workflow-app/ @iamjoel @zxhlyh @@ -198,6 +204,7 @@ web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d web/app/signin/ @douxc @iamjoel web/app/signup/ @douxc @iamjoel web/app/reset-password/ @douxc @iamjoel + web/app/install/ @douxc @iamjoel web/app/init/ @douxc @iamjoel web/app/forgot-password/ @douxc @iamjoel @@ -238,3 +245,6 @@ web/app/education-apply/ @iamjoel @zxhlyh # Frontend - Workspace web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh + +# Docker +docker/* @laipz8200 From 7b60ff3d2d1bc95055840bcf59991b27ce1fe41c Mon Sep 17 00:00:00 2001 From: yyh <92089059+lyzno1@users.noreply.github.com> Date: Sat, 20 Dec 2025 20:47:46 +0800 Subject: [PATCH 2/9] chore: add symlink for skills directory and update autofix workflow exclusion pattern (#29953) --- .claude/skills/frontend-testing/SKILL.md | 17 +++++++++-------- .../component-test.template.tsx | 0 .../{templates => assets}/hook-test.template.ts | 0 .../utility-test.template.ts | 0 .../{guides => references}/async-testing.md | 0 .../{CHECKLIST.md => references/checklist.md} | 0 .../{guides => references}/common-patterns.md | 0 .../{guides => references}/domain-components.md | 0 .../{guides => references}/mocking.md | 0 .../{guides => references}/workflow.md | 0 .codex/skills | 1 + .github/workflows/autofix.yml | 2 +- 12 files changed, 11 insertions(+), 9 deletions(-) rename .claude/skills/frontend-testing/{templates => assets}/component-test.template.tsx (100%) rename .claude/skills/frontend-testing/{templates => assets}/hook-test.template.ts (100%) rename .claude/skills/frontend-testing/{templates => assets}/utility-test.template.ts (100%) rename .claude/skills/frontend-testing/{guides => references}/async-testing.md (100%) rename .claude/skills/frontend-testing/{CHECKLIST.md => references/checklist.md} (100%) rename .claude/skills/frontend-testing/{guides => references}/common-patterns.md (100%) rename .claude/skills/frontend-testing/{guides => references}/domain-components.md (100%) rename .claude/skills/frontend-testing/{guides => references}/mocking.md (100%) rename .claude/skills/frontend-testing/{guides => references}/workflow.md (100%) create mode 120000 .codex/skills diff --git a/.claude/skills/frontend-testing/SKILL.md b/.claude/skills/frontend-testing/SKILL.md index 06cb672141..cd775007a0 100644 --- a/.claude/skills/frontend-testing/SKILL.md +++ b/.claude/skills/frontend-testing/SKILL.md @@ -1,5 +1,5 @@ --- -name: Dify Frontend Testing +name: frontend-testing description: Generate Jest + React Testing Library tests for Dify frontend components, hooks, and utilities. Triggers on testing, spec files, coverage, Jest, RTL, unit tests, integration tests, or write/review test requests. --- @@ -178,7 +178,7 @@ Process in this order for multi-file testing: - **500+ lines**: Consider splitting before testing - **Many dependencies**: Extract logic into hooks first -> 📖 See `guides/workflow.md` for complete workflow details and todo list format. +> 📖 See `references/workflow.md` for complete workflow details and todo list format. ## Testing Strategy @@ -289,17 +289,18 @@ For each test file generated, aim for: - ✅ **>95%** branch coverage - ✅ **>95%** line coverage -> **Note**: For multi-file directories, process one file at a time with full coverage each. See `guides/workflow.md`. +> **Note**: For multi-file directories, process one file at a time with full coverage each. See `references/workflow.md`. ## Detailed Guides For more detailed information, refer to: -- `guides/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing) -- `guides/mocking.md` - Mock patterns and best practices -- `guides/async-testing.md` - Async operations and API calls -- `guides/domain-components.md` - Workflow, Dataset, Configuration testing -- `guides/common-patterns.md` - Frequently used testing patterns +- `references/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing) +- `references/mocking.md` - Mock patterns and best practices +- `references/async-testing.md` - Async operations and API calls +- `references/domain-components.md` - Workflow, Dataset, Configuration testing +- `references/common-patterns.md` - Frequently used testing patterns +- `references/checklist.md` - Test generation checklist and validation steps ## Authoritative References diff --git a/.claude/skills/frontend-testing/templates/component-test.template.tsx b/.claude/skills/frontend-testing/assets/component-test.template.tsx similarity index 100% rename from .claude/skills/frontend-testing/templates/component-test.template.tsx rename to .claude/skills/frontend-testing/assets/component-test.template.tsx diff --git a/.claude/skills/frontend-testing/templates/hook-test.template.ts b/.claude/skills/frontend-testing/assets/hook-test.template.ts similarity index 100% rename from .claude/skills/frontend-testing/templates/hook-test.template.ts rename to .claude/skills/frontend-testing/assets/hook-test.template.ts diff --git a/.claude/skills/frontend-testing/templates/utility-test.template.ts b/.claude/skills/frontend-testing/assets/utility-test.template.ts similarity index 100% rename from .claude/skills/frontend-testing/templates/utility-test.template.ts rename to .claude/skills/frontend-testing/assets/utility-test.template.ts diff --git a/.claude/skills/frontend-testing/guides/async-testing.md b/.claude/skills/frontend-testing/references/async-testing.md similarity index 100% rename from .claude/skills/frontend-testing/guides/async-testing.md rename to .claude/skills/frontend-testing/references/async-testing.md diff --git a/.claude/skills/frontend-testing/CHECKLIST.md b/.claude/skills/frontend-testing/references/checklist.md similarity index 100% rename from .claude/skills/frontend-testing/CHECKLIST.md rename to .claude/skills/frontend-testing/references/checklist.md diff --git a/.claude/skills/frontend-testing/guides/common-patterns.md b/.claude/skills/frontend-testing/references/common-patterns.md similarity index 100% rename from .claude/skills/frontend-testing/guides/common-patterns.md rename to .claude/skills/frontend-testing/references/common-patterns.md diff --git a/.claude/skills/frontend-testing/guides/domain-components.md b/.claude/skills/frontend-testing/references/domain-components.md similarity index 100% rename from .claude/skills/frontend-testing/guides/domain-components.md rename to .claude/skills/frontend-testing/references/domain-components.md diff --git a/.claude/skills/frontend-testing/guides/mocking.md b/.claude/skills/frontend-testing/references/mocking.md similarity index 100% rename from .claude/skills/frontend-testing/guides/mocking.md rename to .claude/skills/frontend-testing/references/mocking.md diff --git a/.claude/skills/frontend-testing/guides/workflow.md b/.claude/skills/frontend-testing/references/workflow.md similarity index 100% rename from .claude/skills/frontend-testing/guides/workflow.md rename to .claude/skills/frontend-testing/references/workflow.md diff --git a/.codex/skills b/.codex/skills new file mode 120000 index 0000000000..454b8427cd --- /dev/null +++ b/.codex/skills @@ -0,0 +1 @@ +../.claude/skills \ No newline at end of file diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml index 2f457d0a0a..bafac7bd13 100644 --- a/.github/workflows/autofix.yml +++ b/.github/workflows/autofix.yml @@ -66,7 +66,7 @@ jobs: # mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter. - name: mdformat run: | - uvx --python 3.13 mdformat . --exclude ".claude/skills/**" + uvx --python 3.13 mdformat . --exclude ".claude/skills/**/SKILL.md" - name: Install pnpm uses: pnpm/action-setup@v4 From 7501360663efe879d716d8a962ce80181a21596b Mon Sep 17 00:00:00 2001 From: Novice Date: Sun, 21 Dec 2025 09:19:11 +0800 Subject: [PATCH 3/9] fix: add RFC 9728 compliant well-known URL discovery with path insertion fallback (#29960) --- .../console/workspace/tool_providers.py | 39 ++++-- api/core/mcp/auth/auth_flow.py | 55 +++++--- api/core/mcp/mcp_client.py | 2 +- .../tools/mcp_tools_manage_service.py | 120 ++++++++++++------ .../tools/test_mcp_tools_manage_service.py | 41 +++--- 5 files changed, 170 insertions(+), 87 deletions(-) diff --git a/api/controllers/console/workspace/tool_providers.py b/api/controllers/console/workspace/tool_providers.py index 2c54aa5a20..a2fc45c29c 100644 --- a/api/controllers/console/workspace/tool_providers.py +++ b/api/controllers/console/workspace/tool_providers.py @@ -18,6 +18,7 @@ from controllers.console.wraps import ( setup_required, ) from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration +from core.helper.tool_provider_cache import ToolProviderListCache from core.mcp.auth.auth_flow import auth, handle_callback from core.mcp.error import MCPAuthError, MCPError, MCPRefreshTokenError from core.mcp.mcp_client import MCPClient @@ -944,7 +945,7 @@ class ToolProviderMCPApi(Resource): configuration = MCPConfiguration.model_validate(args["configuration"]) authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None - # Create provider + # Create provider in transaction with Session(db.engine) as session, session.begin(): service = MCPToolManageService(session=session) result = service.create_provider( @@ -960,7 +961,11 @@ class ToolProviderMCPApi(Resource): configuration=configuration, authentication=authentication, ) - return jsonable_encoder(result) + + # Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations + ToolProviderListCache.invalidate_cache(tenant_id) + + return jsonable_encoder(result) @console_ns.expect(parser_mcp_put) @setup_required @@ -972,17 +977,23 @@ class ToolProviderMCPApi(Resource): authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None _, current_tenant_id = current_account_with_tenant() - # Step 1: Validate server URL change if needed (includes URL format validation and network operation) - validation_result = None + # Step 1: Get provider data for URL validation (short-lived session, no network I/O) + validation_data = None with Session(db.engine) as session: service = MCPToolManageService(session=session) - validation_result = service.validate_server_url_change( - tenant_id=current_tenant_id, provider_id=args["provider_id"], new_server_url=args["server_url"] + validation_data = service.get_provider_for_url_validation( + tenant_id=current_tenant_id, provider_id=args["provider_id"] ) - # No need to check for errors here, exceptions will be raised directly + # Step 2: Perform URL validation with network I/O OUTSIDE of any database session + # This prevents holding database locks during potentially slow network operations + validation_result = MCPToolManageService.validate_server_url_standalone( + tenant_id=current_tenant_id, + new_server_url=args["server_url"], + validation_data=validation_data, + ) - # Step 2: Perform database update in a transaction + # Step 3: Perform database update in a transaction with Session(db.engine) as session, session.begin(): service = MCPToolManageService(session=session) service.update_provider( @@ -999,7 +1010,11 @@ class ToolProviderMCPApi(Resource): authentication=authentication, validation_result=validation_result, ) - return {"result": "success"} + + # Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations + ToolProviderListCache.invalidate_cache(current_tenant_id) + + return {"result": "success"} @console_ns.expect(parser_mcp_delete) @setup_required @@ -1012,7 +1027,11 @@ class ToolProviderMCPApi(Resource): with Session(db.engine) as session, session.begin(): service = MCPToolManageService(session=session) service.delete_provider(tenant_id=current_tenant_id, provider_id=args["provider_id"]) - return {"result": "success"} + + # Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations + ToolProviderListCache.invalidate_cache(current_tenant_id) + + return {"result": "success"} parser_auth = ( diff --git a/api/core/mcp/auth/auth_flow.py b/api/core/mcp/auth/auth_flow.py index 92787b39dd..aef1afb235 100644 --- a/api/core/mcp/auth/auth_flow.py +++ b/api/core/mcp/auth/auth_flow.py @@ -47,7 +47,11 @@ def build_protected_resource_metadata_discovery_urls( """ Build a list of URLs to try for Protected Resource Metadata discovery. - Per SEP-985, supports fallback when discovery fails at one URL. + Per RFC 9728 Section 5.1, supports fallback when discovery fails at one URL. + Priority order: + 1. URL from WWW-Authenticate header (if provided) + 2. Well-known URI with path: https://example.com/.well-known/oauth-protected-resource/public/mcp + 3. Well-known URI at root: https://example.com/.well-known/oauth-protected-resource """ urls = [] @@ -58,9 +62,18 @@ def build_protected_resource_metadata_discovery_urls( # Fallback: construct from server URL parsed = urlparse(server_url) base_url = f"{parsed.scheme}://{parsed.netloc}" - fallback_url = urljoin(base_url, "/.well-known/oauth-protected-resource") - if fallback_url not in urls: - urls.append(fallback_url) + path = parsed.path.rstrip("/") + + # Priority 2: With path insertion (e.g., /.well-known/oauth-protected-resource/public/mcp) + if path: + path_url = f"{base_url}/.well-known/oauth-protected-resource{path}" + if path_url not in urls: + urls.append(path_url) + + # Priority 3: At root (e.g., /.well-known/oauth-protected-resource) + root_url = f"{base_url}/.well-known/oauth-protected-resource" + if root_url not in urls: + urls.append(root_url) return urls @@ -71,30 +84,34 @@ def build_oauth_authorization_server_metadata_discovery_urls(auth_server_url: st Supports both OAuth 2.0 (RFC 8414) and OpenID Connect discovery. - Per RFC 8414 section 3: - - If issuer has no path: https://example.com/.well-known/oauth-authorization-server - - If issuer has path: https://example.com/.well-known/oauth-authorization-server{path} - - Example: - - issuer: https://example.com/oauth - - metadata: https://example.com/.well-known/oauth-authorization-server/oauth + Per RFC 8414 section 3.1 and section 5, try all possible endpoints: + - OAuth 2.0 with path insertion: https://example.com/.well-known/oauth-authorization-server/tenant1 + - OpenID Connect with path insertion: https://example.com/.well-known/openid-configuration/tenant1 + - OpenID Connect path appending: https://example.com/tenant1/.well-known/openid-configuration + - OAuth 2.0 at root: https://example.com/.well-known/oauth-authorization-server + - OpenID Connect at root: https://example.com/.well-known/openid-configuration """ urls = [] base_url = auth_server_url or server_url parsed = urlparse(base_url) base = f"{parsed.scheme}://{parsed.netloc}" - path = parsed.path.rstrip("/") # Remove trailing slash + path = parsed.path.rstrip("/") + # OAuth 2.0 Authorization Server Metadata at root (MCP-03-26) + urls.append(f"{base}/.well-known/oauth-authorization-server") - # Try OpenID Connect discovery first (more common) - urls.append(urljoin(base + "/", ".well-known/openid-configuration")) + # OpenID Connect Discovery at root + urls.append(f"{base}/.well-known/openid-configuration") - # OAuth 2.0 Authorization Server Metadata (RFC 8414) - # Include the path component if present in the issuer URL if path: - urls.append(urljoin(base, f".well-known/oauth-authorization-server{path}")) - else: - urls.append(urljoin(base, ".well-known/oauth-authorization-server")) + # OpenID Connect Discovery with path insertion + urls.append(f"{base}/.well-known/openid-configuration{path}") + + # OpenID Connect Discovery path appending + urls.append(f"{base}{path}/.well-known/openid-configuration") + + # OAuth 2.0 Authorization Server Metadata with path insertion + urls.append(f"{base}/.well-known/oauth-authorization-server{path}") return urls diff --git a/api/core/mcp/mcp_client.py b/api/core/mcp/mcp_client.py index b0e0dab9be..2b0645b558 100644 --- a/api/core/mcp/mcp_client.py +++ b/api/core/mcp/mcp_client.py @@ -59,7 +59,7 @@ class MCPClient: try: logger.debug("Not supported method %s found in URL path, trying default 'mcp' method.", method_name) self.connect_server(sse_client, "sse") - except MCPConnectionError: + except (MCPConnectionError, ValueError): logger.debug("MCP connection failed with 'sse', falling back to 'mcp' method.") self.connect_server(streamablehttp_client, "mcp") diff --git a/api/services/tools/mcp_tools_manage_service.py b/api/services/tools/mcp_tools_manage_service.py index d641fe0315..252be77b27 100644 --- a/api/services/tools/mcp_tools_manage_service.py +++ b/api/services/tools/mcp_tools_manage_service.py @@ -15,7 +15,6 @@ from sqlalchemy.orm import Session from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration, MCPProviderEntity from core.helper import encrypter from core.helper.provider_cache import NoOpProviderCredentialCache -from core.helper.tool_provider_cache import ToolProviderListCache from core.mcp.auth.auth_flow import auth from core.mcp.auth_client import MCPClientWithAuthRetry from core.mcp.error import MCPAuthError, MCPError @@ -65,6 +64,15 @@ class ServerUrlValidationResult(BaseModel): return self.needs_validation and self.validation_passed and self.reconnect_result is not None +class ProviderUrlValidationData(BaseModel): + """Data required for URL validation, extracted from database to perform network operations outside of session""" + + current_server_url_hash: str + headers: dict[str, str] + timeout: float | None + sse_read_timeout: float | None + + class MCPToolManageService: """Service class for managing MCP tools and providers.""" @@ -166,9 +174,6 @@ class MCPToolManageService: self._session.add(mcp_tool) self._session.flush() - # Invalidate tool providers cache - ToolProviderListCache.invalidate_cache(tenant_id) - mcp_providers = ToolTransformService.mcp_provider_to_user_provider(mcp_tool, for_list=True) return mcp_providers @@ -192,7 +197,7 @@ class MCPToolManageService: Update an MCP provider. Args: - validation_result: Pre-validation result from validate_server_url_change. + validation_result: Pre-validation result from validate_server_url_standalone. If provided and contains reconnect_result, it will be used instead of performing network operations. """ @@ -251,8 +256,6 @@ class MCPToolManageService: # Flush changes to database self._session.flush() - # Invalidate tool providers cache - ToolProviderListCache.invalidate_cache(tenant_id) except IntegrityError as e: self._handle_integrity_error(e, name, server_url, server_identifier) @@ -261,9 +264,6 @@ class MCPToolManageService: mcp_tool = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) self._session.delete(mcp_tool) - # Invalidate tool providers cache - ToolProviderListCache.invalidate_cache(tenant_id) - def list_providers( self, *, tenant_id: str, for_list: bool = False, include_sensitive: bool = True ) -> list[ToolProviderApiEntity]: @@ -546,30 +546,39 @@ class MCPToolManageService: ) return self.execute_auth_actions(auth_result) - def _reconnect_provider(self, *, server_url: str, provider: MCPToolProvider) -> ReconnectResult: - """Attempt to reconnect to MCP provider with new server URL.""" + def get_provider_for_url_validation(self, *, tenant_id: str, provider_id: str) -> ProviderUrlValidationData: + """ + Get provider data required for URL validation. + This method performs database read and should be called within a session. + + Returns: + ProviderUrlValidationData: Data needed for standalone URL validation + """ + provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) provider_entity = provider.to_entity() - headers = provider_entity.headers + return ProviderUrlValidationData( + current_server_url_hash=provider.server_url_hash, + headers=provider_entity.headers, + timeout=provider_entity.timeout, + sse_read_timeout=provider_entity.sse_read_timeout, + ) - try: - tools = self._retrieve_remote_mcp_tools(server_url, headers, provider_entity) - return ReconnectResult( - authed=True, - tools=json.dumps([tool.model_dump() for tool in tools]), - encrypted_credentials=EMPTY_CREDENTIALS_JSON, - ) - except MCPAuthError: - return ReconnectResult(authed=False, tools=EMPTY_TOOLS_JSON, encrypted_credentials=EMPTY_CREDENTIALS_JSON) - except MCPError as e: - raise ValueError(f"Failed to re-connect MCP server: {e}") from e - - def validate_server_url_change( - self, *, tenant_id: str, provider_id: str, new_server_url: str + @staticmethod + def validate_server_url_standalone( + *, + tenant_id: str, + new_server_url: str, + validation_data: ProviderUrlValidationData, ) -> ServerUrlValidationResult: """ Validate server URL change by attempting to connect to the new server. - This method should be called BEFORE update_provider to perform network operations - outside of the database transaction. + This method performs network operations and MUST be called OUTSIDE of any database session + to avoid holding locks during network I/O. + + Args: + tenant_id: Tenant ID for encryption + new_server_url: The new server URL to validate + validation_data: Provider data obtained from get_provider_for_url_validation Returns: ServerUrlValidationResult: Validation result with connection status and tools if successful @@ -579,25 +588,30 @@ class MCPToolManageService: return ServerUrlValidationResult(needs_validation=False) # Validate URL format - if not self._is_valid_url(new_server_url): + parsed = urlparse(new_server_url) + if not all([parsed.scheme, parsed.netloc]) or parsed.scheme not in ["http", "https"]: raise ValueError("Server URL is not valid.") # Always encrypt and hash the URL encrypted_server_url = encrypter.encrypt_token(tenant_id, new_server_url) new_server_url_hash = hashlib.sha256(new_server_url.encode()).hexdigest() - # Get current provider - provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id) - # Check if URL is actually different - if new_server_url_hash == provider.server_url_hash: + if new_server_url_hash == validation_data.current_server_url_hash: # URL hasn't changed, but still return the encrypted data return ServerUrlValidationResult( - needs_validation=False, encrypted_server_url=encrypted_server_url, server_url_hash=new_server_url_hash + needs_validation=False, + encrypted_server_url=encrypted_server_url, + server_url_hash=new_server_url_hash, ) - # Perform validation by attempting to connect - reconnect_result = self._reconnect_provider(server_url=new_server_url, provider=provider) + # Perform network validation - this is the expensive operation that should be outside session + reconnect_result = MCPToolManageService._reconnect_with_url( + server_url=new_server_url, + headers=validation_data.headers, + timeout=validation_data.timeout, + sse_read_timeout=validation_data.sse_read_timeout, + ) return ServerUrlValidationResult( needs_validation=True, validation_passed=True, @@ -606,6 +620,38 @@ class MCPToolManageService: server_url_hash=new_server_url_hash, ) + @staticmethod + def _reconnect_with_url( + *, + server_url: str, + headers: dict[str, str], + timeout: float | None, + sse_read_timeout: float | None, + ) -> ReconnectResult: + """ + Attempt to connect to MCP server with given URL. + This is a static method that performs network I/O without database access. + """ + from core.mcp.mcp_client import MCPClient + + try: + with MCPClient( + server_url=server_url, + headers=headers, + timeout=timeout, + sse_read_timeout=sse_read_timeout, + ) as mcp_client: + tools = mcp_client.list_tools() + return ReconnectResult( + authed=True, + tools=json.dumps([tool.model_dump() for tool in tools]), + encrypted_credentials=EMPTY_CREDENTIALS_JSON, + ) + except MCPAuthError: + return ReconnectResult(authed=False, tools=EMPTY_TOOLS_JSON, encrypted_credentials=EMPTY_CREDENTIALS_JSON) + except MCPError as e: + raise ValueError(f"Failed to re-connect MCP server: {e}") from e + def _build_tool_provider_response( self, db_provider: MCPToolProvider, provider_entity: MCPProviderEntity, tools: list ) -> ToolProviderApiEntity: diff --git a/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py b/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py index 8c190762cf..6cae83ac37 100644 --- a/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py +++ b/api/tests/test_containers_integration_tests/services/tools/test_mcp_tools_manage_service.py @@ -1308,18 +1308,17 @@ class TestMCPToolManageService: type("MockTool", (), {"model_dump": lambda self: {"name": "test_tool_2", "description": "Test tool 2"}})(), ] - with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: + with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client: # Setup mock client mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.return_value = mock_tools # Act: Execute the method under test - from extensions.ext_database import db - - service = MCPToolManageService(db.session()) - result = service._reconnect_provider( + result = MCPToolManageService._reconnect_with_url( server_url="https://example.com/mcp", - provider=mcp_provider, + headers={"X-Test": "1"}, + timeout=mcp_provider.timeout, + sse_read_timeout=mcp_provider.sse_read_timeout, ) # Assert: Verify the expected outcomes @@ -1337,8 +1336,12 @@ class TestMCPToolManageService: assert tools_data[1]["name"] == "test_tool_2" # Verify mock interactions - provider_entity = mcp_provider.to_entity() - mock_mcp_client.assert_called_once() + mock_mcp_client.assert_called_once_with( + server_url="https://example.com/mcp", + headers={"X-Test": "1"}, + timeout=mcp_provider.timeout, + sse_read_timeout=mcp_provider.sse_read_timeout, + ) def test_re_connect_mcp_provider_auth_error(self, db_session_with_containers, mock_external_service_dependencies): """ @@ -1361,19 +1364,18 @@ class TestMCPToolManageService: ) # Mock MCPClient to raise authentication error - with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: + with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client: from core.mcp.error import MCPAuthError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPAuthError("Authentication required") # Act: Execute the method under test - from extensions.ext_database import db - - service = MCPToolManageService(db.session()) - result = service._reconnect_provider( + result = MCPToolManageService._reconnect_with_url( server_url="https://example.com/mcp", - provider=mcp_provider, + headers={}, + timeout=mcp_provider.timeout, + sse_read_timeout=mcp_provider.sse_read_timeout, ) # Assert: Verify the expected outcomes @@ -1404,18 +1406,17 @@ class TestMCPToolManageService: ) # Mock MCPClient to raise connection error - with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client: + with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client: from core.mcp.error import MCPError mock_client_instance = mock_mcp_client.return_value.__enter__.return_value mock_client_instance.list_tools.side_effect = MCPError("Connection failed") # Act & Assert: Verify proper error handling - from extensions.ext_database import db - - service = MCPToolManageService(db.session()) with pytest.raises(ValueError, match="Failed to re-connect MCP server: Connection failed"): - service._reconnect_provider( + MCPToolManageService._reconnect_with_url( server_url="https://example.com/mcp", - provider=mcp_provider, + headers={"X-Test": "1"}, + timeout=mcp_provider.timeout, + sse_read_timeout=mcp_provider.sse_read_timeout, ) From 471fc944551cec75dc4a47ad182c4b3e106953af Mon Sep 17 00:00:00 2001 From: Rhys Date: Sun, 21 Dec 2025 15:51:24 +0700 Subject: [PATCH 4/9] fix: update Notion credential retrieval in document indexing sync task (#29933) --- api/tasks/document_indexing_sync_task.py | 40 +- .../tasks/test_document_indexing_sync_task.py | 520 ++++++++++++++++++ 2 files changed, 544 insertions(+), 16 deletions(-) create mode 100644 api/tests/unit_tests/tasks/test_document_indexing_sync_task.py diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 4c1f38c3bb..5fc2597c92 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -2,7 +2,6 @@ import logging import time import click -import sqlalchemy as sa from celery import shared_task from sqlalchemy import select @@ -12,7 +11,7 @@ from core.rag.index_processor.index_processor_factory import IndexProcessorFacto from extensions.ext_database import db from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, Document, DocumentSegment -from models.source import DataSourceOauthBinding +from services.datasource_provider_service import DatasourceProviderService logger = logging.getLogger(__name__) @@ -48,27 +47,36 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): page_id = data_source_info["notion_page_id"] page_type = data_source_info["type"] page_edited_time = data_source_info["last_edited_time"] + credential_id = data_source_info.get("credential_id") - data_source_binding = ( - db.session.query(DataSourceOauthBinding) - .where( - sa.and_( - DataSourceOauthBinding.tenant_id == document.tenant_id, - DataSourceOauthBinding.provider == "notion", - DataSourceOauthBinding.disabled == False, - DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"', - ) - ) - .first() + # Get credentials from datasource provider + datasource_provider_service = DatasourceProviderService() + credential = datasource_provider_service.get_datasource_credentials( + tenant_id=document.tenant_id, + credential_id=credential_id, + provider="notion_datasource", + plugin_id="langgenius/notion_datasource", ) - if not data_source_binding: - raise ValueError("Data source binding not found.") + + if not credential: + logger.error( + "Datasource credential not found for document %s, tenant_id: %s, credential_id: %s", + document_id, + document.tenant_id, + credential_id, + ) + document.indexing_status = "error" + document.error = "Datasource credential not found. Please reconnect your Notion workspace." + document.stopped_at = naive_utc_now() + db.session.commit() + db.session.close() + return loader = NotionExtractor( notion_workspace_id=workspace_id, notion_obj_id=page_id, notion_page_type=page_type, - notion_access_token=data_source_binding.access_token, + notion_access_token=credential.get("integration_secret"), tenant_id=document.tenant_id, ) diff --git a/api/tests/unit_tests/tasks/test_document_indexing_sync_task.py b/api/tests/unit_tests/tasks/test_document_indexing_sync_task.py new file mode 100644 index 0000000000..374abe0368 --- /dev/null +++ b/api/tests/unit_tests/tasks/test_document_indexing_sync_task.py @@ -0,0 +1,520 @@ +""" +Unit tests for document indexing sync task. + +This module tests the document indexing sync task functionality including: +- Syncing Notion documents when updated +- Validating document and data source existence +- Credential validation and retrieval +- Cleaning old segments before re-indexing +- Error handling and edge cases +""" + +import uuid +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from core.indexing_runner import DocumentIsPausedError, IndexingRunner +from models.dataset import Dataset, Document, DocumentSegment +from tasks.document_indexing_sync_task import document_indexing_sync_task + +# ============================================================================ +# Fixtures +# ============================================================================ + + +@pytest.fixture +def tenant_id(): + """Generate a unique tenant ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def dataset_id(): + """Generate a unique dataset ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def document_id(): + """Generate a unique document ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def notion_workspace_id(): + """Generate a Notion workspace ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def notion_page_id(): + """Generate a Notion page ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def credential_id(): + """Generate a credential ID for testing.""" + return str(uuid.uuid4()) + + +@pytest.fixture +def mock_dataset(dataset_id, tenant_id): + """Create a mock Dataset object.""" + dataset = Mock(spec=Dataset) + dataset.id = dataset_id + dataset.tenant_id = tenant_id + dataset.indexing_technique = "high_quality" + dataset.embedding_model_provider = "openai" + dataset.embedding_model = "text-embedding-ada-002" + return dataset + + +@pytest.fixture +def mock_document(document_id, dataset_id, tenant_id, notion_workspace_id, notion_page_id, credential_id): + """Create a mock Document object with Notion data source.""" + doc = Mock(spec=Document) + doc.id = document_id + doc.dataset_id = dataset_id + doc.tenant_id = tenant_id + doc.data_source_type = "notion_import" + doc.indexing_status = "completed" + doc.error = None + doc.stopped_at = None + doc.processing_started_at = None + doc.doc_form = "text_model" + doc.data_source_info_dict = { + "notion_workspace_id": notion_workspace_id, + "notion_page_id": notion_page_id, + "type": "page", + "last_edited_time": "2024-01-01T00:00:00Z", + "credential_id": credential_id, + } + return doc + + +@pytest.fixture +def mock_document_segments(document_id): + """Create mock DocumentSegment objects.""" + segments = [] + for i in range(3): + segment = Mock(spec=DocumentSegment) + segment.id = str(uuid.uuid4()) + segment.document_id = document_id + segment.index_node_id = f"node-{document_id}-{i}" + segments.append(segment) + return segments + + +@pytest.fixture +def mock_db_session(): + """Mock database session.""" + with patch("tasks.document_indexing_sync_task.db.session") as mock_session: + mock_query = MagicMock() + mock_session.query.return_value = mock_query + mock_query.where.return_value = mock_query + mock_session.scalars.return_value = MagicMock() + yield mock_session + + +@pytest.fixture +def mock_datasource_provider_service(): + """Mock DatasourceProviderService.""" + with patch("tasks.document_indexing_sync_task.DatasourceProviderService") as mock_service_class: + mock_service = MagicMock() + mock_service.get_datasource_credentials.return_value = {"integration_secret": "test_token"} + mock_service_class.return_value = mock_service + yield mock_service + + +@pytest.fixture +def mock_notion_extractor(): + """Mock NotionExtractor.""" + with patch("tasks.document_indexing_sync_task.NotionExtractor") as mock_extractor_class: + mock_extractor = MagicMock() + mock_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" # Updated time + mock_extractor_class.return_value = mock_extractor + yield mock_extractor + + +@pytest.fixture +def mock_index_processor_factory(): + """Mock IndexProcessorFactory.""" + with patch("tasks.document_indexing_sync_task.IndexProcessorFactory") as mock_factory: + mock_processor = MagicMock() + mock_processor.clean = Mock() + mock_factory.return_value.init_index_processor.return_value = mock_processor + yield mock_factory + + +@pytest.fixture +def mock_indexing_runner(): + """Mock IndexingRunner.""" + with patch("tasks.document_indexing_sync_task.IndexingRunner") as mock_runner_class: + mock_runner = MagicMock(spec=IndexingRunner) + mock_runner.run = Mock() + mock_runner_class.return_value = mock_runner + yield mock_runner + + +# ============================================================================ +# Tests for document_indexing_sync_task +# ============================================================================ + + +class TestDocumentIndexingSyncTask: + """Tests for the document_indexing_sync_task function.""" + + def test_document_not_found(self, mock_db_session, dataset_id, document_id): + """Test that task handles document not found gracefully.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = None + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + mock_db_session.close.assert_called_once() + + def test_missing_notion_workspace_id(self, mock_db_session, mock_document, dataset_id, document_id): + """Test that task raises error when notion_workspace_id is missing.""" + # Arrange + mock_document.data_source_info_dict = {"notion_page_id": "page123", "type": "page"} + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + + # Act & Assert + with pytest.raises(ValueError, match="no notion page found"): + document_indexing_sync_task(dataset_id, document_id) + + def test_missing_notion_page_id(self, mock_db_session, mock_document, dataset_id, document_id): + """Test that task raises error when notion_page_id is missing.""" + # Arrange + mock_document.data_source_info_dict = {"notion_workspace_id": "ws123", "type": "page"} + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + + # Act & Assert + with pytest.raises(ValueError, match="no notion page found"): + document_indexing_sync_task(dataset_id, document_id) + + def test_empty_data_source_info(self, mock_db_session, mock_document, dataset_id, document_id): + """Test that task raises error when data_source_info is empty.""" + # Arrange + mock_document.data_source_info_dict = None + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + + # Act & Assert + with pytest.raises(ValueError, match="no notion page found"): + document_indexing_sync_task(dataset_id, document_id) + + def test_credential_not_found( + self, + mock_db_session, + mock_datasource_provider_service, + mock_document, + dataset_id, + document_id, + ): + """Test that task handles missing credentials by updating document status.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + mock_datasource_provider_service.get_datasource_credentials.return_value = None + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + assert mock_document.indexing_status == "error" + assert "Datasource credential not found" in mock_document.error + assert mock_document.stopped_at is not None + mock_db_session.commit.assert_called() + mock_db_session.close.assert_called() + + def test_page_not_updated( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_document, + dataset_id, + document_id, + ): + """Test that task does nothing when page has not been updated.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + # Return same time as stored in document + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-01T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Document status should remain unchanged + assert mock_document.indexing_status == "completed" + # No session operations should be performed beyond the initial query + mock_db_session.close.assert_not_called() + + def test_successful_sync_when_page_updated( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_index_processor_factory, + mock_indexing_runner, + mock_dataset, + mock_document, + mock_document_segments, + dataset_id, + document_id, + ): + """Test successful sync flow when Notion page has been updated.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, mock_dataset] + mock_db_session.scalars.return_value.all.return_value = mock_document_segments + # NotionExtractor returns updated time + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Verify document status was updated to parsing + assert mock_document.indexing_status == "parsing" + assert mock_document.processing_started_at is not None + + # Verify segments were cleaned + mock_processor = mock_index_processor_factory.return_value.init_index_processor.return_value + mock_processor.clean.assert_called_once() + + # Verify segments were deleted from database + for segment in mock_document_segments: + mock_db_session.delete.assert_any_call(segment) + + # Verify indexing runner was called + mock_indexing_runner.run.assert_called_once_with([mock_document]) + + # Verify session operations + assert mock_db_session.commit.called + mock_db_session.close.assert_called_once() + + def test_dataset_not_found_during_cleaning( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_document, + dataset_id, + document_id, + ): + """Test that task handles dataset not found during cleaning phase.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, None] + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Document should still be set to parsing + assert mock_document.indexing_status == "parsing" + # Session should be closed after error + mock_db_session.close.assert_called_once() + + def test_cleaning_error_continues_to_indexing( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_index_processor_factory, + mock_indexing_runner, + mock_dataset, + mock_document, + dataset_id, + document_id, + ): + """Test that indexing continues even if cleaning fails.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, mock_dataset] + mock_db_session.scalars.return_value.all.side_effect = Exception("Cleaning error") + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Indexing should still be attempted despite cleaning error + mock_indexing_runner.run.assert_called_once_with([mock_document]) + mock_db_session.close.assert_called_once() + + def test_indexing_runner_document_paused_error( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_index_processor_factory, + mock_indexing_runner, + mock_dataset, + mock_document, + mock_document_segments, + dataset_id, + document_id, + ): + """Test that DocumentIsPausedError is handled gracefully.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, mock_dataset] + mock_db_session.scalars.return_value.all.return_value = mock_document_segments + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + mock_indexing_runner.run.side_effect = DocumentIsPausedError("Document paused") + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Session should be closed after handling error + mock_db_session.close.assert_called_once() + + def test_indexing_runner_general_error( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_index_processor_factory, + mock_indexing_runner, + mock_dataset, + mock_document, + mock_document_segments, + dataset_id, + document_id, + ): + """Test that general exceptions during indexing are handled.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, mock_dataset] + mock_db_session.scalars.return_value.all.return_value = mock_document_segments + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + mock_indexing_runner.run.side_effect = Exception("Indexing error") + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + # Session should be closed after error + mock_db_session.close.assert_called_once() + + def test_notion_extractor_initialized_with_correct_params( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_document, + dataset_id, + document_id, + notion_workspace_id, + notion_page_id, + ): + """Test that NotionExtractor is initialized with correct parameters.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-01T00:00:00Z" # No update + + # Act + with patch("tasks.document_indexing_sync_task.NotionExtractor") as mock_extractor_class: + mock_extractor = MagicMock() + mock_extractor.get_notion_last_edited_time.return_value = "2024-01-01T00:00:00Z" + mock_extractor_class.return_value = mock_extractor + + document_indexing_sync_task(dataset_id, document_id) + + # Assert + mock_extractor_class.assert_called_once_with( + notion_workspace_id=notion_workspace_id, + notion_obj_id=notion_page_id, + notion_page_type="page", + notion_access_token="test_token", + tenant_id=mock_document.tenant_id, + ) + + def test_datasource_credentials_requested_correctly( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_document, + dataset_id, + document_id, + credential_id, + ): + """Test that datasource credentials are requested with correct parameters.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-01T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + mock_datasource_provider_service.get_datasource_credentials.assert_called_once_with( + tenant_id=mock_document.tenant_id, + credential_id=credential_id, + provider="notion_datasource", + plugin_id="langgenius/notion_datasource", + ) + + def test_credential_id_missing_uses_none( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_document, + dataset_id, + document_id, + ): + """Test that task handles missing credential_id by passing None.""" + # Arrange + mock_document.data_source_info_dict = { + "notion_workspace_id": "ws123", + "notion_page_id": "page123", + "type": "page", + "last_edited_time": "2024-01-01T00:00:00Z", + } + mock_db_session.query.return_value.where.return_value.first.return_value = mock_document + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-01T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + mock_datasource_provider_service.get_datasource_credentials.assert_called_once_with( + tenant_id=mock_document.tenant_id, + credential_id=None, + provider="notion_datasource", + plugin_id="langgenius/notion_datasource", + ) + + def test_index_processor_clean_called_with_correct_params( + self, + mock_db_session, + mock_datasource_provider_service, + mock_notion_extractor, + mock_index_processor_factory, + mock_indexing_runner, + mock_dataset, + mock_document, + mock_document_segments, + dataset_id, + document_id, + ): + """Test that index processor clean is called with correct parameters.""" + # Arrange + mock_db_session.query.return_value.where.return_value.first.side_effect = [mock_document, mock_dataset] + mock_db_session.scalars.return_value.all.return_value = mock_document_segments + mock_notion_extractor.get_notion_last_edited_time.return_value = "2024-01-02T00:00:00Z" + + # Act + document_indexing_sync_task(dataset_id, document_id) + + # Assert + mock_processor = mock_index_processor_factory.return_value.init_index_processor.return_value + expected_node_ids = [seg.index_node_id for seg in mock_document_segments] + mock_processor.clean.assert_called_once_with( + mock_dataset, expected_node_ids, with_keywords=True, delete_child_chunks=True + ) From 32605181bdcd7022549cd672d98651842617288a Mon Sep 17 00:00:00 2001 From: wangxiaolei Date: Sun, 21 Dec 2025 16:53:37 +0800 Subject: [PATCH 5/9] feat: first use INTERNAL_FILES_URL first, then FILES_URL (#29962) --- api/core/rag/extractor/word_extractor.py | 9 ++--- .../core/rag/extractor/test_word_extractor.py | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 044b118635..f67f613e9d 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -83,6 +83,7 @@ class WordExtractor(BaseExtractor): def _extract_images_from_docx(self, doc): image_count = 0 image_map = {} + base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL for r_id, rel in doc.part.rels.items(): if "image" in rel.target_ref: @@ -121,8 +122,7 @@ class WordExtractor(BaseExtractor): used_at=naive_utc_now(), ) db.session.add(upload_file) - # Use r_id as key for external images since target_part is undefined - image_map[r_id] = f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)" + image_map[r_id] = f"![image]({base_url}/files/{upload_file.id}/file-preview)" else: image_ext = rel.target_ref.split(".")[-1] if image_ext is None: @@ -150,10 +150,7 @@ class WordExtractor(BaseExtractor): used_at=naive_utc_now(), ) db.session.add(upload_file) - # Use target_part as key for internal images - image_map[rel.target_part] = ( - f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)" - ) + image_map[rel.target_part] = f"![image]({base_url}/files/{upload_file.id}/file-preview)" db.session.commit() return image_map diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py index fd0b0e2e44..3203aab8c3 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py @@ -132,3 +132,36 @@ def test_extract_images_from_docx(monkeypatch): # DB interactions should be recorded assert len(db_stub.session.added) == 2 assert db_stub.session.committed is True + + +def test_extract_images_from_docx_uses_internal_files_url(): + """Test that INTERNAL_FILES_URL takes precedence over FILES_URL for plugin access.""" + # Test the URL generation logic directly + from configs import dify_config + + # Mock the configuration values + original_files_url = getattr(dify_config, "FILES_URL", None) + original_internal_files_url = getattr(dify_config, "INTERNAL_FILES_URL", None) + + try: + # Set both URLs - INTERNAL should take precedence + dify_config.FILES_URL = "http://external.example.com" + dify_config.INTERNAL_FILES_URL = "http://internal.docker:5001" + + # Test the URL generation logic (same as in word_extractor.py) + upload_file_id = "test_file_id" + + # This is the pattern we fixed in the word extractor + base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + generated_url = f"{base_url}/files/{upload_file_id}/file-preview" + + # Verify that INTERNAL_FILES_URL is used instead of FILES_URL + assert "http://internal.docker:5001" in generated_url, f"Expected internal URL, got: {generated_url}" + assert "http://external.example.com" not in generated_url, f"Should not use external URL, got: {generated_url}" + + finally: + # Restore original values + if original_files_url is not None: + dify_config.FILES_URL = original_files_url + if original_internal_files_url is not None: + dify_config.INTERNAL_FILES_URL = original_internal_files_url From 6cf71366ba44c1ff538acb760e3fc5853ee31ac8 Mon Sep 17 00:00:00 2001 From: Ben Ghorbel Mohamed Aziz <129333644+AziizBg@users.noreply.github.com> Date: Sun, 21 Dec 2025 10:04:07 +0100 Subject: [PATCH 6/9] fix: validate API key is not empty in HTTPRequest node (#29950) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- .../workflow/nodes/http_request/executor.py | 5 + .../workflow/nodes/test_http.py | 37 +++-- .../test_http_request_executor.py | 127 ++++++++++++++++++ 3 files changed, 150 insertions(+), 19 deletions(-) diff --git a/api/core/workflow/nodes/http_request/executor.py b/api/core/workflow/nodes/http_request/executor.py index f0c84872fb..931c6113a7 100644 --- a/api/core/workflow/nodes/http_request/executor.py +++ b/api/core/workflow/nodes/http_request/executor.py @@ -86,6 +86,11 @@ class Executor: node_data.authorization.config.api_key = variable_pool.convert_template( node_data.authorization.config.api_key ).text + # Validate that API key is not empty after template conversion + if not node_data.authorization.config.api_key or not node_data.authorization.config.api_key.strip(): + raise AuthorizationConfigError( + "API key is required for authorization but was empty. Please provide a valid API key." + ) self.url = node_data.url self.method = node_data.method diff --git a/api/tests/integration_tests/workflow/nodes/test_http.py b/api/tests/integration_tests/workflow/nodes/test_http.py index e75258a2a2..d814da8ec7 100644 --- a/api/tests/integration_tests/workflow/nodes/test_http.py +++ b/api/tests/integration_tests/workflow/nodes/test_http.py @@ -6,6 +6,7 @@ import pytest from core.app.entities.app_invoke_entities import InvokeFrom from core.workflow.entities import GraphInitParams +from core.workflow.enums import WorkflowNodeExecutionStatus from core.workflow.graph import Graph from core.workflow.nodes.http_request.node import HttpRequestNode from core.workflow.nodes.node_factory import DifyNodeFactory @@ -169,13 +170,14 @@ def test_custom_authorization_header(setup_http_mock): @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) -def test_custom_auth_with_empty_api_key_does_not_set_header(setup_http_mock): - """Test: In custom authentication mode, when the api_key is empty, no header should be set.""" +def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock): + """Test: In custom authentication mode, when the api_key is empty, AuthorizationConfigError should be raised.""" from core.workflow.nodes.http_request.entities import ( HttpRequestNodeAuthorization, HttpRequestNodeData, HttpRequestNodeTimeout, ) + from core.workflow.nodes.http_request.exc import AuthorizationConfigError from core.workflow.nodes.http_request.executor import Executor from core.workflow.runtime import VariablePool from core.workflow.system_variable import SystemVariable @@ -208,16 +210,13 @@ def test_custom_auth_with_empty_api_key_does_not_set_header(setup_http_mock): ssl_verify=True, ) - # Create executor - executor = Executor( - node_data=node_data, timeout=HttpRequestNodeTimeout(connect=10, read=30, write=10), variable_pool=variable_pool - ) - - # Get assembled headers - headers = executor._assembling_headers() - - # When api_key is empty, the custom header should NOT be set - assert "X-Custom-Auth" not in headers + # Create executor should raise AuthorizationConfigError + with pytest.raises(AuthorizationConfigError, match="API key is required"): + Executor( + node_data=node_data, + timeout=HttpRequestNodeTimeout(connect=10, read=30, write=10), + variable_pool=variable_pool, + ) @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) @@ -305,9 +304,10 @@ def test_basic_authorization_with_custom_header_ignored(setup_http_mock): @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) def test_custom_authorization_with_empty_api_key(setup_http_mock): """ - Test that custom authorization doesn't set header when api_key is empty. - This test verifies the fix for issue #23554. + Test that custom authorization raises error when api_key is empty. + This test verifies the fix for issue #21830. """ + node = init_http_node( config={ "id": "1", @@ -333,11 +333,10 @@ def test_custom_authorization_with_empty_api_key(setup_http_mock): ) result = node._run() - assert result.process_data is not None - data = result.process_data.get("request", "") - - # Custom header should NOT be set when api_key is empty - assert "X-Custom-Auth:" not in data + # Should fail with AuthorizationConfigError + assert result.status == WorkflowNodeExecutionStatus.FAILED + assert "API key is required" in result.error + assert result.error_type == "AuthorizationConfigError" @pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True) diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py index f040a92b6f..27df938102 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py @@ -1,3 +1,5 @@ +import pytest + from core.workflow.nodes.http_request import ( BodyData, HttpRequestNodeAuthorization, @@ -5,6 +7,7 @@ from core.workflow.nodes.http_request import ( HttpRequestNodeData, ) from core.workflow.nodes.http_request.entities import HttpRequestNodeTimeout +from core.workflow.nodes.http_request.exc import AuthorizationConfigError from core.workflow.nodes.http_request.executor import Executor from core.workflow.runtime import VariablePool from core.workflow.system_variable import SystemVariable @@ -348,3 +351,127 @@ def test_init_params(): executor = create_executor("key1:value1\n\nkey2:value2\n\n") executor._init_params() assert executor.params == [("key1", "value1"), ("key2", "value2")] + + +def test_empty_api_key_raises_error_bearer(): + """Test that empty API key raises AuthorizationConfigError for bearer auth.""" + variable_pool = VariablePool(system_variables=SystemVariable.empty()) + node_data = HttpRequestNodeData( + title="test", + method="get", + url="http://example.com", + headers="", + params="", + authorization=HttpRequestNodeAuthorization( + type="api-key", + config={"type": "bearer", "api_key": ""}, + ), + ) + timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30) + + with pytest.raises(AuthorizationConfigError, match="API key is required"): + Executor( + node_data=node_data, + timeout=timeout, + variable_pool=variable_pool, + ) + + +def test_empty_api_key_raises_error_basic(): + """Test that empty API key raises AuthorizationConfigError for basic auth.""" + variable_pool = VariablePool(system_variables=SystemVariable.empty()) + node_data = HttpRequestNodeData( + title="test", + method="get", + url="http://example.com", + headers="", + params="", + authorization=HttpRequestNodeAuthorization( + type="api-key", + config={"type": "basic", "api_key": ""}, + ), + ) + timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30) + + with pytest.raises(AuthorizationConfigError, match="API key is required"): + Executor( + node_data=node_data, + timeout=timeout, + variable_pool=variable_pool, + ) + + +def test_empty_api_key_raises_error_custom(): + """Test that empty API key raises AuthorizationConfigError for custom auth.""" + variable_pool = VariablePool(system_variables=SystemVariable.empty()) + node_data = HttpRequestNodeData( + title="test", + method="get", + url="http://example.com", + headers="", + params="", + authorization=HttpRequestNodeAuthorization( + type="api-key", + config={"type": "custom", "api_key": "", "header": "X-Custom-Auth"}, + ), + ) + timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30) + + with pytest.raises(AuthorizationConfigError, match="API key is required"): + Executor( + node_data=node_data, + timeout=timeout, + variable_pool=variable_pool, + ) + + +def test_whitespace_only_api_key_raises_error(): + """Test that whitespace-only API key raises AuthorizationConfigError.""" + variable_pool = VariablePool(system_variables=SystemVariable.empty()) + node_data = HttpRequestNodeData( + title="test", + method="get", + url="http://example.com", + headers="", + params="", + authorization=HttpRequestNodeAuthorization( + type="api-key", + config={"type": "bearer", "api_key": " "}, + ), + ) + timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30) + + with pytest.raises(AuthorizationConfigError, match="API key is required"): + Executor( + node_data=node_data, + timeout=timeout, + variable_pool=variable_pool, + ) + + +def test_valid_api_key_works(): + """Test that valid API key works correctly for bearer auth.""" + variable_pool = VariablePool(system_variables=SystemVariable.empty()) + node_data = HttpRequestNodeData( + title="test", + method="get", + url="http://example.com", + headers="", + params="", + authorization=HttpRequestNodeAuthorization( + type="api-key", + config={"type": "bearer", "api_key": "valid-api-key-123"}, + ), + ) + timeout = HttpRequestNodeTimeout(connect=10, read=30, write=30) + + executor = Executor( + node_data=node_data, + timeout=timeout, + variable_pool=variable_pool, + ) + + # Should not raise an error + headers = executor._assembling_headers() + assert "Authorization" in headers + assert headers["Authorization"] == "Bearer valid-api-key-123" From f8ccc75cdef3d9750e35e1cc05109a91d1c99d9e Mon Sep 17 00:00:00 2001 From: Guangjing Yan <125958391+GuangjingYan@users.noreply.github.com> Date: Mon, 22 Dec 2025 09:40:01 +0800 Subject: [PATCH 7/9] fix: clear uploaded files when clicking clear button in workflow (#29884) --- web/app/components/base/file-uploader/store.tsx | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/web/app/components/base/file-uploader/store.tsx b/web/app/components/base/file-uploader/store.tsx index cddfdf6f27..917e5fc646 100644 --- a/web/app/components/base/file-uploader/store.tsx +++ b/web/app/components/base/file-uploader/store.tsx @@ -1,6 +1,7 @@ import { createContext, useContext, + useEffect, useRef, } from 'react' import { @@ -10,6 +11,7 @@ import { import type { FileEntity, } from './types' +import { isEqual } from 'lodash-es' type Shape = { files: FileEntity[] @@ -55,10 +57,20 @@ export const FileContextProvider = ({ onChange, }: FileProviderProps) => { const storeRef = useRef(undefined) - if (!storeRef.current) storeRef.current = createFileStore(value, onChange) + useEffect(() => { + if (!storeRef.current) + return + if (isEqual(value, storeRef.current.getState().files)) + return + + storeRef.current.setState({ + files: value ? [...value] : [], + }) + }, [value]) + return ( {children} From 4cf65f0137d44a59213bb10e9a96e5627001cb3f Mon Sep 17 00:00:00 2001 From: Asuka Minato Date: Mon, 22 Dec 2025 10:40:32 +0900 Subject: [PATCH 8/9] =?UTF-8?q?refactor:=20split=20changes=20for=20api/con?= =?UTF-8?q?trollers/console/explore/installed=E2=80=A6=20(#29891)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../console/explore/installed_app.py | 34 ++++++++++++------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py index 3c95779475..e42db10ba6 100644 --- a/api/controllers/console/explore/installed_app.py +++ b/api/controllers/console/explore/installed_app.py @@ -2,7 +2,8 @@ import logging from typing import Any from flask import request -from flask_restx import Resource, inputs, marshal_with, reqparse +from flask_restx import Resource, marshal_with +from pydantic import BaseModel from sqlalchemy import and_, select from werkzeug.exceptions import BadRequest, Forbidden, NotFound @@ -18,6 +19,15 @@ from services.account_service import TenantService from services.enterprise.enterprise_service import EnterpriseService from services.feature_service import FeatureService + +class InstalledAppCreatePayload(BaseModel): + app_id: str + + +class InstalledAppUpdatePayload(BaseModel): + is_pinned: bool | None = None + + logger = logging.getLogger(__name__) @@ -105,26 +115,25 @@ class InstalledAppsListApi(Resource): @account_initialization_required @cloud_edition_billing_resource_check("apps") def post(self): - parser = reqparse.RequestParser().add_argument("app_id", type=str, required=True, help="Invalid app_id") - args = parser.parse_args() + payload = InstalledAppCreatePayload.model_validate(console_ns.payload or {}) - recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == args["app_id"]).first() + recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == payload.app_id).first() if recommended_app is None: - raise NotFound("App not found") + raise NotFound("Recommended app not found") _, current_tenant_id = current_account_with_tenant() - app = db.session.query(App).where(App.id == args["app_id"]).first() + app = db.session.query(App).where(App.id == payload.app_id).first() if app is None: - raise NotFound("App not found") + raise NotFound("App entity not found") if not app.is_public: raise Forbidden("You can't install a non-public app") installed_app = ( db.session.query(InstalledApp) - .where(and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id)) + .where(and_(InstalledApp.app_id == payload.app_id, InstalledApp.tenant_id == current_tenant_id)) .first() ) @@ -133,7 +142,7 @@ class InstalledAppsListApi(Resource): recommended_app.install_count += 1 new_installed_app = InstalledApp( - app_id=args["app_id"], + app_id=payload.app_id, tenant_id=current_tenant_id, app_owner_tenant_id=app.tenant_id, is_pinned=False, @@ -163,12 +172,11 @@ class InstalledAppApi(InstalledAppResource): return {"result": "success", "message": "App uninstalled successfully"}, 204 def patch(self, installed_app): - parser = reqparse.RequestParser().add_argument("is_pinned", type=inputs.boolean) - args = parser.parse_args() + payload = InstalledAppUpdatePayload.model_validate(console_ns.payload or {}) commit_args = False - if "is_pinned" in args: - installed_app.is_pinned = args["is_pinned"] + if payload.is_pinned is not None: + installed_app.is_pinned = payload.is_pinned commit_args = True if commit_args: From ba73964dfd8f99459a14a73a2297a92ab9c6f156 Mon Sep 17 00:00:00 2001 From: Asuka Minato Date: Mon, 22 Dec 2025 10:40:41 +0900 Subject: [PATCH 9/9] =?UTF-8?q?refactor:=20split=20changes=20for=20api/con?= =?UTF-8?q?trollers/console/explore/conversat=E2=80=A6=20(#29893)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api/controllers/console/explore/conversation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/controllers/console/explore/conversation.py b/api/controllers/console/explore/conversation.py index 92da591ab4..51995b8b8a 100644 --- a/api/controllers/console/explore/conversation.py +++ b/api/controllers/console/explore/conversation.py @@ -1,5 +1,4 @@ from typing import Any -from uuid import UUID from flask import request from flask_restx import marshal_with @@ -13,6 +12,7 @@ from controllers.console.explore.wraps import InstalledAppResource from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from fields.conversation_fields import conversation_infinite_scroll_pagination_fields, simple_conversation_fields +from libs.helper import UUIDStrOrEmpty from libs.login import current_user from models import Account from models.model import AppMode @@ -24,7 +24,7 @@ from .. import console_ns class ConversationListQuery(BaseModel): - last_id: UUID | None = None + last_id: UUIDStrOrEmpty | None = None limit: int = Field(default=20, ge=1, le=100) pinned: bool | None = None