diff --git a/.claude/settings.json b/.claude/settings.json index 7d42234cae..c5c514b5f5 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -3,6 +3,7 @@ "feature-dev@claude-plugins-official": true, "context7@claude-plugins-official": true, "typescript-lsp@claude-plugins-official": true, - "pyright-lsp@claude-plugins-official": true + "pyright-lsp@claude-plugins-official": true, + "ralph-wiggum@claude-plugins-official": true } } diff --git a/.claude/skills/frontend-code-review/SKILL.md b/.claude/skills/frontend-code-review/SKILL.md new file mode 100644 index 0000000000..6cc23ca171 --- /dev/null +++ b/.claude/skills/frontend-code-review/SKILL.md @@ -0,0 +1,73 @@ +--- +name: frontend-code-review +description: "Trigger when the user requests a review of frontend files (e.g., `.tsx`, `.ts`, `.js`). Support both pending-change reviews and focused file reviews while applying the checklist rules." +--- + +# Frontend Code Review + +## Intent +Use this skill whenever the user asks to review frontend code (especially `.tsx`, `.ts`, or `.js` files). Support two review modes: + +1. **Pending-change review** – inspect staged/working-tree files slated for commit and flag checklist violations before submission. +2. **File-targeted review** – review the specific file(s) the user names and report the relevant checklist findings. + +Stick to the checklist below for every applicable file and mode. + +## Checklist +See [references/code-quality.md](references/code-quality.md), [references/performance.md](references/performance.md), [references/business-logic.md](references/business-logic.md) for the living checklist split by category—treat it as the canonical set of rules to follow. + +Flag each rule violation with urgency metadata so future reviewers can prioritize fixes. + +## Review Process +1. Open the relevant component/module. Gather lines that relate to class names, React Flow hooks, prop memoization, and styling. +2. For each rule in the review point, note where the code deviates and capture a representative snippet. +3. Compose the review section per the template below. Group violations first by **Urgent** flag, then by category order (Code Quality, Performance, Business Logic). + +## Required output +When invoked, the response must exactly follow one of the two templates: + +### Template A (any findings) +``` +# Code review +Found urgent issues need to be fixed: + +## 1 +FilePath: line + + + +### Suggested fix + + +--- +... (repeat for each urgent issue) ... + +Found suggestions for improvement: + +## 1 +FilePath: line + + + +### Suggested fix + + +--- + +... (repeat for each suggestion) ... +``` + +If there are no urgent issues, omit that section. If there are no suggestions, omit that section. + +If the issue number is more than 10, summarize as "10+ urgent issues" or "10+ suggestions" and just output the first 10 issues. + +Don't compress the blank lines between sections; keep them as-is for readability. + +If you use Template A (i.e., there are issues to fix) and at least one issue requires code changes, append a brief follow-up question after the structured output asking whether the user wants you to apply the suggested fix(es). For example: "Would you like me to use the Suggested fix section to address these issues?" + +### Template B (no issues) +``` +## Code review +No issues found. +``` + diff --git a/.claude/skills/frontend-code-review/references/business-logic.md b/.claude/skills/frontend-code-review/references/business-logic.md new file mode 100644 index 0000000000..4584f99dfc --- /dev/null +++ b/.claude/skills/frontend-code-review/references/business-logic.md @@ -0,0 +1,15 @@ +# Rule Catalog — Business Logic + +## Can't use workflowStore in Node components + +IsUrgent: True + +### Description + +File path pattern of node components: `web/app/components/workflow/nodes/[nodeName]/node.tsx` + +Node components are also used when creating a RAG Pipe from a template, but in that context there is no workflowStore Provider, which results in a blank screen. [This Issue](https://github.com/langgenius/dify/issues/29168) was caused by exactly this reason. + +### Suggested Fix + +Use `import { useNodes } from 'reactflow'` instead of `import useNodes from '@/app/components/workflow/store/workflow/use-nodes'`. diff --git a/.claude/skills/frontend-code-review/references/code-quality.md b/.claude/skills/frontend-code-review/references/code-quality.md new file mode 100644 index 0000000000..afdd40deb3 --- /dev/null +++ b/.claude/skills/frontend-code-review/references/code-quality.md @@ -0,0 +1,44 @@ +# Rule Catalog — Code Quality + +## Conditional class names use utility function + +IsUrgent: True +Category: Code Quality + +### Description + +Ensure conditional CSS is handled via the shared `classNames` instead of custom ternaries, string concatenation, or template strings. Centralizing class logic keeps components consistent and easier to maintain. + +### Suggested Fix + +```ts +import { cn } from '@/utils/classnames' +const classNames = cn(isActive ? 'text-primary-600' : 'text-gray-500') +``` + +## Tailwind-first styling + +IsUrgent: True +Category: Code Quality + +### Description + +Favor Tailwind CSS utility classes instead of adding new `.module.css` files unless a Tailwind combination cannot achieve the required styling. Keeping styles in Tailwind improves consistency and reduces maintenance overhead. + +Update this file when adding, editing, or removing Code Quality rules so the catalog remains accurate. + +## Classname ordering for easy overrides + +### Description + +When writing components, always place the incoming `className` prop after the component’s own class values so that downstream consumers can override or extend the styling. This keeps your component’s defaults but still lets external callers change or remove specific styles. + +Example: + +```tsx +import { cn } from '@/utils/classnames' + +const Button = ({ className }) => { + return
+} +``` diff --git a/.claude/skills/frontend-code-review/references/performance.md b/.claude/skills/frontend-code-review/references/performance.md new file mode 100644 index 0000000000..2d60072f5c --- /dev/null +++ b/.claude/skills/frontend-code-review/references/performance.md @@ -0,0 +1,45 @@ +# Rule Catalog — Performance + +## React Flow data usage + +IsUrgent: True +Category: Performance + +### Description + +When rendering React Flow, prefer `useNodes`/`useEdges` for UI consumption and rely on `useStoreApi` inside callbacks that mutate or read node/edge state. Avoid manually pulling Flow data outside of these hooks. + +## Complex prop memoization + +IsUrgent: True +Category: Performance + +### Description + +Wrap complex prop values (objects, arrays, maps) in `useMemo` prior to passing them into child components to guarantee stable references and prevent unnecessary renders. + +Update this file when adding, editing, or removing Performance rules so the catalog remains accurate. + +Wrong: + +```tsx + +``` + +Right: + +```tsx +const config = useMemo(() => ({ + provider: ..., + detail: ... +}), [provider, detail]); + + +``` diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index aa5a50918a..50dbde2aee 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -20,4 +20,4 @@ - [x] I understand that this PR may be closed in case there was no previous discussion or issues. (This doesn't apply to typos!) - [x] I've added a test for each change that was introduced, and I tried as much as possible to make a single atomic change. - [x] I've updated the documentation accordingly. -- [x] I ran `dev/reformat`(backend) and `cd web && npx lint-staged`(frontend) to appease the lint gods +- [x] I ran `make lint` and `make type-check` (backend) and `cd web && npx lint-staged` (frontend) to appease the lint gods diff --git a/.github/workflows/translate-i18n-base-on-english.yml b/.github/workflows/translate-i18n-base-on-english.yml index a51350f630..16d36361fd 100644 --- a/.github/workflows/translate-i18n-base-on-english.yml +++ b/.github/workflows/translate-i18n-base-on-english.yml @@ -5,6 +5,7 @@ on: branches: [main] paths: - 'web/i18n/en-US/*.json' + workflow_dispatch: permissions: contents: write @@ -18,7 +19,8 @@ jobs: run: working-directory: web steps: - - uses: actions/checkout@v6 + # Keep use old checkout action version for https://github.com/peter-evans/create-pull-request/issues/4272 + - uses: actions/checkout@v4 with: fetch-depth: 0 token: ${{ secrets.GITHUB_TOKEN }} @@ -26,21 +28,28 @@ jobs: - name: Check for file changes in i18n/en-US id: check_files run: | - git fetch origin "${{ github.event.before }}" || true - git fetch origin "${{ github.sha }}" || true - changed_files=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'i18n/en-US/*.json') - echo "Changed files: $changed_files" - if [ -n "$changed_files" ]; then + # Skip check for manual trigger, translate all files + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then echo "FILES_CHANGED=true" >> $GITHUB_ENV - file_args="" - for file in $changed_files; do - filename=$(basename "$file" .json) - file_args="$file_args --file $filename" - done - echo "FILE_ARGS=$file_args" >> $GITHUB_ENV - echo "File arguments: $file_args" + echo "FILE_ARGS=" >> $GITHUB_ENV + echo "Manual trigger: translating all files" else - echo "FILES_CHANGED=false" >> $GITHUB_ENV + git fetch origin "${{ github.event.before }}" || true + git fetch origin "${{ github.sha }}" || true + changed_files=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'i18n/en-US/*.json') + echo "Changed files: $changed_files" + if [ -n "$changed_files" ]; then + echo "FILES_CHANGED=true" >> $GITHUB_ENV + file_args="" + for file in $changed_files; do + filename=$(basename "$file" .json) + file_args="$file_args --file $filename" + done + echo "FILE_ARGS=$file_args" >> $GITHUB_ENV + echo "File arguments: $file_args" + else + echo "FILES_CHANGED=false" >> $GITHUB_ENV + fi fi - name: Install pnpm diff --git a/.gitignore b/.gitignore index 17a2bd5b7b..7bd919f095 100644 --- a/.gitignore +++ b/.gitignore @@ -235,3 +235,4 @@ scripts/stress-test/reports/ # settings *.local.json +*.local.md diff --git a/Makefile b/Makefile index 07afd8187e..60c32948b9 100644 --- a/Makefile +++ b/Makefile @@ -60,9 +60,10 @@ check: @echo "✅ Code check complete" lint: - @echo "🔧 Running ruff format, check with fixes, and import linter..." + @echo "🔧 Running ruff format, check with fixes, import linter, and dotenv-linter..." @uv run --project api --dev sh -c 'ruff format ./api && ruff check --fix ./api' @uv run --directory api --dev lint-imports + @uv run --project api --dev dotenv-linter ./api/.env.example ./web/.env.example @echo "✅ Linting complete" type-check: @@ -122,7 +123,7 @@ help: @echo "Backend Code Quality:" @echo " make format - Format code with ruff" @echo " make check - Check code with ruff" - @echo " make lint - Format and fix code with ruff" + @echo " make lint - Format, fix, and lint code (ruff, imports, dotenv)" @echo " make type-check - Run type checking with basedpyright" @echo " make test - Run backend unit tests" @echo "" diff --git a/api/.env.example b/api/.env.example index 5f8d369ec4..88611e016e 100644 --- a/api/.env.example +++ b/api/.env.example @@ -502,6 +502,8 @@ LOG_FILE_BACKUP_COUNT=5 LOG_DATEFORMAT=%Y-%m-%d %H:%M:%S # Log Timezone LOG_TZ=UTC +# Log output format: text or json +LOG_OUTPUT_FORMAT=text # Log format LOG_FORMAT=%(asctime)s,%(msecs)d %(levelname)-2s [%(filename)s:%(lineno)d] %(req_id)s %(message)s diff --git a/api/.importlinter b/api/.importlinter index 24ece72b30..acb21ae522 100644 --- a/api/.importlinter +++ b/api/.importlinter @@ -3,9 +3,11 @@ root_packages = core configs controllers + extensions models tasks services +include_external_packages = True [importlinter:contract:workflow] name = Workflow @@ -33,6 +35,29 @@ ignore_imports = core.workflow.nodes.loop.loop_node -> core.workflow.graph core.workflow.nodes.loop.loop_node -> core.workflow.graph_engine.command_channels +[importlinter:contract:workflow-infrastructure-dependencies] +name = Workflow Infrastructure Dependencies +type = forbidden +source_modules = + core.workflow +forbidden_modules = + extensions.ext_database + extensions.ext_redis +allow_indirect_imports = True +ignore_imports = + core.workflow.nodes.agent.agent_node -> extensions.ext_database + core.workflow.nodes.datasource.datasource_node -> extensions.ext_database + core.workflow.nodes.knowledge_index.knowledge_index_node -> extensions.ext_database + core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node -> extensions.ext_database + core.workflow.nodes.llm.file_saver -> extensions.ext_database + core.workflow.nodes.llm.llm_utils -> extensions.ext_database + core.workflow.nodes.llm.node -> extensions.ext_database + core.workflow.nodes.tool.tool_node -> extensions.ext_database + core.workflow.nodes.variable_assigner.common.impl -> extensions.ext_database + core.workflow.graph_engine.command_channels.redis_channel -> extensions.ext_redis + core.workflow.graph_engine.manager -> extensions.ext_redis + core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node -> extensions.ext_redis + [importlinter:contract:rsc] name = RSC type = layers diff --git a/api/app_factory.py b/api/app_factory.py index bcad88e9e0..f827842d68 100644 --- a/api/app_factory.py +++ b/api/app_factory.py @@ -2,9 +2,11 @@ import logging import time from opentelemetry.trace import get_current_span +from opentelemetry.trace.span import INVALID_SPAN_ID, INVALID_TRACE_ID from configs import dify_config from contexts.wrapper import RecyclableContextVar +from core.logging.context import init_request_context from dify_app import DifyApp logger = logging.getLogger(__name__) @@ -25,28 +27,35 @@ def create_flask_app_with_configs() -> DifyApp: # add before request hook @dify_app.before_request def before_request(): - # add an unique identifier to each request + # Initialize logging context for this request + init_request_context() RecyclableContextVar.increment_thread_recycles() - # add after request hook for injecting X-Trace-Id header from OpenTelemetry span context + # add after request hook for injecting trace headers from OpenTelemetry span context + # Only adds headers when OTEL is enabled and has valid context @dify_app.after_request - def add_trace_id_header(response): + def add_trace_headers(response): try: span = get_current_span() ctx = span.get_span_context() if span else None - if ctx and ctx.is_valid: - trace_id_hex = format(ctx.trace_id, "032x") - # Avoid duplicates if some middleware added it - if "X-Trace-Id" not in response.headers: - response.headers["X-Trace-Id"] = trace_id_hex + + if not ctx or not ctx.is_valid: + return response + + # Inject trace headers from OTEL context + if ctx.trace_id != INVALID_TRACE_ID and "X-Trace-Id" not in response.headers: + response.headers["X-Trace-Id"] = format(ctx.trace_id, "032x") + if ctx.span_id != INVALID_SPAN_ID and "X-Span-Id" not in response.headers: + response.headers["X-Span-Id"] = format(ctx.span_id, "016x") + except Exception: # Never break the response due to tracing header injection - logger.warning("Failed to add trace ID to response header", exc_info=True) + logger.warning("Failed to add trace headers to response", exc_info=True) return response # Capture the decorator's return value to avoid pyright reportUnusedFunction _ = before_request - _ = add_trace_id_header + _ = add_trace_headers return dify_app diff --git a/api/commands.py b/api/commands.py index a8d89ac200..44f7b42825 100644 --- a/api/commands.py +++ b/api/commands.py @@ -1184,6 +1184,217 @@ def remove_orphaned_files_on_storage(force: bool): click.echo(click.style(f"Removed {removed_files} orphaned files, with {error_files} errors.", fg="yellow")) +@click.command("file-usage", help="Query file usages and show where files are referenced.") +@click.option("--file-id", type=str, default=None, help="Filter by file UUID.") +@click.option("--key", type=str, default=None, help="Filter by storage key.") +@click.option("--src", type=str, default=None, help="Filter by table.column pattern (e.g., 'documents.%' or '%.icon').") +@click.option("--limit", type=int, default=100, help="Limit number of results (default: 100).") +@click.option("--offset", type=int, default=0, help="Offset for pagination (default: 0).") +@click.option("--json", "output_json", is_flag=True, help="Output results in JSON format.") +def file_usage( + file_id: str | None, + key: str | None, + src: str | None, + limit: int, + offset: int, + output_json: bool, +): + """ + Query file usages and show where files are referenced in the database. + + This command reuses the same reference checking logic as clear-orphaned-file-records + and displays detailed information about where each file is referenced. + """ + # define tables and columns to process + files_tables = [ + {"table": "upload_files", "id_column": "id", "key_column": "key"}, + {"table": "tool_files", "id_column": "id", "key_column": "file_key"}, + ] + ids_tables = [ + {"type": "uuid", "table": "message_files", "column": "upload_file_id", "pk_column": "id"}, + {"type": "text", "table": "documents", "column": "data_source_info", "pk_column": "id"}, + {"type": "text", "table": "document_segments", "column": "content", "pk_column": "id"}, + {"type": "text", "table": "messages", "column": "answer", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "inputs", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "process_data", "pk_column": "id"}, + {"type": "text", "table": "workflow_node_executions", "column": "outputs", "pk_column": "id"}, + {"type": "text", "table": "conversations", "column": "introduction", "pk_column": "id"}, + {"type": "text", "table": "conversations", "column": "system_instruction", "pk_column": "id"}, + {"type": "text", "table": "accounts", "column": "avatar", "pk_column": "id"}, + {"type": "text", "table": "apps", "column": "icon", "pk_column": "id"}, + {"type": "text", "table": "sites", "column": "icon", "pk_column": "id"}, + {"type": "json", "table": "messages", "column": "inputs", "pk_column": "id"}, + {"type": "json", "table": "messages", "column": "message", "pk_column": "id"}, + ] + + # Stream file usages with pagination to avoid holding all results in memory + paginated_usages = [] + total_count = 0 + + # First, build a mapping of file_id -> storage_key from the base tables + file_key_map = {} + for files_table in files_tables: + query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}" + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + file_key_map[str(row[0])] = f"{files_table['table']}:{row[1]}" + + # If filtering by key or file_id, verify it exists + if file_id and file_id not in file_key_map: + if output_json: + click.echo(json.dumps({"error": f"File ID {file_id} not found in base tables"})) + else: + click.echo(click.style(f"File ID {file_id} not found in base tables.", fg="red")) + return + + if key: + valid_prefixes = {f"upload_files:{key}", f"tool_files:{key}"} + matching_file_ids = [fid for fid, fkey in file_key_map.items() if fkey in valid_prefixes] + if not matching_file_ids: + if output_json: + click.echo(json.dumps({"error": f"Key {key} not found in base tables"})) + else: + click.echo(click.style(f"Key {key} not found in base tables.", fg="red")) + return + + guid_regexp = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" + + # For each reference table/column, find matching file IDs and record the references + for ids_table in ids_tables: + src_filter = f"{ids_table['table']}.{ids_table['column']}" + + # Skip if src filter doesn't match (use fnmatch for wildcard patterns) + if src: + if "%" in src or "_" in src: + import fnmatch + + # Convert SQL LIKE wildcards to fnmatch wildcards (% -> *, _ -> ?) + pattern = src.replace("%", "*").replace("_", "?") + if not fnmatch.fnmatch(src_filter, pattern): + continue + else: + if src_filter != src: + continue + + if ids_table["type"] == "uuid": + # Direct UUID match + query = ( + f"SELECT {ids_table['pk_column']}, {ids_table['column']} " + f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + record_id = str(row[0]) + ref_file_id = str(row[1]) + if ref_file_id not in file_key_map: + continue + storage_key = file_key_map[ref_file_id] + + # Apply filters + if file_id and ref_file_id != file_id: + continue + if key and not storage_key.endswith(key): + continue + + # Only collect items within the requested page range + if offset <= total_count < offset + limit: + paginated_usages.append( + { + "src": f"{ids_table['table']}.{ids_table['column']}", + "record_id": record_id, + "file_id": ref_file_id, + "key": storage_key, + } + ) + total_count += 1 + + elif ids_table["type"] in ("text", "json"): + # Extract UUIDs from text/json content + column_cast = f"{ids_table['column']}::text" if ids_table["type"] == "json" else ids_table["column"] + query = ( + f"SELECT {ids_table['pk_column']}, {column_cast} " + f"FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL" + ) + with db.engine.begin() as conn: + rs = conn.execute(sa.text(query)) + for row in rs: + record_id = str(row[0]) + content = str(row[1]) + + # Find all UUIDs in the content + import re + + uuid_pattern = re.compile(guid_regexp, re.IGNORECASE) + matches = uuid_pattern.findall(content) + + for ref_file_id in matches: + if ref_file_id not in file_key_map: + continue + storage_key = file_key_map[ref_file_id] + + # Apply filters + if file_id and ref_file_id != file_id: + continue + if key and not storage_key.endswith(key): + continue + + # Only collect items within the requested page range + if offset <= total_count < offset + limit: + paginated_usages.append( + { + "src": f"{ids_table['table']}.{ids_table['column']}", + "record_id": record_id, + "file_id": ref_file_id, + "key": storage_key, + } + ) + total_count += 1 + + # Output results + if output_json: + result = { + "total": total_count, + "offset": offset, + "limit": limit, + "usages": paginated_usages, + } + click.echo(json.dumps(result, indent=2)) + else: + click.echo( + click.style(f"Found {total_count} file usages (showing {len(paginated_usages)} results)", fg="white") + ) + click.echo("") + + if not paginated_usages: + click.echo(click.style("No file usages found matching the specified criteria.", fg="yellow")) + return + + # Print table header + click.echo( + click.style( + f"{'Src (Table.Column)':<50} {'Record ID':<40} {'File ID':<40} {'Storage Key':<60}", + fg="cyan", + ) + ) + click.echo(click.style("-" * 190, fg="white")) + + # Print each usage + for usage in paginated_usages: + click.echo(f"{usage['src']:<50} {usage['record_id']:<40} {usage['file_id']:<40} {usage['key']:<60}") + + # Show pagination info + if offset + limit < total_count: + click.echo("") + click.echo( + click.style( + f"Showing {offset + 1}-{offset + len(paginated_usages)} of {total_count} results", fg="white" + ) + ) + click.echo(click.style(f"Use --offset {offset + limit} to see next page", fg="white")) + + @click.command("setup-system-tool-oauth-client", help="Setup system tool oauth client.") @click.option("--provider", prompt=True, help="Provider name") @click.option("--client-params", prompt=True, help="Client Params") diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index 43dddbd011..6a04171d2d 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -587,6 +587,11 @@ class LoggingConfig(BaseSettings): default="INFO", ) + LOG_OUTPUT_FORMAT: Literal["text", "json"] = Field( + description="Log output format: 'text' for human-readable, 'json' for structured JSON logs.", + default="text", + ) + LOG_FILE: str | None = Field( description="File path for log output.", default=None, diff --git a/api/configs/middleware/vdb/milvus_config.py b/api/configs/middleware/vdb/milvus_config.py index 05cee51cc9..eb9b0ac2ab 100644 --- a/api/configs/middleware/vdb/milvus_config.py +++ b/api/configs/middleware/vdb/milvus_config.py @@ -16,7 +16,6 @@ class MilvusConfig(BaseSettings): description="Authentication token for Milvus, if token-based authentication is enabled", default=None, ) - MILVUS_USER: str | None = Field( description="Username for authenticating with Milvus, if username/password authentication is enabled", default=None, diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index c16dcfd91f..ef2f86d4be 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -13,7 +13,6 @@ from controllers.console.app.wraps import get_app_model from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db -from fields.conversation_fields import MessageTextField from fields.raws import FilesContainedField from libs.datetime_utils import naive_utc_now, parse_time_range from libs.helper import TimestampField @@ -177,6 +176,12 @@ annotation_hit_history_model = console_ns.model( }, ) + +class MessageTextField(fields.Raw): + def format(self, value): + return value[0]["text"] if value else "" + + # Simple message detail model simple_message_detail_model = console_ns.model( "SimpleMessageDetail", diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index e94768f985..ac78d3854b 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -751,12 +751,12 @@ class DocumentApi(DocumentResource): elif metadata == "without": dataset_process_rules = DatasetService.get_process_rules(dataset_id) document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} - data_source_info = document.data_source_detail_dict response = { "id": document.id, "position": document.position, "data_source_type": document.data_source_type, - "data_source_info": data_source_info, + "data_source_info": document.data_source_info_dict, + "data_source_detail_dict": document.data_source_detail_dict, "dataset_process_rule_id": document.dataset_process_rule_id, "dataset_process_rule": dataset_process_rules, "document_process_rule": document_process_rules, @@ -784,12 +784,12 @@ class DocumentApi(DocumentResource): else: dataset_process_rules = DatasetService.get_process_rules(dataset_id) document_process_rules = document.dataset_process_rule.to_dict() if document.dataset_process_rule else {} - data_source_info = document.data_source_detail_dict response = { "id": document.id, "position": document.position, "data_source_type": document.data_source_type, - "data_source_info": data_source_info, + "data_source_info": document.data_source_info_dict, + "data_source_detail_dict": document.data_source_detail_dict, "dataset_process_rule_id": document.dataset_process_rule_id, "dataset_process_rule": dataset_process_rules, "document_process_rule": document_process_rules, diff --git a/api/controllers/console/explore/conversation.py b/api/controllers/console/explore/conversation.py index 51995b8b8a..933c80f509 100644 --- a/api/controllers/console/explore/conversation.py +++ b/api/controllers/console/explore/conversation.py @@ -1,8 +1,7 @@ from typing import Any from flask import request -from flask_restx import marshal_with -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, TypeAdapter, model_validator from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound @@ -11,7 +10,11 @@ from controllers.console.explore.error import NotChatAppError from controllers.console.explore.wraps import InstalledAppResource from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db -from fields.conversation_fields import conversation_infinite_scroll_pagination_fields, simple_conversation_fields +from fields.conversation_fields import ( + ConversationInfiniteScrollPagination, + ResultResponse, + SimpleConversation, +) from libs.helper import UUIDStrOrEmpty from libs.login import current_user from models import Account @@ -49,7 +52,6 @@ register_schema_models(console_ns, ConversationListQuery, ConversationRenamePayl endpoint="installed_app_conversations", ) class ConversationListApi(InstalledAppResource): - @marshal_with(conversation_infinite_scroll_pagination_fields) @console_ns.expect(console_ns.models[ConversationListQuery.__name__]) def get(self, installed_app): app_model = installed_app.app @@ -73,7 +75,7 @@ class ConversationListApi(InstalledAppResource): if not isinstance(current_user, Account): raise ValueError("current_user must be an Account instance") with Session(db.engine) as session: - return WebConversationService.pagination_by_last_id( + pagination = WebConversationService.pagination_by_last_id( session=session, app_model=app_model, user=current_user, @@ -82,6 +84,13 @@ class ConversationListApi(InstalledAppResource): invoke_from=InvokeFrom.EXPLORE, pinned=args.pinned, ) + adapter = TypeAdapter(SimpleConversation) + conversations = [adapter.validate_python(item, from_attributes=True) for item in pagination.data] + return ConversationInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=conversations, + ).model_dump(mode="json") except LastConversationNotExistsError: raise NotFound("Last Conversation Not Exists.") @@ -105,7 +114,7 @@ class ConversationApi(InstalledAppResource): except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"}, 204 + return ResultResponse(result="success").model_dump(mode="json"), 204 @console_ns.route( @@ -113,7 +122,6 @@ class ConversationApi(InstalledAppResource): endpoint="installed_app_conversation_rename", ) class ConversationRenameApi(InstalledAppResource): - @marshal_with(simple_conversation_fields) @console_ns.expect(console_ns.models[ConversationRenamePayload.__name__]) def post(self, installed_app, c_id): app_model = installed_app.app @@ -128,9 +136,14 @@ class ConversationRenameApi(InstalledAppResource): try: if not isinstance(current_user, Account): raise ValueError("current_user must be an Account instance") - return ConversationService.rename( + conversation = ConversationService.rename( app_model, conversation_id, current_user, payload.name, payload.auto_generate ) + return ( + TypeAdapter(SimpleConversation) + .validate_python(conversation, from_attributes=True) + .model_dump(mode="json") + ) except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") @@ -155,7 +168,7 @@ class ConversationPinApi(InstalledAppResource): except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @console_ns.route( @@ -174,4 +187,4 @@ class ConversationUnPinApi(InstalledAppResource): raise ValueError("current_user must be an Account instance") WebConversationService.unpin(app_model, conversation_id, current_user) - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py index d596d60b36..88487ac96f 100644 --- a/api/controllers/console/explore/message.py +++ b/api/controllers/console/explore/message.py @@ -2,8 +2,7 @@ import logging from typing import Literal from flask import request -from flask_restx import marshal_with -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, TypeAdapter from werkzeug.exceptions import InternalServerError, NotFound from controllers.common.schema import register_schema_models @@ -23,7 +22,8 @@ from controllers.console.explore.wraps import InstalledAppResource from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError -from fields.message_fields import message_infinite_scroll_pagination_fields +from fields.conversation_fields import ResultResponse +from fields.message_fields import MessageInfiniteScrollPagination, MessageListItem, SuggestedQuestionsResponse from libs import helper from libs.helper import UUIDStrOrEmpty from libs.login import current_account_with_tenant @@ -66,7 +66,6 @@ register_schema_models(console_ns, MessageListQuery, MessageFeedbackPayload, Mor endpoint="installed_app_messages", ) class MessageListApi(InstalledAppResource): - @marshal_with(message_infinite_scroll_pagination_fields) @console_ns.expect(console_ns.models[MessageListQuery.__name__]) def get(self, installed_app): current_user, _ = current_account_with_tenant() @@ -78,13 +77,20 @@ class MessageListApi(InstalledAppResource): args = MessageListQuery.model_validate(request.args.to_dict()) try: - return MessageService.pagination_by_first_id( + pagination = MessageService.pagination_by_first_id( app_model, current_user, str(args.conversation_id), str(args.first_id) if args.first_id else None, args.limit, ) + adapter = TypeAdapter(MessageListItem) + items = [adapter.validate_python(message, from_attributes=True) for message in pagination.data] + return MessageInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=items, + ).model_dump(mode="json") except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") except FirstMessageNotExistsError: @@ -116,7 +122,7 @@ class MessageFeedbackApi(InstalledAppResource): except MessageNotExistsError: raise NotFound("Message Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @console_ns.route( @@ -201,4 +207,4 @@ class MessageSuggestedQuestionApi(InstalledAppResource): logger.exception("internal server error.") raise InternalServerError() - return {"data": questions} + return SuggestedQuestionsResponse(data=questions).model_dump(mode="json") diff --git a/api/controllers/console/explore/saved_message.py b/api/controllers/console/explore/saved_message.py index bc7b8e7651..ea3de91741 100644 --- a/api/controllers/console/explore/saved_message.py +++ b/api/controllers/console/explore/saved_message.py @@ -1,14 +1,14 @@ from flask import request -from flask_restx import fields, marshal_with -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, TypeAdapter from werkzeug.exceptions import NotFound from controllers.common.schema import register_schema_models from controllers.console import console_ns from controllers.console.explore.error import NotCompletionAppError from controllers.console.explore.wraps import InstalledAppResource -from fields.conversation_fields import message_file_fields -from libs.helper import TimestampField, UUIDStrOrEmpty +from fields.conversation_fields import ResultResponse +from fields.message_fields import SavedMessageInfiniteScrollPagination, SavedMessageItem +from libs.helper import UUIDStrOrEmpty from libs.login import current_account_with_tenant from services.errors.message import MessageNotExistsError from services.saved_message_service import SavedMessageService @@ -26,28 +26,8 @@ class SavedMessageCreatePayload(BaseModel): register_schema_models(console_ns, SavedMessageListQuery, SavedMessageCreatePayload) -feedback_fields = {"rating": fields.String} - -message_fields = { - "id": fields.String, - "inputs": fields.Raw, - "query": fields.String, - "answer": fields.String, - "message_files": fields.List(fields.Nested(message_file_fields)), - "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), - "created_at": TimestampField, -} - - @console_ns.route("/installed-apps//saved-messages", endpoint="installed_app_saved_messages") class SavedMessageListApi(InstalledAppResource): - saved_message_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_fields)), - } - - @marshal_with(saved_message_infinite_scroll_pagination_fields) @console_ns.expect(console_ns.models[SavedMessageListQuery.__name__]) def get(self, installed_app): current_user, _ = current_account_with_tenant() @@ -57,12 +37,19 @@ class SavedMessageListApi(InstalledAppResource): args = SavedMessageListQuery.model_validate(request.args.to_dict()) - return SavedMessageService.pagination_by_last_id( + pagination = SavedMessageService.pagination_by_last_id( app_model, current_user, str(args.last_id) if args.last_id else None, args.limit, ) + adapter = TypeAdapter(SavedMessageItem) + items = [adapter.validate_python(message, from_attributes=True) for message in pagination.data] + return SavedMessageInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=items, + ).model_dump(mode="json") @console_ns.expect(console_ns.models[SavedMessageCreatePayload.__name__]) def post(self, installed_app): @@ -78,7 +65,7 @@ class SavedMessageListApi(InstalledAppResource): except MessageNotExistsError: raise NotFound("Message Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @console_ns.route( @@ -96,4 +83,4 @@ class SavedMessageApi(InstalledAppResource): SavedMessageService.delete(app_model, current_user, message_id) - return {"result": "success"}, 204 + return ResultResponse(result="success").model_dump(mode="json"), 204 diff --git a/api/controllers/console/workspace/trigger_providers.py b/api/controllers/console/workspace/trigger_providers.py index 497e62b790..c13bfd986e 100644 --- a/api/controllers/console/workspace/trigger_providers.py +++ b/api/controllers/console/workspace/trigger_providers.py @@ -4,12 +4,11 @@ from typing import Any from flask import make_response, redirect, request from flask_restx import Resource, reqparse -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from sqlalchemy.orm import Session from werkzeug.exceptions import BadRequest, Forbidden from configs import dify_config -from constants import HIDDEN_VALUE, UNKNOWN_VALUE from controllers.web.error import NotFoundError from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin_daemon import CredentialType @@ -44,6 +43,12 @@ class TriggerSubscriptionUpdateRequest(BaseModel): parameters: Mapping[str, Any] | None = Field(default=None, description="The parameters for the subscription") properties: Mapping[str, Any] | None = Field(default=None, description="The properties for the subscription") + @model_validator(mode="after") + def check_at_least_one_field(self): + if all(v is None for v in (self.name, self.credentials, self.parameters, self.properties)): + raise ValueError("At least one of name, credentials, parameters, or properties must be provided") + return self + class TriggerSubscriptionVerifyRequest(BaseModel): """Request payload for verifying subscription credentials.""" @@ -333,7 +338,7 @@ class TriggerSubscriptionUpdateApi(Resource): user = current_user assert user.current_tenant_id is not None - args = TriggerSubscriptionUpdateRequest.model_validate(console_ns.payload) + request = TriggerSubscriptionUpdateRequest.model_validate(console_ns.payload) subscription = TriggerProviderService.get_subscription_by_id( tenant_id=user.current_tenant_id, @@ -345,50 +350,32 @@ class TriggerSubscriptionUpdateApi(Resource): provider_id = TriggerProviderID(subscription.provider_id) try: - # rename only - if ( - args.name is not None - and args.credentials is None - and args.parameters is None - and args.properties is None - ): + # For rename only, just update the name + rename = request.name is not None and not any((request.credentials, request.parameters, request.properties)) + # When credential type is UNAUTHORIZED, it indicates the subscription was manually created + # For Manually created subscription, they dont have credentials, parameters + # They only have name and properties(which is input by user) + manually_created = subscription.credential_type == CredentialType.UNAUTHORIZED + if rename or manually_created: TriggerProviderService.update_trigger_subscription( tenant_id=user.current_tenant_id, subscription_id=subscription_id, - name=args.name, + name=request.name, + properties=request.properties, ) return 200 - # rebuild for create automatically by the provider - match subscription.credential_type: - case CredentialType.UNAUTHORIZED: - TriggerProviderService.update_trigger_subscription( - tenant_id=user.current_tenant_id, - subscription_id=subscription_id, - name=args.name, - properties=args.properties, - ) - return 200 - case CredentialType.API_KEY | CredentialType.OAUTH2: - if args.credentials: - new_credentials: dict[str, Any] = { - key: value if value != HIDDEN_VALUE else subscription.credentials.get(key, UNKNOWN_VALUE) - for key, value in args.credentials.items() - } - else: - new_credentials = subscription.credentials - - TriggerProviderService.rebuild_trigger_subscription( - tenant_id=user.current_tenant_id, - name=args.name, - provider_id=provider_id, - subscription_id=subscription_id, - credentials=new_credentials, - parameters=args.parameters or subscription.parameters, - ) - return 200 - case _: - raise BadRequest("Invalid credential type") + # For the rest cases(API_KEY, OAUTH2) + # we need to call third party provider(e.g. GitHub) to rebuild the subscription + TriggerProviderService.rebuild_trigger_subscription( + tenant_id=user.current_tenant_id, + name=request.name, + provider_id=provider_id, + subscription_id=subscription_id, + credentials=request.credentials or subscription.credentials, + parameters=request.parameters or subscription.parameters, + ) + return 200 except ValueError as e: raise BadRequest(str(e)) except Exception as e: diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py index 40e4bde389..62e8258e25 100644 --- a/api/controllers/service_api/app/conversation.py +++ b/api/controllers/service_api/app/conversation.py @@ -3,8 +3,7 @@ from uuid import UUID from flask import request from flask_restx import Resource -from flask_restx._http import HTTPStatus -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator from sqlalchemy.orm import Session from werkzeug.exceptions import BadRequest, NotFound @@ -16,9 +15,9 @@ from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db from fields.conversation_fields import ( - build_conversation_delete_model, - build_conversation_infinite_scroll_pagination_model, - build_simple_conversation_model, + ConversationDelete, + ConversationInfiniteScrollPagination, + SimpleConversation, ) from fields.conversation_variable_fields import ( build_conversation_variable_infinite_scroll_pagination_model, @@ -105,7 +104,6 @@ class ConversationApi(Resource): } ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) - @service_api_ns.marshal_with(build_conversation_infinite_scroll_pagination_model(service_api_ns)) def get(self, app_model: App, end_user: EndUser): """List all conversations for the current user. @@ -120,7 +118,7 @@ class ConversationApi(Resource): try: with Session(db.engine) as session: - return ConversationService.pagination_by_last_id( + pagination = ConversationService.pagination_by_last_id( session=session, app_model=app_model, user=end_user, @@ -129,6 +127,13 @@ class ConversationApi(Resource): invoke_from=InvokeFrom.SERVICE_API, sort_by=query_args.sort_by, ) + adapter = TypeAdapter(SimpleConversation) + conversations = [adapter.validate_python(item, from_attributes=True) for item in pagination.data] + return ConversationInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=conversations, + ).model_dump(mode="json") except services.errors.conversation.LastConversationNotExistsError: raise NotFound("Last Conversation Not Exists.") @@ -146,7 +151,6 @@ class ConversationDetailApi(Resource): } ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) - @service_api_ns.marshal_with(build_conversation_delete_model(service_api_ns), code=HTTPStatus.NO_CONTENT) def delete(self, app_model: App, end_user: EndUser, c_id): """Delete a specific conversation.""" app_mode = AppMode.value_of(app_model.mode) @@ -159,7 +163,7 @@ class ConversationDetailApi(Resource): ConversationService.delete(app_model, conversation_id, end_user) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"}, 204 + return ConversationDelete(result="success").model_dump(mode="json"), 204 @service_api_ns.route("/conversations//name") @@ -176,7 +180,6 @@ class ConversationRenameApi(Resource): } ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON)) - @service_api_ns.marshal_with(build_simple_conversation_model(service_api_ns)) def post(self, app_model: App, end_user: EndUser, c_id): """Rename a conversation or auto-generate a name.""" app_mode = AppMode.value_of(app_model.mode) @@ -188,7 +191,14 @@ class ConversationRenameApi(Resource): payload = ConversationRenamePayload.model_validate(service_api_ns.payload or {}) try: - return ConversationService.rename(app_model, conversation_id, end_user, payload.name, payload.auto_generate) + conversation = ConversationService.rename( + app_model, conversation_id, end_user, payload.name, payload.auto_generate + ) + return ( + TypeAdapter(SimpleConversation) + .validate_python(conversation, from_attributes=True) + .model_dump(mode="json") + ) except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") diff --git a/api/controllers/service_api/app/message.py b/api/controllers/service_api/app/message.py index d342f4e661..8981bbd7d5 100644 --- a/api/controllers/service_api/app/message.py +++ b/api/controllers/service_api/app/message.py @@ -1,11 +1,10 @@ -import json import logging from typing import Literal from uuid import UUID from flask import request -from flask_restx import Namespace, Resource, fields -from pydantic import BaseModel, Field +from flask_restx import Resource +from pydantic import BaseModel, Field, TypeAdapter from werkzeug.exceptions import BadRequest, InternalServerError, NotFound import services @@ -14,10 +13,8 @@ from controllers.service_api import service_api_ns from controllers.service_api.app.error import NotChatAppError from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token from core.app.entities.app_invoke_entities import InvokeFrom -from fields.conversation_fields import build_message_file_model -from fields.message_fields import build_agent_thought_model, build_feedback_model -from fields.raws import FilesContainedField -from libs.helper import TimestampField +from fields.conversation_fields import ResultResponse +from fields.message_fields import MessageInfiniteScrollPagination, MessageListItem from models.model import App, AppMode, EndUser from services.errors.message import ( FirstMessageNotExistsError, @@ -48,49 +45,6 @@ class FeedbackListQuery(BaseModel): register_schema_models(service_api_ns, MessageListQuery, MessageFeedbackPayload, FeedbackListQuery) -def build_message_model(api_or_ns: Namespace): - """Build the message model for the API or Namespace.""" - # First build the nested models - feedback_model = build_feedback_model(api_or_ns) - agent_thought_model = build_agent_thought_model(api_or_ns) - message_file_model = build_message_file_model(api_or_ns) - - # Then build the message fields with nested models - message_fields = { - "id": fields.String, - "conversation_id": fields.String, - "parent_message_id": fields.String, - "inputs": FilesContainedField, - "query": fields.String, - "answer": fields.String(attribute="re_sign_file_url_answer"), - "message_files": fields.List(fields.Nested(message_file_model)), - "feedback": fields.Nested(feedback_model, attribute="user_feedback", allow_null=True), - "retriever_resources": fields.Raw( - attribute=lambda obj: json.loads(obj.message_metadata).get("retriever_resources", []) - if obj.message_metadata - else [] - ), - "created_at": TimestampField, - "agent_thoughts": fields.List(fields.Nested(agent_thought_model)), - "status": fields.String, - "error": fields.String, - } - return api_or_ns.model("Message", message_fields) - - -def build_message_infinite_scroll_pagination_model(api_or_ns: Namespace): - """Build the message infinite scroll pagination model for the API or Namespace.""" - # Build the nested message model first - message_model = build_message_model(api_or_ns) - - message_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_model)), - } - return api_or_ns.model("MessageInfiniteScrollPagination", message_infinite_scroll_pagination_fields) - - @service_api_ns.route("/messages") class MessageListApi(Resource): @service_api_ns.expect(service_api_ns.models[MessageListQuery.__name__]) @@ -104,7 +58,6 @@ class MessageListApi(Resource): } ) @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY)) - @service_api_ns.marshal_with(build_message_infinite_scroll_pagination_model(service_api_ns)) def get(self, app_model: App, end_user: EndUser): """List messages in a conversation. @@ -119,9 +72,16 @@ class MessageListApi(Resource): first_id = str(query_args.first_id) if query_args.first_id else None try: - return MessageService.pagination_by_first_id( + pagination = MessageService.pagination_by_first_id( app_model, end_user, conversation_id, first_id, query_args.limit ) + adapter = TypeAdapter(MessageListItem) + items = [adapter.validate_python(message, from_attributes=True) for message in pagination.data] + return MessageInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=items, + ).model_dump(mode="json") except services.errors.conversation.ConversationNotExistsError: raise NotFound("Conversation Not Exists.") except FirstMessageNotExistsError: @@ -162,7 +122,7 @@ class MessageFeedbackApi(Resource): except MessageNotExistsError: raise NotFound("Message Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @service_api_ns.route("/app/feedbacks") diff --git a/api/controllers/web/conversation.py b/api/controllers/web/conversation.py index 86e19423e5..527eef6094 100644 --- a/api/controllers/web/conversation.py +++ b/api/controllers/web/conversation.py @@ -1,5 +1,6 @@ -from flask_restx import fields, marshal_with, reqparse +from flask_restx import reqparse from flask_restx.inputs import int_range +from pydantic import TypeAdapter from sqlalchemy.orm import Session from werkzeug.exceptions import NotFound @@ -8,7 +9,11 @@ from controllers.web.error import NotChatAppError from controllers.web.wraps import WebApiResource from core.app.entities.app_invoke_entities import InvokeFrom from extensions.ext_database import db -from fields.conversation_fields import conversation_infinite_scroll_pagination_fields, simple_conversation_fields +from fields.conversation_fields import ( + ConversationInfiniteScrollPagination, + ResultResponse, + SimpleConversation, +) from libs.helper import uuid_value from models.model import AppMode from services.conversation_service import ConversationService @@ -54,7 +59,6 @@ class ConversationListApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(conversation_infinite_scroll_pagination_fields) def get(self, app_model, end_user): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -82,7 +86,7 @@ class ConversationListApi(WebApiResource): try: with Session(db.engine) as session: - return WebConversationService.pagination_by_last_id( + pagination = WebConversationService.pagination_by_last_id( session=session, app_model=app_model, user=end_user, @@ -92,16 +96,19 @@ class ConversationListApi(WebApiResource): pinned=pinned, sort_by=args["sort_by"], ) + adapter = TypeAdapter(SimpleConversation) + conversations = [adapter.validate_python(item, from_attributes=True) for item in pagination.data] + return ConversationInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=conversations, + ).model_dump(mode="json") except LastConversationNotExistsError: raise NotFound("Last Conversation Not Exists.") @web_ns.route("/conversations/") class ConversationApi(WebApiResource): - delete_response_fields = { - "result": fields.String, - } - @web_ns.doc("Delete Conversation") @web_ns.doc(description="Delete a specific conversation.") @web_ns.doc(params={"c_id": {"description": "Conversation UUID", "type": "string", "required": True}}) @@ -115,7 +122,6 @@ class ConversationApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(delete_response_fields) def delete(self, app_model, end_user, c_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -126,7 +132,7 @@ class ConversationApi(WebApiResource): ConversationService.delete(app_model, conversation_id, end_user) except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"}, 204 + return ResultResponse(result="success").model_dump(mode="json"), 204 @web_ns.route("/conversations//name") @@ -155,7 +161,6 @@ class ConversationRenameApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(simple_conversation_fields) def post(self, app_model, end_user, c_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -171,17 +176,20 @@ class ConversationRenameApi(WebApiResource): args = parser.parse_args() try: - return ConversationService.rename(app_model, conversation_id, end_user, args["name"], args["auto_generate"]) + conversation = ConversationService.rename( + app_model, conversation_id, end_user, args["name"], args["auto_generate"] + ) + return ( + TypeAdapter(SimpleConversation) + .validate_python(conversation, from_attributes=True) + .model_dump(mode="json") + ) except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") @web_ns.route("/conversations//pin") class ConversationPinApi(WebApiResource): - pin_response_fields = { - "result": fields.String, - } - @web_ns.doc("Pin Conversation") @web_ns.doc(description="Pin a specific conversation to keep it at the top of the list.") @web_ns.doc(params={"c_id": {"description": "Conversation UUID", "type": "string", "required": True}}) @@ -195,7 +203,6 @@ class ConversationPinApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(pin_response_fields) def patch(self, app_model, end_user, c_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -208,15 +215,11 @@ class ConversationPinApi(WebApiResource): except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @web_ns.route("/conversations//unpin") class ConversationUnPinApi(WebApiResource): - unpin_response_fields = { - "result": fields.String, - } - @web_ns.doc("Unpin Conversation") @web_ns.doc(description="Unpin a specific conversation to remove it from the top of the list.") @web_ns.doc(params={"c_id": {"description": "Conversation UUID", "type": "string", "required": True}}) @@ -230,7 +233,6 @@ class ConversationUnPinApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(unpin_response_fields) def patch(self, app_model, end_user, c_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -239,4 +241,4 @@ class ConversationUnPinApi(WebApiResource): conversation_id = str(c_id) WebConversationService.unpin(app_model, conversation_id, end_user) - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") diff --git a/api/controllers/web/message.py b/api/controllers/web/message.py index 5c7ea9e69a..80035ba818 100644 --- a/api/controllers/web/message.py +++ b/api/controllers/web/message.py @@ -2,8 +2,7 @@ import logging from typing import Literal from flask import request -from flask_restx import fields, marshal_with -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, TypeAdapter, field_validator from werkzeug.exceptions import InternalServerError, NotFound from controllers.common.schema import register_schema_models @@ -22,11 +21,10 @@ from controllers.web.wraps import WebApiResource from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError from core.model_runtime.errors.invoke import InvokeError -from fields.conversation_fields import message_file_fields -from fields.message_fields import agent_thought_fields, feedback_fields, retriever_resource_fields -from fields.raws import FilesContainedField +from fields.conversation_fields import ResultResponse +from fields.message_fields import SuggestedQuestionsResponse, WebMessageInfiniteScrollPagination, WebMessageListItem from libs import helper -from libs.helper import TimestampField, uuid_value +from libs.helper import uuid_value from models.model import AppMode from services.app_generate_service import AppGenerateService from services.errors.app import MoreLikeThisDisabledError @@ -70,29 +68,6 @@ register_schema_models(web_ns, MessageListQuery, MessageFeedbackPayload, Message @web_ns.route("/messages") class MessageListApi(WebApiResource): - message_fields = { - "id": fields.String, - "conversation_id": fields.String, - "parent_message_id": fields.String, - "inputs": FilesContainedField, - "query": fields.String, - "answer": fields.String(attribute="re_sign_file_url_answer"), - "message_files": fields.List(fields.Nested(message_file_fields)), - "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), - "retriever_resources": fields.List(fields.Nested(retriever_resource_fields)), - "created_at": TimestampField, - "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), - "metadata": fields.Raw(attribute="message_metadata_dict"), - "status": fields.String, - "error": fields.String, - } - - message_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_fields)), - } - @web_ns.doc("Get Message List") @web_ns.doc(description="Retrieve paginated list of messages from a conversation in a chat application.") @web_ns.doc( @@ -121,7 +96,6 @@ class MessageListApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(message_infinite_scroll_pagination_fields) def get(self, app_model, end_user): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -131,9 +105,16 @@ class MessageListApi(WebApiResource): query = MessageListQuery.model_validate(raw_args) try: - return MessageService.pagination_by_first_id( + pagination = MessageService.pagination_by_first_id( app_model, end_user, query.conversation_id, query.first_id, query.limit ) + adapter = TypeAdapter(WebMessageListItem) + items = [adapter.validate_python(message, from_attributes=True) for message in pagination.data] + return WebMessageInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=items, + ).model_dump(mode="json") except ConversationNotExistsError: raise NotFound("Conversation Not Exists.") except FirstMessageNotExistsError: @@ -142,10 +123,6 @@ class MessageListApi(WebApiResource): @web_ns.route("/messages//feedbacks") class MessageFeedbackApi(WebApiResource): - feedback_response_fields = { - "result": fields.String, - } - @web_ns.doc("Create Message Feedback") @web_ns.doc(description="Submit feedback (like/dislike) for a specific message.") @web_ns.doc(params={"message_id": {"description": "Message UUID", "type": "string", "required": True}}) @@ -170,7 +147,6 @@ class MessageFeedbackApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(feedback_response_fields) def post(self, app_model, end_user, message_id): message_id = str(message_id) @@ -187,7 +163,7 @@ class MessageFeedbackApi(WebApiResource): except MessageNotExistsError: raise NotFound("Message Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @web_ns.route("/messages//more-like-this") @@ -247,10 +223,6 @@ class MessageMoreLikeThisApi(WebApiResource): @web_ns.route("/messages//suggested-questions") class MessageSuggestedQuestionApi(WebApiResource): - suggested_questions_response_fields = { - "data": fields.List(fields.String), - } - @web_ns.doc("Get Suggested Questions") @web_ns.doc(description="Get suggested follow-up questions after a message (chat apps only).") @web_ns.doc(params={"message_id": {"description": "Message UUID", "type": "string", "required": True}}) @@ -264,7 +236,6 @@ class MessageSuggestedQuestionApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(suggested_questions_response_fields) def get(self, app_model, end_user, message_id): app_mode = AppMode.value_of(app_model.mode) if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}: @@ -277,7 +248,6 @@ class MessageSuggestedQuestionApi(WebApiResource): app_model=app_model, user=end_user, message_id=message_id, invoke_from=InvokeFrom.WEB_APP ) # questions is a list of strings, not a list of Message objects - # so we can directly return it except MessageNotExistsError: raise NotFound("Message not found") except ConversationNotExistsError: @@ -296,4 +266,4 @@ class MessageSuggestedQuestionApi(WebApiResource): logger.exception("internal server error.") raise InternalServerError() - return {"data": questions} + return SuggestedQuestionsResponse(data=questions).model_dump(mode="json") diff --git a/api/controllers/web/saved_message.py b/api/controllers/web/saved_message.py index 865f3610a7..4e20690e9e 100644 --- a/api/controllers/web/saved_message.py +++ b/api/controllers/web/saved_message.py @@ -1,40 +1,20 @@ -from flask_restx import fields, marshal_with, reqparse +from flask_restx import reqparse from flask_restx.inputs import int_range +from pydantic import TypeAdapter from werkzeug.exceptions import NotFound from controllers.web import web_ns from controllers.web.error import NotCompletionAppError from controllers.web.wraps import WebApiResource -from fields.conversation_fields import message_file_fields -from libs.helper import TimestampField, uuid_value +from fields.conversation_fields import ResultResponse +from fields.message_fields import SavedMessageInfiniteScrollPagination, SavedMessageItem +from libs.helper import uuid_value from services.errors.message import MessageNotExistsError from services.saved_message_service import SavedMessageService -feedback_fields = {"rating": fields.String} - -message_fields = { - "id": fields.String, - "inputs": fields.Raw, - "query": fields.String, - "answer": fields.String, - "message_files": fields.List(fields.Nested(message_file_fields)), - "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), - "created_at": TimestampField, -} - @web_ns.route("/saved-messages") class SavedMessageListApi(WebApiResource): - saved_message_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_fields)), - } - - post_response_fields = { - "result": fields.String, - } - @web_ns.doc("Get Saved Messages") @web_ns.doc(description="Retrieve paginated list of saved messages for a completion application.") @web_ns.doc( @@ -58,7 +38,6 @@ class SavedMessageListApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(saved_message_infinite_scroll_pagination_fields) def get(self, app_model, end_user): if app_model.mode != "completion": raise NotCompletionAppError() @@ -70,7 +49,14 @@ class SavedMessageListApi(WebApiResource): ) args = parser.parse_args() - return SavedMessageService.pagination_by_last_id(app_model, end_user, args["last_id"], args["limit"]) + pagination = SavedMessageService.pagination_by_last_id(app_model, end_user, args["last_id"], args["limit"]) + adapter = TypeAdapter(SavedMessageItem) + items = [adapter.validate_python(message, from_attributes=True) for message in pagination.data] + return SavedMessageInfiniteScrollPagination( + limit=pagination.limit, + has_more=pagination.has_more, + data=items, + ).model_dump(mode="json") @web_ns.doc("Save Message") @web_ns.doc(description="Save a specific message for later reference.") @@ -89,7 +75,6 @@ class SavedMessageListApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(post_response_fields) def post(self, app_model, end_user): if app_model.mode != "completion": raise NotCompletionAppError() @@ -102,15 +87,11 @@ class SavedMessageListApi(WebApiResource): except MessageNotExistsError: raise NotFound("Message Not Exists.") - return {"result": "success"} + return ResultResponse(result="success").model_dump(mode="json") @web_ns.route("/saved-messages/") class SavedMessageApi(WebApiResource): - delete_response_fields = { - "result": fields.String, - } - @web_ns.doc("Delete Saved Message") @web_ns.doc(description="Remove a message from saved messages.") @web_ns.doc(params={"message_id": {"description": "Message UUID to delete", "type": "string", "required": True}}) @@ -124,7 +105,6 @@ class SavedMessageApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(delete_response_fields) def delete(self, app_model, end_user, message_id): message_id = str(message_id) @@ -133,4 +113,4 @@ class SavedMessageApi(WebApiResource): SavedMessageService.delete(app_model, end_user, message_id) - return {"result": "success"}, 204 + return ResultResponse(result="success").model_dump(mode="json"), 204 diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index 0b36969cf9..1785cbde4c 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -88,7 +88,41 @@ def _get_user_provided_host_header(headers: dict | None) -> str | None: return None +def _inject_trace_headers(headers: dict | None) -> dict: + """ + Inject W3C traceparent header for distributed tracing. + + When OTEL is enabled, HTTPXClientInstrumentor handles trace propagation automatically. + When OTEL is disabled, we manually inject the traceparent header. + """ + if headers is None: + headers = {} + + # Skip if already present (case-insensitive check) + for key in headers: + if key.lower() == "traceparent": + return headers + + # Skip if OTEL is enabled - HTTPXClientInstrumentor handles this automatically + if dify_config.ENABLE_OTEL: + return headers + + # Generate and inject traceparent for non-OTEL scenarios + try: + from core.helper.trace_id_helper import generate_traceparent_header + + traceparent = generate_traceparent_header() + if traceparent: + headers["traceparent"] = traceparent + except Exception: + # Silently ignore errors to avoid breaking requests + logger.debug("Failed to generate traceparent header", exc_info=True) + + return headers + + def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): + # Convert requests-style allow_redirects to httpx-style follow_redirects if "allow_redirects" in kwargs: allow_redirects = kwargs.pop("allow_redirects") if "follow_redirects" not in kwargs: @@ -106,18 +140,21 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY) client = _get_ssrf_client(verify_option) + # Inject traceparent header for distributed tracing (when OTEL is not enabled) + headers = kwargs.get("headers") or {} + headers = _inject_trace_headers(headers) + kwargs["headers"] = headers + # Preserve user-provided Host header # When using a forward proxy, httpx may override the Host header based on the URL. # We extract and preserve any explicitly set Host header to support virtual hosting. - headers = kwargs.get("headers", {}) user_provided_host = _get_user_provided_host_header(headers) retries = 0 while retries <= max_retries: try: - # Build the request manually to preserve the Host header - # httpx may override the Host header when using a proxy, so we use - # the request API to explicitly set headers before sending + # Preserve the user-provided Host header + # httpx may override the Host header when using a proxy headers = {k: v for k, v in headers.items() if k.lower() != "host"} if user_provided_host is not None: headers["host"] = user_provided_host diff --git a/api/core/helper/trace_id_helper.py b/api/core/helper/trace_id_helper.py index 820502e558..e827859109 100644 --- a/api/core/helper/trace_id_helper.py +++ b/api/core/helper/trace_id_helper.py @@ -103,3 +103,60 @@ def parse_traceparent_header(traceparent: str) -> str | None: if len(parts) == 4 and len(parts[1]) == 32: return parts[1] return None + + +def get_span_id_from_otel_context() -> str | None: + """ + Retrieve the current span ID from the active OpenTelemetry trace context. + + Returns: + A 16-character hex string representing the span ID, or None if not available. + """ + try: + from opentelemetry.trace import get_current_span + from opentelemetry.trace.span import INVALID_SPAN_ID + + span = get_current_span() + if not span: + return None + + span_context = span.get_span_context() + if not span_context or span_context.span_id == INVALID_SPAN_ID: + return None + + return f"{span_context.span_id:016x}" + except Exception: + return None + + +def generate_traceparent_header() -> str | None: + """ + Generate a W3C traceparent header from the current context. + + Uses OpenTelemetry context if available, otherwise uses the + ContextVar-based trace_id from the logging context. + + Format: {version}-{trace_id}-{span_id}-{flags} + Example: 00-5b8aa5a2d2c872e8321cf37308d69df2-051581bf3bb55c45-01 + + Returns: + A valid traceparent header string, or None if generation fails. + """ + import uuid + + # Try OTEL context first + trace_id = get_trace_id_from_otel_context() + span_id = get_span_id_from_otel_context() + + if trace_id and span_id: + return f"00-{trace_id}-{span_id}-01" + + # Fallback: use ContextVar-based trace_id or generate new one + from core.logging.context import get_trace_id as get_logging_trace_id + + trace_id = get_logging_trace_id() or uuid.uuid4().hex + + # Generate a new span_id (16 hex chars) + span_id = uuid.uuid4().hex[:16] + + return f"00-{trace_id}-{span_id}-01" diff --git a/api/core/logging/__init__.py b/api/core/logging/__init__.py new file mode 100644 index 0000000000..db046cc9fa --- /dev/null +++ b/api/core/logging/__init__.py @@ -0,0 +1,20 @@ +"""Structured logging components for Dify.""" + +from core.logging.context import ( + clear_request_context, + get_request_id, + get_trace_id, + init_request_context, +) +from core.logging.filters import IdentityContextFilter, TraceContextFilter +from core.logging.structured_formatter import StructuredJSONFormatter + +__all__ = [ + "IdentityContextFilter", + "StructuredJSONFormatter", + "TraceContextFilter", + "clear_request_context", + "get_request_id", + "get_trace_id", + "init_request_context", +] diff --git a/api/core/logging/context.py b/api/core/logging/context.py new file mode 100644 index 0000000000..18633a0b05 --- /dev/null +++ b/api/core/logging/context.py @@ -0,0 +1,35 @@ +"""Request context for logging - framework agnostic. + +This module provides request-scoped context variables for logging, +using Python's contextvars for thread-safe and async-safe storage. +""" + +import uuid +from contextvars import ContextVar + +_request_id: ContextVar[str] = ContextVar("log_request_id", default="") +_trace_id: ContextVar[str] = ContextVar("log_trace_id", default="") + + +def get_request_id() -> str: + """Get current request ID (10 hex chars).""" + return _request_id.get() + + +def get_trace_id() -> str: + """Get fallback trace ID when OTEL is unavailable (32 hex chars).""" + return _trace_id.get() + + +def init_request_context() -> None: + """Initialize request context. Call at start of each request.""" + req_id = uuid.uuid4().hex[:10] + trace_id = uuid.uuid5(uuid.NAMESPACE_DNS, req_id).hex + _request_id.set(req_id) + _trace_id.set(trace_id) + + +def clear_request_context() -> None: + """Clear request context. Call at end of request (optional).""" + _request_id.set("") + _trace_id.set("") diff --git a/api/core/logging/filters.py b/api/core/logging/filters.py new file mode 100644 index 0000000000..1e8aa8d566 --- /dev/null +++ b/api/core/logging/filters.py @@ -0,0 +1,94 @@ +"""Logging filters for structured logging.""" + +import contextlib +import logging + +import flask + +from core.logging.context import get_request_id, get_trace_id + + +class TraceContextFilter(logging.Filter): + """ + Filter that adds trace_id and span_id to log records. + Integrates with OpenTelemetry when available, falls back to ContextVar-based trace_id. + """ + + def filter(self, record: logging.LogRecord) -> bool: + # Get trace context from OpenTelemetry + trace_id, span_id = self._get_otel_context() + + # Set trace_id (fallback to ContextVar if no OTEL context) + if trace_id: + record.trace_id = trace_id + else: + record.trace_id = get_trace_id() + + record.span_id = span_id or "" + + # For backward compatibility, also set req_id + record.req_id = get_request_id() + + return True + + def _get_otel_context(self) -> tuple[str, str]: + """Extract trace_id and span_id from OpenTelemetry context.""" + with contextlib.suppress(Exception): + from opentelemetry.trace import get_current_span + from opentelemetry.trace.span import INVALID_SPAN_ID, INVALID_TRACE_ID + + span = get_current_span() + if span and span.get_span_context(): + ctx = span.get_span_context() + if ctx.is_valid and ctx.trace_id != INVALID_TRACE_ID: + trace_id = f"{ctx.trace_id:032x}" + span_id = f"{ctx.span_id:016x}" if ctx.span_id != INVALID_SPAN_ID else "" + return trace_id, span_id + return "", "" + + +class IdentityContextFilter(logging.Filter): + """ + Filter that adds user identity context to log records. + Extracts tenant_id, user_id, and user_type from Flask-Login current_user. + """ + + def filter(self, record: logging.LogRecord) -> bool: + identity = self._extract_identity() + record.tenant_id = identity.get("tenant_id", "") + record.user_id = identity.get("user_id", "") + record.user_type = identity.get("user_type", "") + return True + + def _extract_identity(self) -> dict[str, str]: + """Extract identity from current_user if in request context.""" + try: + if not flask.has_request_context(): + return {} + from flask_login import current_user + + # Check if user is authenticated using the proxy + if not current_user.is_authenticated: + return {} + + # Access the underlying user object + user = current_user + + from models import Account + from models.model import EndUser + + identity: dict[str, str] = {} + + if isinstance(user, Account): + if user.current_tenant_id: + identity["tenant_id"] = user.current_tenant_id + identity["user_id"] = user.id + identity["user_type"] = "account" + elif isinstance(user, EndUser): + identity["tenant_id"] = user.tenant_id + identity["user_id"] = user.id + identity["user_type"] = user.type or "end_user" + + return identity + except Exception: + return {} diff --git a/api/core/logging/structured_formatter.py b/api/core/logging/structured_formatter.py new file mode 100644 index 0000000000..4295d2dd34 --- /dev/null +++ b/api/core/logging/structured_formatter.py @@ -0,0 +1,107 @@ +"""Structured JSON log formatter for Dify.""" + +import logging +import traceback +from datetime import UTC, datetime +from typing import Any + +import orjson + +from configs import dify_config + + +class StructuredJSONFormatter(logging.Formatter): + """ + JSON log formatter following the specified schema: + { + "ts": "ISO 8601 UTC", + "severity": "INFO|ERROR|WARN|DEBUG", + "service": "service name", + "caller": "file:line", + "trace_id": "hex 32", + "span_id": "hex 16", + "identity": { "tenant_id", "user_id", "user_type" }, + "message": "log message", + "attributes": { ... }, + "stack_trace": "..." + } + """ + + SEVERITY_MAP: dict[int, str] = { + logging.DEBUG: "DEBUG", + logging.INFO: "INFO", + logging.WARNING: "WARN", + logging.ERROR: "ERROR", + logging.CRITICAL: "ERROR", + } + + def __init__(self, service_name: str | None = None): + super().__init__() + self._service_name = service_name or dify_config.APPLICATION_NAME + + def format(self, record: logging.LogRecord) -> str: + log_dict = self._build_log_dict(record) + try: + return orjson.dumps(log_dict).decode("utf-8") + except TypeError: + # Fallback: convert non-serializable objects to string + import json + + return json.dumps(log_dict, default=str, ensure_ascii=False) + + def _build_log_dict(self, record: logging.LogRecord) -> dict[str, Any]: + # Core fields + log_dict: dict[str, Any] = { + "ts": datetime.now(UTC).isoformat(timespec="milliseconds").replace("+00:00", "Z"), + "severity": self.SEVERITY_MAP.get(record.levelno, "INFO"), + "service": self._service_name, + "caller": f"{record.filename}:{record.lineno}", + "message": record.getMessage(), + } + + # Trace context (from TraceContextFilter) + trace_id = getattr(record, "trace_id", "") + span_id = getattr(record, "span_id", "") + + if trace_id: + log_dict["trace_id"] = trace_id + if span_id: + log_dict["span_id"] = span_id + + # Identity context (from IdentityContextFilter) + identity = self._extract_identity(record) + if identity: + log_dict["identity"] = identity + + # Dynamic attributes + attributes = getattr(record, "attributes", None) + if attributes: + log_dict["attributes"] = attributes + + # Stack trace for errors with exceptions + if record.exc_info and record.levelno >= logging.ERROR: + log_dict["stack_trace"] = self._format_exception(record.exc_info) + + return log_dict + + def _extract_identity(self, record: logging.LogRecord) -> dict[str, str] | None: + tenant_id = getattr(record, "tenant_id", None) + user_id = getattr(record, "user_id", None) + user_type = getattr(record, "user_type", None) + + if not any([tenant_id, user_id, user_type]): + return None + + identity: dict[str, str] = {} + if tenant_id: + identity["tenant_id"] = tenant_id + if user_id: + identity["user_id"] = user_id + if user_type: + identity["user_type"] = user_type + return identity + + def _format_exception(self, exc_info: tuple[Any, ...]) -> str: + if exc_info and exc_info[0] is not None: + return "".join(traceback.format_exception(*exc_info)) + return "" diff --git a/api/core/plugin/impl/base.py b/api/core/plugin/impl/base.py index 7bb2749afa..0e49824ad0 100644 --- a/api/core/plugin/impl/base.py +++ b/api/core/plugin/impl/base.py @@ -103,6 +103,9 @@ class BasePluginClient: prepared_headers["X-Api-Key"] = dify_config.PLUGIN_DAEMON_KEY prepared_headers.setdefault("Accept-Encoding", "gzip, deflate, br") + # Inject traceparent header for distributed tracing + self._inject_trace_headers(prepared_headers) + prepared_data: bytes | dict[str, Any] | str | None = ( data if isinstance(data, (bytes, str, dict)) or data is None else None ) @@ -114,6 +117,31 @@ class BasePluginClient: return str(url), prepared_headers, prepared_data, params, files + def _inject_trace_headers(self, headers: dict[str, str]) -> None: + """ + Inject W3C traceparent header for distributed tracing. + + This ensures trace context is propagated to plugin daemon even if + HTTPXClientInstrumentor doesn't cover module-level httpx functions. + """ + if not dify_config.ENABLE_OTEL: + return + + import contextlib + + # Skip if already present (case-insensitive check) + for key in headers: + if key.lower() == "traceparent": + return + + # Inject traceparent - works as fallback when OTEL instrumentation doesn't cover this call + with contextlib.suppress(Exception): + from core.helper.trace_id_helper import generate_traceparent_header + + traceparent = generate_traceparent_header() + if traceparent: + headers["traceparent"] = traceparent + def _stream_request( self, method: str, diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 4ec59940e3..c6339aa3ba 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -515,6 +515,7 @@ class DatasetRetrieval: 0 ].embedding_model_provider weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model + dataset_count = len(available_datasets) with measure_time() as timer: cancel_event = threading.Event() thread_exceptions: list[Exception] = [] @@ -537,6 +538,7 @@ class DatasetRetrieval: "score_threshold": score_threshold, "query": query, "attachment_id": None, + "dataset_count": dataset_count, "cancel_event": cancel_event, "thread_exceptions": thread_exceptions, }, @@ -562,6 +564,7 @@ class DatasetRetrieval: "score_threshold": score_threshold, "query": None, "attachment_id": attachment_id, + "dataset_count": dataset_count, "cancel_event": cancel_event, "thread_exceptions": thread_exceptions, }, @@ -1422,6 +1425,7 @@ class DatasetRetrieval: score_threshold: float, query: str | None, attachment_id: str | None, + dataset_count: int, cancel_event: threading.Event | None = None, thread_exceptions: list[Exception] | None = None, ): @@ -1470,37 +1474,38 @@ class DatasetRetrieval: if cancel_event and cancel_event.is_set(): break - if reranking_enable: - # do rerank for searched documents - data_post_processor = DataPostProcessor(tenant_id, reranking_mode, reranking_model, weights, False) - if query: - all_documents_item = data_post_processor.invoke( - query=query, - documents=all_documents_item, - score_threshold=score_threshold, - top_n=top_k, - query_type=QueryType.TEXT_QUERY, - ) - if attachment_id: - all_documents_item = data_post_processor.invoke( - documents=all_documents_item, - score_threshold=score_threshold, - top_n=top_k, - query_type=QueryType.IMAGE_QUERY, - query=attachment_id, - ) - else: - if index_type == IndexTechniqueType.ECONOMY: - if not query: - all_documents_item = [] - else: - all_documents_item = self.calculate_keyword_score(query, all_documents_item, top_k) - elif index_type == IndexTechniqueType.HIGH_QUALITY: - all_documents_item = self.calculate_vector_score(all_documents_item, top_k, score_threshold) + # Skip second reranking when there is only one dataset + if reranking_enable and dataset_count > 1: + # do rerank for searched documents + data_post_processor = DataPostProcessor(tenant_id, reranking_mode, reranking_model, weights, False) + if query: + all_documents_item = data_post_processor.invoke( + query=query, + documents=all_documents_item, + score_threshold=score_threshold, + top_n=top_k, + query_type=QueryType.TEXT_QUERY, + ) + if attachment_id: + all_documents_item = data_post_processor.invoke( + documents=all_documents_item, + score_threshold=score_threshold, + top_n=top_k, + query_type=QueryType.IMAGE_QUERY, + query=attachment_id, + ) else: - all_documents_item = all_documents_item[:top_k] if top_k else all_documents_item - if all_documents_item: - all_documents.extend(all_documents_item) + if index_type == IndexTechniqueType.ECONOMY: + if not query: + all_documents_item = [] + else: + all_documents_item = self.calculate_keyword_score(query, all_documents_item, top_k) + elif index_type == IndexTechniqueType.HIGH_QUALITY: + all_documents_item = self.calculate_vector_score(all_documents_item, top_k, score_threshold) + else: + all_documents_item = all_documents_item[:top_k] if top_k else all_documents_item + if all_documents_item: + all_documents.extend(all_documents_item) except Exception as e: if cancel_event: cancel_event.set() diff --git a/api/core/workflow/nodes/code/code_node.py b/api/core/workflow/nodes/code/code_node.py index a38e10030a..e3035d3bf0 100644 --- a/api/core/workflow/nodes/code/code_node.py +++ b/api/core/workflow/nodes/code/code_node.py @@ -1,8 +1,7 @@ from collections.abc import Mapping, Sequence from decimal import Decimal -from typing import Any, cast +from typing import TYPE_CHECKING, Any, ClassVar, cast -from configs import dify_config from core.helper.code_executor.code_executor import CodeExecutionError, CodeExecutor, CodeLanguage from core.helper.code_executor.code_node_provider import CodeNodeProvider from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider @@ -13,6 +12,7 @@ from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base.node import Node from core.workflow.nodes.code.entities import CodeNodeData +from core.workflow.nodes.code.limits import CodeNodeLimits from .exc import ( CodeNodeError, @@ -20,9 +20,41 @@ from .exc import ( OutputValidationError, ) +if TYPE_CHECKING: + from core.workflow.entities import GraphInitParams + from core.workflow.runtime import GraphRuntimeState + class CodeNode(Node[CodeNodeData]): node_type = NodeType.CODE + _DEFAULT_CODE_PROVIDERS: ClassVar[tuple[type[CodeNodeProvider], ...]] = ( + Python3CodeProvider, + JavascriptCodeProvider, + ) + _limits: CodeNodeLimits + + def __init__( + self, + id: str, + config: Mapping[str, Any], + graph_init_params: "GraphInitParams", + graph_runtime_state: "GraphRuntimeState", + *, + code_executor: type[CodeExecutor] | None = None, + code_providers: Sequence[type[CodeNodeProvider]] | None = None, + code_limits: CodeNodeLimits, + ) -> None: + super().__init__( + id=id, + config=config, + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + self._code_executor: type[CodeExecutor] = code_executor or CodeExecutor + self._code_providers: tuple[type[CodeNodeProvider], ...] = ( + tuple(code_providers) if code_providers else self._DEFAULT_CODE_PROVIDERS + ) + self._limits = code_limits @classmethod def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]: @@ -35,11 +67,16 @@ class CodeNode(Node[CodeNodeData]): if filters: code_language = cast(CodeLanguage, filters.get("code_language", CodeLanguage.PYTHON3)) - providers: list[type[CodeNodeProvider]] = [Python3CodeProvider, JavascriptCodeProvider] - code_provider: type[CodeNodeProvider] = next(p for p in providers if p.is_accept_language(code_language)) + code_provider: type[CodeNodeProvider] = next( + provider for provider in cls._DEFAULT_CODE_PROVIDERS if provider.is_accept_language(code_language) + ) return code_provider.get_default_config() + @classmethod + def default_code_providers(cls) -> tuple[type[CodeNodeProvider], ...]: + return cls._DEFAULT_CODE_PROVIDERS + @classmethod def version(cls) -> str: return "1" @@ -60,7 +97,8 @@ class CodeNode(Node[CodeNodeData]): variables[variable_name] = variable.to_object() if variable else None # Run code try: - result = CodeExecutor.execute_workflow_code_template( + _ = self._select_code_provider(code_language) + result = self._code_executor.execute_workflow_code_template( language=code_language, code=code, inputs=variables, @@ -75,6 +113,12 @@ class CodeNode(Node[CodeNodeData]): return NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs=result) + def _select_code_provider(self, code_language: CodeLanguage) -> type[CodeNodeProvider]: + for provider in self._code_providers: + if provider.is_accept_language(code_language): + return provider + raise CodeNodeError(f"Unsupported code language: {code_language}") + def _check_string(self, value: str | None, variable: str) -> str | None: """ Check string @@ -85,10 +129,10 @@ class CodeNode(Node[CodeNodeData]): if value is None: return None - if len(value) > dify_config.CODE_MAX_STRING_LENGTH: + if len(value) > self._limits.max_string_length: raise OutputValidationError( f"The length of output variable `{variable}` must be" - f" less than {dify_config.CODE_MAX_STRING_LENGTH} characters" + f" less than {self._limits.max_string_length} characters" ) return value.replace("\x00", "") @@ -109,20 +153,20 @@ class CodeNode(Node[CodeNodeData]): if value is None: return None - if value > dify_config.CODE_MAX_NUMBER or value < dify_config.CODE_MIN_NUMBER: + if value > self._limits.max_number or value < self._limits.min_number: raise OutputValidationError( f"Output variable `{variable}` is out of range," - f" it must be between {dify_config.CODE_MIN_NUMBER} and {dify_config.CODE_MAX_NUMBER}." + f" it must be between {self._limits.min_number} and {self._limits.max_number}." ) if isinstance(value, float): decimal_value = Decimal(str(value)).normalize() precision = -decimal_value.as_tuple().exponent if decimal_value.as_tuple().exponent < 0 else 0 # type: ignore[operator] # raise error if precision is too high - if precision > dify_config.CODE_MAX_PRECISION: + if precision > self._limits.max_precision: raise OutputValidationError( f"Output variable `{variable}` has too high precision," - f" it must be less than {dify_config.CODE_MAX_PRECISION} digits." + f" it must be less than {self._limits.max_precision} digits." ) return value @@ -137,8 +181,8 @@ class CodeNode(Node[CodeNodeData]): # TODO(QuantumGhost): Replace native Python lists with `Array*Segment` classes. # Note that `_transform_result` may produce lists containing `None` values, # which don't conform to the type requirements of `Array*Segment` classes. - if depth > dify_config.CODE_MAX_DEPTH: - raise DepthLimitError(f"Depth limit {dify_config.CODE_MAX_DEPTH} reached, object too deep.") + if depth > self._limits.max_depth: + raise DepthLimitError(f"Depth limit {self._limits.max_depth} reached, object too deep.") transformed_result: dict[str, Any] = {} if output_schema is None: @@ -272,10 +316,10 @@ class CodeNode(Node[CodeNodeData]): f"Output {prefix}{dot}{output_name} is not an array, got {type(value)} instead." ) else: - if len(value) > dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH: + if len(value) > self._limits.max_number_array_length: raise OutputValidationError( f"The length of output variable `{prefix}{dot}{output_name}` must be" - f" less than {dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH} elements." + f" less than {self._limits.max_number_array_length} elements." ) for i, inner_value in enumerate(value): @@ -305,10 +349,10 @@ class CodeNode(Node[CodeNodeData]): f" got {type(result.get(output_name))} instead." ) else: - if len(result[output_name]) > dify_config.CODE_MAX_STRING_ARRAY_LENGTH: + if len(result[output_name]) > self._limits.max_string_array_length: raise OutputValidationError( f"The length of output variable `{prefix}{dot}{output_name}` must be" - f" less than {dify_config.CODE_MAX_STRING_ARRAY_LENGTH} elements." + f" less than {self._limits.max_string_array_length} elements." ) transformed_result[output_name] = [ @@ -326,10 +370,10 @@ class CodeNode(Node[CodeNodeData]): f" got {type(result.get(output_name))} instead." ) else: - if len(result[output_name]) > dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH: + if len(result[output_name]) > self._limits.max_object_array_length: raise OutputValidationError( f"The length of output variable `{prefix}{dot}{output_name}` must be" - f" less than {dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH} elements." + f" less than {self._limits.max_object_array_length} elements." ) for i, value in enumerate(result[output_name]): diff --git a/api/core/workflow/nodes/code/limits.py b/api/core/workflow/nodes/code/limits.py new file mode 100644 index 0000000000..a6b9e9e68e --- /dev/null +++ b/api/core/workflow/nodes/code/limits.py @@ -0,0 +1,13 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class CodeNodeLimits: + max_string_length: int + max_number: int | float + min_number: int | float + max_precision: int + max_depth: int + max_number_array_length: int + max_string_array_length: int + max_object_array_length: int diff --git a/api/core/workflow/nodes/node_factory.py b/api/core/workflow/nodes/node_factory.py index c55ad346bf..f177aef665 100644 --- a/api/core/workflow/nodes/node_factory.py +++ b/api/core/workflow/nodes/node_factory.py @@ -1,10 +1,21 @@ +from collections.abc import Sequence from typing import TYPE_CHECKING, final from typing_extensions import override +from configs import dify_config +from core.helper.code_executor.code_executor import CodeExecutor +from core.helper.code_executor.code_node_provider import CodeNodeProvider from core.workflow.enums import NodeType from core.workflow.graph import NodeFactory from core.workflow.nodes.base.node import Node +from core.workflow.nodes.code.code_node import CodeNode +from core.workflow.nodes.code.limits import CodeNodeLimits +from core.workflow.nodes.template_transform.template_renderer import ( + CodeExecutorJinja2TemplateRenderer, + Jinja2TemplateRenderer, +) +from core.workflow.nodes.template_transform.template_transform_node import TemplateTransformNode from libs.typing import is_str, is_str_dict from .node_mapping import LATEST_VERSION, NODE_TYPE_CLASSES_MAPPING @@ -27,9 +38,29 @@ class DifyNodeFactory(NodeFactory): self, graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", + *, + code_executor: type[CodeExecutor] | None = None, + code_providers: Sequence[type[CodeNodeProvider]] | None = None, + code_limits: CodeNodeLimits | None = None, + template_renderer: Jinja2TemplateRenderer | None = None, ) -> None: self.graph_init_params = graph_init_params self.graph_runtime_state = graph_runtime_state + self._code_executor: type[CodeExecutor] = code_executor or CodeExecutor + self._code_providers: tuple[type[CodeNodeProvider], ...] = ( + tuple(code_providers) if code_providers else CodeNode.default_code_providers() + ) + self._code_limits = code_limits or CodeNodeLimits( + max_string_length=dify_config.CODE_MAX_STRING_LENGTH, + max_number=dify_config.CODE_MAX_NUMBER, + min_number=dify_config.CODE_MIN_NUMBER, + max_precision=dify_config.CODE_MAX_PRECISION, + max_depth=dify_config.CODE_MAX_DEPTH, + max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH, + max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH, + max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH, + ) + self._template_renderer = template_renderer or CodeExecutorJinja2TemplateRenderer() @override def create_node(self, node_config: dict[str, object]) -> Node: @@ -72,6 +103,26 @@ class DifyNodeFactory(NodeFactory): raise ValueError(f"No latest version class found for node type: {node_type}") # Create node instance + if node_type == NodeType.CODE: + return CodeNode( + id=node_id, + config=node_config, + graph_init_params=self.graph_init_params, + graph_runtime_state=self.graph_runtime_state, + code_executor=self._code_executor, + code_providers=self._code_providers, + code_limits=self._code_limits, + ) + + if node_type == NodeType.TEMPLATE_TRANSFORM: + return TemplateTransformNode( + id=node_id, + config=node_config, + graph_init_params=self.graph_init_params, + graph_runtime_state=self.graph_runtime_state, + template_renderer=self._template_renderer, + ) + return node_class( id=node_id, config=node_config, diff --git a/api/core/workflow/nodes/template_transform/template_renderer.py b/api/core/workflow/nodes/template_transform/template_renderer.py new file mode 100644 index 0000000000..a5f06bf2bb --- /dev/null +++ b/api/core/workflow/nodes/template_transform/template_renderer.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any, Protocol + +from core.helper.code_executor.code_executor import CodeExecutionError, CodeExecutor, CodeLanguage + + +class TemplateRenderError(ValueError): + """Raised when rendering a Jinja2 template fails.""" + + +class Jinja2TemplateRenderer(Protocol): + """Render Jinja2 templates for template transform nodes.""" + + def render_template(self, template: str, variables: Mapping[str, Any]) -> str: + """Render a Jinja2 template with provided variables.""" + raise NotImplementedError + + +class CodeExecutorJinja2TemplateRenderer(Jinja2TemplateRenderer): + """Adapter that renders Jinja2 templates via CodeExecutor.""" + + _code_executor: type[CodeExecutor] + + def __init__(self, code_executor: type[CodeExecutor] | None = None) -> None: + self._code_executor = code_executor or CodeExecutor + + def render_template(self, template: str, variables: Mapping[str, Any]) -> str: + try: + result = self._code_executor.execute_workflow_code_template( + language=CodeLanguage.JINJA2, code=template, inputs=variables + ) + except CodeExecutionError as exc: + raise TemplateRenderError(str(exc)) from exc + + rendered = result.get("result") + if not isinstance(rendered, str): + raise TemplateRenderError("Template render result must be a string.") + return rendered diff --git a/api/core/workflow/nodes/template_transform/template_transform_node.py b/api/core/workflow/nodes/template_transform/template_transform_node.py index 2274323960..f7e0bccccf 100644 --- a/api/core/workflow/nodes/template_transform/template_transform_node.py +++ b/api/core/workflow/nodes/template_transform/template_transform_node.py @@ -1,18 +1,44 @@ from collections.abc import Mapping, Sequence -from typing import Any +from typing import TYPE_CHECKING, Any from configs import dify_config -from core.helper.code_executor.code_executor import CodeExecutionError, CodeExecutor, CodeLanguage from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus from core.workflow.node_events import NodeRunResult from core.workflow.nodes.base.node import Node from core.workflow.nodes.template_transform.entities import TemplateTransformNodeData +from core.workflow.nodes.template_transform.template_renderer import ( + CodeExecutorJinja2TemplateRenderer, + Jinja2TemplateRenderer, + TemplateRenderError, +) + +if TYPE_CHECKING: + from core.workflow.entities import GraphInitParams + from core.workflow.runtime import GraphRuntimeState MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH = dify_config.TEMPLATE_TRANSFORM_MAX_LENGTH class TemplateTransformNode(Node[TemplateTransformNodeData]): node_type = NodeType.TEMPLATE_TRANSFORM + _template_renderer: Jinja2TemplateRenderer + + def __init__( + self, + id: str, + config: Mapping[str, Any], + graph_init_params: "GraphInitParams", + graph_runtime_state: "GraphRuntimeState", + *, + template_renderer: Jinja2TemplateRenderer | None = None, + ) -> None: + super().__init__( + id=id, + config=config, + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + ) + self._template_renderer = template_renderer or CodeExecutorJinja2TemplateRenderer() @classmethod def get_default_config(cls, filters: Mapping[str, object] | None = None) -> Mapping[str, object]: @@ -39,13 +65,11 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]): variables[variable_name] = value.to_object() if value else None # Run code try: - result = CodeExecutor.execute_workflow_code_template( - language=CodeLanguage.JINJA2, code=self.node_data.template, inputs=variables - ) - except CodeExecutionError as e: + rendered = self._template_renderer.render_template(self.node_data.template, variables) + except TemplateRenderError as e: return NodeRunResult(inputs=variables, status=WorkflowNodeExecutionStatus.FAILED, error=str(e)) - if len(result["result"]) > MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH: + if len(rendered) > MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH: return NodeRunResult( inputs=variables, status=WorkflowNodeExecutionStatus.FAILED, @@ -53,7 +77,7 @@ class TemplateTransformNode(Node[TemplateTransformNodeData]): ) return NodeRunResult( - status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs={"output": result["result"]} + status=WorkflowNodeExecutionStatus.SUCCEEDED, inputs=variables, outputs={"output": rendered} ) @classmethod diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 764df5d178..2fbab001d0 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -46,7 +46,11 @@ def _get_celery_ssl_options() -> dict[str, Any] | None: def init_app(app: DifyApp) -> Celery: class FlaskTask(Task): def __call__(self, *args: object, **kwargs: object) -> object: + from core.logging.context import init_request_context + with app.app_context(): + # Initialize logging context for this task (similar to before_request in Flask) + init_request_context() return self.run(*args, **kwargs) broker_transport_options = {} diff --git a/api/extensions/ext_commands.py b/api/extensions/ext_commands.py index 71a63168a5..daa3756dba 100644 --- a/api/extensions/ext_commands.py +++ b/api/extensions/ext_commands.py @@ -11,6 +11,7 @@ def init_app(app: DifyApp): create_tenant, extract_plugins, extract_unique_plugins, + file_usage, fix_app_site_missing, install_plugins, install_rag_pipeline_plugins, @@ -47,6 +48,7 @@ def init_app(app: DifyApp): clear_free_plan_tenant_expired_logs, clear_orphaned_file_records, remove_orphaned_files_on_storage, + file_usage, setup_system_tool_oauth_client, setup_system_trigger_oauth_client, cleanup_orphaned_draft_variables, diff --git a/api/extensions/ext_database.py b/api/extensions/ext_database.py index c90b1d0a9f..2e0d4c889a 100644 --- a/api/extensions/ext_database.py +++ b/api/extensions/ext_database.py @@ -53,3 +53,10 @@ def _setup_gevent_compatibility(): def init_app(app: DifyApp): db.init_app(app) _setup_gevent_compatibility() + + # Eagerly build the engine so pool_size/max_overflow/etc. come from config + try: + with app.app_context(): + _ = db.engine # triggers engine creation with the configured options + except Exception: + logger.exception("Failed to initialize SQLAlchemy engine during app startup") diff --git a/api/extensions/ext_logging.py b/api/extensions/ext_logging.py index 000d03ac41..978a40c503 100644 --- a/api/extensions/ext_logging.py +++ b/api/extensions/ext_logging.py @@ -1,18 +1,19 @@ +"""Logging extension for Dify Flask application.""" + import logging import os import sys -import uuid from logging.handlers import RotatingFileHandler -import flask - from configs import dify_config -from core.helper.trace_id_helper import get_trace_id_from_otel_context from dify_app import DifyApp def init_app(app: DifyApp): + """Initialize logging with support for text or JSON format.""" log_handlers: list[logging.Handler] = [] + + # File handler log_file = dify_config.LOG_FILE if log_file: log_dir = os.path.dirname(log_file) @@ -25,27 +26,53 @@ def init_app(app: DifyApp): ) ) - # Always add StreamHandler to log to console + # Console handler sh = logging.StreamHandler(sys.stdout) log_handlers.append(sh) - # Apply RequestIdFilter to all handlers - for handler in log_handlers: - handler.addFilter(RequestIdFilter()) + # Apply filters to all handlers + from core.logging.filters import IdentityContextFilter, TraceContextFilter + for handler in log_handlers: + handler.addFilter(TraceContextFilter()) + handler.addFilter(IdentityContextFilter()) + + # Configure formatter based on format type + formatter = _create_formatter() + for handler in log_handlers: + handler.setFormatter(formatter) + + # Configure root logger logging.basicConfig( level=dify_config.LOG_LEVEL, - format=dify_config.LOG_FORMAT, - datefmt=dify_config.LOG_DATEFORMAT, handlers=log_handlers, force=True, ) - # Apply RequestIdFormatter to all handlers - apply_request_id_formatter() - # Disable propagation for noisy loggers to avoid duplicate logs logging.getLogger("sqlalchemy.engine").propagate = False + + # Apply timezone if specified (only for text format) + if dify_config.LOG_OUTPUT_FORMAT == "text": + _apply_timezone(log_handlers) + + +def _create_formatter() -> logging.Formatter: + """Create appropriate formatter based on configuration.""" + if dify_config.LOG_OUTPUT_FORMAT == "json": + from core.logging.structured_formatter import StructuredJSONFormatter + + return StructuredJSONFormatter() + else: + # Text format - use existing pattern with backward compatible formatter + return _TextFormatter( + fmt=dify_config.LOG_FORMAT, + datefmt=dify_config.LOG_DATEFORMAT, + ) + + +def _apply_timezone(handlers: list[logging.Handler]): + """Apply timezone conversion to text formatters.""" log_tz = dify_config.LOG_TZ if log_tz: from datetime import datetime @@ -57,34 +84,51 @@ def init_app(app: DifyApp): def time_converter(seconds): return datetime.fromtimestamp(seconds, tz=timezone).timetuple() - for handler in logging.root.handlers: + for handler in handlers: if handler.formatter: - handler.formatter.converter = time_converter + handler.formatter.converter = time_converter # type: ignore[attr-defined] -def get_request_id(): - if getattr(flask.g, "request_id", None): - return flask.g.request_id +class _TextFormatter(logging.Formatter): + """Text formatter that ensures trace_id and req_id are always present.""" - new_uuid = uuid.uuid4().hex[:10] - flask.g.request_id = new_uuid - - return new_uuid + def format(self, record: logging.LogRecord) -> str: + if not hasattr(record, "req_id"): + record.req_id = "" + if not hasattr(record, "trace_id"): + record.trace_id = "" + if not hasattr(record, "span_id"): + record.span_id = "" + return super().format(record) +def get_request_id() -> str: + """Get request ID for current request context. + + Deprecated: Use core.logging.context.get_request_id() directly. + """ + from core.logging.context import get_request_id as _get_request_id + + return _get_request_id() + + +# Backward compatibility aliases class RequestIdFilter(logging.Filter): - # This is a logging filter that makes the request ID available for use in - # the logging format. Note that we're checking if we're in a request - # context, as we may want to log things before Flask is fully loaded. - def filter(self, record): - trace_id = get_trace_id_from_otel_context() or "" - record.req_id = get_request_id() if flask.has_request_context() else "" - record.trace_id = trace_id + """Deprecated: Use TraceContextFilter from core.logging.filters instead.""" + + def filter(self, record: logging.LogRecord) -> bool: + from core.logging.context import get_request_id as _get_request_id + from core.logging.context import get_trace_id as _get_trace_id + + record.req_id = _get_request_id() + record.trace_id = _get_trace_id() return True class RequestIdFormatter(logging.Formatter): - def format(self, record): + """Deprecated: Use _TextFormatter instead.""" + + def format(self, record: logging.LogRecord) -> str: if not hasattr(record, "req_id"): record.req_id = "" if not hasattr(record, "trace_id"): @@ -93,6 +137,7 @@ class RequestIdFormatter(logging.Formatter): def apply_request_id_formatter(): + """Deprecated: Formatter is now applied in init_app.""" for handler in logging.root.handlers: if handler.formatter: handler.formatter = RequestIdFormatter(dify_config.LOG_FORMAT, dify_config.LOG_DATEFORMAT) diff --git a/api/extensions/logstore/repositories/logstore_workflow_execution_repository.py b/api/extensions/logstore/repositories/logstore_workflow_execution_repository.py index 6e6631cfef..a6b3706f42 100644 --- a/api/extensions/logstore/repositories/logstore_workflow_execution_repository.py +++ b/api/extensions/logstore/repositories/logstore_workflow_execution_repository.py @@ -22,6 +22,18 @@ from models.enums import WorkflowRunTriggeredFrom logger = logging.getLogger(__name__) +def to_serializable(obj): + """ + Convert non-JSON-serializable objects into JSON-compatible formats. + + - Uses `to_dict()` if it's a callable method. + - Falls back to string representation. + """ + if hasattr(obj, "to_dict") and callable(obj.to_dict): + return obj.to_dict() + return str(obj) + + class LogstoreWorkflowExecutionRepository(WorkflowExecutionRepository): def __init__( self, @@ -108,9 +120,24 @@ class LogstoreWorkflowExecutionRepository(WorkflowExecutionRepository): ), ("type", domain_model.workflow_type.value), ("version", domain_model.workflow_version), - ("graph", json.dumps(domain_model.graph, ensure_ascii=False) if domain_model.graph else "{}"), - ("inputs", json.dumps(domain_model.inputs, ensure_ascii=False) if domain_model.inputs else "{}"), - ("outputs", json.dumps(domain_model.outputs, ensure_ascii=False) if domain_model.outputs else "{}"), + ( + "graph", + json.dumps(domain_model.graph, ensure_ascii=False, default=to_serializable) + if domain_model.graph + else "{}", + ), + ( + "inputs", + json.dumps(domain_model.inputs, ensure_ascii=False, default=to_serializable) + if domain_model.inputs + else "{}", + ), + ( + "outputs", + json.dumps(domain_model.outputs, ensure_ascii=False, default=to_serializable) + if domain_model.outputs + else "{}", + ), ("status", domain_model.status.value), ("error_message", domain_model.error_message or ""), ("total_tokens", str(domain_model.total_tokens)), diff --git a/api/extensions/otel/instrumentation.py b/api/extensions/otel/instrumentation.py index 3597110cba..6617f69513 100644 --- a/api/extensions/otel/instrumentation.py +++ b/api/extensions/otel/instrumentation.py @@ -19,26 +19,43 @@ logger = logging.getLogger(__name__) class ExceptionLoggingHandler(logging.Handler): + """ + Handler that records exceptions to the current OpenTelemetry span. + + Unlike creating a new span, this records exceptions on the existing span + to maintain trace context consistency throughout the request lifecycle. + """ + def emit(self, record: logging.LogRecord): with contextlib.suppress(Exception): - if record.exc_info: - tracer = get_tracer_provider().get_tracer("dify.exception.logging") - with tracer.start_as_current_span( - "log.exception", - attributes={ - "log.level": record.levelname, - "log.message": record.getMessage(), - "log.logger": record.name, - "log.file.path": record.pathname, - "log.file.line": record.lineno, - }, - ) as span: - span.set_status(StatusCode.ERROR) - if record.exc_info[1]: - span.record_exception(record.exc_info[1]) - span.set_attribute("exception.message", str(record.exc_info[1])) - if record.exc_info[0]: - span.set_attribute("exception.type", record.exc_info[0].__name__) + if not record.exc_info: + return + + from opentelemetry.trace import get_current_span + + span = get_current_span() + if not span or not span.is_recording(): + return + + # Record exception on the current span instead of creating a new one + span.set_status(StatusCode.ERROR, record.getMessage()) + + # Add log context as span events/attributes + span.add_event( + "log.exception", + attributes={ + "log.level": record.levelname, + "log.message": record.getMessage(), + "log.logger": record.name, + "log.file.path": record.pathname, + "log.file.line": record.lineno, + }, + ) + + if record.exc_info[1]: + span.record_exception(record.exc_info[1]) + if record.exc_info[0]: + span.set_attribute("exception.type", record.exc_info[0].__name__) def instrument_exception_logging() -> None: diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py index e4ca2e7a42..d8ae0ad8b8 100644 --- a/api/fields/conversation_fields.py +++ b/api/fields/conversation_fields.py @@ -1,236 +1,338 @@ -from flask_restx import Namespace, fields +from __future__ import annotations -from fields.member_fields import simple_account_fields -from libs.helper import TimestampField +from datetime import datetime +from typing import Any, TypeAlias -from .raws import FilesContainedField +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator + +from core.file import File + +JSONValue: TypeAlias = Any -class MessageTextField(fields.Raw): - def format(self, value): - return value[0]["text"] if value else "" +class ResponseModel(BaseModel): + model_config = ConfigDict( + from_attributes=True, + extra="ignore", + populate_by_name=True, + serialize_by_alias=True, + protected_namespaces=(), + ) -feedback_fields = { - "rating": fields.String, - "content": fields.String, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_account": fields.Nested(simple_account_fields, allow_null=True), -} +class MessageFile(ResponseModel): + id: str + filename: str + type: str + url: str | None = None + mime_type: str | None = None + size: int | None = None + transfer_method: str + belongs_to: str | None = None + upload_file_id: str | None = None -annotation_fields = { - "id": fields.String, - "question": fields.String, - "content": fields.String, - "account": fields.Nested(simple_account_fields, allow_null=True), - "created_at": TimestampField, -} - -annotation_hit_history_fields = { - "annotation_id": fields.String(attribute="id"), - "annotation_create_account": fields.Nested(simple_account_fields, allow_null=True), - "created_at": TimestampField, -} - -message_file_fields = { - "id": fields.String, - "filename": fields.String, - "type": fields.String, - "url": fields.String, - "mime_type": fields.String, - "size": fields.Integer, - "transfer_method": fields.String, - "belongs_to": fields.String(default="user"), - "upload_file_id": fields.String(default=None), -} + @field_validator("transfer_method", mode="before") + @classmethod + def _normalize_transfer_method(cls, value: object) -> str: + if isinstance(value, str): + return value + return str(value) -def build_message_file_model(api_or_ns: Namespace): - """Build the message file fields for the API or Namespace.""" - return api_or_ns.model("MessageFile", message_file_fields) +class SimpleConversation(ResponseModel): + id: str + name: str + inputs: dict[str, JSONValue] + status: str + introduction: str | None = None + created_at: int | None = None + updated_at: int | None = None + + @field_validator("inputs", mode="before") + @classmethod + def _normalize_inputs(cls, value: JSONValue) -> JSONValue: + return format_files_contained(value) + + @field_validator("created_at", "updated_at", mode="before") + @classmethod + def _normalize_timestamp(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value -agent_thought_fields = { - "id": fields.String, - "chain_id": fields.String, - "message_id": fields.String, - "position": fields.Integer, - "thought": fields.String, - "tool": fields.String, - "tool_labels": fields.Raw, - "tool_input": fields.String, - "created_at": TimestampField, - "observation": fields.String, - "files": fields.List(fields.String), -} - -message_detail_fields = { - "id": fields.String, - "conversation_id": fields.String, - "inputs": FilesContainedField, - "query": fields.String, - "message": fields.Raw, - "message_tokens": fields.Integer, - "answer": fields.String(attribute="re_sign_file_url_answer"), - "answer_tokens": fields.Integer, - "provider_response_latency": fields.Float, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_account_id": fields.String, - "feedbacks": fields.List(fields.Nested(feedback_fields)), - "workflow_run_id": fields.String, - "annotation": fields.Nested(annotation_fields, allow_null=True), - "annotation_hit_history": fields.Nested(annotation_hit_history_fields, allow_null=True), - "created_at": TimestampField, - "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), - "message_files": fields.List(fields.Nested(message_file_fields)), - "metadata": fields.Raw(attribute="message_metadata_dict"), - "status": fields.String, - "error": fields.String, - "parent_message_id": fields.String, -} - -feedback_stat_fields = {"like": fields.Integer, "dislike": fields.Integer} -status_count_fields = {"success": fields.Integer, "failed": fields.Integer, "partial_success": fields.Integer} -model_config_fields = { - "opening_statement": fields.String, - "suggested_questions": fields.Raw, - "model": fields.Raw, - "user_input_form": fields.Raw, - "pre_prompt": fields.String, - "agent_mode": fields.Raw, -} - -simple_model_config_fields = { - "model": fields.Raw(attribute="model_dict"), - "pre_prompt": fields.String, -} - -simple_message_detail_fields = { - "inputs": FilesContainedField, - "query": fields.String, - "message": MessageTextField, - "answer": fields.String, -} - -conversation_fields = { - "id": fields.String, - "status": fields.String, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_end_user_session_id": fields.String(), - "from_account_id": fields.String, - "from_account_name": fields.String, - "read_at": TimestampField, - "created_at": TimestampField, - "updated_at": TimestampField, - "annotation": fields.Nested(annotation_fields, allow_null=True), - "model_config": fields.Nested(simple_model_config_fields), - "user_feedback_stats": fields.Nested(feedback_stat_fields), - "admin_feedback_stats": fields.Nested(feedback_stat_fields), - "message": fields.Nested(simple_message_detail_fields, attribute="first_message"), -} - -conversation_pagination_fields = { - "page": fields.Integer, - "limit": fields.Integer(attribute="per_page"), - "total": fields.Integer, - "has_more": fields.Boolean(attribute="has_next"), - "data": fields.List(fields.Nested(conversation_fields), attribute="items"), -} - -conversation_message_detail_fields = { - "id": fields.String, - "status": fields.String, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_account_id": fields.String, - "created_at": TimestampField, - "model_config": fields.Nested(model_config_fields), - "message": fields.Nested(message_detail_fields, attribute="first_message"), -} - -conversation_with_summary_fields = { - "id": fields.String, - "status": fields.String, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_end_user_session_id": fields.String, - "from_account_id": fields.String, - "from_account_name": fields.String, - "name": fields.String, - "summary": fields.String(attribute="summary_or_query"), - "read_at": TimestampField, - "created_at": TimestampField, - "updated_at": TimestampField, - "annotated": fields.Boolean, - "model_config": fields.Nested(simple_model_config_fields), - "message_count": fields.Integer, - "user_feedback_stats": fields.Nested(feedback_stat_fields), - "admin_feedback_stats": fields.Nested(feedback_stat_fields), - "status_count": fields.Nested(status_count_fields), -} - -conversation_with_summary_pagination_fields = { - "page": fields.Integer, - "limit": fields.Integer(attribute="per_page"), - "total": fields.Integer, - "has_more": fields.Boolean(attribute="has_next"), - "data": fields.List(fields.Nested(conversation_with_summary_fields), attribute="items"), -} - -conversation_detail_fields = { - "id": fields.String, - "status": fields.String, - "from_source": fields.String, - "from_end_user_id": fields.String, - "from_account_id": fields.String, - "created_at": TimestampField, - "updated_at": TimestampField, - "annotated": fields.Boolean, - "introduction": fields.String, - "model_config": fields.Nested(model_config_fields), - "message_count": fields.Integer, - "user_feedback_stats": fields.Nested(feedback_stat_fields), - "admin_feedback_stats": fields.Nested(feedback_stat_fields), -} - -simple_conversation_fields = { - "id": fields.String, - "name": fields.String, - "inputs": FilesContainedField, - "status": fields.String, - "introduction": fields.String, - "created_at": TimestampField, - "updated_at": TimestampField, -} - -conversation_delete_fields = { - "result": fields.String, -} - -conversation_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(simple_conversation_fields)), -} +class ConversationInfiniteScrollPagination(ResponseModel): + limit: int + has_more: bool + data: list[SimpleConversation] -def build_conversation_infinite_scroll_pagination_model(api_or_ns: Namespace): - """Build the conversation infinite scroll pagination model for the API or Namespace.""" - simple_conversation_model = build_simple_conversation_model(api_or_ns) - - copied_fields = conversation_infinite_scroll_pagination_fields.copy() - copied_fields["data"] = fields.List(fields.Nested(simple_conversation_model)) - return api_or_ns.model("ConversationInfiniteScrollPagination", copied_fields) +class ConversationDelete(ResponseModel): + result: str -def build_conversation_delete_model(api_or_ns: Namespace): - """Build the conversation delete model for the API or Namespace.""" - return api_or_ns.model("ConversationDelete", conversation_delete_fields) +class ResultResponse(ResponseModel): + result: str -def build_simple_conversation_model(api_or_ns: Namespace): - """Build the simple conversation model for the API or Namespace.""" - return api_or_ns.model("SimpleConversation", simple_conversation_fields) +class SimpleAccount(ResponseModel): + id: str + name: str + email: str + + +class Feedback(ResponseModel): + rating: str + content: str | None = None + from_source: str + from_end_user_id: str | None = None + from_account: SimpleAccount | None = None + + +class Annotation(ResponseModel): + id: str + question: str | None = None + content: str + account: SimpleAccount | None = None + created_at: int | None = None + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + +class AnnotationHitHistory(ResponseModel): + annotation_id: str + annotation_create_account: SimpleAccount | None = None + created_at: int | None = None + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + +class AgentThought(ResponseModel): + id: str + chain_id: str | None = None + message_chain_id: str | None = Field(default=None, exclude=True, validation_alias="message_chain_id") + message_id: str + position: int + thought: str | None = None + tool: str | None = None + tool_labels: JSONValue + tool_input: str | None = None + created_at: int | None = None + observation: str | None = None + files: list[str] + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + @model_validator(mode="after") + def _fallback_chain_id(self): + if self.chain_id is None and self.message_chain_id: + self.chain_id = self.message_chain_id + return self + + +class MessageDetail(ResponseModel): + id: str + conversation_id: str + inputs: dict[str, JSONValue] + query: str + message: JSONValue + message_tokens: int + answer: str + answer_tokens: int + provider_response_latency: float + from_source: str + from_end_user_id: str | None = None + from_account_id: str | None = None + feedbacks: list[Feedback] + workflow_run_id: str | None = None + annotation: Annotation | None = None + annotation_hit_history: AnnotationHitHistory | None = None + created_at: int | None = None + agent_thoughts: list[AgentThought] + message_files: list[MessageFile] + metadata: JSONValue + status: str + error: str | None = None + parent_message_id: str | None = None + + @field_validator("inputs", mode="before") + @classmethod + def _normalize_inputs(cls, value: JSONValue) -> JSONValue: + return format_files_contained(value) + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + +class FeedbackStat(ResponseModel): + like: int + dislike: int + + +class StatusCount(ResponseModel): + success: int + failed: int + partial_success: int + + +class ModelConfig(ResponseModel): + opening_statement: str | None = None + suggested_questions: JSONValue | None = None + model: JSONValue | None = None + user_input_form: JSONValue | None = None + pre_prompt: str | None = None + agent_mode: JSONValue | None = None + + +class SimpleModelConfig(ResponseModel): + model: JSONValue | None = None + pre_prompt: str | None = None + + +class SimpleMessageDetail(ResponseModel): + inputs: dict[str, JSONValue] + query: str + message: str + answer: str + + @field_validator("inputs", mode="before") + @classmethod + def _normalize_inputs(cls, value: JSONValue) -> JSONValue: + return format_files_contained(value) + + +class Conversation(ResponseModel): + id: str + status: str + from_source: str + from_end_user_id: str | None = None + from_end_user_session_id: str | None = None + from_account_id: str | None = None + from_account_name: str | None = None + read_at: int | None = None + created_at: int | None = None + updated_at: int | None = None + annotation: Annotation | None = None + model_config_: SimpleModelConfig | None = Field(default=None, alias="model_config") + user_feedback_stats: FeedbackStat | None = None + admin_feedback_stats: FeedbackStat | None = None + message: SimpleMessageDetail | None = None + + +class ConversationPagination(ResponseModel): + page: int + limit: int + total: int + has_more: bool + data: list[Conversation] + + +class ConversationMessageDetail(ResponseModel): + id: str + status: str + from_source: str + from_end_user_id: str | None = None + from_account_id: str | None = None + created_at: int | None = None + model_config_: ModelConfig | None = Field(default=None, alias="model_config") + message: MessageDetail | None = None + + +class ConversationWithSummary(ResponseModel): + id: str + status: str + from_source: str + from_end_user_id: str | None = None + from_end_user_session_id: str | None = None + from_account_id: str | None = None + from_account_name: str | None = None + name: str + summary: str + read_at: int | None = None + created_at: int | None = None + updated_at: int | None = None + annotated: bool + model_config_: SimpleModelConfig | None = Field(default=None, alias="model_config") + message_count: int + user_feedback_stats: FeedbackStat | None = None + admin_feedback_stats: FeedbackStat | None = None + status_count: StatusCount | None = None + + +class ConversationWithSummaryPagination(ResponseModel): + page: int + limit: int + total: int + has_more: bool + data: list[ConversationWithSummary] + + +class ConversationDetail(ResponseModel): + id: str + status: str + from_source: str + from_end_user_id: str | None = None + from_account_id: str | None = None + created_at: int | None = None + updated_at: int | None = None + annotated: bool + introduction: str | None = None + model_config_: ModelConfig | None = Field(default=None, alias="model_config") + message_count: int + user_feedback_stats: FeedbackStat | None = None + admin_feedback_stats: FeedbackStat | None = None + + +def to_timestamp(value: datetime | None) -> int | None: + if value is None: + return None + return int(value.timestamp()) + + +def format_files_contained(value: JSONValue) -> JSONValue: + if isinstance(value, File): + return value.model_dump() + if isinstance(value, dict): + return {k: format_files_contained(v) for k, v in value.items()} + if isinstance(value, list): + return [format_files_contained(v) for v in value] + return value + + +def message_text(value: JSONValue) -> str: + if isinstance(value, list) and value: + first = value[0] + if isinstance(first, dict): + text = first.get("text") + if isinstance(text, str): + return text + return "" + + +def extract_model_config(value: object | None) -> dict[str, JSONValue]: + if value is None: + return {} + if isinstance(value, dict): + return value + if hasattr(value, "to_dict"): + return value.to_dict() + return {} diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index 151ff6f826..2bba198fa8 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -1,77 +1,137 @@ -from flask_restx import Namespace, fields +from __future__ import annotations -from fields.conversation_fields import message_file_fields -from libs.helper import TimestampField +from datetime import datetime +from typing import TypeAlias -from .raws import FilesContainedField +from pydantic import BaseModel, ConfigDict, Field, field_validator -feedback_fields = { - "rating": fields.String, -} +from core.file import File +from fields.conversation_fields import AgentThought, JSONValue, MessageFile + +JSONValueType: TypeAlias = JSONValue -def build_feedback_model(api_or_ns: Namespace): - """Build the feedback model for the API or Namespace.""" - return api_or_ns.model("Feedback", feedback_fields) +class ResponseModel(BaseModel): + model_config = ConfigDict(from_attributes=True, extra="ignore") -agent_thought_fields = { - "id": fields.String, - "chain_id": fields.String, - "message_id": fields.String, - "position": fields.Integer, - "thought": fields.String, - "tool": fields.String, - "tool_labels": fields.Raw, - "tool_input": fields.String, - "created_at": TimestampField, - "observation": fields.String, - "files": fields.List(fields.String), -} +class SimpleFeedback(ResponseModel): + rating: str | None = None -def build_agent_thought_model(api_or_ns: Namespace): - """Build the agent thought model for the API or Namespace.""" - return api_or_ns.model("AgentThought", agent_thought_fields) +class RetrieverResource(ResponseModel): + id: str + message_id: str + position: int + dataset_id: str | None = None + dataset_name: str | None = None + document_id: str | None = None + document_name: str | None = None + data_source_type: str | None = None + segment_id: str | None = None + score: float | None = None + hit_count: int | None = None + word_count: int | None = None + segment_position: int | None = None + index_node_hash: str | None = None + content: str | None = None + created_at: int | None = None + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value -retriever_resource_fields = { - "id": fields.String, - "message_id": fields.String, - "position": fields.Integer, - "dataset_id": fields.String, - "dataset_name": fields.String, - "document_id": fields.String, - "document_name": fields.String, - "data_source_type": fields.String, - "segment_id": fields.String, - "score": fields.Float, - "hit_count": fields.Integer, - "word_count": fields.Integer, - "segment_position": fields.Integer, - "index_node_hash": fields.String, - "content": fields.String, - "created_at": TimestampField, -} +class MessageListItem(ResponseModel): + id: str + conversation_id: str + parent_message_id: str | None = None + inputs: dict[str, JSONValueType] + query: str + answer: str = Field(validation_alias="re_sign_file_url_answer") + feedback: SimpleFeedback | None = Field(default=None, validation_alias="user_feedback") + retriever_resources: list[RetrieverResource] + created_at: int | None = None + agent_thoughts: list[AgentThought] + message_files: list[MessageFile] + status: str + error: str | None = None -message_fields = { - "id": fields.String, - "conversation_id": fields.String, - "parent_message_id": fields.String, - "inputs": FilesContainedField, - "query": fields.String, - "answer": fields.String(attribute="re_sign_file_url_answer"), - "feedback": fields.Nested(feedback_fields, attribute="user_feedback", allow_null=True), - "retriever_resources": fields.List(fields.Nested(retriever_resource_fields)), - "created_at": TimestampField, - "agent_thoughts": fields.List(fields.Nested(agent_thought_fields)), - "message_files": fields.List(fields.Nested(message_file_fields)), - "status": fields.String, - "error": fields.String, -} + @field_validator("inputs", mode="before") + @classmethod + def _normalize_inputs(cls, value: JSONValueType) -> JSONValueType: + return format_files_contained(value) -message_infinite_scroll_pagination_fields = { - "limit": fields.Integer, - "has_more": fields.Boolean, - "data": fields.List(fields.Nested(message_fields)), -} + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + +class WebMessageListItem(MessageListItem): + metadata: JSONValueType | None = Field(default=None, validation_alias="message_metadata_dict") + + +class MessageInfiniteScrollPagination(ResponseModel): + limit: int + has_more: bool + data: list[MessageListItem] + + +class WebMessageInfiniteScrollPagination(ResponseModel): + limit: int + has_more: bool + data: list[WebMessageListItem] + + +class SavedMessageItem(ResponseModel): + id: str + inputs: dict[str, JSONValueType] + query: str + answer: str + message_files: list[MessageFile] + feedback: SimpleFeedback | None = Field(default=None, validation_alias="user_feedback") + created_at: int | None = None + + @field_validator("inputs", mode="before") + @classmethod + def _normalize_inputs(cls, value: JSONValueType) -> JSONValueType: + return format_files_contained(value) + + @field_validator("created_at", mode="before") + @classmethod + def _normalize_created_at(cls, value: datetime | int | None) -> int | None: + if isinstance(value, datetime): + return to_timestamp(value) + return value + + +class SavedMessageInfiniteScrollPagination(ResponseModel): + limit: int + has_more: bool + data: list[SavedMessageItem] + + +class SuggestedQuestionsResponse(ResponseModel): + data: list[str] + + +def to_timestamp(value: datetime | None) -> int | None: + if value is None: + return None + return int(value.timestamp()) + + +def format_files_contained(value: JSONValueType) -> JSONValueType: + if isinstance(value, File): + return value.model_dump() + if isinstance(value, dict): + return {k: format_files_contained(v) for k, v in value.items()} + if isinstance(value, list): + return [format_files_contained(v) for v in value] + return value diff --git a/api/libs/external_api.py b/api/libs/external_api.py index 61a90ee4a9..e8592407c3 100644 --- a/api/libs/external_api.py +++ b/api/libs/external_api.py @@ -1,5 +1,4 @@ import re -import sys from collections.abc import Mapping from typing import Any @@ -109,11 +108,8 @@ def register_external_error_handlers(api: Api): data.setdefault("code", "unknown") data.setdefault("status", status_code) - # Log stack - exc_info: Any = sys.exc_info() - if exc_info[1] is None: - exc_info = (None, None, None) - current_app.log_exception(exc_info) + # Note: Exception logging is handled by Flask/Flask-RESTX framework automatically + # Explicit log_exception call removed to avoid duplicate log entries return data, status_code diff --git a/api/services/trigger/trigger_provider_service.py b/api/services/trigger/trigger_provider_service.py index ef77c33c1b..4131d75145 100644 --- a/api/services/trigger/trigger_provider_service.py +++ b/api/services/trigger/trigger_provider_service.py @@ -853,7 +853,7 @@ class TriggerProviderService: """ Create a subscription builder for rebuilding an existing subscription. - This method creates a builder pre-filled with data from the rebuild request, + This method rebuild the subscription by call DELETE and CREATE API of the third party provider(e.g. GitHub) keeping the same subscription_id and endpoint_id so the webhook URL remains unchanged. :param tenant_id: Tenant ID @@ -868,111 +868,50 @@ class TriggerProviderService: if not provider_controller: raise ValueError(f"Provider {provider_id} not found") - # Use distributed lock to prevent race conditions on the same subscription - lock_key = f"trigger_subscription_rebuild_lock:{tenant_id}_{subscription_id}" - with redis_client.lock(lock_key, timeout=20): - with Session(db.engine, expire_on_commit=False) as session: - try: - # Get subscription within the transaction - subscription: TriggerSubscription | None = ( - session.query(TriggerSubscription).filter_by(tenant_id=tenant_id, id=subscription_id).first() - ) - if not subscription: - raise ValueError(f"Subscription {subscription_id} not found") + subscription = TriggerProviderService.get_subscription_by_id( + tenant_id=tenant_id, + subscription_id=subscription_id, + ) + if not subscription: + raise ValueError(f"Subscription {subscription_id} not found") - credential_type = CredentialType.of(subscription.credential_type) - if credential_type not in [CredentialType.OAUTH2, CredentialType.API_KEY]: - raise ValueError("Credential type not supported for rebuild") + credential_type = CredentialType.of(subscription.credential_type) + if credential_type not in {CredentialType.OAUTH2, CredentialType.API_KEY}: + raise ValueError(f"Credential type {credential_type} not supported for auto creation") - # Decrypt existing credentials for merging - credential_encrypter, _ = create_trigger_provider_encrypter_for_subscription( - tenant_id=tenant_id, - controller=provider_controller, - subscription=subscription, - ) - decrypted_credentials = dict(credential_encrypter.decrypt(subscription.credentials)) + # Delete the previous subscription + user_id = subscription.user_id + unsubscribe_result = TriggerManager.unsubscribe_trigger( + tenant_id=tenant_id, + user_id=user_id, + provider_id=provider_id, + subscription=subscription.to_entity(), + credentials=subscription.credentials, + credential_type=credential_type, + ) + if not unsubscribe_result.success: + raise ValueError(f"Failed to delete previous subscription: {unsubscribe_result.message}") - # Merge credentials: if caller passed HIDDEN_VALUE, retain existing decrypted value - merged_credentials: dict[str, Any] = { - key: value if value != HIDDEN_VALUE else decrypted_credentials.get(key, UNKNOWN_VALUE) - for key, value in credentials.items() - } - - user_id = subscription.user_id - - # TODO: Trying to invoke update api of the plugin trigger provider - - # FALLBACK: If the update api is not implemented, - # delete the previous subscription and create a new one - - # Unsubscribe the previous subscription (external call, but we'll handle errors) - try: - TriggerManager.unsubscribe_trigger( - tenant_id=tenant_id, - user_id=user_id, - provider_id=provider_id, - subscription=subscription.to_entity(), - credentials=decrypted_credentials, - credential_type=credential_type, - ) - except Exception as e: - logger.exception("Error unsubscribing trigger during rebuild", exc_info=e) - # Continue anyway - the subscription might already be deleted externally - - # Create a new subscription with the same subscription_id and endpoint_id (external call) - new_subscription: TriggerSubscriptionEntity = TriggerManager.subscribe_trigger( - tenant_id=tenant_id, - user_id=user_id, - provider_id=provider_id, - endpoint=generate_plugin_trigger_endpoint_url(subscription.endpoint_id), - parameters=parameters, - credentials=merged_credentials, - credential_type=credential_type, - ) - - # Update the subscription in the same transaction - # Inline update logic to reuse the same session - if name is not None and name != subscription.name: - existing = ( - session.query(TriggerSubscription) - .filter_by(tenant_id=tenant_id, provider_id=str(provider_id), name=name) - .first() - ) - if existing and existing.id != subscription.id: - raise ValueError(f"Subscription name '{name}' already exists for this provider") - subscription.name = name - - # Update parameters - subscription.parameters = dict(parameters) - - # Update credentials with merged (and encrypted) values - subscription.credentials = dict(credential_encrypter.encrypt(merged_credentials)) - - # Update properties - if new_subscription.properties: - properties_encrypter, _ = create_provider_encrypter( - tenant_id=tenant_id, - config=provider_controller.get_properties_schema(), - cache=NoOpProviderCredentialCache(), - ) - subscription.properties = dict(properties_encrypter.encrypt(dict(new_subscription.properties))) - - # Update expiration timestamp - if new_subscription.expires_at is not None: - subscription.expires_at = new_subscription.expires_at - - # Commit the transaction - session.commit() - - # Clear subscription cache - delete_cache_for_subscription( - tenant_id=tenant_id, - provider_id=subscription.provider_id, - subscription_id=subscription.id, - ) - - except Exception as e: - # Rollback on any error - session.rollback() - logger.exception("Failed to rebuild trigger subscription", exc_info=e) - raise + # Create a new subscription with the same subscription_id and endpoint_id + new_credentials: dict[str, Any] = { + key: value if value != HIDDEN_VALUE else subscription.credentials.get(key, UNKNOWN_VALUE) + for key, value in credentials.items() + } + new_subscription: TriggerSubscriptionEntity = TriggerManager.subscribe_trigger( + tenant_id=tenant_id, + user_id=user_id, + provider_id=provider_id, + endpoint=generate_plugin_trigger_endpoint_url(subscription.endpoint_id), + parameters=parameters, + credentials=new_credentials, + credential_type=credential_type, + ) + TriggerProviderService.update_trigger_subscription( + tenant_id=tenant_id, + subscription_id=subscription.id, + name=name, + parameters=parameters, + credentials=new_credentials, + properties=new_subscription.properties, + expires_at=new_subscription.expires_at, + ) diff --git a/api/tests/integration_tests/workflow/nodes/test_code.py b/api/tests/integration_tests/workflow/nodes/test_code.py index e421e4ff36..9b0bd6275b 100644 --- a/api/tests/integration_tests/workflow/nodes/test_code.py +++ b/api/tests/integration_tests/workflow/nodes/test_code.py @@ -10,6 +10,7 @@ from core.workflow.enums import WorkflowNodeExecutionStatus from core.workflow.graph import Graph from core.workflow.node_events import NodeRunResult from core.workflow.nodes.code.code_node import CodeNode +from core.workflow.nodes.code.limits import CodeNodeLimits from core.workflow.nodes.node_factory import DifyNodeFactory from core.workflow.runtime import GraphRuntimeState, VariablePool from core.workflow.system_variable import SystemVariable @@ -67,6 +68,16 @@ def init_code_node(code_config: dict): config=code_config, graph_init_params=init_params, graph_runtime_state=graph_runtime_state, + code_limits=CodeNodeLimits( + max_string_length=dify_config.CODE_MAX_STRING_LENGTH, + max_number=dify_config.CODE_MAX_NUMBER, + min_number=dify_config.CODE_MIN_NUMBER, + max_precision=dify_config.CODE_MAX_PRECISION, + max_depth=dify_config.CODE_MAX_DEPTH, + max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH, + max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH, + max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH, + ), ) return node diff --git a/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py b/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py index 8322b9414e..5315960d73 100644 --- a/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py +++ b/api/tests/test_containers_integration_tests/services/test_trigger_provider_service.py @@ -474,64 +474,6 @@ class TestTriggerProviderService: assert subscription.name == original_name assert subscription.parameters == original_parameters - def test_rebuild_trigger_subscription_unsubscribe_error_continues( - self, db_session_with_containers, mock_external_service_dependencies - ): - """ - Test that unsubscribe errors are handled gracefully and operation continues. - - This test verifies: - - Unsubscribe errors are caught and logged but don't stop the rebuild - - Rebuild continues even if unsubscribe fails - """ - fake = Faker() - account, tenant = self._create_test_account_and_tenant( - db_session_with_containers, mock_external_service_dependencies - ) - - provider_id = TriggerProviderID("test_org/test_plugin/test_provider") - credential_type = CredentialType.API_KEY - - original_credentials = {"api_key": "original-key"} - subscription = self._create_test_subscription( - db_session_with_containers, - tenant.id, - account.id, - provider_id, - credential_type, - original_credentials, - mock_external_service_dependencies, - ) - - # Make unsubscribe_trigger raise an error (should be caught and continue) - mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.side_effect = ValueError( - "Unsubscribe failed" - ) - - new_subscription_entity = TriggerSubscriptionEntity( - endpoint=subscription.endpoint_id, - parameters={}, - properties={}, - expires_at=-1, - ) - mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity - - # Execute rebuild - should succeed despite unsubscribe error - TriggerProviderService.rebuild_trigger_subscription( - tenant_id=tenant.id, - provider_id=provider_id, - subscription_id=subscription.id, - credentials={"api_key": "new-key"}, - parameters={}, - ) - - # Verify subscribe was still called (operation continued) - mock_external_service_dependencies["trigger_manager"].subscribe_trigger.assert_called_once() - - # Verify subscription was updated - db.session.refresh(subscription) - assert subscription.parameters == {} - def test_rebuild_trigger_subscription_subscription_not_found( self, db_session_with_containers, mock_external_service_dependencies ): @@ -558,70 +500,6 @@ class TestTriggerProviderService: parameters={}, ) - def test_rebuild_trigger_subscription_provider_not_found( - self, db_session_with_containers, mock_external_service_dependencies - ): - """ - Test error when provider is not found. - - This test verifies: - - Proper error is raised when provider doesn't exist - """ - fake = Faker() - account, tenant = self._create_test_account_and_tenant( - db_session_with_containers, mock_external_service_dependencies - ) - - provider_id = TriggerProviderID("non_existent_org/non_existent_plugin/non_existent_provider") - - # Make get_trigger_provider return None - mock_external_service_dependencies["trigger_manager"].get_trigger_provider.return_value = None - - with pytest.raises(ValueError, match="Provider.*not found"): - TriggerProviderService.rebuild_trigger_subscription( - tenant_id=tenant.id, - provider_id=provider_id, - subscription_id=fake.uuid4(), - credentials={}, - parameters={}, - ) - - def test_rebuild_trigger_subscription_unsupported_credential_type( - self, db_session_with_containers, mock_external_service_dependencies - ): - """ - Test error when credential type is not supported for rebuild. - - This test verifies: - - Proper error is raised for unsupported credential types (not OAUTH2 or API_KEY) - """ - fake = Faker() - account, tenant = self._create_test_account_and_tenant( - db_session_with_containers, mock_external_service_dependencies - ) - - provider_id = TriggerProviderID("test_org/test_plugin/test_provider") - credential_type = CredentialType.UNAUTHORIZED # Not supported - - subscription = self._create_test_subscription( - db_session_with_containers, - tenant.id, - account.id, - provider_id, - credential_type, - {}, - mock_external_service_dependencies, - ) - - with pytest.raises(ValueError, match="Credential type not supported for rebuild"): - TriggerProviderService.rebuild_trigger_subscription( - tenant_id=tenant.id, - provider_id=provider_id, - subscription_id=subscription.id, - credentials={}, - parameters={}, - ) - def test_rebuild_trigger_subscription_name_uniqueness_check( self, db_session_with_containers, mock_external_service_dependencies ): diff --git a/api/tests/unit_tests/controllers/__init__.py b/api/tests/unit_tests/controllers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/console/__init__.py b/api/tests/unit_tests/controllers/console/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/controllers/console/test_document_detail_api_data_source_info.py b/api/tests/unit_tests/controllers/console/test_document_detail_api_data_source_info.py new file mode 100644 index 0000000000..f8dd98fdb2 --- /dev/null +++ b/api/tests/unit_tests/controllers/console/test_document_detail_api_data_source_info.py @@ -0,0 +1,145 @@ +""" +Test for document detail API data_source_info serialization fix. + +This test verifies that the document detail API returns both data_source_info +and data_source_detail_dict for all data_source_type values, including "local_file". +""" + +import json +from typing import Generic, Literal, NotRequired, TypedDict, TypeVar, Union + +from models.dataset import Document + + +class LocalFileInfo(TypedDict): + file_path: str + size: int + created_at: NotRequired[str] + + +class UploadFileInfo(TypedDict): + upload_file_id: str + + +class NotionImportInfo(TypedDict): + notion_page_id: str + workspace_id: str + + +class WebsiteCrawlInfo(TypedDict): + url: str + job_id: str + + +RawInfo = Union[LocalFileInfo, UploadFileInfo, NotionImportInfo, WebsiteCrawlInfo] +T_type = TypeVar("T_type", bound=str) +T_info = TypeVar("T_info", bound=Union[LocalFileInfo, UploadFileInfo, NotionImportInfo, WebsiteCrawlInfo]) + + +class Case(TypedDict, Generic[T_type, T_info]): + data_source_type: T_type + data_source_info: str + expected_raw: T_info + + +LocalFileCase = Case[Literal["local_file"], LocalFileInfo] +UploadFileCase = Case[Literal["upload_file"], UploadFileInfo] +NotionImportCase = Case[Literal["notion_import"], NotionImportInfo] +WebsiteCrawlCase = Case[Literal["website_crawl"], WebsiteCrawlInfo] + +AnyCase = Union[LocalFileCase, UploadFileCase, NotionImportCase, WebsiteCrawlCase] + + +case_1: LocalFileCase = { + "data_source_type": "local_file", + "data_source_info": json.dumps({"file_path": "/tmp/test.txt", "size": 1024}), + "expected_raw": {"file_path": "/tmp/test.txt", "size": 1024}, +} + + +# ERROR: Expected LocalFileInfo, but got WebsiteCrawlInfo +case_2: LocalFileCase = { + "data_source_type": "local_file", + "data_source_info": "...", + "expected_raw": {"file_path": "https://google.com", "size": 123}, +} + +cases: list[AnyCase] = [case_1] + + +class TestDocumentDetailDataSourceInfo: + """Test cases for document detail API data_source_info serialization.""" + + def test_data_source_info_dict_returns_raw_data(self): + """Test that data_source_info_dict returns raw JSON data for all data_source_type values.""" + # Test data for different data_source_type values + for case in cases: + document = Document( + data_source_type=case["data_source_type"], + data_source_info=case["data_source_info"], + ) + + # Test data_source_info_dict (raw data) + raw_result = document.data_source_info_dict + assert raw_result == case["expected_raw"], f"Failed for {case['data_source_type']}" + + # Verify raw_result is always a valid dict + assert isinstance(raw_result, dict) + + def test_local_file_data_source_info_without_db_context(self): + """Test that local_file type data_source_info_dict works without database context.""" + test_data: LocalFileInfo = { + "file_path": "/local/path/document.txt", + "size": 512, + "created_at": "2024-01-01T00:00:00Z", + } + + document = Document( + data_source_type="local_file", + data_source_info=json.dumps(test_data), + ) + + # data_source_info_dict should return the raw data (this doesn't need DB context) + raw_data = document.data_source_info_dict + assert raw_data == test_data + assert isinstance(raw_data, dict) + + # Verify the data contains expected keys for pipeline mode + assert "file_path" in raw_data + assert "size" in raw_data + + def test_notion_and_website_crawl_data_source_detail(self): + """Test that notion_import and website_crawl return raw data in data_source_detail_dict.""" + # Test notion_import + notion_data: NotionImportInfo = {"notion_page_id": "page-123", "workspace_id": "ws-456"} + document = Document( + data_source_type="notion_import", + data_source_info=json.dumps(notion_data), + ) + + # data_source_detail_dict should return raw data for notion_import + detail_result = document.data_source_detail_dict + assert detail_result == notion_data + + # Test website_crawl + website_data: WebsiteCrawlInfo = {"url": "https://example.com", "job_id": "job-789"} + document = Document( + data_source_type="website_crawl", + data_source_info=json.dumps(website_data), + ) + + # data_source_detail_dict should return raw data for website_crawl + detail_result = document.data_source_detail_dict + assert detail_result == website_data + + def test_local_file_data_source_detail_dict_without_db(self): + """Test that local_file returns empty data_source_detail_dict (this doesn't need DB context).""" + # Test local_file - this should work without database context since it returns {} early + document = Document( + data_source_type="local_file", + data_source_info=json.dumps({"file_path": "/tmp/test.txt"}), + ) + + # Should return empty dict for local_file type (handled in the model) + detail_result = document.data_source_detail_dict + assert detail_result == {} diff --git a/api/tests/unit_tests/controllers/web/test_message_list.py b/api/tests/unit_tests/controllers/web/test_message_list.py new file mode 100644 index 0000000000..2835f7ffbf --- /dev/null +++ b/api/tests/unit_tests/controllers/web/test_message_list.py @@ -0,0 +1,174 @@ +"""Unit tests for controllers.web.message message list mapping.""" + +from __future__ import annotations + +import builtins +from datetime import datetime +from types import ModuleType, SimpleNamespace +from unittest.mock import patch +from uuid import uuid4 + +import pytest +from flask import Flask +from flask.views import MethodView + +# Ensure flask_restx.api finds MethodView during import. +if not hasattr(builtins, "MethodView"): + builtins.MethodView = MethodView # type: ignore[attr-defined] + + +def _load_controller_module(): + """Import controllers.web.message using a stub package.""" + + import importlib + import importlib.util + import sys + + parent_module_name = "controllers.web" + module_name = f"{parent_module_name}.message" + + if parent_module_name not in sys.modules: + from flask_restx import Namespace + + stub = ModuleType(parent_module_name) + stub.__file__ = "controllers/web/__init__.py" + stub.__path__ = ["controllers/web"] + stub.__package__ = "controllers" + stub.__spec__ = importlib.util.spec_from_loader(parent_module_name, loader=None, is_package=True) + stub.web_ns = Namespace("web", description="Web API", path="/") + sys.modules[parent_module_name] = stub + + wraps_module_name = f"{parent_module_name}.wraps" + if wraps_module_name not in sys.modules: + wraps_stub = ModuleType(wraps_module_name) + + class WebApiResource: + pass + + wraps_stub.WebApiResource = WebApiResource + sys.modules[wraps_module_name] = wraps_stub + + return importlib.import_module(module_name) + + +message_module = _load_controller_module() +MessageListApi = message_module.MessageListApi + + +@pytest.fixture +def app() -> Flask: + app = Flask(__name__) + app.config["TESTING"] = True + return app + + +def test_message_list_mapping(app: Flask) -> None: + conversation_id = str(uuid4()) + message_id = str(uuid4()) + + created_at = datetime(2024, 1, 1, 12, 0, 0) + resource_created_at = datetime(2024, 1, 1, 13, 0, 0) + thought_created_at = datetime(2024, 1, 1, 14, 0, 0) + + retriever_resource_obj = SimpleNamespace( + id="res-obj", + message_id=message_id, + position=2, + dataset_id="ds-1", + dataset_name="dataset", + document_id="doc-1", + document_name="document", + data_source_type="file", + segment_id="seg-1", + score=0.9, + hit_count=1, + word_count=10, + segment_position=0, + index_node_hash="hash", + content="content", + created_at=resource_created_at, + ) + + agent_thought = SimpleNamespace( + id="thought-1", + chain_id=None, + message_chain_id="chain-1", + message_id=message_id, + position=1, + thought="thinking", + tool="tool", + tool_labels={"label": "value"}, + tool_input="{}", + created_at=thought_created_at, + observation="observed", + files=["file-a"], + ) + + message_file_obj = SimpleNamespace( + id="file-obj", + filename="b.txt", + type="file", + url=None, + mime_type=None, + size=None, + transfer_method="local", + belongs_to=None, + upload_file_id=None, + ) + + message = SimpleNamespace( + id=message_id, + conversation_id=conversation_id, + parent_message_id=None, + inputs={"foo": "bar"}, + query="hello", + re_sign_file_url_answer="answer", + user_feedback=SimpleNamespace(rating="like"), + retriever_resources=[ + {"id": "res-dict", "message_id": message_id, "position": 1}, + retriever_resource_obj, + ], + created_at=created_at, + agent_thoughts=[agent_thought], + message_files=[ + {"id": "file-dict", "filename": "a.txt", "type": "file", "transfer_method": "local"}, + message_file_obj, + ], + status="success", + error=None, + message_metadata_dict={"meta": "value"}, + ) + + pagination = SimpleNamespace(limit=20, has_more=False, data=[message]) + app_model = SimpleNamespace(mode="chat") + end_user = SimpleNamespace() + + with ( + patch.object(message_module.MessageService, "pagination_by_first_id", return_value=pagination) as mock_page, + app.test_request_context(f"/messages?conversation_id={conversation_id}&limit=20"), + ): + response = MessageListApi().get(app_model, end_user) + + mock_page.assert_called_once_with(app_model, end_user, conversation_id, None, 20) + assert response["limit"] == 20 + assert response["has_more"] is False + assert len(response["data"]) == 1 + + item = response["data"][0] + assert item["id"] == message_id + assert item["conversation_id"] == conversation_id + assert item["inputs"] == {"foo": "bar"} + assert item["answer"] == "answer" + assert item["feedback"]["rating"] == "like" + assert item["metadata"] == {"meta": "value"} + assert item["created_at"] == int(created_at.timestamp()) + + assert item["retriever_resources"][0]["id"] == "res-dict" + assert item["retriever_resources"][1]["id"] == "res-obj" + assert item["retriever_resources"][1]["created_at"] == int(resource_created_at.timestamp()) + + assert item["agent_thoughts"][0]["chain_id"] == "chain-1" + assert item["agent_thoughts"][0]["created_at"] == int(thought_created_at.timestamp()) + + assert item["message_files"][0]["id"] == "file-dict" + assert item["message_files"][1]["id"] == "file-obj" diff --git a/api/tests/unit_tests/core/helper/test_ssrf_proxy.py b/api/tests/unit_tests/core/helper/test_ssrf_proxy.py index beae1d0358..d6d75fb72f 100644 --- a/api/tests/unit_tests/core/helper/test_ssrf_proxy.py +++ b/api/tests/unit_tests/core/helper/test_ssrf_proxy.py @@ -14,12 +14,12 @@ def test_successful_request(mock_get_client): mock_client = MagicMock() mock_response = MagicMock() mock_response.status_code = 200 - mock_client.send.return_value = mock_response mock_client.request.return_value = mock_response mock_get_client.return_value = mock_client response = make_request("GET", "http://example.com") assert response.status_code == 200 + mock_client.request.assert_called_once() @patch("core.helper.ssrf_proxy._get_ssrf_client") @@ -27,7 +27,6 @@ def test_retry_exceed_max_retries(mock_get_client): mock_client = MagicMock() mock_response = MagicMock() mock_response.status_code = 500 - mock_client.send.return_value = mock_response mock_client.request.return_value = mock_response mock_get_client.return_value = mock_client @@ -72,34 +71,12 @@ class TestGetUserProvidedHostHeader: assert result in ("first.com", "second.com") -@patch("core.helper.ssrf_proxy._get_ssrf_client") -def test_host_header_preservation_without_user_header(mock_get_client): - """Test that when no Host header is provided, the default behavior is maintained.""" - mock_client = MagicMock() - mock_request = MagicMock() - mock_request.headers = {} - mock_response = MagicMock() - mock_response.status_code = 200 - mock_client.send.return_value = mock_response - mock_client.request.return_value = mock_response - mock_get_client.return_value = mock_client - - response = make_request("GET", "http://example.com") - - assert response.status_code == 200 - # Host should not be set if not provided by user - assert "Host" not in mock_request.headers or mock_request.headers.get("Host") is None - - @patch("core.helper.ssrf_proxy._get_ssrf_client") def test_host_header_preservation_with_user_header(mock_get_client): """Test that user-provided Host header is preserved in the request.""" mock_client = MagicMock() - mock_request = MagicMock() - mock_request.headers = {} mock_response = MagicMock() mock_response.status_code = 200 - mock_client.send.return_value = mock_response mock_client.request.return_value = mock_response mock_get_client.return_value = mock_client @@ -107,3 +84,93 @@ def test_host_header_preservation_with_user_header(mock_get_client): response = make_request("GET", "http://example.com", headers={"Host": custom_host}) assert response.status_code == 200 + # Verify client.request was called with the host header preserved (lowercase) + call_kwargs = mock_client.request.call_args.kwargs + assert call_kwargs["headers"]["host"] == custom_host + + +@patch("core.helper.ssrf_proxy._get_ssrf_client") +@pytest.mark.parametrize("host_key", ["host", "HOST", "Host"]) +def test_host_header_preservation_case_insensitive(mock_get_client, host_key): + """Test that Host header is preserved regardless of case.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.request.return_value = mock_response + mock_get_client.return_value = mock_client + + response = make_request("GET", "http://example.com", headers={host_key: "api.example.com"}) + + assert response.status_code == 200 + # Host header should be normalized to lowercase "host" + call_kwargs = mock_client.request.call_args.kwargs + assert call_kwargs["headers"]["host"] == "api.example.com" + + +class TestFollowRedirectsParameter: + """Tests for follow_redirects parameter handling. + + These tests verify that follow_redirects is correctly passed to client.request(). + """ + + @patch("core.helper.ssrf_proxy._get_ssrf_client") + def test_follow_redirects_passed_to_request(self, mock_get_client): + """Verify follow_redirects IS passed to client.request().""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.request.return_value = mock_response + mock_get_client.return_value = mock_client + + make_request("GET", "http://example.com", follow_redirects=True) + + # Verify follow_redirects was passed to request + call_kwargs = mock_client.request.call_args.kwargs + assert call_kwargs.get("follow_redirects") is True + + @patch("core.helper.ssrf_proxy._get_ssrf_client") + def test_allow_redirects_converted_to_follow_redirects(self, mock_get_client): + """Verify allow_redirects (requests-style) is converted to follow_redirects (httpx-style).""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.request.return_value = mock_response + mock_get_client.return_value = mock_client + + # Use allow_redirects (requests-style parameter) + make_request("GET", "http://example.com", allow_redirects=True) + + # Verify it was converted to follow_redirects + call_kwargs = mock_client.request.call_args.kwargs + assert call_kwargs.get("follow_redirects") is True + assert "allow_redirects" not in call_kwargs + + @patch("core.helper.ssrf_proxy._get_ssrf_client") + def test_follow_redirects_not_set_when_not_specified(self, mock_get_client): + """Verify follow_redirects is not in kwargs when not specified (httpx default behavior).""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.request.return_value = mock_response + mock_get_client.return_value = mock_client + + make_request("GET", "http://example.com") + + # follow_redirects should not be in kwargs, letting httpx use its default + call_kwargs = mock_client.request.call_args.kwargs + assert "follow_redirects" not in call_kwargs + + @patch("core.helper.ssrf_proxy._get_ssrf_client") + def test_follow_redirects_takes_precedence_over_allow_redirects(self, mock_get_client): + """Verify follow_redirects takes precedence when both are specified.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_client.request.return_value = mock_response + mock_get_client.return_value = mock_client + + # Both specified - follow_redirects should take precedence + make_request("GET", "http://example.com", allow_redirects=False, follow_redirects=True) + + call_kwargs = mock_client.request.call_args.kwargs + assert call_kwargs.get("follow_redirects") is True diff --git a/api/tests/unit_tests/core/logging/__init__.py b/api/tests/unit_tests/core/logging/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/logging/test_context.py b/api/tests/unit_tests/core/logging/test_context.py new file mode 100644 index 0000000000..f388a3a0b9 --- /dev/null +++ b/api/tests/unit_tests/core/logging/test_context.py @@ -0,0 +1,79 @@ +"""Tests for logging context module.""" + +import uuid + +from core.logging.context import ( + clear_request_context, + get_request_id, + get_trace_id, + init_request_context, +) + + +class TestLoggingContext: + """Tests for the logging context functions.""" + + def test_init_creates_request_id(self): + """init_request_context should create a 10-char request ID.""" + init_request_context() + request_id = get_request_id() + assert len(request_id) == 10 + assert all(c in "0123456789abcdef" for c in request_id) + + def test_init_creates_trace_id(self): + """init_request_context should create a 32-char trace ID.""" + init_request_context() + trace_id = get_trace_id() + assert len(trace_id) == 32 + assert all(c in "0123456789abcdef" for c in trace_id) + + def test_trace_id_derived_from_request_id(self): + """trace_id should be deterministically derived from request_id.""" + init_request_context() + request_id = get_request_id() + trace_id = get_trace_id() + + # Verify trace_id is derived using uuid5 + expected_trace = uuid.uuid5(uuid.NAMESPACE_DNS, request_id).hex + assert trace_id == expected_trace + + def test_clear_resets_context(self): + """clear_request_context should reset both IDs to empty strings.""" + init_request_context() + assert get_request_id() != "" + assert get_trace_id() != "" + + clear_request_context() + assert get_request_id() == "" + assert get_trace_id() == "" + + def test_default_values_are_empty(self): + """Default values should be empty strings before init.""" + clear_request_context() + assert get_request_id() == "" + assert get_trace_id() == "" + + def test_multiple_inits_create_different_ids(self): + """Each init should create new unique IDs.""" + init_request_context() + first_request_id = get_request_id() + first_trace_id = get_trace_id() + + init_request_context() + second_request_id = get_request_id() + second_trace_id = get_trace_id() + + assert first_request_id != second_request_id + assert first_trace_id != second_trace_id + + def test_context_isolation(self): + """Context should be isolated per-call (no thread leakage in same thread).""" + init_request_context() + id1 = get_request_id() + + # Simulate another request + init_request_context() + id2 = get_request_id() + + # IDs should be different + assert id1 != id2 diff --git a/api/tests/unit_tests/core/logging/test_filters.py b/api/tests/unit_tests/core/logging/test_filters.py new file mode 100644 index 0000000000..b66ad111d5 --- /dev/null +++ b/api/tests/unit_tests/core/logging/test_filters.py @@ -0,0 +1,114 @@ +"""Tests for logging filters.""" + +import logging +from unittest import mock + +import pytest + + +@pytest.fixture +def log_record(): + return logging.LogRecord( + name="test", + level=logging.INFO, + pathname="", + lineno=0, + msg="test", + args=(), + exc_info=None, + ) + + +class TestTraceContextFilter: + def test_sets_empty_trace_id_without_context(self, log_record): + from core.logging.context import clear_request_context + from core.logging.filters import TraceContextFilter + + # Ensure no context is set + clear_request_context() + + filter = TraceContextFilter() + result = filter.filter(log_record) + + assert result is True + assert hasattr(log_record, "trace_id") + assert hasattr(log_record, "span_id") + assert hasattr(log_record, "req_id") + # Without context, IDs should be empty + assert log_record.trace_id == "" + assert log_record.req_id == "" + + def test_sets_trace_id_from_context(self, log_record): + """Test that trace_id and req_id are set from ContextVar when initialized.""" + from core.logging.context import init_request_context + from core.logging.filters import TraceContextFilter + + # Initialize context (no Flask needed!) + init_request_context() + + filter = TraceContextFilter() + filter.filter(log_record) + + # With context initialized, IDs should be set + assert log_record.trace_id != "" + assert len(log_record.trace_id) == 32 + assert log_record.req_id != "" + assert len(log_record.req_id) == 10 + + def test_filter_always_returns_true(self, log_record): + from core.logging.filters import TraceContextFilter + + filter = TraceContextFilter() + result = filter.filter(log_record) + assert result is True + + def test_sets_trace_id_from_otel_when_available(self, log_record): + from core.logging.filters import TraceContextFilter + + mock_span = mock.MagicMock() + mock_context = mock.MagicMock() + mock_context.trace_id = 0x5B8AA5A2D2C872E8321CF37308D69DF2 + mock_context.span_id = 0x051581BF3BB55C45 + mock_span.get_span_context.return_value = mock_context + + with ( + mock.patch("opentelemetry.trace.get_current_span", return_value=mock_span), + mock.patch("opentelemetry.trace.span.INVALID_TRACE_ID", 0), + mock.patch("opentelemetry.trace.span.INVALID_SPAN_ID", 0), + ): + filter = TraceContextFilter() + filter.filter(log_record) + + assert log_record.trace_id == "5b8aa5a2d2c872e8321cf37308d69df2" + assert log_record.span_id == "051581bf3bb55c45" + + +class TestIdentityContextFilter: + def test_sets_empty_identity_without_request_context(self, log_record): + from core.logging.filters import IdentityContextFilter + + filter = IdentityContextFilter() + result = filter.filter(log_record) + + assert result is True + assert log_record.tenant_id == "" + assert log_record.user_id == "" + assert log_record.user_type == "" + + def test_filter_always_returns_true(self, log_record): + from core.logging.filters import IdentityContextFilter + + filter = IdentityContextFilter() + result = filter.filter(log_record) + assert result is True + + def test_handles_exception_gracefully(self, log_record): + from core.logging.filters import IdentityContextFilter + + filter = IdentityContextFilter() + + # Should not raise even if something goes wrong + with mock.patch("core.logging.filters.flask.has_request_context", side_effect=Exception("Test error")): + result = filter.filter(log_record) + assert result is True + assert log_record.tenant_id == "" diff --git a/api/tests/unit_tests/core/logging/test_structured_formatter.py b/api/tests/unit_tests/core/logging/test_structured_formatter.py new file mode 100644 index 0000000000..94b91d205e --- /dev/null +++ b/api/tests/unit_tests/core/logging/test_structured_formatter.py @@ -0,0 +1,267 @@ +"""Tests for structured JSON formatter.""" + +import logging +import sys + +import orjson + + +class TestStructuredJSONFormatter: + def test_basic_log_format(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter(service_name="test-service") + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=42, + msg="Test message", + args=(), + exc_info=None, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert log_dict["severity"] == "INFO" + assert log_dict["service"] == "test-service" + assert log_dict["caller"] == "test.py:42" + assert log_dict["message"] == "Test message" + assert "ts" in log_dict + assert log_dict["ts"].endswith("Z") + + def test_severity_mapping(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + + test_cases = [ + (logging.DEBUG, "DEBUG"), + (logging.INFO, "INFO"), + (logging.WARNING, "WARN"), + (logging.ERROR, "ERROR"), + (logging.CRITICAL, "ERROR"), + ] + + for level, expected_severity in test_cases: + record = logging.LogRecord( + name="test", + level=level, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + output = formatter.format(record) + log_dict = orjson.loads(output) + assert log_dict["severity"] == expected_severity, f"Level {level} should map to {expected_severity}" + + def test_error_with_stack_trace(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + + try: + raise ValueError("Test error") + except ValueError: + exc_info = sys.exc_info() + + record = logging.LogRecord( + name="test", + level=logging.ERROR, + pathname="test.py", + lineno=10, + msg="Error occurred", + args=(), + exc_info=exc_info, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert log_dict["severity"] == "ERROR" + assert "stack_trace" in log_dict + assert "ValueError: Test error" in log_dict["stack_trace"] + + def test_no_stack_trace_for_info(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + + try: + raise ValueError("Test error") + except ValueError: + exc_info = sys.exc_info() + + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=10, + msg="Info message", + args=(), + exc_info=exc_info, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert "stack_trace" not in log_dict + + def test_trace_context_included(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + record.trace_id = "5b8aa5a2d2c872e8321cf37308d69df2" + record.span_id = "051581bf3bb55c45" + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert log_dict["trace_id"] == "5b8aa5a2d2c872e8321cf37308d69df2" + assert log_dict["span_id"] == "051581bf3bb55c45" + + def test_identity_context_included(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + record.tenant_id = "t-global-corp" + record.user_id = "u-admin-007" + record.user_type = "admin" + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert "identity" in log_dict + assert log_dict["identity"]["tenant_id"] == "t-global-corp" + assert log_dict["identity"]["user_id"] == "u-admin-007" + assert log_dict["identity"]["user_type"] == "admin" + + def test_no_identity_when_empty(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert "identity" not in log_dict + + def test_attributes_included(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + record.attributes = {"order_id": "ord-123", "amount": 99.99} + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert log_dict["attributes"]["order_id"] == "ord-123" + assert log_dict["attributes"]["amount"] == 99.99 + + def test_message_with_args(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="User %s logged in from %s", + args=("john", "192.168.1.1"), + exc_info=None, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + assert log_dict["message"] == "User john logged in from 192.168.1.1" + + def test_timestamp_format(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test", + args=(), + exc_info=None, + ) + + output = formatter.format(record) + log_dict = orjson.loads(output) + + # Verify ISO 8601 format with Z suffix + ts = log_dict["ts"] + assert ts.endswith("Z") + assert "T" in ts + # Should have milliseconds + assert "." in ts + + def test_fallback_for_non_serializable_attributes(self): + from core.logging.structured_formatter import StructuredJSONFormatter + + formatter = StructuredJSONFormatter() + record = logging.LogRecord( + name="test", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test with non-serializable", + args=(), + exc_info=None, + ) + # Set is not serializable by orjson + record.attributes = {"items": {1, 2, 3}, "custom": object()} + + # Should not raise, fallback to json.dumps with default=str + output = formatter.format(record) + + # Verify it's valid JSON (parsed by stdlib json since orjson may fail) + import json + + log_dict = json.loads(output) + assert log_dict["message"] == "Test with non-serializable" + assert "attributes" in log_dict diff --git a/api/tests/unit_tests/core/logging/test_trace_helpers.py b/api/tests/unit_tests/core/logging/test_trace_helpers.py new file mode 100644 index 0000000000..aab1753b9b --- /dev/null +++ b/api/tests/unit_tests/core/logging/test_trace_helpers.py @@ -0,0 +1,102 @@ +"""Tests for trace helper functions.""" + +import re +from unittest import mock + + +class TestGetSpanIdFromOtelContext: + def test_returns_none_without_span(self): + from core.helper.trace_id_helper import get_span_id_from_otel_context + + with mock.patch("opentelemetry.trace.get_current_span", return_value=None): + result = get_span_id_from_otel_context() + assert result is None + + def test_returns_span_id_when_available(self): + from core.helper.trace_id_helper import get_span_id_from_otel_context + + mock_span = mock.MagicMock() + mock_context = mock.MagicMock() + mock_context.span_id = 0x051581BF3BB55C45 + mock_span.get_span_context.return_value = mock_context + + with mock.patch("opentelemetry.trace.get_current_span", return_value=mock_span): + with mock.patch("opentelemetry.trace.span.INVALID_SPAN_ID", 0): + result = get_span_id_from_otel_context() + assert result == "051581bf3bb55c45" + + def test_returns_none_on_exception(self): + from core.helper.trace_id_helper import get_span_id_from_otel_context + + with mock.patch("opentelemetry.trace.get_current_span", side_effect=Exception("Test error")): + result = get_span_id_from_otel_context() + assert result is None + + +class TestGenerateTraceparentHeader: + def test_generates_valid_format(self): + from core.helper.trace_id_helper import generate_traceparent_header + + with mock.patch("opentelemetry.trace.get_current_span", return_value=None): + result = generate_traceparent_header() + + assert result is not None + # Format: 00-{trace_id}-{span_id}-01 + parts = result.split("-") + assert len(parts) == 4 + assert parts[0] == "00" # version + assert len(parts[1]) == 32 # trace_id (32 hex chars) + assert len(parts[2]) == 16 # span_id (16 hex chars) + assert parts[3] == "01" # flags + + def test_uses_otel_context_when_available(self): + from core.helper.trace_id_helper import generate_traceparent_header + + mock_span = mock.MagicMock() + mock_context = mock.MagicMock() + mock_context.trace_id = 0x5B8AA5A2D2C872E8321CF37308D69DF2 + mock_context.span_id = 0x051581BF3BB55C45 + mock_span.get_span_context.return_value = mock_context + + with mock.patch("opentelemetry.trace.get_current_span", return_value=mock_span): + with ( + mock.patch("opentelemetry.trace.span.INVALID_TRACE_ID", 0), + mock.patch("opentelemetry.trace.span.INVALID_SPAN_ID", 0), + ): + result = generate_traceparent_header() + + assert result == "00-5b8aa5a2d2c872e8321cf37308d69df2-051581bf3bb55c45-01" + + def test_generates_hex_only_values(self): + from core.helper.trace_id_helper import generate_traceparent_header + + with mock.patch("opentelemetry.trace.get_current_span", return_value=None): + result = generate_traceparent_header() + + parts = result.split("-") + # All parts should be valid hex + assert re.match(r"^[0-9a-f]+$", parts[1]) + assert re.match(r"^[0-9a-f]+$", parts[2]) + + +class TestParseTraceparentHeader: + def test_parses_valid_traceparent(self): + from core.helper.trace_id_helper import parse_traceparent_header + + traceparent = "00-5b8aa5a2d2c872e8321cf37308d69df2-051581bf3bb55c45-01" + result = parse_traceparent_header(traceparent) + + assert result == "5b8aa5a2d2c872e8321cf37308d69df2" + + def test_returns_none_for_invalid_format(self): + from core.helper.trace_id_helper import parse_traceparent_header + + # Wrong number of parts + assert parse_traceparent_header("00-abc-def") is None + # Wrong trace_id length + assert parse_traceparent_header("00-abc-def-01") is None + + def test_returns_none_for_empty_string(self): + from core.helper.trace_id_helper import parse_traceparent_header + + assert parse_traceparent_header("") is None diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py index 6306d665e7..ca08cb0591 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py @@ -73,6 +73,7 @@ import pytest from core.rag.datasource.retrieval_service import RetrievalService from core.rag.models.document import Document +from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.retrieval_methods import RetrievalMethod from models.dataset import Dataset @@ -1518,6 +1519,282 @@ class TestRetrievalService: call_kwargs = mock_retrieve.call_args.kwargs assert call_kwargs["reranking_model"] == reranking_model + # ==================== Multiple Retrieve Thread Tests ==================== + + @patch("core.rag.retrieval.dataset_retrieval.DataPostProcessor") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval._retriever") + def test_multiple_retrieve_thread_skips_second_reranking_with_single_dataset( + self, mock_retriever, mock_data_processor_class, mock_flask_app, mock_dataset + ): + """ + Test that _multiple_retrieve_thread skips second reranking when dataset_count is 1. + + When there is only one dataset, the second reranking is unnecessary + because the documents are already ranked from the first retrieval. + This optimization avoids the overhead of reranking when it won't + provide any benefit. + + Verifies: + - DataPostProcessor is NOT called when dataset_count == 1 + - Documents are still added to all_documents + - Standard scoring logic is applied instead + """ + # Arrange + dataset_retrieval = DatasetRetrieval() + tenant_id = str(uuid4()) + + # Create test documents + doc1 = Document( + page_content="Test content 1", + metadata={"doc_id": "doc1", "score": 0.9, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + doc2 = Document( + page_content="Test content 2", + metadata={"doc_id": "doc2", "score": 0.8, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + + # Mock _retriever to return documents + def side_effect_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.extend([doc1, doc2]) + + mock_retriever.side_effect = side_effect_retriever + + # Set up dataset with high_quality indexing + mock_dataset.indexing_technique = "high_quality" + + all_documents = [] + + # Act - Call with dataset_count = 1 + dataset_retrieval._multiple_retrieve_thread( + flask_app=mock_flask_app, + available_datasets=[mock_dataset], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + weights=None, + top_k=5, + score_threshold=0.5, + query="test query", + attachment_id=None, + dataset_count=1, # Single dataset - should skip second reranking + ) + + # Assert + # DataPostProcessor should NOT be called (second reranking skipped) + mock_data_processor_class.assert_not_called() + + # Documents should still be added to all_documents + assert len(all_documents) == 2 + assert all_documents[0].page_content == "Test content 1" + assert all_documents[1].page_content == "Test content 2" + + @patch("core.rag.retrieval.dataset_retrieval.DataPostProcessor") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval._retriever") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval.calculate_vector_score") + def test_multiple_retrieve_thread_performs_second_reranking_with_multiple_datasets( + self, mock_calculate_vector_score, mock_retriever, mock_data_processor_class, mock_flask_app, mock_dataset + ): + """ + Test that _multiple_retrieve_thread performs second reranking when dataset_count > 1. + + When there are multiple datasets, the second reranking is necessary + to merge and re-rank results from different datasets. This ensures + the most relevant documents across all datasets are returned. + + Verifies: + - DataPostProcessor IS called when dataset_count > 1 + - Reranking is applied with correct parameters + - Documents are processed correctly + """ + # Arrange + dataset_retrieval = DatasetRetrieval() + tenant_id = str(uuid4()) + + # Create test documents + doc1 = Document( + page_content="Test content 1", + metadata={"doc_id": "doc1", "score": 0.7, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + doc2 = Document( + page_content="Test content 2", + metadata={"doc_id": "doc2", "score": 0.6, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + + # Mock _retriever to return documents + def side_effect_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.extend([doc1, doc2]) + + mock_retriever.side_effect = side_effect_retriever + + # Set up dataset with high_quality indexing + mock_dataset.indexing_technique = "high_quality" + + # Mock DataPostProcessor instance and its invoke method + mock_processor_instance = Mock() + # Simulate reranking - return documents in reversed order with updated scores + reranked_docs = [ + Document( + page_content="Test content 2", + metadata={"doc_id": "doc2", "score": 0.95, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ), + Document( + page_content="Test content 1", + metadata={"doc_id": "doc1", "score": 0.85, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ), + ] + mock_processor_instance.invoke.return_value = reranked_docs + mock_data_processor_class.return_value = mock_processor_instance + + all_documents = [] + + # Create second dataset + mock_dataset2 = Mock(spec=Dataset) + mock_dataset2.id = str(uuid4()) + mock_dataset2.indexing_technique = "high_quality" + mock_dataset2.provider = "dify" + + # Act - Call with dataset_count = 2 + dataset_retrieval._multiple_retrieve_thread( + flask_app=mock_flask_app, + available_datasets=[mock_dataset, mock_dataset2], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + weights=None, + top_k=5, + score_threshold=0.5, + query="test query", + attachment_id=None, + dataset_count=2, # Multiple datasets - should perform second reranking + ) + + # Assert + # DataPostProcessor SHOULD be called (second reranking performed) + mock_data_processor_class.assert_called_once_with( + tenant_id, + "reranking_model", + {"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + None, + False, + ) + + # Verify invoke was called with correct parameters + mock_processor_instance.invoke.assert_called_once() + + # Documents should be added to all_documents after reranking + assert len(all_documents) == 2 + # The reranked order should be reflected + assert all_documents[0].page_content == "Test content 2" + assert all_documents[1].page_content == "Test content 1" + + @patch("core.rag.retrieval.dataset_retrieval.DataPostProcessor") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval._retriever") + @patch("core.rag.retrieval.dataset_retrieval.DatasetRetrieval.calculate_vector_score") + def test_multiple_retrieve_thread_single_dataset_uses_standard_scoring( + self, mock_calculate_vector_score, mock_retriever, mock_data_processor_class, mock_flask_app, mock_dataset + ): + """ + Test that _multiple_retrieve_thread uses standard scoring when dataset_count is 1 + and reranking is enabled. + + When there's only one dataset, instead of using DataPostProcessor, + the method should fall through to the standard scoring logic + (calculate_vector_score for high_quality datasets). + + Verifies: + - DataPostProcessor is NOT called + - calculate_vector_score IS called for high_quality indexing + - Documents are scored correctly + """ + # Arrange + dataset_retrieval = DatasetRetrieval() + tenant_id = str(uuid4()) + + # Create test documents + doc1 = Document( + page_content="Test content 1", + metadata={"doc_id": "doc1", "score": 0.9, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + doc2 = Document( + page_content="Test content 2", + metadata={"doc_id": "doc2", "score": 0.8, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ) + + # Mock _retriever to return documents + def side_effect_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.extend([doc1, doc2]) + + mock_retriever.side_effect = side_effect_retriever + + # Set up dataset with high_quality indexing + mock_dataset.indexing_technique = "high_quality" + + # Mock calculate_vector_score to return scored documents + scored_docs = [ + Document( + page_content="Test content 1", + metadata={"doc_id": "doc1", "score": 0.95, "document_id": str(uuid4()), "dataset_id": mock_dataset.id}, + provider="dify", + ), + ] + mock_calculate_vector_score.return_value = scored_docs + + all_documents = [] + + # Act - Call with dataset_count = 1 + dataset_retrieval._multiple_retrieve_thread( + flask_app=mock_flask_app, + available_datasets=[mock_dataset], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, # Reranking enabled but should be skipped for single dataset + reranking_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + weights=None, + top_k=5, + score_threshold=0.5, + query="test query", + attachment_id=None, + dataset_count=1, + ) + + # Assert + # DataPostProcessor should NOT be called + mock_data_processor_class.assert_not_called() + + # calculate_vector_score SHOULD be called for high_quality datasets + mock_calculate_vector_score.assert_called_once() + call_args = mock_calculate_vector_score.call_args + assert call_args[0][1] == 5 # top_k + + # Documents should be added after standard scoring + assert len(all_documents) == 1 + assert all_documents[0].page_content == "Test content 1" + class TestRetrievalMethods: """ diff --git a/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py new file mode 100644 index 0000000000..5f461d53ae --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py @@ -0,0 +1,113 @@ +import threading +from unittest.mock import Mock, patch +from uuid import uuid4 + +import pytest +from flask import Flask, current_app + +from core.rag.models.document import Document +from core.rag.retrieval.dataset_retrieval import DatasetRetrieval +from models.dataset import Dataset + + +class TestRetrievalService: + @pytest.fixture + def mock_dataset(self) -> Dataset: + dataset = Mock(spec=Dataset) + dataset.id = str(uuid4()) + dataset.tenant_id = str(uuid4()) + dataset.name = "test_dataset" + dataset.indexing_technique = "high_quality" + dataset.provider = "dify" + return dataset + + def test_multiple_retrieve_reranking_with_app_context(self, mock_dataset): + """ + Repro test for current bug: + reranking runs after `with flask_app.app_context():` exits. + `_multiple_retrieve_thread` catches exceptions and stores them into `thread_exceptions`, + so we must assert from that list (not from an outer try/except). + """ + dataset_retrieval = DatasetRetrieval() + flask_app = Flask(__name__) + tenant_id = str(uuid4()) + + # second dataset to ensure dataset_count > 1 reranking branch + secondary_dataset = Mock(spec=Dataset) + secondary_dataset.id = str(uuid4()) + secondary_dataset.provider = "dify" + secondary_dataset.indexing_technique = "high_quality" + + # retriever returns 1 doc into internal list (all_documents_item) + document = Document( + page_content="Context aware doc", + metadata={ + "doc_id": "doc1", + "score": 0.95, + "document_id": str(uuid4()), + "dataset_id": mock_dataset.id, + }, + provider="dify", + ) + + def fake_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.append(document) + + called = {"init": 0, "invoke": 0} + + class ContextRequiredPostProcessor: + def __init__(self, *args, **kwargs): + called["init"] += 1 + # will raise RuntimeError if no Flask app context exists + _ = current_app.name + + def invoke(self, *args, **kwargs): + called["invoke"] += 1 + _ = current_app.name + return kwargs.get("documents") or args[1] + + # output list from _multiple_retrieve_thread + all_documents: list[Document] = [] + + # IMPORTANT: _multiple_retrieve_thread swallows exceptions and appends them here + thread_exceptions: list[Exception] = [] + + def target(): + with patch.object(dataset_retrieval, "_retriever", side_effect=fake_retriever): + with patch( + "core.rag.retrieval.dataset_retrieval.DataPostProcessor", + ContextRequiredPostProcessor, + ): + dataset_retrieval._multiple_retrieve_thread( + flask_app=flask_app, + available_datasets=[mock_dataset, secondary_dataset], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={ + "reranking_provider_name": "cohere", + "reranking_model_name": "rerank-v2", + }, + weights=None, + top_k=3, + score_threshold=0.0, + query="test query", + attachment_id=None, + dataset_count=2, # force reranking branch + thread_exceptions=thread_exceptions, # ✅ key + ) + + t = threading.Thread(target=target) + t.start() + t.join() + + # Ensure reranking branch was actually executed + assert called["init"] >= 1, "DataPostProcessor was never constructed; reranking branch may not have run." + + # Current buggy code should record an exception (not raise it) + assert not thread_exceptions, thread_exceptions diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py index eeffdd27fe..6e9a432745 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py @@ -103,13 +103,25 @@ class MockNodeFactory(DifyNodeFactory): # Create mock node instance mock_class = self._mock_node_types[node_type] - mock_instance = mock_class( - id=node_id, - config=node_config, - graph_init_params=self.graph_init_params, - graph_runtime_state=self.graph_runtime_state, - mock_config=self.mock_config, - ) + if node_type == NodeType.CODE: + mock_instance = mock_class( + id=node_id, + config=node_config, + graph_init_params=self.graph_init_params, + graph_runtime_state=self.graph_runtime_state, + mock_config=self.mock_config, + code_executor=self._code_executor, + code_providers=self._code_providers, + code_limits=self._code_limits, + ) + else: + mock_instance = mock_class( + id=node_id, + config=node_config, + graph_init_params=self.graph_init_params, + graph_runtime_state=self.graph_runtime_state, + mock_config=self.mock_config, + ) return mock_instance diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py index fd94a5e833..5937bbfb39 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py @@ -40,12 +40,14 @@ class MockNodeMixin: graph_init_params: "GraphInitParams", graph_runtime_state: "GraphRuntimeState", mock_config: Optional["MockConfig"] = None, + **kwargs: Any, ): super().__init__( id=id, config=config, graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, + **kwargs, ) self.mock_config = mock_config diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py index 4fb693a5c2..de08cc3497 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py @@ -5,11 +5,24 @@ This module tests the functionality of MockTemplateTransformNode and MockCodeNod to ensure they work correctly with the TableTestRunner. """ +from configs import dify_config from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus +from core.workflow.nodes.code.limits import CodeNodeLimits from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockCodeNode, MockTemplateTransformNode +DEFAULT_CODE_LIMITS = CodeNodeLimits( + max_string_length=dify_config.CODE_MAX_STRING_LENGTH, + max_number=dify_config.CODE_MAX_NUMBER, + min_number=dify_config.CODE_MIN_NUMBER, + max_precision=dify_config.CODE_MAX_PRECISION, + max_depth=dify_config.CODE_MAX_DEPTH, + max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH, + max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH, + max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH, +) + class TestMockTemplateTransformNode: """Test cases for MockTemplateTransformNode.""" @@ -306,6 +319,7 @@ class TestMockCodeNode: graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, mock_config=mock_config, + code_limits=DEFAULT_CODE_LIMITS, ) # Run the node @@ -370,6 +384,7 @@ class TestMockCodeNode: graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, mock_config=mock_config, + code_limits=DEFAULT_CODE_LIMITS, ) # Run the node @@ -438,6 +453,7 @@ class TestMockCodeNode: graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state, mock_config=mock_config, + code_limits=DEFAULT_CODE_LIMITS, ) # Run the node diff --git a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py index 596e72ddd0..2262d25a14 100644 --- a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py @@ -1,3 +1,4 @@ +from configs import dify_config from core.helper.code_executor.code_executor import CodeLanguage from core.variables.types import SegmentType from core.workflow.nodes.code.code_node import CodeNode @@ -7,6 +8,18 @@ from core.workflow.nodes.code.exc import ( DepthLimitError, OutputValidationError, ) +from core.workflow.nodes.code.limits import CodeNodeLimits + +CodeNode._limits = CodeNodeLimits( + max_string_length=dify_config.CODE_MAX_STRING_LENGTH, + max_number=dify_config.CODE_MAX_NUMBER, + min_number=dify_config.CODE_MIN_NUMBER, + max_precision=dify_config.CODE_MAX_PRECISION, + max_depth=dify_config.CODE_MAX_DEPTH, + max_number_array_length=dify_config.CODE_MAX_NUMBER_ARRAY_LENGTH, + max_string_array_length=dify_config.CODE_MAX_STRING_ARRAY_LENGTH, + max_object_array_length=dify_config.CODE_MAX_OBJECT_ARRAY_LENGTH, +) class TestCodeNodeExceptions: diff --git a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py index 1a67d5c3e3..66d6c3c56b 100644 --- a/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/template_transform/template_transform_node_spec.py @@ -5,8 +5,8 @@ from core.workflow.graph_engine.entities.graph import Graph from core.workflow.graph_engine.entities.graph_init_params import GraphInitParams from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState -from core.helper.code_executor.code_executor import CodeExecutionError from core.workflow.enums import ErrorStrategy, NodeType, WorkflowNodeExecutionStatus +from core.workflow.nodes.template_transform.template_renderer import TemplateRenderError from core.workflow.nodes.template_transform.template_transform_node import TemplateTransformNode from models.workflow import WorkflowType @@ -127,7 +127,9 @@ class TestTemplateTransformNode: """Test version class method.""" assert TemplateTransformNode.version() == "1" - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_simple_template( self, mock_execute, basic_node_data, mock_graph, mock_graph_runtime_state, graph_init_params ): @@ -145,7 +147,7 @@ class TestTemplateTransformNode: mock_graph_runtime_state.variable_pool.get.side_effect = lambda selector: variable_map.get(tuple(selector)) # Setup mock executor - mock_execute.return_value = {"result": "Hello Alice, you are 30 years old!"} + mock_execute.return_value = "Hello Alice, you are 30 years old!" node = TemplateTransformNode( id="test_node", @@ -162,7 +164,9 @@ class TestTemplateTransformNode: assert result.inputs["name"] == "Alice" assert result.inputs["age"] == 30 - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_none_values(self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params): """Test _run with None variable values.""" node_data = { @@ -172,7 +176,7 @@ class TestTemplateTransformNode: } mock_graph_runtime_state.variable_pool.get.return_value = None - mock_execute.return_value = {"result": "Value: "} + mock_execute.return_value = "Value: " node = TemplateTransformNode( id="test_node", @@ -187,13 +191,15 @@ class TestTemplateTransformNode: assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED assert result.inputs["value"] is None - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_code_execution_error( self, mock_execute, basic_node_data, mock_graph, mock_graph_runtime_state, graph_init_params ): """Test _run when code execution fails.""" mock_graph_runtime_state.variable_pool.get.return_value = MagicMock() - mock_execute.side_effect = CodeExecutionError("Template syntax error") + mock_execute.side_effect = TemplateRenderError("Template syntax error") node = TemplateTransformNode( id="test_node", @@ -208,14 +214,16 @@ class TestTemplateTransformNode: assert result.status == WorkflowNodeExecutionStatus.FAILED assert "Template syntax error" in result.error - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) @patch("core.workflow.nodes.template_transform.template_transform_node.MAX_TEMPLATE_TRANSFORM_OUTPUT_LENGTH", 10) def test_run_output_length_exceeds_limit( self, mock_execute, basic_node_data, mock_graph, mock_graph_runtime_state, graph_init_params ): """Test _run when output exceeds maximum length.""" mock_graph_runtime_state.variable_pool.get.return_value = MagicMock() - mock_execute.return_value = {"result": "This is a very long output that exceeds the limit"} + mock_execute.return_value = "This is a very long output that exceeds the limit" node = TemplateTransformNode( id="test_node", @@ -230,7 +238,9 @@ class TestTemplateTransformNode: assert result.status == WorkflowNodeExecutionStatus.FAILED assert "Output length exceeds" in result.error - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_complex_jinja2_template( self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params ): @@ -257,7 +267,7 @@ class TestTemplateTransformNode: ("sys", "show_total"): mock_show_total, } mock_graph_runtime_state.variable_pool.get.side_effect = lambda selector: variable_map.get(tuple(selector)) - mock_execute.return_value = {"result": "apple, banana, orange (Total: 3)"} + mock_execute.return_value = "apple, banana, orange (Total: 3)" node = TemplateTransformNode( id="test_node", @@ -292,7 +302,9 @@ class TestTemplateTransformNode: assert mapping["node_123.var1"] == ["sys", "input1"] assert mapping["node_123.var2"] == ["sys", "input2"] - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_empty_variables(self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params): """Test _run with no variables (static template).""" node_data = { @@ -301,7 +313,7 @@ class TestTemplateTransformNode: "template": "This is a static message.", } - mock_execute.return_value = {"result": "This is a static message."} + mock_execute.return_value = "This is a static message." node = TemplateTransformNode( id="test_node", @@ -317,7 +329,9 @@ class TestTemplateTransformNode: assert result.outputs["output"] == "This is a static message." assert result.inputs == {} - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_numeric_values(self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params): """Test _run with numeric variable values.""" node_data = { @@ -339,7 +353,7 @@ class TestTemplateTransformNode: ("sys", "quantity"): mock_quantity, } mock_graph_runtime_state.variable_pool.get.side_effect = lambda selector: variable_map.get(tuple(selector)) - mock_execute.return_value = {"result": "Total: $31.5"} + mock_execute.return_value = "Total: $31.5" node = TemplateTransformNode( id="test_node", @@ -354,7 +368,9 @@ class TestTemplateTransformNode: assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED assert result.outputs["output"] == "Total: $31.5" - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_dict_values(self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params): """Test _run with dictionary variable values.""" node_data = { @@ -367,7 +383,7 @@ class TestTemplateTransformNode: mock_user.to_object.return_value = {"name": "John Doe", "email": "john@example.com"} mock_graph_runtime_state.variable_pool.get.return_value = mock_user - mock_execute.return_value = {"result": "Name: John Doe, Email: john@example.com"} + mock_execute.return_value = "Name: John Doe, Email: john@example.com" node = TemplateTransformNode( id="test_node", @@ -383,7 +399,9 @@ class TestTemplateTransformNode: assert "John Doe" in result.outputs["output"] assert "john@example.com" in result.outputs["output"] - @patch("core.workflow.nodes.template_transform.template_transform_node.CodeExecutor.execute_workflow_code_template") + @patch( + "core.workflow.nodes.template_transform.template_transform_node.CodeExecutorJinja2TemplateRenderer.render_template" + ) def test_run_with_list_values(self, mock_execute, mock_graph, mock_graph_runtime_state, graph_init_params): """Test _run with list variable values.""" node_data = { @@ -396,7 +414,7 @@ class TestTemplateTransformNode: mock_tags.to_object.return_value = ["python", "ai", "workflow"] mock_graph_runtime_state.variable_pool.get.return_value = mock_tags - mock_execute.return_value = {"result": "Tags: #python #ai #workflow "} + mock_execute.return_value = "Tags: #python #ai #workflow " node = TemplateTransformNode( id="test_node", diff --git a/api/tests/unit_tests/libs/test_external_api.py b/api/tests/unit_tests/libs/test_external_api.py index 9aa157a651..5135970bcc 100644 --- a/api/tests/unit_tests/libs/test_external_api.py +++ b/api/tests/unit_tests/libs/test_external_api.py @@ -99,29 +99,20 @@ def test_external_api_json_message_and_bad_request_rewrite(): assert res.get_json()["message"] == "Invalid JSON payload received or JSON payload is empty." -def test_external_api_param_mapping_and_quota_and_exc_info_none(): - # Force exc_info() to return (None,None,None) only during request - import libs.external_api as ext +def test_external_api_param_mapping_and_quota(): + app = _create_api_app() + client = app.test_client() - orig_exc_info = ext.sys.exc_info - try: - ext.sys.exc_info = lambda: (None, None, None) + # Param errors mapping payload path + res = client.get("/api/param-errors") + assert res.status_code == 400 + data = res.get_json() + assert data["code"] == "invalid_param" + assert data["params"] == "field" - app = _create_api_app() - client = app.test_client() - - # Param errors mapping payload path - res = client.get("/api/param-errors") - assert res.status_code == 400 - data = res.get_json() - assert data["code"] == "invalid_param" - assert data["params"] == "field" - - # Quota path — depending on Flask-RESTX internals it may be handled - res = client.get("/api/quota") - assert res.status_code in (400, 429) - finally: - ext.sys.exc_info = orig_exc_info # type: ignore[assignment] + # Quota path — depending on Flask-RESTX internals it may be handled + res = client.get("/api/quota") + assert res.status_code in (400, 429) def test_unauthorized_and_force_logout_clears_cookies(): diff --git a/api/uv.lock b/api/uv.lock index 4ccd229eec..8e60fad3a7 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -1953,14 +1953,14 @@ wheels = [ [[package]] name = "fickling" -version = "0.1.5" +version = "0.1.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "stdlib-list" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/94/0d0ce455952c036cfee235637f786c1d1d07d1b90f6a4dfb50e0eff929d6/fickling-0.1.5.tar.gz", hash = "sha256:92f9b49e717fa8dbc198b4b7b685587adb652d85aa9ede8131b3e44494efca05", size = 282462, upload-time = "2025-11-18T05:04:30.748Z" } +sdist = { url = "https://files.pythonhosted.org/packages/07/ab/7571453f9365c17c047b5a7b7e82692a7f6be51203f295030886758fd57a/fickling-0.1.6.tar.gz", hash = "sha256:03cb5d7bd09f9169c7583d2079fad4b3b88b25f865ed0049172e5cb68582311d", size = 284033, upload-time = "2025-12-15T18:14:58.721Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/a7/d25912b2e3a5b0a37e6f460050bbc396042b5906a6563a1962c484abc3c6/fickling-0.1.5-py3-none-any.whl", hash = "sha256:6aed7270bfa276e188b0abe043a27b3a042129d28ec1fa6ff389bdcc5ad178bb", size = 46240, upload-time = "2025-11-18T05:04:29.048Z" }, + { url = "https://files.pythonhosted.org/packages/76/99/cc04258dda421bc612cdfe4be8c253f45b922f1c7f268b5a0b9962d9cd12/fickling-0.1.6-py3-none-any.whl", hash = "sha256:465d0069548bfc731bdd75a583cb4cf5a4b2666739c0f76287807d724b147ed3", size = 47922, upload-time = "2025-12-15T18:14:57.526Z" }, ] [[package]] @@ -2955,14 +2955,14 @@ wheels = [ [[package]] name = "intersystems-irispython" -version = "5.3.0" +version = "5.3.1" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/56/16d93576b50408d97a5cbbd055d8da024d585e96a360e2adc95b41ae6284/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-macosx_10_9_universal2.whl", hash = "sha256:59d3176a35867a55b1ab69a6b5c75438b460291bccb254c2d2f4173be08b6e55", size = 6594480, upload-time = "2025-10-09T20:47:27.629Z" }, - { url = "https://files.pythonhosted.org/packages/99/bc/19e144ee805ea6ee0df6342a711e722c84347c05a75b3bf040c5fbe19982/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56bccefd1997c25f9f9f6c4086214c18d4fdaac0a93319d4b21dd9a6c59c9e51", size = 14779928, upload-time = "2025-10-09T20:47:30.564Z" }, - { url = "https://files.pythonhosted.org/packages/e6/fb/59ba563a80b39e9450b4627b5696019aa831dce27dacc3831b8c1e669102/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e160adc0785c55bb64e4264b8e99075691a15b0afa5d8d529f1b4bac7e57b81", size = 14422035, upload-time = "2025-10-09T20:47:32.552Z" }, - { url = "https://files.pythonhosted.org/packages/c1/68/ade8ad43f0ed1e5fba60e1710fa5ddeb01285f031e465e8c006329072e63/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-win32.whl", hash = "sha256:820f2c5729119e5173a5bf6d6ac2a41275c4f1ffba6af6c59ea313ecd8f499cc", size = 2824316, upload-time = "2025-10-09T20:47:28.998Z" }, - { url = "https://files.pythonhosted.org/packages/f4/03/cd45cb94e42c01dc525efebf3c562543a18ee55b67fde4022665ca672351/intersystems_irispython-5.3.0-cp38.cp39.cp310.cp311.cp312.cp313-cp38.cp39.cp310.cp311.cp312.cp313-win_amd64.whl", hash = "sha256:fc07ec24bc50b6f01573221cd7d86f2937549effe31c24af8db118e0131e340c", size = 3463297, upload-time = "2025-10-09T20:47:34.636Z" }, + { url = "https://files.pythonhosted.org/packages/33/5b/8eac672a6ef26bef6ef79a7c9557096167b50c4d3577d558ae6999c195fe/intersystems_irispython-5.3.1-cp38.cp39.cp310.cp311.cp312.cp313.cp314-cp38.cp39.cp310.cp311.cp312.cp313.cp314-macosx_10_9_universal2.whl", hash = "sha256:634c9b4ec620837d830ff49543aeb2797a1ce8d8570a0e868398b85330dfcc4d", size = 6736686, upload-time = "2025-12-19T16:24:57.734Z" }, + { url = "https://files.pythonhosted.org/packages/ba/17/bab3e525ffb6711355f7feea18c1b7dced9c2484cecbcdd83f74550398c0/intersystems_irispython-5.3.1-cp38.cp39.cp310.cp311.cp312.cp313.cp314-cp38.cp39.cp310.cp311.cp312.cp313.cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf912f30f85e2a42f2c2ea77fbeb98a24154d5ea7428a50382786a684ec4f583", size = 16005259, upload-time = "2025-12-19T16:25:05.578Z" }, + { url = "https://files.pythonhosted.org/packages/39/59/9bb79d9e32e3e55fc9aed8071a797b4497924cbc6457cea9255bb09320b7/intersystems_irispython-5.3.1-cp38.cp39.cp310.cp311.cp312.cp313.cp314-cp38.cp39.cp310.cp311.cp312.cp313.cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:be5659a6bb57593910f2a2417eddb9f5dc2f93a337ead6ddca778f557b8a359a", size = 15638040, upload-time = "2025-12-19T16:24:54.429Z" }, + { url = "https://files.pythonhosted.org/packages/cf/47/654ccf9c5cca4f5491f070888544165c9e2a6a485e320ea703e4e38d2358/intersystems_irispython-5.3.1-cp38.cp39.cp310.cp311.cp312.cp313.cp314-cp38.cp39.cp310.cp311.cp312.cp313.cp314-win32.whl", hash = "sha256:583e4f17088c1e0530f32efda1c0ccb02993cbc22035bc8b4c71d8693b04ee7e", size = 2879644, upload-time = "2025-12-19T16:24:59.945Z" }, + { url = "https://files.pythonhosted.org/packages/68/95/19cc13d09f1b4120bd41b1434509052e1d02afd27f2679266d7ad9cc1750/intersystems_irispython-5.3.1-cp38.cp39.cp310.cp311.cp312.cp313.cp314-cp38.cp39.cp310.cp311.cp312.cp313.cp314-win_amd64.whl", hash = "sha256:1d5d40450a0cdeec2a1f48d12d946a8a8ffc7c128576fcae7d58e66e3a127eae", size = 3522092, upload-time = "2025-12-19T16:25:01.834Z" }, ] [[package]] diff --git a/docker/.env.example b/docker/.env.example index 5c1089408c..ecb003cd70 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -69,6 +69,8 @@ PYTHONIOENCODING=utf-8 # The log level for the application. # Supported values are `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL` LOG_LEVEL=INFO +# Log output format: text or json +LOG_OUTPUT_FORMAT=text # Log file path LOG_FILE=/app/logs/server.log # Log file max size, the unit is MB @@ -231,7 +233,7 @@ NEXT_PUBLIC_ENABLE_SINGLE_DOLLAR_LATEX=false # You can adjust the database configuration according to your needs. # ------------------------------ -# Database type, supported values are `postgresql` and `mysql` +# Database type, supported values are `postgresql`, `mysql`, `oceanbase`, `seekdb` DB_TYPE=postgresql # For MySQL, only `root` user is supported for now DB_USERNAME=postgres @@ -531,7 +533,7 @@ SUPABASE_URL=your-server-url # ------------------------------ # The type of vector store to use. -# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`. +# Supported values are `weaviate`, `oceanbase`, `seekdb`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`, `vastbase`, `tidb`, `tidb_on_qdrant`, `baidu`, `lindorm`, `huawei_cloud`, `upstash`, `matrixone`, `clickzetta`, `alibabacloud_mysql`, `iris`. VECTOR_STORE=weaviate # Prefix used to create collection name in vector database VECTOR_INDEX_NAME_PREFIX=Vector_index @@ -542,9 +544,9 @@ WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih WEAVIATE_GRPC_ENDPOINT=grpc://weaviate:50051 WEAVIATE_TOKENIZATION=word -# For OceanBase metadata database configuration, available when `DB_TYPE` is `mysql` and `COMPOSE_PROFILES` includes `oceanbase`. +# For OceanBase metadata database configuration, available when `DB_TYPE` is `oceanbase`. # For OceanBase vector database configuration, available when `VECTOR_STORE` is `oceanbase` -# If you want to use OceanBase as both vector database and metadata database, you need to set `DB_TYPE` to `mysql`, `COMPOSE_PROFILES` is `oceanbase`, and set Database Configuration is the same as the vector database. +# If you want to use OceanBase as both vector database and metadata database, you need to set both `DB_TYPE` and `VECTOR_STORE` to `oceanbase`, and set Database Configuration is the same as the vector database. # seekdb is the lite version of OceanBase and shares the connection configuration with OceanBase. OCEANBASE_VECTOR_HOST=oceanbase OCEANBASE_VECTOR_PORT=2881 diff --git a/docker/docker-compose.middleware.yaml b/docker/docker-compose.middleware.yaml index dba61d1816..81c34fc6a2 100644 --- a/docker/docker-compose.middleware.yaml +++ b/docker/docker-compose.middleware.yaml @@ -129,6 +129,7 @@ services: - ./middleware.env environment: # Use the shared environment variables. + LOG_OUTPUT_FORMAT: ${LOG_OUTPUT_FORMAT:-text} DB_DATABASE: ${DB_PLUGIN_DATABASE:-dify_plugin} REDIS_HOST: ${REDIS_HOST:-redis} REDIS_PORT: ${REDIS_PORT:-6379} diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 9910c95a84..a67141ce05 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -17,6 +17,7 @@ x-shared-env: &shared-api-worker-env LC_ALL: ${LC_ALL:-en_US.UTF-8} PYTHONIOENCODING: ${PYTHONIOENCODING:-utf-8} LOG_LEVEL: ${LOG_LEVEL:-INFO} + LOG_OUTPUT_FORMAT: ${LOG_OUTPUT_FORMAT:-text} LOG_FILE: ${LOG_FILE:-/app/logs/server.log} LOG_FILE_MAX_SIZE: ${LOG_FILE_MAX_SIZE:-20} LOG_FILE_BACKUP_COUNT: ${LOG_FILE_BACKUP_COUNT:-5} diff --git a/docker/ssrf_proxy/squid.conf.template b/docker/ssrf_proxy/squid.conf.template index 1775a1fff9..256e669c8d 100644 --- a/docker/ssrf_proxy/squid.conf.template +++ b/docker/ssrf_proxy/squid.conf.template @@ -54,3 +54,52 @@ http_access allow src_all # Unless the option's size is increased, an error will occur when uploading more than two files. client_request_buffer_max_size 100 MB + +################################## Performance & Concurrency ############################### +# Increase file descriptor limit for high concurrency +max_filedescriptors 65536 + +# Timeout configurations for image requests +connect_timeout 30 seconds +request_timeout 2 minutes +read_timeout 2 minutes +client_lifetime 5 minutes +shutdown_lifetime 30 seconds + +# Persistent connections - improve performance for multiple requests +server_persistent_connections on +client_persistent_connections on +persistent_request_timeout 30 seconds +pconn_timeout 1 minute + +# Connection pool and concurrency limits +client_db on +server_idle_pconn_timeout 2 minutes +client_idle_pconn_timeout 2 minutes + +# Quick abort settings - don't abort requests that are mostly done +quick_abort_min 16 KB +quick_abort_max 16 MB +quick_abort_pct 95 + +# Memory and cache optimization +memory_cache_mode disk +cache_mem 256 MB +maximum_object_size_in_memory 512 KB + +# DNS resolver settings for better performance +dns_timeout 30 seconds +dns_retransmit_interval 5 seconds +# By default, Squid uses the system's configured DNS resolvers. +# If you need to override them, set dns_nameservers to appropriate servers +# for your environment (for example, internal/corporate DNS). The following +# is an example using public DNS and SHOULD be customized before use: +# dns_nameservers 8.8.8.8 8.8.4.4 + +# Logging format for better debugging +logformat dify_log %ts.%03tu %6tr %>a %Ss/%03>Hs % = ({ isWorkflowPage, isRagPipelinePage, defaultLocale, - Object.keys(Actions).sort().join(','), + Actions, ], queryFn: async () => { const query = searchQueryDebouncedValue.toLowerCase() diff --git a/web/app/components/plugins/plugin-page/plugins-panel.tsx b/web/app/components/plugins/plugin-page/plugins-panel.tsx index a065e735a8..ff765d39ab 100644 --- a/web/app/components/plugins/plugin-page/plugins-panel.tsx +++ b/web/app/components/plugins/plugin-page/plugins-panel.tsx @@ -1,10 +1,13 @@ 'use client' +import type { PluginDetail } from '../types' import type { FilterState } from './filter-management' import { useDebounceFn } from 'ahooks' import { useMemo } from 'react' import { useTranslation } from 'react-i18next' import Button from '@/app/components/base/button' import PluginDetailPanel from '@/app/components/plugins/plugin-detail-panel' +import { useGetLanguage } from '@/context/i18n' +import { renderI18nObject } from '@/i18n-config' import { useInstalledLatestVersion, useInstalledPluginList, useInvalidateInstalledPluginList } from '@/service/use-plugins' import Loading from '../../base/loading' import { PluginSource } from '../types' @@ -13,8 +16,34 @@ import Empty from './empty' import FilterManagement from './filter-management' import List from './list' +const matchesSearchQuery = (plugin: PluginDetail & { latest_version: string }, query: string, locale: string): boolean => { + if (!query) + return true + const lowerQuery = query.toLowerCase() + const { declaration } = plugin + // Match plugin_id + if (plugin.plugin_id.toLowerCase().includes(lowerQuery)) + return true + // Match plugin name + if (plugin.name?.toLowerCase().includes(lowerQuery)) + return true + // Match declaration name + if (declaration.name?.toLowerCase().includes(lowerQuery)) + return true + // Match localized label + const label = renderI18nObject(declaration.label, locale) + if (label?.toLowerCase().includes(lowerQuery)) + return true + // Match localized description + const description = renderI18nObject(declaration.description, locale) + if (description?.toLowerCase().includes(lowerQuery)) + return true + return false +} + const PluginsPanel = () => { const { t } = useTranslation() + const locale = useGetLanguage() const filters = usePluginPageContext(v => v.filters) as FilterState const setFilters = usePluginPageContext(v => v.setFilters) const { data: pluginList, isLoading: isPluginListLoading, isFetching, isLastPage, loadNextPage } = useInstalledPluginList() @@ -48,11 +77,11 @@ const PluginsPanel = () => { return ( (categories.length === 0 || categories.includes(plugin.declaration.category)) && (tags.length === 0 || tags.some(tag => plugin.declaration.tags.includes(tag))) - && (searchQuery === '' || plugin.plugin_id.toLowerCase().includes(searchQuery.toLowerCase())) + && matchesSearchQuery(plugin, searchQuery, locale) ) }) return filteredList - }, [pluginListWithLatestVersion, filters]) + }, [pluginListWithLatestVersion, filters, locale]) const currentPluginDetail = useMemo(() => { const detail = pluginListWithLatestVersion.find(plugin => plugin.plugin_id === currentPluginID) diff --git a/web/app/components/rag-pipeline/components/chunk-card-list/index.spec.tsx b/web/app/components/rag-pipeline/components/chunk-card-list/index.spec.tsx new file mode 100644 index 0000000000..e665cf134e --- /dev/null +++ b/web/app/components/rag-pipeline/components/chunk-card-list/index.spec.tsx @@ -0,0 +1,1164 @@ +import type { GeneralChunks, ParentChildChunk, ParentChildChunks, QAChunk, QAChunks } from './types' +import { render, screen } from '@testing-library/react' +import { ChunkingMode } from '@/models/datasets' +import ChunkCard from './chunk-card' +import { ChunkCardList } from './index' +import QAItem from './q-a-item' +import { QAItemType } from './types' + +// ============================================================================= +// Test Data Factories +// ============================================================================= + +const createGeneralChunks = (overrides: string[] = []): GeneralChunks => { + if (overrides.length > 0) + return overrides + return [ + 'This is the first chunk of text content.', + 'This is the second chunk with different content.', + 'Third chunk here with more text.', + ] +} + +const createParentChildChunk = (overrides: Partial = {}): ParentChildChunk => ({ + child_contents: ['Child content 1', 'Child content 2'], + parent_content: 'This is the parent content that contains the children.', + parent_mode: 'paragraph', + ...overrides, +}) + +const createParentChildChunks = (overrides: Partial = {}): ParentChildChunks => ({ + parent_child_chunks: [ + createParentChildChunk(), + createParentChildChunk({ + child_contents: ['Another child 1', 'Another child 2', 'Another child 3'], + parent_content: 'Another parent content here.', + }), + ], + parent_mode: 'paragraph', + ...overrides, +}) + +const createQAChunk = (overrides: Partial = {}): QAChunk => ({ + question: 'What is the answer to life?', + answer: 'The answer is 42.', + ...overrides, +}) + +const createQAChunks = (overrides: Partial = {}): QAChunks => ({ + qa_chunks: [ + createQAChunk(), + createQAChunk({ + question: 'How does this work?', + answer: 'It works by processing data.', + }), + ], + ...overrides, +}) + +// ============================================================================= +// QAItem Component Tests +// ============================================================================= + +describe('QAItem', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + // Tests for basic rendering of QAItem component + describe('Rendering', () => { + it('should render question type with Q prefix', () => { + // Arrange & Act + render() + + // Assert + expect(screen.getByText('Q')).toBeInTheDocument() + expect(screen.getByText('What is this?')).toBeInTheDocument() + }) + + it('should render answer type with A prefix', () => { + // Arrange & Act + render() + + // Assert + expect(screen.getByText('A')).toBeInTheDocument() + expect(screen.getByText('This is the answer.')).toBeInTheDocument() + }) + }) + + // Tests for different prop variations + describe('Props', () => { + it('should render with empty text', () => { + // Arrange & Act + render() + + // Assert + expect(screen.getByText('Q')).toBeInTheDocument() + }) + + it('should render with long text content', () => { + // Arrange + const longText = 'A'.repeat(1000) + + // Act + render() + + // Assert + expect(screen.getByText(longText)).toBeInTheDocument() + }) + + it('should render with special characters in text', () => { + // Arrange + const specialText = ' & "quotes" \'apostrophe\'' + + // Act + render() + + // Assert + expect(screen.getByText(specialText)).toBeInTheDocument() + }) + }) + + // Tests for memoization behavior + describe('Memoization', () => { + it('should be memoized with React.memo', () => { + // Arrange & Act + const { rerender } = render() + + // Assert - component should render consistently + expect(screen.getByText('Q')).toBeInTheDocument() + expect(screen.getByText('Test')).toBeInTheDocument() + + // Rerender with same props - should not cause issues + rerender() + expect(screen.getByText('Q')).toBeInTheDocument() + }) + }) +}) + +// ============================================================================= +// ChunkCard Component Tests +// ============================================================================= + +describe('ChunkCard', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + // Tests for basic rendering with different chunk types + describe('Rendering', () => { + it('should render text chunk type correctly', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText('This is plain text content.')).toBeInTheDocument() + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + }) + + it('should render QA chunk type with question and answer', () => { + // Arrange + const qaContent: QAChunk = { + question: 'What is React?', + answer: 'React is a JavaScript library.', + } + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Q')).toBeInTheDocument() + expect(screen.getByText('What is React?')).toBeInTheDocument() + expect(screen.getByText('A')).toBeInTheDocument() + expect(screen.getByText('React is a JavaScript library.')).toBeInTheDocument() + }) + + it('should render parent-child chunk type with child contents', () => { + // Arrange + const childContents = ['Child 1 content', 'Child 2 content'] + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Child 1 content')).toBeInTheDocument() + expect(screen.getByText('Child 2 content')).toBeInTheDocument() + expect(screen.getByText('C-1')).toBeInTheDocument() + expect(screen.getByText('C-2')).toBeInTheDocument() + }) + }) + + // Tests for parent mode variations + describe('Parent Mode Variations', () => { + it('should show Parent-Chunk label prefix for paragraph mode', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText(/Parent-Chunk-01/)).toBeInTheDocument() + }) + + it('should hide segment index tag for full-doc mode', () => { + // Arrange & Act + render( + , + ) + + // Assert - should not show Chunk or Parent-Chunk label + expect(screen.queryByText(/Chunk/)).not.toBeInTheDocument() + expect(screen.queryByText(/Parent-Chunk/)).not.toBeInTheDocument() + }) + + it('should show Chunk label prefix for text mode', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText(/Chunk-05/)).toBeInTheDocument() + }) + }) + + // Tests for word count display + describe('Word Count Display', () => { + it('should display formatted word count', () => { + // Arrange & Act + render( + , + ) + + // Assert - formatNumber(1234) returns '1,234' + expect(screen.getByText(/1,234/)).toBeInTheDocument() + }) + + it('should display word count with character translation key', () => { + // Arrange & Act + render( + , + ) + + // Assert - translation key is returned as-is by mock + expect(screen.getByText(/100\s+(?:\S.*)?characters/)).toBeInTheDocument() + }) + + it('should not display word count info for full-doc mode', () => { + // Arrange & Act + render( + , + ) + + // Assert - the header with word count should be hidden + expect(screen.queryByText(/500/)).not.toBeInTheDocument() + }) + }) + + // Tests for position ID variations + describe('Position ID', () => { + it('should handle numeric position ID', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText(/Chunk-42/)).toBeInTheDocument() + }) + + it('should handle string position ID', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText(/Chunk-99/)).toBeInTheDocument() + }) + + it('should pad single digit position ID', () => { + // Arrange & Act + render( + , + ) + + // Assert + expect(screen.getByText(/Chunk-03/)).toBeInTheDocument() + }) + }) + + // Tests for memoization dependencies + describe('Memoization', () => { + it('should update isFullDoc memo when parentMode changes', () => { + // Arrange + const { rerender } = render( + , + ) + + // Assert - paragraph mode shows label + expect(screen.getByText(/Parent-Chunk/)).toBeInTheDocument() + + // Act - change to full-doc + rerender( + , + ) + + // Assert - full-doc mode hides label + expect(screen.queryByText(/Parent-Chunk/)).not.toBeInTheDocument() + }) + + it('should update contentElement memo when content changes', () => { + // Arrange + const { rerender } = render( + , + ) + + // Assert + expect(screen.getByText('Initial content')).toBeInTheDocument() + + // Act + rerender( + , + ) + + // Assert + expect(screen.getByText('Updated content')).toBeInTheDocument() + expect(screen.queryByText('Initial content')).not.toBeInTheDocument() + }) + + it('should update contentElement memo when chunkType changes', () => { + // Arrange + const { rerender } = render( + , + ) + + // Assert + expect(screen.getByText('Text content')).toBeInTheDocument() + + // Act - change to QA type + const qaContent: QAChunk = { question: 'Q?', answer: 'A.' } + rerender( + , + ) + + // Assert + expect(screen.getByText('Q')).toBeInTheDocument() + expect(screen.getByText('Q?')).toBeInTheDocument() + }) + }) + + // Tests for edge cases + describe('Edge Cases', () => { + it('should handle empty child contents array', () => { + // Arrange & Act + render( + , + ) + + // Assert - should render without errors + expect(screen.getByText(/Parent-Chunk-01/)).toBeInTheDocument() + }) + + it('should handle QA chunk with empty strings', () => { + // Arrange + const emptyQA: QAChunk = { question: '', answer: '' } + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Q')).toBeInTheDocument() + expect(screen.getByText('A')).toBeInTheDocument() + }) + + it('should handle very long content', () => { + // Arrange + const longContent = 'A'.repeat(10000) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText(longContent)).toBeInTheDocument() + }) + + it('should handle zero word count', () => { + // Arrange & Act + render( + , + ) + + // Assert - formatNumber returns falsy for 0, so it shows 0 + expect(screen.getByText(/0\s+(?:\S.*)?characters/)).toBeInTheDocument() + }) + }) +}) + +// ============================================================================= +// ChunkCardList Component Tests +// ============================================================================= + +describe('ChunkCardList', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + // Tests for rendering with different chunk types + describe('Rendering', () => { + it('should render text chunks correctly', () => { + // Arrange + const chunks = createGeneralChunks() + + // Act + render( + , + ) + + // Assert + expect(screen.getByText(chunks[0])).toBeInTheDocument() + expect(screen.getByText(chunks[1])).toBeInTheDocument() + expect(screen.getByText(chunks[2])).toBeInTheDocument() + }) + + it('should render parent-child chunks correctly', () => { + // Arrange + const chunks = createParentChildChunks() + + // Act + render( + , + ) + + // Assert - should render child contents from parent-child chunks + expect(screen.getByText('Child content 1')).toBeInTheDocument() + expect(screen.getByText('Child content 2')).toBeInTheDocument() + expect(screen.getByText('Another child 1')).toBeInTheDocument() + }) + + it('should render QA chunks correctly', () => { + // Arrange + const chunks = createQAChunks() + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('What is the answer to life?')).toBeInTheDocument() + expect(screen.getByText('The answer is 42.')).toBeInTheDocument() + expect(screen.getByText('How does this work?')).toBeInTheDocument() + expect(screen.getByText('It works by processing data.')).toBeInTheDocument() + }) + }) + + // Tests for chunkList memoization + describe('Memoization - chunkList', () => { + it('should extract chunks from GeneralChunks for text mode', () => { + // Arrange + const chunks: GeneralChunks = ['Chunk 1', 'Chunk 2'] + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Chunk 1')).toBeInTheDocument() + expect(screen.getByText('Chunk 2')).toBeInTheDocument() + }) + + it('should extract parent_child_chunks from ParentChildChunks for parentChild mode', () => { + // Arrange + const chunks = createParentChildChunks({ + parent_child_chunks: [ + createParentChildChunk({ child_contents: ['Specific child'] }), + ], + }) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Specific child')).toBeInTheDocument() + }) + + it('should extract qa_chunks from QAChunks for qa mode', () => { + // Arrange + const chunks: QAChunks = { + qa_chunks: [ + { question: 'Specific Q', answer: 'Specific A' }, + ], + } + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Specific Q')).toBeInTheDocument() + expect(screen.getByText('Specific A')).toBeInTheDocument() + }) + + it('should update chunkList when chunkInfo changes', () => { + // Arrange + const initialChunks = createGeneralChunks(['Initial chunk']) + + const { rerender } = render( + , + ) + + // Assert initial state + expect(screen.getByText('Initial chunk')).toBeInTheDocument() + + // Act - update chunks + const updatedChunks = createGeneralChunks(['Updated chunk']) + rerender( + , + ) + + // Assert updated state + expect(screen.getByText('Updated chunk')).toBeInTheDocument() + expect(screen.queryByText('Initial chunk')).not.toBeInTheDocument() + }) + }) + + // Tests for getWordCount function + describe('Word Count Calculation', () => { + it('should calculate word count for text chunks using string length', () => { + // Arrange - "Hello" has 5 characters + const chunks = createGeneralChunks(['Hello']) + + // Act + render( + , + ) + + // Assert - word count should be 5 (string length) + expect(screen.getByText(/5\s+(?:\S.*)?characters/)).toBeInTheDocument() + }) + + it('should calculate word count for parent-child chunks using parent_content length', () => { + // Arrange - parent_content length determines word count + const chunks = createParentChildChunks({ + parent_child_chunks: [ + createParentChildChunk({ + parent_content: 'Parent', // 6 characters + child_contents: ['Child'], + }), + ], + }) + + // Act + render( + , + ) + + // Assert - word count should be 6 (parent_content length) + expect(screen.getByText(/6\s+(?:\S.*)?characters/)).toBeInTheDocument() + }) + + it('should calculate word count for QA chunks using question + answer length', () => { + // Arrange - "Hi" (2) + "Bye" (3) = 5 + const chunks: QAChunks = { + qa_chunks: [ + { question: 'Hi', answer: 'Bye' }, + ], + } + + // Act + render( + , + ) + + // Assert - word count should be 5 (question.length + answer.length) + expect(screen.getByText(/5\s+(?:\S.*)?characters/)).toBeInTheDocument() + }) + }) + + // Tests for position ID assignment + describe('Position ID', () => { + it('should assign 1-based position IDs to chunks', () => { + // Arrange + const chunks = createGeneralChunks(['First', 'Second', 'Third']) + + // Act + render( + , + ) + + // Assert - position IDs should be 1, 2, 3 + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + expect(screen.getByText(/Chunk-02/)).toBeInTheDocument() + expect(screen.getByText(/Chunk-03/)).toBeInTheDocument() + }) + }) + + // Tests for className prop + describe('Custom className', () => { + it('should apply custom className to container', () => { + // Arrange + const chunks = createGeneralChunks(['Test']) + + // Act + const { container } = render( + , + ) + + // Assert + expect(container.firstChild).toHaveClass('custom-class') + }) + + it('should merge custom className with default classes', () => { + // Arrange + const chunks = createGeneralChunks(['Test']) + + // Act + const { container } = render( + , + ) + + // Assert - should have both default and custom classes + expect(container.firstChild).toHaveClass('flex') + expect(container.firstChild).toHaveClass('w-full') + expect(container.firstChild).toHaveClass('flex-col') + expect(container.firstChild).toHaveClass('my-custom-class') + }) + + it('should render without className prop', () => { + // Arrange + const chunks = createGeneralChunks(['Test']) + + // Act + const { container } = render( + , + ) + + // Assert - should have default classes + expect(container.firstChild).toHaveClass('flex') + expect(container.firstChild).toHaveClass('w-full') + }) + }) + + // Tests for parentMode prop + describe('Parent Mode', () => { + it('should pass parentMode to ChunkCard for parent-child type', () => { + // Arrange + const chunks = createParentChildChunks() + + // Act + render( + , + ) + + // Assert - paragraph mode shows Parent-Chunk label + expect(screen.getAllByText(/Parent-Chunk/).length).toBeGreaterThan(0) + }) + + it('should handle full-doc parentMode', () => { + // Arrange + const chunks = createParentChildChunks() + + // Act + render( + , + ) + + // Assert - full-doc mode hides chunk labels + expect(screen.queryByText(/Parent-Chunk/)).not.toBeInTheDocument() + expect(screen.queryByText(/Chunk-/)).not.toBeInTheDocument() + }) + + it('should not use parentMode for text type', () => { + // Arrange + const chunks = createGeneralChunks(['Text']) + + // Act + render( + , + ) + + // Assert - should show Chunk label, not affected by parentMode + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + }) + }) + + // Tests for edge cases + describe('Edge Cases', () => { + it('should handle empty GeneralChunks array', () => { + // Arrange + const chunks: GeneralChunks = [] + + // Act + const { container } = render( + , + ) + + // Assert - should render empty container + expect(container.firstChild).toBeInTheDocument() + expect(container.firstChild?.childNodes.length).toBe(0) + }) + + it('should handle empty ParentChildChunks', () => { + // Arrange + const chunks: ParentChildChunks = { + parent_child_chunks: [], + parent_mode: 'paragraph', + } + + // Act + const { container } = render( + , + ) + + // Assert + expect(container.firstChild).toBeInTheDocument() + expect(container.firstChild?.childNodes.length).toBe(0) + }) + + it('should handle empty QAChunks', () => { + // Arrange + const chunks: QAChunks = { + qa_chunks: [], + } + + // Act + const { container } = render( + , + ) + + // Assert + expect(container.firstChild).toBeInTheDocument() + expect(container.firstChild?.childNodes.length).toBe(0) + }) + + it('should handle single item in chunks', () => { + // Arrange + const chunks = createGeneralChunks(['Single chunk']) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Single chunk')).toBeInTheDocument() + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + }) + + it('should handle large number of chunks', () => { + // Arrange + const chunks = Array.from({ length: 100 }, (_, i) => `Chunk number ${i + 1}`) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('Chunk number 1')).toBeInTheDocument() + expect(screen.getByText('Chunk number 100')).toBeInTheDocument() + expect(screen.getByText(/Chunk-100/)).toBeInTheDocument() + }) + }) + + // Tests for key uniqueness + describe('Key Generation', () => { + it('should generate unique keys for chunks', () => { + // Arrange - chunks with same content + const chunks = createGeneralChunks(['Same content', 'Same content', 'Same content']) + + // Act + const { container } = render( + , + ) + + // Assert - all three should render (keys are based on chunkType-index) + const chunkCards = container.querySelectorAll('.bg-components-panel-bg') + expect(chunkCards.length).toBe(3) + }) + }) +}) + +// ============================================================================= +// Integration Tests +// ============================================================================= + +describe('ChunkCardList Integration', () => { + beforeEach(() => { + vi.clearAllMocks() + }) + + // Tests for complete workflow scenarios + describe('Complete Workflows', () => { + it('should render complete text chunking workflow', () => { + // Arrange + const textChunks = createGeneralChunks([ + 'First paragraph of the document.', + 'Second paragraph with more information.', + 'Final paragraph concluding the content.', + ]) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('First paragraph of the document.')).toBeInTheDocument() + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + // "First paragraph of the document." = 32 characters + expect(screen.getByText(/32\s+(?:\S.*)?characters/)).toBeInTheDocument() + + expect(screen.getByText('Second paragraph with more information.')).toBeInTheDocument() + expect(screen.getByText(/Chunk-02/)).toBeInTheDocument() + + expect(screen.getByText('Final paragraph concluding the content.')).toBeInTheDocument() + expect(screen.getByText(/Chunk-03/)).toBeInTheDocument() + }) + + it('should render complete parent-child chunking workflow', () => { + // Arrange + const parentChildChunks = createParentChildChunks({ + parent_child_chunks: [ + { + parent_content: 'Main section about React components and their lifecycle.', + child_contents: [ + 'React components are building blocks.', + 'Lifecycle methods control component behavior.', + ], + parent_mode: 'paragraph', + }, + ], + }) + + // Act + render( + , + ) + + // Assert + expect(screen.getByText('React components are building blocks.')).toBeInTheDocument() + expect(screen.getByText('Lifecycle methods control component behavior.')).toBeInTheDocument() + expect(screen.getByText('C-1')).toBeInTheDocument() + expect(screen.getByText('C-2')).toBeInTheDocument() + expect(screen.getByText(/Parent-Chunk-01/)).toBeInTheDocument() + }) + + it('should render complete QA chunking workflow', () => { + // Arrange + const qaChunks = createQAChunks({ + qa_chunks: [ + { + question: 'What is Dify?', + answer: 'Dify is an open-source LLM application development platform.', + }, + { + question: 'How do I get started?', + answer: 'You can start by installing the platform using Docker.', + }, + ], + }) + + // Act + render( + , + ) + + // Assert + const qLabels = screen.getAllByText('Q') + const aLabels = screen.getAllByText('A') + expect(qLabels.length).toBe(2) + expect(aLabels.length).toBe(2) + + expect(screen.getByText('What is Dify?')).toBeInTheDocument() + expect(screen.getByText('Dify is an open-source LLM application development platform.')).toBeInTheDocument() + expect(screen.getByText('How do I get started?')).toBeInTheDocument() + expect(screen.getByText('You can start by installing the platform using Docker.')).toBeInTheDocument() + }) + }) + + // Tests for type switching scenarios + describe('Type Switching', () => { + it('should handle switching from text to QA type', () => { + // Arrange + const textChunks = createGeneralChunks(['Text content']) + const qaChunks = createQAChunks() + + const { rerender } = render( + , + ) + + // Assert initial text state + expect(screen.getByText('Text content')).toBeInTheDocument() + + // Act - switch to QA + rerender( + , + ) + + // Assert QA state + expect(screen.queryByText('Text content')).not.toBeInTheDocument() + expect(screen.getByText('What is the answer to life?')).toBeInTheDocument() + }) + + it('should handle switching from text to parent-child type', () => { + // Arrange + const textChunks = createGeneralChunks(['Simple text']) + const parentChildChunks = createParentChildChunks() + + const { rerender } = render( + , + ) + + // Assert initial state + expect(screen.getByText('Simple text')).toBeInTheDocument() + expect(screen.getByText(/Chunk-01/)).toBeInTheDocument() + + // Act - switch to parent-child + rerender( + , + ) + + // Assert parent-child state + expect(screen.queryByText('Simple text')).not.toBeInTheDocument() + // Multiple Parent-Chunk elements exist, so use getAllByText + expect(screen.getAllByText(/Parent-Chunk/).length).toBeGreaterThan(0) + }) + }) +}) diff --git a/web/app/components/rag-pipeline/components/index.spec.tsx b/web/app/components/rag-pipeline/components/index.spec.tsx new file mode 100644 index 0000000000..3f6b0dccc2 --- /dev/null +++ b/web/app/components/rag-pipeline/components/index.spec.tsx @@ -0,0 +1,1390 @@ +import type { PropsWithChildren } from 'react' +import type { EnvironmentVariable } from '@/app/components/workflow/types' +import { fireEvent, render, screen, waitFor } from '@testing-library/react' +import { createMockProviderContextValue } from '@/__mocks__/provider-context' + +// ============================================================================ +// Import Components After Mocks Setup +// ============================================================================ + +import Conversion from './conversion' +import RagPipelinePanel from './panel' +import PublishAsKnowledgePipelineModal from './publish-as-knowledge-pipeline-modal' +import PublishToast from './publish-toast' +import RagPipelineChildren from './rag-pipeline-children' +import PipelineScreenShot from './screenshot' + +// ============================================================================ +// Mock External Dependencies - All vi.mock calls must come before any imports +// ============================================================================ + +// Mock next/navigation +const mockPush = vi.fn() +vi.mock('next/navigation', () => ({ + useParams: () => ({ datasetId: 'test-dataset-id' }), + useRouter: () => ({ push: mockPush }), +})) + +// Mock next/image +vi.mock('next/image', () => ({ + default: ({ src, alt, width, height }: { src: string, alt: string, width: number, height: number }) => ( + // eslint-disable-next-line next/no-img-element + {alt} + ), +})) + +// Mock next/dynamic +vi.mock('next/dynamic', () => ({ + default: (importFn: () => Promise<{ default: React.ComponentType }>, options?: { ssr?: boolean }) => { + const DynamicComponent = ({ children, ...props }: PropsWithChildren) => { + return
{children}
+ } + DynamicComponent.displayName = 'DynamicComponent' + return DynamicComponent + }, +})) + +// Mock workflow store - using controllable state +let mockShowImportDSLModal = false +const mockSetShowImportDSLModal = vi.fn((value: boolean) => { + mockShowImportDSLModal = value +}) +vi.mock('@/app/components/workflow/store', () => { + const mockSetShowInputFieldPanel = vi.fn() + const mockSetShowEnvPanel = vi.fn() + const mockSetShowDebugAndPreviewPanel = vi.fn() + const mockSetIsPreparingDataSource = vi.fn() + const mockSetPublishedAt = vi.fn() + const mockSetRagPipelineVariables = vi.fn() + const mockSetEnvironmentVariables = vi.fn() + + return { + useStore: (selector: (state: Record) => unknown) => { + const storeState = { + pipelineId: 'test-pipeline-id', + showDebugAndPreviewPanel: false, + showGlobalVariablePanel: false, + showInputFieldPanel: false, + showInputFieldPreviewPanel: false, + inputFieldEditPanelProps: null as null | object, + historyWorkflowData: null as null | object, + publishedAt: 0, + draftUpdatedAt: Date.now(), + knowledgeName: 'Test Knowledge', + knowledgeIcon: { + icon_type: 'emoji' as const, + icon: '📚', + icon_background: '#FFFFFF', + icon_url: '', + }, + showImportDSLModal: mockShowImportDSLModal, + setShowInputFieldPanel: mockSetShowInputFieldPanel, + setShowEnvPanel: mockSetShowEnvPanel, + setShowDebugAndPreviewPanel: mockSetShowDebugAndPreviewPanel, + setIsPreparingDataSource: mockSetIsPreparingDataSource, + setPublishedAt: mockSetPublishedAt, + setRagPipelineVariables: mockSetRagPipelineVariables, + setEnvironmentVariables: mockSetEnvironmentVariables, + setShowImportDSLModal: mockSetShowImportDSLModal, + } + return selector(storeState) + }, + useWorkflowStore: () => ({ + getState: () => ({ + pipelineId: 'test-pipeline-id', + setIsPreparingDataSource: mockSetIsPreparingDataSource, + setShowDebugAndPreviewPanel: mockSetShowDebugAndPreviewPanel, + setPublishedAt: mockSetPublishedAt, + setRagPipelineVariables: mockSetRagPipelineVariables, + setEnvironmentVariables: mockSetEnvironmentVariables, + }), + }), + } +}) + +// Mock workflow hooks - extract mock functions for assertions using vi.hoisted +const { + mockHandlePaneContextmenuCancel, + mockExportCheck, + mockHandleExportDSL, +} = vi.hoisted(() => ({ + mockHandlePaneContextmenuCancel: vi.fn(), + mockExportCheck: vi.fn(), + mockHandleExportDSL: vi.fn(), +})) +vi.mock('@/app/components/workflow/hooks', () => { + return { + useNodesSyncDraft: () => ({ + doSyncWorkflowDraft: vi.fn(), + syncWorkflowDraftWhenPageClose: vi.fn(), + handleSyncWorkflowDraft: vi.fn(), + }), + usePanelInteractions: () => ({ + handlePaneContextmenuCancel: mockHandlePaneContextmenuCancel, + }), + useDSL: () => ({ + exportCheck: mockExportCheck, + handleExportDSL: mockHandleExportDSL, + }), + useChecklistBeforePublish: () => ({ + handleCheckBeforePublish: vi.fn().mockResolvedValue(true), + }), + useWorkflowRun: () => ({ + handleStopRun: vi.fn(), + }), + useWorkflowStartRun: () => ({ + handleWorkflowStartRunInWorkflow: vi.fn(), + }), + } +}) + +// Mock rag-pipeline hooks +vi.mock('../hooks', () => ({ + useAvailableNodesMetaData: () => ({}), + useDSL: () => ({ + exportCheck: mockExportCheck, + handleExportDSL: mockHandleExportDSL, + }), + useNodesSyncDraft: () => ({ + doSyncWorkflowDraft: vi.fn(), + syncWorkflowDraftWhenPageClose: vi.fn(), + }), + usePipelineRefreshDraft: () => ({ + handleRefreshWorkflowDraft: vi.fn(), + }), + usePipelineRun: () => ({ + handleBackupDraft: vi.fn(), + handleLoadBackupDraft: vi.fn(), + handleRestoreFromPublishedWorkflow: vi.fn(), + handleRun: vi.fn(), + handleStopRun: vi.fn(), + }), + usePipelineStartRun: () => ({ + handleStartWorkflowRun: vi.fn(), + handleWorkflowStartRunInWorkflow: vi.fn(), + }), + useGetRunAndTraceUrl: () => ({ + getWorkflowRunAndTraceUrl: vi.fn(), + }), +})) + +// Mock rag-pipeline search hook +vi.mock('../hooks/use-rag-pipeline-search', () => ({ + useRagPipelineSearch: vi.fn(), +})) + +// Mock configs-map hook +vi.mock('../hooks/use-configs-map', () => ({ + useConfigsMap: () => ({}), +})) + +// Mock inspect-vars-crud hook +vi.mock('../hooks/use-inspect-vars-crud', () => ({ + useInspectVarsCrud: () => ({ + hasNodeInspectVars: vi.fn(), + hasSetInspectVar: vi.fn(), + fetchInspectVarValue: vi.fn(), + editInspectVarValue: vi.fn(), + renameInspectVarName: vi.fn(), + appendNodeInspectVars: vi.fn(), + deleteInspectVar: vi.fn(), + deleteNodeInspectorVars: vi.fn(), + deleteAllInspectorVars: vi.fn(), + isInspectVarEdited: vi.fn(), + resetToLastRunVar: vi.fn(), + invalidateSysVarValues: vi.fn(), + resetConversationVar: vi.fn(), + invalidateConversationVarValues: vi.fn(), + }), +})) + +// Mock workflow hooks for fetch-workflow-inspect-vars +vi.mock('@/app/components/workflow/hooks/use-fetch-workflow-inspect-vars', () => ({ + useSetWorkflowVarsWithValue: () => ({ + fetchInspectVars: vi.fn(), + }), +})) + +// Mock service hooks - with controllable convert function +let mockConvertFn = vi.fn() +let mockIsPending = false +vi.mock('@/service/use-pipeline', () => ({ + useConvertDatasetToPipeline: () => ({ + mutateAsync: mockConvertFn, + isPending: mockIsPending, + }), + useImportPipelineDSL: () => ({ + mutateAsync: vi.fn(), + }), + useImportPipelineDSLConfirm: () => ({ + mutateAsync: vi.fn(), + }), + publishedPipelineInfoQueryKeyPrefix: ['pipeline-info'], + useInvalidCustomizedTemplateList: () => vi.fn(), + usePublishAsCustomizedPipeline: () => ({ + mutateAsync: vi.fn(), + }), +})) + +vi.mock('@/service/use-base', () => ({ + useInvalid: () => vi.fn(), +})) + +vi.mock('@/service/knowledge/use-dataset', () => ({ + datasetDetailQueryKeyPrefix: ['dataset-detail'], + useInvalidDatasetList: () => vi.fn(), +})) + +vi.mock('@/service/workflow', () => ({ + fetchWorkflowDraft: vi.fn().mockResolvedValue({ + graph: { nodes: [], edges: [], viewport: {} }, + hash: 'test-hash', + rag_pipeline_variables: [], + }), +})) + +// Mock event emitter context - with controllable subscription +let mockEventSubscriptionCallback: ((v: { type: string, payload?: { data?: EnvironmentVariable[] } }) => void) | null = null +const mockUseSubscription = vi.fn((callback: (v: { type: string, payload?: { data?: EnvironmentVariable[] } }) => void) => { + mockEventSubscriptionCallback = callback +}) +vi.mock('@/context/event-emitter', () => ({ + useEventEmitterContextContext: () => ({ + eventEmitter: { + useSubscription: mockUseSubscription, + emit: vi.fn(), + }, + }), +})) + +// Mock toast +vi.mock('@/app/components/base/toast', () => ({ + default: { + notify: vi.fn(), + }, + useToastContext: () => ({ + notify: vi.fn(), + }), + ToastContext: { + Provider: ({ children }: PropsWithChildren) => children, + }, +})) + +// Mock useTheme hook +vi.mock('@/hooks/use-theme', () => ({ + default: () => ({ + theme: 'light', + }), +})) + +// Mock basePath +vi.mock('@/utils/var', () => ({ + basePath: '/public', +})) + +// Mock provider context +vi.mock('@/context/provider-context', () => ({ + useProviderContext: () => createMockProviderContextValue(), +})) + +// Mock WorkflowWithInnerContext +vi.mock('@/app/components/workflow', () => ({ + WorkflowWithInnerContext: ({ children }: PropsWithChildren) => ( +
{children}
+ ), +})) + +// Mock workflow panel +vi.mock('@/app/components/workflow/panel', () => ({ + default: ({ components }: { components?: { left?: React.ReactNode, right?: React.ReactNode } }) => ( +
+
{components?.left}
+
{components?.right}
+
+ ), +})) + +// Mock PluginDependency +vi.mock('../../workflow/plugin-dependency', () => ({ + default: () =>
, +})) + +// Mock plugin-dependency hooks +vi.mock('@/app/components/workflow/plugin-dependency/hooks', () => ({ + usePluginDependencies: () => ({ + handleCheckPluginDependencies: vi.fn().mockResolvedValue(undefined), + }), +})) + +// Mock DSLExportConfirmModal +vi.mock('@/app/components/workflow/dsl-export-confirm-modal', () => ({ + default: ({ envList, onConfirm, onClose }: { envList: EnvironmentVariable[], onConfirm: () => void, onClose: () => void }) => ( +
+ {envList.length} + + +
+ ), +})) + +// Mock workflow constants +vi.mock('@/app/components/workflow/constants', () => ({ + DSL_EXPORT_CHECK: 'DSL_EXPORT_CHECK', + WORKFLOW_DATA_UPDATE: 'WORKFLOW_DATA_UPDATE', +})) + +// Mock workflow utils +vi.mock('@/app/components/workflow/utils', () => ({ + initialNodes: vi.fn(nodes => nodes), + initialEdges: vi.fn(edges => edges), + getKeyboardKeyCodeBySystem: (key: string) => key, + getKeyboardKeyNameBySystem: (key: string) => key, +})) + +// Mock Confirm component +vi.mock('@/app/components/base/confirm', () => ({ + default: ({ title, content, isShow, onConfirm, onCancel, isLoading, isDisabled }: { + title: string + content: string + isShow: boolean + onConfirm: () => void + onCancel: () => void + isLoading?: boolean + isDisabled?: boolean + }) => isShow + ? ( +
+
{title}
+
{content}
+ + +
+ ) + : null, +})) + +// Mock Modal component +vi.mock('@/app/components/base/modal', () => ({ + default: ({ children, isShow, onClose, className }: PropsWithChildren<{ + isShow: boolean + onClose: () => void + className?: string + }>) => isShow + ? ( +
e.target === e.currentTarget && onClose()}> + {children} +
+ ) + : null, +})) + +// Mock Input component +vi.mock('@/app/components/base/input', () => ({ + default: ({ value, onChange, placeholder }: { + value: string + onChange: (e: React.ChangeEvent) => void + placeholder?: string + }) => ( + + ), +})) + +// Mock Textarea component +vi.mock('@/app/components/base/textarea', () => ({ + default: ({ value, onChange, placeholder, className }: { + value: string + onChange: (e: React.ChangeEvent) => void + placeholder?: string + className?: string + }) => ( +