diff --git a/.claude/skills/frontend-testing/assets/component-test.template.tsx b/.claude/skills/frontend-testing/assets/component-test.template.tsx index c39baff916..6b7803bd4b 100644 --- a/.claude/skills/frontend-testing/assets/component-test.template.tsx +++ b/.claude/skills/frontend-testing/assets/component-test.template.tsx @@ -28,17 +28,14 @@ import userEvent from '@testing-library/user-event' // i18n (automatically mocked) // WHY: Global mock in web/vitest.setup.ts is auto-loaded by Vitest setup -// No explicit mock needed - it returns translation keys as-is +// The global mock provides: useTranslation, Trans, useMixedTranslation, useGetLanguage +// No explicit mock needed for most tests +// // Override only if custom translations are required: -// vi.mock('react-i18next', () => ({ -// useTranslation: () => ({ -// t: (key: string) => { -// const customTranslations: Record = { -// 'my.custom.key': 'Custom Translation', -// } -// return customTranslations[key] || key -// }, -// }), +// import { createReactI18nextMock } from '@/test/i18n-mock' +// vi.mock('react-i18next', () => createReactI18nextMock({ +// 'my.custom.key': 'Custom Translation', +// 'button.save': 'Save', // })) // Router (if component uses useRouter, usePathname, useSearchParams) diff --git a/.claude/skills/frontend-testing/references/mocking.md b/.claude/skills/frontend-testing/references/mocking.md index 23889c8d3d..c70bcf0ae5 100644 --- a/.claude/skills/frontend-testing/references/mocking.md +++ b/.claude/skills/frontend-testing/references/mocking.md @@ -52,23 +52,29 @@ Modules are not mocked automatically. Use `vi.mock` in test files, or add global ### 1. i18n (Auto-loaded via Global Mock) A global mock is defined in `web/vitest.setup.ts` and is auto-loaded by Vitest setup. -**No explicit mock needed** for most tests - it returns translation keys as-is. -For tests requiring custom translations, override the mock: +The global mock provides: + +- `useTranslation` - returns translation keys with namespace prefix +- `Trans` component - renders i18nKey and components +- `useMixedTranslation` (from `@/app/components/plugins/marketplace/hooks`) +- `useGetLanguage` (from `@/context/i18n`) - returns `'en-US'` + +**Default behavior**: Most tests should use the global mock (no local override needed). + +**For custom translations**: Use the helper function from `@/test/i18n-mock`: ```typescript -vi.mock('react-i18next', () => ({ - useTranslation: () => ({ - t: (key: string) => { - const translations: Record = { - 'my.custom.key': 'Custom translation', - } - return translations[key] || key - }, - }), +import { createReactI18nextMock } from '@/test/i18n-mock' + +vi.mock('react-i18next', () => createReactI18nextMock({ + 'my.custom.key': 'Custom translation', + 'button.save': 'Save', })) ``` +**Avoid**: Manually defining `useTranslation` mocks that just return the key - the global mock already does this. + ### 2. Next.js Router ```typescript diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index d463349686..462ece303e 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -110,6 +110,16 @@ jobs: working-directory: ./web run: pnpm run type-check:tsgo + - name: Web dead code check + if: steps.changed-files.outputs.any_changed == 'true' + working-directory: ./web + run: pnpm run knip + + - name: Web build check + if: steps.changed-files.outputs.any_changed == 'true' + working-directory: ./web + run: pnpm run build + superlinter: name: SuperLinter runs-on: ubuntu-latest diff --git a/api/.env.example b/api/.env.example index 99cd2ba558..5f8d369ec4 100644 --- a/api/.env.example +++ b/api/.env.example @@ -101,6 +101,15 @@ S3_ACCESS_KEY=your-access-key S3_SECRET_KEY=your-secret-key S3_REGION=your-region +# Workflow run and Conversation archive storage (S3-compatible) +ARCHIVE_STORAGE_ENABLED=false +ARCHIVE_STORAGE_ENDPOINT= +ARCHIVE_STORAGE_ARCHIVE_BUCKET= +ARCHIVE_STORAGE_EXPORT_BUCKET= +ARCHIVE_STORAGE_ACCESS_KEY= +ARCHIVE_STORAGE_SECRET_KEY= +ARCHIVE_STORAGE_REGION=auto + # Azure Blob Storage configuration AZURE_BLOB_ACCOUNT_NAME=your-account-name AZURE_BLOB_ACCOUNT_KEY=your-account-key diff --git a/api/.ruff.toml b/api/.ruff.toml index 7206f7fa0f..8db0cbcb21 100644 --- a/api/.ruff.toml +++ b/api/.ruff.toml @@ -1,4 +1,8 @@ -exclude = ["migrations/*"] +exclude = [ + "migrations/*", + ".git", + ".git/**", +] line-length = 120 [format] diff --git a/api/configs/extra/__init__.py b/api/configs/extra/__init__.py index 4543b5389d..de97adfc0e 100644 --- a/api/configs/extra/__init__.py +++ b/api/configs/extra/__init__.py @@ -1,9 +1,11 @@ +from configs.extra.archive_config import ArchiveStorageConfig from configs.extra.notion_config import NotionConfig from configs.extra.sentry_config import SentryConfig class ExtraServiceConfig( # place the configs in alphabet order + ArchiveStorageConfig, NotionConfig, SentryConfig, ): diff --git a/api/configs/extra/archive_config.py b/api/configs/extra/archive_config.py new file mode 100644 index 0000000000..a85628fa61 --- /dev/null +++ b/api/configs/extra/archive_config.py @@ -0,0 +1,43 @@ +from pydantic import Field +from pydantic_settings import BaseSettings + + +class ArchiveStorageConfig(BaseSettings): + """ + Configuration settings for workflow run logs archiving storage. + """ + + ARCHIVE_STORAGE_ENABLED: bool = Field( + description="Enable workflow run logs archiving to S3-compatible storage", + default=False, + ) + + ARCHIVE_STORAGE_ENDPOINT: str | None = Field( + description="URL of the S3-compatible storage endpoint (e.g., 'https://storage.example.com')", + default=None, + ) + + ARCHIVE_STORAGE_ARCHIVE_BUCKET: str | None = Field( + description="Name of the bucket to store archived workflow logs", + default=None, + ) + + ARCHIVE_STORAGE_EXPORT_BUCKET: str | None = Field( + description="Name of the bucket to store exported workflow runs", + default=None, + ) + + ARCHIVE_STORAGE_ACCESS_KEY: str | None = Field( + description="Access key ID for authenticating with storage", + default=None, + ) + + ARCHIVE_STORAGE_SECRET_KEY: str | None = Field( + description="Secret access key for authenticating with storage", + default=None, + ) + + ARCHIVE_STORAGE_REGION: str = Field( + description="Region for storage (use 'auto' if the provider supports it)", + default="auto", + ) diff --git a/api/controllers/common/fields.py b/api/controllers/common/fields.py index df9de825de..c16a23fac8 100644 --- a/api/controllers/common/fields.py +++ b/api/controllers/common/fields.py @@ -1,62 +1,59 @@ -from flask_restx import Api, Namespace, fields +from __future__ import annotations -from libs.helper import AppIconUrlField +from typing import Any, TypeAlias -parameters__system_parameters = { - "image_file_size_limit": fields.Integer, - "video_file_size_limit": fields.Integer, - "audio_file_size_limit": fields.Integer, - "file_size_limit": fields.Integer, - "workflow_file_upload_limit": fields.Integer, -} +from pydantic import BaseModel, ConfigDict, computed_field + +from core.file import helpers as file_helpers +from models.model import IconType + +JSONValue: TypeAlias = str | int | float | bool | None | dict[str, Any] | list[Any] +JSONObject: TypeAlias = dict[str, Any] -def build_system_parameters_model(api_or_ns: Api | Namespace): - """Build the system parameters model for the API or Namespace.""" - return api_or_ns.model("SystemParameters", parameters__system_parameters) +class SystemParameters(BaseModel): + image_file_size_limit: int + video_file_size_limit: int + audio_file_size_limit: int + file_size_limit: int + workflow_file_upload_limit: int -parameters_fields = { - "opening_statement": fields.String, - "suggested_questions": fields.Raw, - "suggested_questions_after_answer": fields.Raw, - "speech_to_text": fields.Raw, - "text_to_speech": fields.Raw, - "retriever_resource": fields.Raw, - "annotation_reply": fields.Raw, - "more_like_this": fields.Raw, - "user_input_form": fields.Raw, - "sensitive_word_avoidance": fields.Raw, - "file_upload": fields.Raw, - "system_parameters": fields.Nested(parameters__system_parameters), -} +class Parameters(BaseModel): + opening_statement: str | None = None + suggested_questions: list[str] + suggested_questions_after_answer: JSONObject + speech_to_text: JSONObject + text_to_speech: JSONObject + retriever_resource: JSONObject + annotation_reply: JSONObject + more_like_this: JSONObject + user_input_form: list[JSONObject] + sensitive_word_avoidance: JSONObject + file_upload: JSONObject + system_parameters: SystemParameters -def build_parameters_model(api_or_ns: Api | Namespace): - """Build the parameters model for the API or Namespace.""" - copied_fields = parameters_fields.copy() - copied_fields["system_parameters"] = fields.Nested(build_system_parameters_model(api_or_ns)) - return api_or_ns.model("Parameters", copied_fields) +class Site(BaseModel): + model_config = ConfigDict(from_attributes=True) + title: str + chat_color_theme: str | None = None + chat_color_theme_inverted: bool + icon_type: str | None = None + icon: str | None = None + icon_background: str | None = None + description: str | None = None + copyright: str | None = None + privacy_policy: str | None = None + custom_disclaimer: str | None = None + default_language: str + show_workflow_steps: bool + use_icon_as_answer_icon: bool -site_fields = { - "title": fields.String, - "chat_color_theme": fields.String, - "chat_color_theme_inverted": fields.Boolean, - "icon_type": fields.String, - "icon": fields.String, - "icon_background": fields.String, - "icon_url": AppIconUrlField, - "description": fields.String, - "copyright": fields.String, - "privacy_policy": fields.String, - "custom_disclaimer": fields.String, - "default_language": fields.String, - "show_workflow_steps": fields.Boolean, - "use_icon_as_answer_icon": fields.Boolean, -} - - -def build_site_model(api_or_ns: Api | Namespace): - """Build the site model for the API or Namespace.""" - return api_or_ns.model("Site", site_fields) + @computed_field(return_type=str | None) # type: ignore + @property + def icon_url(self) -> str | None: + if self.icon and self.icon_type == IconType.IMAGE: + return file_helpers.get_signed_file_url(self.icon) + return None diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py index 62e997dae2..44cf89d6a9 100644 --- a/api/controllers/console/app/app.py +++ b/api/controllers/console/app/app.py @@ -1,3 +1,4 @@ +import re import uuid from typing import Literal @@ -73,6 +74,48 @@ class AppListQuery(BaseModel): raise ValueError("Invalid UUID format in tag_ids.") from exc +# XSS prevention: patterns that could lead to XSS attacks +# Includes: script tags, iframe tags, javascript: protocol, SVG with onload, etc. +_XSS_PATTERNS = [ + r"]*>.*?", # Script tags + r"]*?(?:/>|>.*?)", # Iframe tags (including self-closing) + r"javascript:", # JavaScript protocol + r"]*?\s+onload\s*=[^>]*>", # SVG with onload handler (attribute-aware, flexible whitespace) + r"<.*?on\s*\w+\s*=", # Event handlers like onclick, onerror, etc. + r"]*(?:\s*/>|>.*?)", # Object tags (opening tag) + r"]*>", # Embed tags (self-closing) + r"]*>", # Link tags with javascript +] + + +def _validate_xss_safe(value: str | None, field_name: str = "Field") -> str | None: + """ + Validate that a string value doesn't contain potential XSS payloads. + + Args: + value: The string value to validate + field_name: Name of the field for error messages + + Returns: + The original value if safe + + Raises: + ValueError: If the value contains XSS patterns + """ + if value is None: + return None + + value_lower = value.lower() + for pattern in _XSS_PATTERNS: + if re.search(pattern, value_lower, re.DOTALL | re.IGNORECASE): + raise ValueError( + f"{field_name} contains invalid characters or patterns. " + "HTML tags, JavaScript, and other potentially dangerous content are not allowed." + ) + + return value + + class CreateAppPayload(BaseModel): name: str = Field(..., min_length=1, description="App name") description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400) @@ -81,6 +124,11 @@ class CreateAppPayload(BaseModel): icon: str | None = Field(default=None, description="Icon") icon_background: str | None = Field(default=None, description="Icon background color") + @field_validator("name", "description", mode="before") + @classmethod + def validate_xss_safe(cls, value: str | None, info) -> str | None: + return _validate_xss_safe(value, info.field_name) + class UpdateAppPayload(BaseModel): name: str = Field(..., min_length=1, description="App name") @@ -91,6 +139,11 @@ class UpdateAppPayload(BaseModel): use_icon_as_answer_icon: bool | None = Field(default=None, description="Use icon as answer icon") max_active_requests: int | None = Field(default=None, description="Maximum active requests") + @field_validator("name", "description", mode="before") + @classmethod + def validate_xss_safe(cls, value: str | None, info) -> str | None: + return _validate_xss_safe(value, info.field_name) + class CopyAppPayload(BaseModel): name: str | None = Field(default=None, description="Name for the copied app") @@ -99,6 +152,11 @@ class CopyAppPayload(BaseModel): icon: str | None = Field(default=None, description="Icon") icon_background: str | None = Field(default=None, description="Icon background color") + @field_validator("name", "description", mode="before") + @classmethod + def validate_xss_safe(cls, value: str | None, info) -> str | None: + return _validate_xss_safe(value, info.field_name) + class AppExportQuery(BaseModel): include_secret: bool = Field(default=False, description="Include secrets in export") diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index 7ad1e56373..c20e83d36f 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -124,7 +124,7 @@ class OAuthCallback(Resource): return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin/invite-settings?invite_token={invite_token}") try: - account = _generate_account(provider, user_info) + account, oauth_new_user = _generate_account(provider, user_info) except AccountNotFoundError: return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin?message=Account not found.") except (WorkSpaceNotFoundError, WorkSpaceNotAllowedCreateError): @@ -159,7 +159,10 @@ class OAuthCallback(Resource): ip_address=extract_remote_ip(request), ) - response = redirect(f"{dify_config.CONSOLE_WEB_URL}") + base_url = dify_config.CONSOLE_WEB_URL + query_char = "&" if "?" in base_url else "?" + target_url = f"{base_url}{query_char}oauth_new_user={str(oauth_new_user).lower()}" + response = redirect(target_url) set_access_token_to_cookie(request, response, token_pair.access_token) set_refresh_token_to_cookie(request, response, token_pair.refresh_token) @@ -177,9 +180,10 @@ def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) -> return account -def _generate_account(provider: str, user_info: OAuthUserInfo): +def _generate_account(provider: str, user_info: OAuthUserInfo) -> tuple[Account, bool]: # Get account by openid or email. account = _get_account_by_openid_or_email(provider, user_info) + oauth_new_user = False if account: tenants = TenantService.get_join_tenants(account) @@ -193,6 +197,7 @@ def _generate_account(provider: str, user_info: OAuthUserInfo): tenant_was_created.send(new_tenant) if not account: + oauth_new_user = True if not FeatureService.get_system_features().is_allow_register: if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(user_info.email): raise AccountRegisterError( @@ -220,4 +225,4 @@ def _generate_account(provider: str, user_info: OAuthUserInfo): # Link account AccountService.link_account_integrate(provider, user_info.id, account) - return account + return account, oauth_new_user diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index e73abc2555..5a536af6d2 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -3,10 +3,12 @@ import uuid from flask import request from flask_restx import Resource, marshal from pydantic import BaseModel, Field -from sqlalchemy import select +from sqlalchemy import String, cast, func, or_, select +from sqlalchemy.dialects.postgresql import JSONB from werkzeug.exceptions import Forbidden, NotFound import services +from configs import dify_config from controllers.common.schema import register_schema_models from controllers.console import console_ns from controllers.console.app.error import ProviderNotInitializeError @@ -143,7 +145,29 @@ class DatasetDocumentSegmentListApi(Resource): query = query.where(DocumentSegment.hit_count >= hit_count_gte) if keyword: - query = query.where(DocumentSegment.content.ilike(f"%{keyword}%")) + # Search in both content and keywords fields + # Use database-specific methods for JSON array search + if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql": + # PostgreSQL: Use jsonb_array_elements_text to properly handle Unicode/Chinese text + keywords_condition = func.array_to_string( + func.array( + select(func.jsonb_array_elements_text(cast(DocumentSegment.keywords, JSONB))) + .correlate(DocumentSegment) + .scalar_subquery() + ), + ",", + ).ilike(f"%{keyword}%") + else: + # MySQL: Cast JSON to string for pattern matching + # MySQL stores Chinese text directly in JSON without Unicode escaping + keywords_condition = cast(DocumentSegment.keywords, String).ilike(f"%{keyword}%") + + query = query.where( + or_( + DocumentSegment.content.ilike(f"%{keyword}%"), + keywords_condition, + ) + ) if args.enabled.lower() != "all": if args.enabled.lower() == "true": diff --git a/api/controllers/console/explore/parameter.py b/api/controllers/console/explore/parameter.py index 9c6b2aedfb..660a4d5aea 100644 --- a/api/controllers/console/explore/parameter.py +++ b/api/controllers/console/explore/parameter.py @@ -1,5 +1,3 @@ -from flask_restx import marshal_with - from controllers.common import fields from controllers.console import console_ns from controllers.console.app.error import AppUnavailableError @@ -13,7 +11,6 @@ from services.app_service import AppService class AppParameterApi(InstalledAppResource): """Resource for app variables.""" - @marshal_with(fields.parameters_fields) def get(self, installed_app: InstalledApp): """Retrieve app parameters.""" app_model = installed_app.app @@ -37,7 +34,8 @@ class AppParameterApi(InstalledAppResource): user_input_form = features_dict.get("user_input_form", []) - return get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + parameters = get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + return fields.Parameters.model_validate(parameters).model_dump(mode="json") @console_ns.route("/installed-apps//meta", endpoint="installed_app_meta") diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py index 63c373b50f..85ac9336d6 100644 --- a/api/controllers/service_api/app/annotation.py +++ b/api/controllers/service_api/app/annotation.py @@ -1,7 +1,7 @@ from typing import Literal from flask import request -from flask_restx import Api, Namespace, Resource, fields +from flask_restx import Namespace, Resource, fields from flask_restx.api import HTTPStatus from pydantic import BaseModel, Field @@ -92,7 +92,7 @@ annotation_list_fields = { } -def build_annotation_list_model(api_or_ns: Api | Namespace): +def build_annotation_list_model(api_or_ns: Namespace): """Build the annotation list model for the API or Namespace.""" copied_annotation_list_fields = annotation_list_fields.copy() copied_annotation_list_fields["data"] = fields.List(fields.Nested(build_annotation_model(api_or_ns))) diff --git a/api/controllers/service_api/app/app.py b/api/controllers/service_api/app/app.py index 25d7ccccec..562f5e33cc 100644 --- a/api/controllers/service_api/app/app.py +++ b/api/controllers/service_api/app/app.py @@ -1,6 +1,6 @@ from flask_restx import Resource -from controllers.common.fields import build_parameters_model +from controllers.common.fields import Parameters from controllers.service_api import service_api_ns from controllers.service_api.app.error import AppUnavailableError from controllers.service_api.wraps import validate_app_token @@ -23,7 +23,6 @@ class AppParameterApi(Resource): } ) @validate_app_token - @service_api_ns.marshal_with(build_parameters_model(service_api_ns)) def get(self, app_model: App): """Retrieve app parameters. @@ -45,7 +44,8 @@ class AppParameterApi(Resource): user_input_form = features_dict.get("user_input_form", []) - return get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + parameters = get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + return Parameters.model_validate(parameters).model_dump(mode="json") @service_api_ns.route("/meta") diff --git a/api/controllers/service_api/app/site.py b/api/controllers/service_api/app/site.py index 9f8324a84e..8b47a887bb 100644 --- a/api/controllers/service_api/app/site.py +++ b/api/controllers/service_api/app/site.py @@ -1,7 +1,7 @@ from flask_restx import Resource from werkzeug.exceptions import Forbidden -from controllers.common.fields import build_site_model +from controllers.common.fields import Site as SiteResponse from controllers.service_api import service_api_ns from controllers.service_api.wraps import validate_app_token from extensions.ext_database import db @@ -23,7 +23,6 @@ class AppSiteApi(Resource): } ) @validate_app_token - @service_api_ns.marshal_with(build_site_model(service_api_ns)) def get(self, app_model: App): """Retrieve app site info. @@ -38,4 +37,4 @@ class AppSiteApi(Resource): if app_model.tenant.status == TenantStatus.ARCHIVE: raise Forbidden() - return site + return SiteResponse.model_validate(site).model_dump(mode="json") diff --git a/api/controllers/service_api/app/workflow.py b/api/controllers/service_api/app/workflow.py index 4964888fd6..6a549fc926 100644 --- a/api/controllers/service_api/app/workflow.py +++ b/api/controllers/service_api/app/workflow.py @@ -3,7 +3,7 @@ from typing import Any, Literal from dateutil.parser import isoparse from flask import request -from flask_restx import Api, Namespace, Resource, fields +from flask_restx import Namespace, Resource, fields from pydantic import BaseModel, Field from sqlalchemy.orm import Session, sessionmaker from werkzeug.exceptions import BadRequest, InternalServerError, NotFound @@ -78,7 +78,7 @@ workflow_run_fields = { } -def build_workflow_run_model(api_or_ns: Api | Namespace): +def build_workflow_run_model(api_or_ns: Namespace): """Build the workflow run model for the API or Namespace.""" return api_or_ns.model("WorkflowRun", workflow_run_fields) diff --git a/api/controllers/web/app.py b/api/controllers/web/app.py index db3b93a4dc..62ea532eac 100644 --- a/api/controllers/web/app.py +++ b/api/controllers/web/app.py @@ -1,7 +1,7 @@ import logging from flask import request -from flask_restx import Resource, marshal_with +from flask_restx import Resource from pydantic import BaseModel, ConfigDict, Field from werkzeug.exceptions import Unauthorized @@ -50,7 +50,6 @@ class AppParameterApi(WebApiResource): 500: "Internal Server Error", } ) - @marshal_with(fields.parameters_fields) def get(self, app_model: App, end_user): """Retrieve app parameters.""" if app_model.mode in {AppMode.ADVANCED_CHAT, AppMode.WORKFLOW}: @@ -69,7 +68,8 @@ class AppParameterApi(WebApiResource): user_input_form = features_dict.get("user_input_form", []) - return get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + parameters = get_parameters_from_feature_dict(features_dict=features_dict, user_input_form=user_input_form) + return fields.Parameters.model_validate(parameters).model_dump(mode="json") @web_ns.route("/meta") diff --git a/api/core/agent/cot_agent_runner.py b/api/core/agent/cot_agent_runner.py index b32e35d0ca..a55f2d0f5f 100644 --- a/api/core/agent/cot_agent_runner.py +++ b/api/core/agent/cot_agent_runner.py @@ -22,6 +22,7 @@ from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransfo from core.tools.__base.tool import Tool from core.tools.entities.tool_entities import ToolInvokeMeta from core.tools.tool_engine import ToolEngine +from core.workflow.nodes.agent.exc import AgentMaxIterationError from models.model import Message logger = logging.getLogger(__name__) @@ -165,6 +166,11 @@ class CotAgentRunner(BaseAgentRunner, ABC): scratchpad.thought = scratchpad.thought.strip() or "I am thinking about how to help you" self._agent_scratchpad.append(scratchpad) + # Check if max iteration is reached and model still wants to call tools + if iteration_step == max_iteration_steps and scratchpad.action: + if scratchpad.action.action_name.lower() != "final answer": + raise AgentMaxIterationError(app_config.agent.max_iteration) + # get llm usage if "usage" in usage_dict: if usage_dict["usage"] is not None: diff --git a/api/core/agent/fc_agent_runner.py b/api/core/agent/fc_agent_runner.py index dcc1326b33..68d14ad027 100644 --- a/api/core/agent/fc_agent_runner.py +++ b/api/core/agent/fc_agent_runner.py @@ -25,6 +25,7 @@ from core.model_runtime.entities.message_entities import ImagePromptMessageConte from core.prompt.agent_history_prompt_transform import AgentHistoryPromptTransform from core.tools.entities.tool_entities import ToolInvokeMeta from core.tools.tool_engine import ToolEngine +from core.workflow.nodes.agent.exc import AgentMaxIterationError from models.model import Message logger = logging.getLogger(__name__) @@ -222,6 +223,10 @@ class FunctionCallAgentRunner(BaseAgentRunner): final_answer += response + "\n" + # Check if max iteration is reached and model still wants to call tools + if iteration_step == max_iteration_steps and tool_calls: + raise AgentMaxIterationError(app_config.agent.max_iteration) + # call tools tool_responses = [] for tool_call_id, tool_call_name, tool_call_args in tool_calls: diff --git a/api/core/entities/model_entities.py b/api/core/entities/model_entities.py index 12431976f0..a123fb0321 100644 --- a/api/core/entities/model_entities.py +++ b/api/core/entities/model_entities.py @@ -30,7 +30,6 @@ class SimpleModelProviderEntity(BaseModel): label: I18nObject icon_small: I18nObject | None = None icon_small_dark: I18nObject | None = None - icon_large: I18nObject | None = None supported_model_types: list[ModelType] def __init__(self, provider_entity: ProviderEntity): @@ -44,7 +43,6 @@ class SimpleModelProviderEntity(BaseModel): label=provider_entity.label, icon_small=provider_entity.icon_small, icon_small_dark=provider_entity.icon_small_dark, - icon_large=provider_entity.icon_large, supported_model_types=provider_entity.supported_model_types, ) @@ -94,7 +92,6 @@ class DefaultModelProviderEntity(BaseModel): provider: str label: I18nObject icon_small: I18nObject | None = None - icon_large: I18nObject | None = None supported_model_types: Sequence[ModelType] = [] diff --git a/api/core/model_runtime/entities/provider_entities.py b/api/core/model_runtime/entities/provider_entities.py index 648b209ef1..2d88751668 100644 --- a/api/core/model_runtime/entities/provider_entities.py +++ b/api/core/model_runtime/entities/provider_entities.py @@ -100,7 +100,6 @@ class SimpleProviderEntity(BaseModel): label: I18nObject icon_small: I18nObject | None = None icon_small_dark: I18nObject | None = None - icon_large: I18nObject | None = None supported_model_types: Sequence[ModelType] models: list[AIModelEntity] = [] @@ -123,7 +122,6 @@ class ProviderEntity(BaseModel): label: I18nObject description: I18nObject | None = None icon_small: I18nObject | None = None - icon_large: I18nObject | None = None icon_small_dark: I18nObject | None = None background: str | None = None help: ProviderHelpEntity | None = None @@ -157,7 +155,6 @@ class ProviderEntity(BaseModel): provider=self.provider, label=self.label, icon_small=self.icon_small, - icon_large=self.icon_large, supported_model_types=self.supported_model_types, models=self.models, ) diff --git a/api/core/model_runtime/model_providers/model_provider_factory.py b/api/core/model_runtime/model_providers/model_provider_factory.py index b8704ef4ed..12a202ce64 100644 --- a/api/core/model_runtime/model_providers/model_provider_factory.py +++ b/api/core/model_runtime/model_providers/model_provider_factory.py @@ -285,7 +285,7 @@ class ModelProviderFactory: """ Get provider icon :param provider: provider name - :param icon_type: icon type (icon_small or icon_large) + :param icon_type: icon type (icon_small or icon_small_dark) :param lang: language (zh_Hans or en_US) :return: provider icon """ @@ -309,13 +309,7 @@ class ModelProviderFactory: else: file_name = provider_schema.icon_small_dark.en_US else: - if not provider_schema.icon_large: - raise ValueError(f"Provider {provider} does not have large icon.") - - if lang.lower() == "zh_hans": - file_name = provider_schema.icon_large.zh_Hans - else: - file_name = provider_schema.icon_large.en_US + raise ValueError(f"Unsupported icon type: {icon_type}.") if not file_name: raise ValueError(f"Provider {provider} does not have icon.") diff --git a/api/core/provider_manager.py b/api/core/provider_manager.py index 6c818bdc8b..10d86d1762 100644 --- a/api/core/provider_manager.py +++ b/api/core/provider_manager.py @@ -331,7 +331,6 @@ class ProviderManager: provider=provider_schema.provider, label=provider_schema.label, icon_small=provider_schema.icon_small, - icon_large=provider_schema.icon_large, supported_model_types=provider_schema.supported_model_types, ), ) diff --git a/api/core/rag/cleaner/clean_processor.py b/api/core/rag/cleaner/clean_processor.py index 9cb009035b..e182c35b99 100644 --- a/api/core/rag/cleaner/clean_processor.py +++ b/api/core/rag/cleaner/clean_processor.py @@ -27,26 +27,44 @@ class CleanProcessor: pattern = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)" text = re.sub(pattern, "", text) - # Remove URL but keep Markdown image URLs - # First, temporarily replace Markdown image URLs with a placeholder - markdown_image_pattern = r"!\[.*?\]\((https?://[^\s)]+)\)" - placeholders: list[str] = [] + # Remove URL but keep Markdown image URLs and link URLs + # Replace the ENTIRE markdown link/image with a single placeholder to protect + # the link text (which might also be a URL) from being removed + markdown_link_pattern = r"\[([^\]]*)\]\((https?://[^)]+)\)" + markdown_image_pattern = r"!\[.*?\]\((https?://[^)]+)\)" + placeholders: list[tuple[str, str, str]] = [] # (type, text, url) - def replace_with_placeholder(match, placeholders=placeholders): + def replace_markdown_with_placeholder(match, placeholders=placeholders): + link_type = "link" + link_text = match.group(1) + url = match.group(2) + placeholder = f"__MARKDOWN_PLACEHOLDER_{len(placeholders)}__" + placeholders.append((link_type, link_text, url)) + return placeholder + + def replace_image_with_placeholder(match, placeholders=placeholders): + link_type = "image" url = match.group(1) - placeholder = f"__MARKDOWN_IMAGE_URL_{len(placeholders)}__" - placeholders.append(url) - return f"![image]({placeholder})" + placeholder = f"__MARKDOWN_PLACEHOLDER_{len(placeholders)}__" + placeholders.append((link_type, "image", url)) + return placeholder - text = re.sub(markdown_image_pattern, replace_with_placeholder, text) + # Protect markdown links first + text = re.sub(markdown_link_pattern, replace_markdown_with_placeholder, text) + # Then protect markdown images + text = re.sub(markdown_image_pattern, replace_image_with_placeholder, text) # Now remove all remaining URLs - url_pattern = r"https?://[^\s)]+" + url_pattern = r"https?://\S+" text = re.sub(url_pattern, "", text) - # Finally, restore the Markdown image URLs - for i, url in enumerate(placeholders): - text = text.replace(f"__MARKDOWN_IMAGE_URL_{i}__", url) + # Restore the Markdown links and images + for i, (link_type, text_or_alt, url) in enumerate(placeholders): + placeholder = f"__MARKDOWN_PLACEHOLDER_{i}__" + if link_type == "link": + text = text.replace(placeholder, f"[{text_or_alt}]({url})") + else: # image + text = text.replace(placeholder, f"![{text_or_alt}]({url})") return text def filter_string(self, text): diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 013c287248..6d28ce25bc 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -112,7 +112,7 @@ class ExtractProcessor: if file_extension in {".xlsx", ".xls"}: extractor = ExcelExtractor(file_path) elif file_extension == ".pdf": - extractor = PdfExtractor(file_path) + extractor = PdfExtractor(file_path, upload_file.tenant_id, upload_file.created_by) elif file_extension in {".md", ".markdown", ".mdx"}: extractor = ( UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key) @@ -148,7 +148,7 @@ class ExtractProcessor: if file_extension in {".xlsx", ".xls"}: extractor = ExcelExtractor(file_path) elif file_extension == ".pdf": - extractor = PdfExtractor(file_path) + extractor = PdfExtractor(file_path, upload_file.tenant_id, upload_file.created_by) elif file_extension in {".md", ".markdown", ".mdx"}: extractor = MarkdownExtractor(file_path, autodetect_encoding=True) elif file_extension in {".htm", ".html"}: diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 80530d99a6..6aabcac704 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -1,25 +1,57 @@ """Abstract interface for document loader implementations.""" import contextlib +import io +import logging +import uuid from collections.abc import Iterator +import pypdfium2 +import pypdfium2.raw as pdfium_c + +from configs import dify_config from core.rag.extractor.blob.blob import Blob from core.rag.extractor.extractor_base import BaseExtractor from core.rag.models.document import Document +from extensions.ext_database import db from extensions.ext_storage import storage +from libs.datetime_utils import naive_utc_now +from models.enums import CreatorUserRole +from models.model import UploadFile + +logger = logging.getLogger(__name__) class PdfExtractor(BaseExtractor): - """Load pdf files. - + """ + PdfExtractor is used to extract text and images from PDF files. Args: - file_path: Path to the file to load. + file_path: Path to the PDF file. + tenant_id: Workspace ID. + user_id: ID of the user performing the extraction. + file_cache_key: Optional cache key for the extracted text. """ - def __init__(self, file_path: str, file_cache_key: str | None = None): - """Initialize with file path.""" + # Magic bytes for image format detection: (magic_bytes, extension, mime_type) + IMAGE_FORMATS = [ + (b"\xff\xd8\xff", "jpg", "image/jpeg"), + (b"\x89PNG\r\n\x1a\n", "png", "image/png"), + (b"\x00\x00\x00\x0c\x6a\x50\x20\x20\x0d\x0a\x87\x0a", "jp2", "image/jp2"), + (b"GIF8", "gif", "image/gif"), + (b"BM", "bmp", "image/bmp"), + (b"II*\x00", "tiff", "image/tiff"), + (b"MM\x00*", "tiff", "image/tiff"), + (b"II+\x00", "tiff", "image/tiff"), + (b"MM\x00+", "tiff", "image/tiff"), + ] + MAX_MAGIC_LEN = max(len(m) for m, _, _ in IMAGE_FORMATS) + + def __init__(self, file_path: str, tenant_id: str, user_id: str, file_cache_key: str | None = None): + """Initialize PdfExtractor.""" self._file_path = file_path + self._tenant_id = tenant_id + self._user_id = user_id self._file_cache_key = file_cache_key def extract(self) -> list[Document]: @@ -50,7 +82,6 @@ class PdfExtractor(BaseExtractor): def parse(self, blob: Blob) -> Iterator[Document]: """Lazily parse the blob.""" - import pypdfium2 # type: ignore with blob.as_bytes_io() as file_path: pdf_reader = pypdfium2.PdfDocument(file_path, autoclose=True) @@ -59,8 +90,87 @@ class PdfExtractor(BaseExtractor): text_page = page.get_textpage() content = text_page.get_text_range() text_page.close() + + image_content = self._extract_images(page) + if image_content: + content += "\n" + image_content + page.close() metadata = {"source": blob.source, "page": page_number} yield Document(page_content=content, metadata=metadata) finally: pdf_reader.close() + + def _extract_images(self, page) -> str: + """ + Extract images from a PDF page, save them to storage and database, + and return markdown image links. + + Args: + page: pypdfium2 page object. + + Returns: + Markdown string containing links to the extracted images. + """ + image_content = [] + upload_files = [] + base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + + try: + image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,)) + for obj in image_objects: + try: + # Extract image bytes + img_byte_arr = io.BytesIO() + # Extract DCTDecode (JPEG) and JPXDecode (JPEG 2000) images directly + # Fallback to png for other formats + obj.extract(img_byte_arr, fb_format="png") + img_bytes = img_byte_arr.getvalue() + + if not img_bytes: + continue + + header = img_bytes[: self.MAX_MAGIC_LEN] + image_ext = None + mime_type = None + for magic, ext, mime in self.IMAGE_FORMATS: + if header.startswith(magic): + image_ext = ext + mime_type = mime + break + + if not image_ext or not mime_type: + continue + + file_uuid = str(uuid.uuid4()) + file_key = "image_files/" + self._tenant_id + "/" + file_uuid + "." + image_ext + + storage.save(file_key, img_bytes) + + # save file to db + upload_file = UploadFile( + tenant_id=self._tenant_id, + storage_type=dify_config.STORAGE_TYPE, + key=file_key, + name=file_key, + size=len(img_bytes), + extension=image_ext, + mime_type=mime_type, + created_by=self._user_id, + created_by_role=CreatorUserRole.ACCOUNT, + created_at=naive_utc_now(), + used=True, + used_by=self._user_id, + used_at=naive_utc_now(), + ) + upload_files.append(upload_file) + image_content.append(f"![image]({base_url}/files/{upload_file.id}/file-preview)") + except Exception as e: + logger.warning("Failed to extract image from PDF: %s", e) + continue + except Exception as e: + logger.warning("Failed to get objects from PDF page: %s", e) + if upload_files: + db.session.add_all(upload_files) + db.session.commit() + return "\n".join(image_content) diff --git a/api/core/tools/utils/parser.py b/api/core/tools/utils/parser.py index 3486182192..584975de05 100644 --- a/api/core/tools/utils/parser.py +++ b/api/core/tools/utils/parser.py @@ -378,7 +378,7 @@ class ApiBasedToolSchemaParser: @staticmethod def auto_parse_to_tool_bundle( content: str, extra_info: dict | None = None, warning: dict | None = None - ) -> tuple[list[ApiToolBundle], str]: + ) -> tuple[list[ApiToolBundle], ApiProviderSchemaType]: """ auto parse to tool bundle diff --git a/api/core/tools/utils/text_processing_utils.py b/api/core/tools/utils/text_processing_utils.py index 0f9a91a111..4bfaa5e49b 100644 --- a/api/core/tools/utils/text_processing_utils.py +++ b/api/core/tools/utils/text_processing_utils.py @@ -4,6 +4,7 @@ import re def remove_leading_symbols(text: str) -> str: """ Remove leading punctuation or symbols from the given text. + Preserves markdown links like [text](url) at the start. Args: text (str): The input text to process. @@ -11,6 +12,11 @@ def remove_leading_symbols(text: str) -> str: Returns: str: The text with leading punctuation or symbols removed. """ + # Check if text starts with a markdown link - preserve it + markdown_link_pattern = r"^\[([^\]]+)\]\((https?://[^)]+)\)" + if re.match(markdown_link_pattern, text): + return text + # Match Unicode ranges for punctuation and symbols # FIXME this pattern is confused quick fix for #11868 maybe refactor it later pattern = r'^[\[\]\u2000-\u2025\u2027-\u206F\u2E00-\u2E7F\u3000-\u300F\u3011-\u303F"#$%&\'()*+,./:;<=>?@^_`~]+' diff --git a/api/core/tools/workflow_as_tool/provider.py b/api/core/tools/workflow_as_tool/provider.py index 2bd973f831..5422f5250b 100644 --- a/api/core/tools/workflow_as_tool/provider.py +++ b/api/core/tools/workflow_as_tool/provider.py @@ -54,7 +54,6 @@ class WorkflowToolProviderController(ToolProviderController): raise ValueError("app not found") user = session.get(Account, db_provider.user_id) if db_provider.user_id else None - controller = WorkflowToolProviderController( entity=ToolProviderEntity( identity=ToolProviderIdentity( @@ -67,7 +66,7 @@ class WorkflowToolProviderController(ToolProviderController): credentials_schema=[], plugin_id=None, ), - provider_id="", + provider_id=db_provider.id, ) controller.tools = [ diff --git a/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py b/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py index 78f8ecdcdf..b9c9243963 100644 --- a/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py +++ b/api/core/workflow/graph_engine/graph_traversal/skip_propagator.py @@ -60,6 +60,7 @@ class SkipPropagator: if edge_states["has_taken"]: # Enqueue node self._state_manager.enqueue_node(downstream_node_id) + self._state_manager.start_execution(downstream_node_id) return # All edges are skipped, propagate skip to this node diff --git a/api/core/workflow/nodes/agent/exc.py b/api/core/workflow/nodes/agent/exc.py index 944f5f0b20..ba2c83d8a6 100644 --- a/api/core/workflow/nodes/agent/exc.py +++ b/api/core/workflow/nodes/agent/exc.py @@ -119,3 +119,14 @@ class AgentVariableTypeError(AgentNodeError): self.expected_type = expected_type self.actual_type = actual_type super().__init__(message) + + +class AgentMaxIterationError(AgentNodeError): + """Exception raised when the agent exceeds the maximum iteration limit.""" + + def __init__(self, max_iteration: int): + self.max_iteration = max_iteration + super().__init__( + f"Agent exceeded the maximum iteration limit of {max_iteration}. " + f"The agent was unable to complete the task within the allowed number of iterations." + ) diff --git a/api/extensions/ext_celery.py b/api/extensions/ext_celery.py index 5cf4984709..764df5d178 100644 --- a/api/extensions/ext_celery.py +++ b/api/extensions/ext_celery.py @@ -12,9 +12,8 @@ from dify_app import DifyApp def _get_celery_ssl_options() -> dict[str, Any] | None: """Get SSL configuration for Celery broker/backend connections.""" - # Use REDIS_USE_SSL for consistency with the main Redis client # Only apply SSL if we're using Redis as broker/backend - if not dify_config.REDIS_USE_SSL: + if not dify_config.BROKER_USE_SSL: return None # Check if Celery is actually using Redis diff --git a/api/fields/annotation_fields.py b/api/fields/annotation_fields.py index 38835d5ac7..e69306dcb2 100644 --- a/api/fields/annotation_fields.py +++ b/api/fields/annotation_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from libs.helper import TimestampField @@ -12,7 +12,7 @@ annotation_fields = { } -def build_annotation_model(api_or_ns: Api | Namespace): +def build_annotation_model(api_or_ns: Namespace): """Build the annotation model for the API or Namespace.""" return api_or_ns.model("Annotation", annotation_fields) diff --git a/api/fields/conversation_fields.py b/api/fields/conversation_fields.py index ecc267cf38..e4ca2e7a42 100644 --- a/api/fields/conversation_fields.py +++ b/api/fields/conversation_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from fields.member_fields import simple_account_fields from libs.helper import TimestampField @@ -46,7 +46,7 @@ message_file_fields = { } -def build_message_file_model(api_or_ns: Api | Namespace): +def build_message_file_model(api_or_ns: Namespace): """Build the message file fields for the API or Namespace.""" return api_or_ns.model("MessageFile", message_file_fields) @@ -217,7 +217,7 @@ conversation_infinite_scroll_pagination_fields = { } -def build_conversation_infinite_scroll_pagination_model(api_or_ns: Api | Namespace): +def build_conversation_infinite_scroll_pagination_model(api_or_ns: Namespace): """Build the conversation infinite scroll pagination model for the API or Namespace.""" simple_conversation_model = build_simple_conversation_model(api_or_ns) @@ -226,11 +226,11 @@ def build_conversation_infinite_scroll_pagination_model(api_or_ns: Api | Namespa return api_or_ns.model("ConversationInfiniteScrollPagination", copied_fields) -def build_conversation_delete_model(api_or_ns: Api | Namespace): +def build_conversation_delete_model(api_or_ns: Namespace): """Build the conversation delete model for the API or Namespace.""" return api_or_ns.model("ConversationDelete", conversation_delete_fields) -def build_simple_conversation_model(api_or_ns: Api | Namespace): +def build_simple_conversation_model(api_or_ns: Namespace): """Build the simple conversation model for the API or Namespace.""" return api_or_ns.model("SimpleConversation", simple_conversation_fields) diff --git a/api/fields/conversation_variable_fields.py b/api/fields/conversation_variable_fields.py index 7d5e311591..c55014a368 100644 --- a/api/fields/conversation_variable_fields.py +++ b/api/fields/conversation_variable_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from libs.helper import TimestampField @@ -29,12 +29,12 @@ conversation_variable_infinite_scroll_pagination_fields = { } -def build_conversation_variable_model(api_or_ns: Api | Namespace): +def build_conversation_variable_model(api_or_ns: Namespace): """Build the conversation variable model for the API or Namespace.""" return api_or_ns.model("ConversationVariable", conversation_variable_fields) -def build_conversation_variable_infinite_scroll_pagination_model(api_or_ns: Api | Namespace): +def build_conversation_variable_infinite_scroll_pagination_model(api_or_ns: Namespace): """Build the conversation variable infinite scroll pagination model for the API or Namespace.""" # Build the nested variable model first conversation_variable_model = build_conversation_variable_model(api_or_ns) diff --git a/api/fields/end_user_fields.py b/api/fields/end_user_fields.py index ea43e3b5fd..5389b0213a 100644 --- a/api/fields/end_user_fields.py +++ b/api/fields/end_user_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields simple_end_user_fields = { "id": fields.String, @@ -8,5 +8,5 @@ simple_end_user_fields = { } -def build_simple_end_user_model(api_or_ns: Api | Namespace): +def build_simple_end_user_model(api_or_ns: Namespace): return api_or_ns.model("SimpleEndUser", simple_end_user_fields) diff --git a/api/fields/file_fields.py b/api/fields/file_fields.py index a707500445..70138404c7 100644 --- a/api/fields/file_fields.py +++ b/api/fields/file_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from libs.helper import TimestampField @@ -14,7 +14,7 @@ upload_config_fields = { } -def build_upload_config_model(api_or_ns: Api | Namespace): +def build_upload_config_model(api_or_ns: Namespace): """Build the upload config model for the API or Namespace. Args: @@ -39,7 +39,7 @@ file_fields = { } -def build_file_model(api_or_ns: Api | Namespace): +def build_file_model(api_or_ns: Namespace): """Build the file model for the API or Namespace. Args: @@ -57,7 +57,7 @@ remote_file_info_fields = { } -def build_remote_file_info_model(api_or_ns: Api | Namespace): +def build_remote_file_info_model(api_or_ns: Namespace): """Build the remote file info model for the API or Namespace. Args: @@ -81,7 +81,7 @@ file_fields_with_signed_url = { } -def build_file_with_signed_url_model(api_or_ns: Api | Namespace): +def build_file_with_signed_url_model(api_or_ns: Namespace): """Build the file with signed URL model for the API or Namespace. Args: diff --git a/api/fields/member_fields.py b/api/fields/member_fields.py index 08e38a6931..25160927e6 100644 --- a/api/fields/member_fields.py +++ b/api/fields/member_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from libs.helper import AvatarUrlField, TimestampField @@ -9,7 +9,7 @@ simple_account_fields = { } -def build_simple_account_model(api_or_ns: Api | Namespace): +def build_simple_account_model(api_or_ns: Namespace): return api_or_ns.model("SimpleAccount", simple_account_fields) diff --git a/api/fields/message_fields.py b/api/fields/message_fields.py index a419da2e18..151ff6f826 100644 --- a/api/fields/message_fields.py +++ b/api/fields/message_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from fields.conversation_fields import message_file_fields from libs.helper import TimestampField @@ -10,7 +10,7 @@ feedback_fields = { } -def build_feedback_model(api_or_ns: Api | Namespace): +def build_feedback_model(api_or_ns: Namespace): """Build the feedback model for the API or Namespace.""" return api_or_ns.model("Feedback", feedback_fields) @@ -30,7 +30,7 @@ agent_thought_fields = { } -def build_agent_thought_model(api_or_ns: Api | Namespace): +def build_agent_thought_model(api_or_ns: Namespace): """Build the agent thought model for the API or Namespace.""" return api_or_ns.model("AgentThought", agent_thought_fields) diff --git a/api/fields/tag_fields.py b/api/fields/tag_fields.py index d5b7c86a04..e359a4408c 100644 --- a/api/fields/tag_fields.py +++ b/api/fields/tag_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields dataset_tag_fields = { "id": fields.String, @@ -8,5 +8,5 @@ dataset_tag_fields = { } -def build_dataset_tag_fields(api_or_ns: Api | Namespace): +def build_dataset_tag_fields(api_or_ns: Namespace): return api_or_ns.model("DataSetTag", dataset_tag_fields) diff --git a/api/fields/workflow_app_log_fields.py b/api/fields/workflow_app_log_fields.py index 4cbdf6f0ca..0ebc03a98c 100644 --- a/api/fields/workflow_app_log_fields.py +++ b/api/fields/workflow_app_log_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from fields.end_user_fields import build_simple_end_user_model, simple_end_user_fields from fields.member_fields import build_simple_account_model, simple_account_fields @@ -17,7 +17,7 @@ workflow_app_log_partial_fields = { } -def build_workflow_app_log_partial_model(api_or_ns: Api | Namespace): +def build_workflow_app_log_partial_model(api_or_ns: Namespace): """Build the workflow app log partial model for the API or Namespace.""" workflow_run_model = build_workflow_run_for_log_model(api_or_ns) simple_account_model = build_simple_account_model(api_or_ns) @@ -43,7 +43,7 @@ workflow_app_log_pagination_fields = { } -def build_workflow_app_log_pagination_model(api_or_ns: Api | Namespace): +def build_workflow_app_log_pagination_model(api_or_ns: Namespace): """Build the workflow app log pagination model for the API or Namespace.""" # Build the nested partial model first workflow_app_log_partial_model = build_workflow_app_log_partial_model(api_or_ns) diff --git a/api/fields/workflow_run_fields.py b/api/fields/workflow_run_fields.py index 821ce62ecc..476025064f 100644 --- a/api/fields/workflow_run_fields.py +++ b/api/fields/workflow_run_fields.py @@ -1,4 +1,4 @@ -from flask_restx import Api, Namespace, fields +from flask_restx import Namespace, fields from fields.end_user_fields import simple_end_user_fields from fields.member_fields import simple_account_fields @@ -19,7 +19,7 @@ workflow_run_for_log_fields = { } -def build_workflow_run_for_log_model(api_or_ns: Api | Namespace): +def build_workflow_run_for_log_model(api_or_ns: Namespace): return api_or_ns.model("WorkflowRunForLog", workflow_run_for_log_fields) diff --git a/api/libs/archive_storage.py b/api/libs/archive_storage.py new file mode 100644 index 0000000000..f84d226447 --- /dev/null +++ b/api/libs/archive_storage.py @@ -0,0 +1,347 @@ +""" +Archive Storage Client for S3-compatible storage. + +This module provides a dedicated storage client for archiving or exporting logs +to S3-compatible object storage. +""" + +import base64 +import datetime +import gzip +import hashlib +import logging +from collections.abc import Generator +from typing import Any, cast + +import boto3 +import orjson +from botocore.client import Config +from botocore.exceptions import ClientError + +from configs import dify_config + +logger = logging.getLogger(__name__) + + +class ArchiveStorageError(Exception): + """Base exception for archive storage operations.""" + + pass + + +class ArchiveStorageNotConfiguredError(ArchiveStorageError): + """Raised when archive storage is not properly configured.""" + + pass + + +class ArchiveStorage: + """ + S3-compatible storage client for archiving or exporting. + + This client provides methods for storing and retrieving archived data in JSONL+gzip format. + """ + + def __init__(self, bucket: str): + if not dify_config.ARCHIVE_STORAGE_ENABLED: + raise ArchiveStorageNotConfiguredError("Archive storage is not enabled") + + if not bucket: + raise ArchiveStorageNotConfiguredError("Archive storage bucket is not configured") + if not all( + [ + dify_config.ARCHIVE_STORAGE_ENDPOINT, + bucket, + dify_config.ARCHIVE_STORAGE_ACCESS_KEY, + dify_config.ARCHIVE_STORAGE_SECRET_KEY, + ] + ): + raise ArchiveStorageNotConfiguredError( + "Archive storage configuration is incomplete. " + "Required: ARCHIVE_STORAGE_ENDPOINT, ARCHIVE_STORAGE_ACCESS_KEY, " + "ARCHIVE_STORAGE_SECRET_KEY, and a bucket name" + ) + + self.bucket = bucket + self.client = boto3.client( + "s3", + endpoint_url=dify_config.ARCHIVE_STORAGE_ENDPOINT, + aws_access_key_id=dify_config.ARCHIVE_STORAGE_ACCESS_KEY, + aws_secret_access_key=dify_config.ARCHIVE_STORAGE_SECRET_KEY, + region_name=dify_config.ARCHIVE_STORAGE_REGION, + config=Config(s3={"addressing_style": "path"}), + ) + + # Verify bucket accessibility + try: + self.client.head_bucket(Bucket=self.bucket) + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code") + if error_code == "404": + raise ArchiveStorageNotConfiguredError(f"Archive bucket '{self.bucket}' does not exist") + elif error_code == "403": + raise ArchiveStorageNotConfiguredError(f"Access denied to archive bucket '{self.bucket}'") + else: + raise ArchiveStorageError(f"Failed to access archive bucket: {e}") + + def put_object(self, key: str, data: bytes) -> str: + """ + Upload an object to the archive storage. + + Args: + key: Object key (path) within the bucket + data: Binary data to upload + + Returns: + MD5 checksum of the uploaded data + + Raises: + ArchiveStorageError: If upload fails + """ + checksum = hashlib.md5(data).hexdigest() + try: + self.client.put_object( + Bucket=self.bucket, + Key=key, + Body=data, + ContentMD5=self._content_md5(data), + ) + logger.debug("Uploaded object: %s (size=%d, checksum=%s)", key, len(data), checksum) + return checksum + except ClientError as e: + raise ArchiveStorageError(f"Failed to upload object '{key}': {e}") + + def get_object(self, key: str) -> bytes: + """ + Download an object from the archive storage. + + Args: + key: Object key (path) within the bucket + + Returns: + Binary data of the object + + Raises: + ArchiveStorageError: If download fails + FileNotFoundError: If object does not exist + """ + try: + response = self.client.get_object(Bucket=self.bucket, Key=key) + return response["Body"].read() + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code") + if error_code == "NoSuchKey": + raise FileNotFoundError(f"Archive object not found: {key}") + raise ArchiveStorageError(f"Failed to download object '{key}': {e}") + + def get_object_stream(self, key: str) -> Generator[bytes, None, None]: + """ + Stream an object from the archive storage. + + Args: + key: Object key (path) within the bucket + + Yields: + Chunks of binary data + + Raises: + ArchiveStorageError: If download fails + FileNotFoundError: If object does not exist + """ + try: + response = self.client.get_object(Bucket=self.bucket, Key=key) + yield from response["Body"].iter_chunks() + except ClientError as e: + error_code = e.response.get("Error", {}).get("Code") + if error_code == "NoSuchKey": + raise FileNotFoundError(f"Archive object not found: {key}") + raise ArchiveStorageError(f"Failed to stream object '{key}': {e}") + + def object_exists(self, key: str) -> bool: + """ + Check if an object exists in the archive storage. + + Args: + key: Object key (path) within the bucket + + Returns: + True if object exists, False otherwise + """ + try: + self.client.head_object(Bucket=self.bucket, Key=key) + return True + except ClientError: + return False + + def delete_object(self, key: str) -> None: + """ + Delete an object from the archive storage. + + Args: + key: Object key (path) within the bucket + + Raises: + ArchiveStorageError: If deletion fails + """ + try: + self.client.delete_object(Bucket=self.bucket, Key=key) + logger.debug("Deleted object: %s", key) + except ClientError as e: + raise ArchiveStorageError(f"Failed to delete object '{key}': {e}") + + def generate_presigned_url(self, key: str, expires_in: int = 3600) -> str: + """ + Generate a pre-signed URL for downloading an object. + + Args: + key: Object key (path) within the bucket + expires_in: URL validity duration in seconds (default: 1 hour) + + Returns: + Pre-signed URL string. + + Raises: + ArchiveStorageError: If generation fails + """ + try: + return self.client.generate_presigned_url( + ClientMethod="get_object", + Params={"Bucket": self.bucket, "Key": key}, + ExpiresIn=expires_in, + ) + except ClientError as e: + raise ArchiveStorageError(f"Failed to generate pre-signed URL for '{key}': {e}") + + def list_objects(self, prefix: str) -> list[str]: + """ + List objects under a given prefix. + + Args: + prefix: Object key prefix to filter by + + Returns: + List of object keys matching the prefix + """ + keys = [] + paginator = self.client.get_paginator("list_objects_v2") + + try: + for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix): + for obj in page.get("Contents", []): + keys.append(obj["Key"]) + except ClientError as e: + raise ArchiveStorageError(f"Failed to list objects with prefix '{prefix}': {e}") + + return keys + + @staticmethod + def _content_md5(data: bytes) -> str: + """Calculate base64-encoded MD5 for Content-MD5 header.""" + return base64.b64encode(hashlib.md5(data).digest()).decode() + + @staticmethod + def serialize_to_jsonl_gz(records: list[dict[str, Any]]) -> bytes: + """ + Serialize records to gzipped JSONL format. + + Args: + records: List of dictionaries to serialize + + Returns: + Gzipped JSONL bytes + """ + lines = [] + for record in records: + # Convert datetime objects to ISO format strings + serialized = ArchiveStorage._serialize_record(record) + lines.append(orjson.dumps(serialized)) + + jsonl_content = b"\n".join(lines) + if jsonl_content: + jsonl_content += b"\n" + + return gzip.compress(jsonl_content) + + @staticmethod + def deserialize_from_jsonl_gz(data: bytes) -> list[dict[str, Any]]: + """ + Deserialize gzipped JSONL data to records. + + Args: + data: Gzipped JSONL bytes + + Returns: + List of dictionaries + """ + jsonl_content = gzip.decompress(data) + records = [] + + for line in jsonl_content.splitlines(): + if line: + records.append(orjson.loads(line)) + + return records + + @staticmethod + def _serialize_record(record: dict[str, Any]) -> dict[str, Any]: + """Serialize a single record, converting special types.""" + + def _serialize(item: Any) -> Any: + if isinstance(item, datetime.datetime): + return item.isoformat() + if isinstance(item, dict): + return {key: _serialize(value) for key, value in item.items()} + if isinstance(item, list): + return [_serialize(value) for value in item] + return item + + return cast(dict[str, Any], _serialize(record)) + + @staticmethod + def compute_checksum(data: bytes) -> str: + """Compute MD5 checksum of data.""" + return hashlib.md5(data).hexdigest() + + +# Singleton instance (lazy initialization) +_archive_storage: ArchiveStorage | None = None +_export_storage: ArchiveStorage | None = None + + +def get_archive_storage() -> ArchiveStorage: + """ + Get the archive storage singleton instance. + + Returns: + ArchiveStorage instance + + Raises: + ArchiveStorageNotConfiguredError: If archive storage is not configured + """ + global _archive_storage + if _archive_storage is None: + archive_bucket = dify_config.ARCHIVE_STORAGE_ARCHIVE_BUCKET + if not archive_bucket: + raise ArchiveStorageNotConfiguredError( + "Archive storage bucket is not configured. Required: ARCHIVE_STORAGE_ARCHIVE_BUCKET" + ) + _archive_storage = ArchiveStorage(bucket=archive_bucket) + return _archive_storage + + +def get_export_storage() -> ArchiveStorage: + """ + Get the export storage singleton instance. + + Returns: + ArchiveStorage instance + """ + global _export_storage + if _export_storage is None: + export_bucket = dify_config.ARCHIVE_STORAGE_EXPORT_BUCKET + if not export_bucket: + raise ArchiveStorageNotConfiguredError( + "Archive export bucket is not configured. Required: ARCHIVE_STORAGE_EXPORT_BUCKET" + ) + _export_storage = ArchiveStorage(bucket=export_bucket) + return _export_storage diff --git a/api/migrations/versions/00bacef91f18_rename_api_provider_description.py b/api/migrations/versions/00bacef91f18_rename_api_provider_description.py index 17ed067d81..657d28f896 100644 --- a/api/migrations/versions/00bacef91f18_rename_api_provider_description.py +++ b/api/migrations/versions/00bacef91f18_rename_api_provider_description.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '00bacef91f18' down_revision = '8ec536f3c800' @@ -23,31 +20,17 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description', sa.Text(), nullable=False)) - batch_op.drop_column('description_str') - else: - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description', models.types.LongText(), nullable=False)) - batch_op.drop_column('description_str') + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description', models.types.LongText(), nullable=False)) + batch_op.drop_column('description_str') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description_str', sa.TEXT(), autoincrement=False, nullable=False)) - batch_op.drop_column('description') - else: - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('description_str', models.types.LongText(), autoincrement=False, nullable=False)) - batch_op.drop_column('description') + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('description_str', models.types.LongText(), autoincrement=False, nullable=False)) + batch_op.drop_column('description') # ### end Alembic commands ### diff --git a/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py b/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py index ed70bf5d08..912d9dbfa4 100644 --- a/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py +++ b/api/migrations/versions/114eed84c228_remove_tool_id_from_model_invoke.py @@ -7,14 +7,10 @@ Create Date: 2024-01-10 04:40:57.257824 """ import sqlalchemy as sa from alembic import op -from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '114eed84c228' down_revision = 'c71211c8f604' @@ -32,13 +28,7 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: - batch_op.add_column(sa.Column('tool_id', postgresql.UUID(), autoincrement=False, nullable=False)) - else: - with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: - batch_op.add_column(sa.Column('tool_id', models.types.StringUUID(), autoincrement=False, nullable=False)) + with op.batch_alter_table('tool_model_invokes', schema=None) as batch_op: + batch_op.add_column(sa.Column('tool_id', models.types.StringUUID(), autoincrement=False, nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py b/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py index 509bd5d0e8..0ca905129d 100644 --- a/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py +++ b/api/migrations/versions/161cadc1af8d_add_dataset_permission_tenant_id.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '161cadc1af8d' down_revision = '7e6a8693e07a' @@ -23,16 +20,9 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: - # Step 1: Add column without NOT NULL constraint - op.add_column('dataset_permissions', sa.Column('tenant_id', sa.UUID(), nullable=False)) - else: - with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: - # Step 1: Add column without NOT NULL constraint - op.add_column('dataset_permissions', sa.Column('tenant_id', models.types.StringUUID(), nullable=False)) + with op.batch_alter_table('dataset_permissions', schema=None) as batch_op: + # Step 1: Add column without NOT NULL constraint + op.add_column('dataset_permissions', sa.Column('tenant_id', models.types.StringUUID(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py index 0767b725f6..be1b42f883 100644 --- a/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py +++ b/api/migrations/versions/2024_09_24_0922-6af6a521a53e_update_retrieval_resource.py @@ -9,11 +9,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - # revision identifiers, used by Alembic. revision = '6af6a521a53e' down_revision = 'd57ba9ebb251' @@ -23,58 +18,30 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('document_id', - existing_type=sa.UUID(), - nullable=True) - batch_op.alter_column('data_source_type', - existing_type=sa.TEXT(), - nullable=True) - batch_op.alter_column('segment_id', - existing_type=sa.UUID(), - nullable=True) - else: - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('document_id', - existing_type=models.types.StringUUID(), - nullable=True) - batch_op.alter_column('data_source_type', - existing_type=models.types.LongText(), - nullable=True) - batch_op.alter_column('segment_id', - existing_type=models.types.StringUUID(), - nullable=True) + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('document_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('data_source_type', + existing_type=models.types.LongText(), + nullable=True) + batch_op.alter_column('segment_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('segment_id', - existing_type=sa.UUID(), - nullable=False) - batch_op.alter_column('data_source_type', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('document_id', - existing_type=sa.UUID(), - nullable=False) - else: - with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: - batch_op.alter_column('segment_id', - existing_type=models.types.StringUUID(), - nullable=False) - batch_op.alter_column('data_source_type', - existing_type=models.types.LongText(), - nullable=False) - batch_op.alter_column('document_id', - existing_type=models.types.StringUUID(), - nullable=False) + with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op: + batch_op.alter_column('segment_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.alter_column('data_source_type', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('document_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_01_0434-d3f6769a94a3_add_upload_files_source_url.py b/api/migrations/versions/2024_11_01_0434-d3f6769a94a3_add_upload_files_source_url.py index a749c8bddf..5d12419bf7 100644 --- a/api/migrations/versions/2024_11_01_0434-d3f6769a94a3_add_upload_files_source_url.py +++ b/api/migrations/versions/2024_11_01_0434-d3f6769a94a3_add_upload_files_source_url.py @@ -8,7 +8,6 @@ Create Date: 2024-11-01 04:34:23.816198 from alembic import op import models as models import sqlalchemy as sa -from sqlalchemy.dialects import postgresql # revision identifiers, used by Alembic. revision = 'd3f6769a94a3' diff --git a/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py b/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py index 45842295ea..a49d6a52f6 100644 --- a/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py +++ b/api/migrations/versions/2024_11_01_0622-d07474999927_update_type_of_custom_disclaimer_to_text.py @@ -28,85 +28,45 @@ def upgrade(): op.execute("UPDATE sites SET custom_disclaimer = '' WHERE custom_disclaimer IS NULL") op.execute("UPDATE tool_api_providers SET custom_disclaimer = '' WHERE custom_disclaimer IS NULL") - if _is_pg(conn): - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=sa.TEXT(), - nullable=False) - else: - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=models.types.LongText(), - nullable=False) - - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=models.types.LongText(), - nullable=False) - - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.VARCHAR(length=255), - type_=models.types.LongText(), - nullable=False) + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + nullable=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=sa.TEXT(), - type_=sa.VARCHAR(length=255), - nullable=True) - else: - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=models.types.LongText(), - type_=sa.VARCHAR(length=255), - nullable=True) - - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=models.types.LongText(), - type_=sa.VARCHAR(length=255), - nullable=True) - - with op.batch_alter_table('recommended_apps', schema=None) as batch_op: - batch_op.alter_column('custom_disclaimer', - existing_type=models.types.LongText(), - type_=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('recommended_apps', schema=None) as batch_op: + batch_op.alter_column('custom_disclaimer', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + nullable=True) # ### end Alembic commands ### diff --git a/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py b/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py index fdd8984029..8a36c9c4a5 100644 --- a/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py +++ b/api/migrations/versions/2024_11_01_0623-09a8d1878d9b_update_workflows_graph_features_and_.py @@ -49,57 +49,33 @@ def upgrade(): op.execute("UPDATE workflows SET updated_at = created_at WHERE updated_at IS NULL") op.execute("UPDATE workflows SET graph = '' WHERE graph IS NULL") op.execute("UPDATE workflows SET features = '' WHERE features IS NULL") - if _is_pg(conn): - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('graph', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('features', - existing_type=sa.TEXT(), - nullable=False) - batch_op.alter_column('updated_at', - existing_type=postgresql.TIMESTAMP(), - nullable=False) - else: - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('graph', - existing_type=models.types.LongText(), - nullable=False) - batch_op.alter_column('features', - existing_type=models.types.LongText(), - nullable=False) - batch_op.alter_column('updated_at', - existing_type=sa.TIMESTAMP(), - nullable=False) + + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('graph', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('features', + existing_type=models.types.LongText(), + nullable=False) + batch_op.alter_column('updated_at', + existing_type=sa.TIMESTAMP(), + nullable=False) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('updated_at', - existing_type=postgresql.TIMESTAMP(), - nullable=True) - batch_op.alter_column('features', - existing_type=sa.TEXT(), - nullable=True) - batch_op.alter_column('graph', - existing_type=sa.TEXT(), - nullable=True) - else: - with op.batch_alter_table('workflows', schema=None) as batch_op: - batch_op.alter_column('updated_at', - existing_type=sa.TIMESTAMP(), - nullable=True) - batch_op.alter_column('features', - existing_type=models.types.LongText(), - nullable=True) - batch_op.alter_column('graph', - existing_type=models.types.LongText(), - nullable=True) + with op.batch_alter_table('workflows', schema=None) as batch_op: + batch_op.alter_column('updated_at', + existing_type=sa.TIMESTAMP(), + nullable=True) + batch_op.alter_column('features', + existing_type=models.types.LongText(), + nullable=True) + batch_op.alter_column('graph', + existing_type=models.types.LongText(), + nullable=True) if _is_pg(conn): with op.batch_alter_table('messages', schema=None) as batch_op: diff --git a/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py b/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py index 16ca902726..1fc4a64df1 100644 --- a/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py +++ b/api/migrations/versions/2025_08_13_1605-0e154742a5fa_add_provider_model_multi_credential.py @@ -86,57 +86,30 @@ def upgrade(): def migrate_existing_provider_models_data(): """migrate provider_models table data to provider_model_credentials""" - conn = op.get_bind() - # Define table structure for data manipulation - if _is_pg(conn): - provider_models_table = table('provider_models', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()), - column('credential_id', models.types.StringUUID()), - ) - else: - provider_models_table = table('provider_models', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('encrypted_config', models.types.LongText()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()), - column('credential_id', models.types.StringUUID()), - ) + # Define table structure for data manipulatio + provider_models_table = table('provider_models', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()), + column('credential_id', models.types.StringUUID()), + ) - if _is_pg(conn): - provider_model_credentials_table = table('provider_model_credentials', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('credential_name', sa.String()), - column('encrypted_config', sa.Text()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()) - ) - else: - provider_model_credentials_table = table('provider_model_credentials', - column('id', models.types.StringUUID()), - column('tenant_id', models.types.StringUUID()), - column('provider_name', sa.String()), - column('model_name', sa.String()), - column('model_type', sa.String()), - column('credential_name', sa.String()), - column('encrypted_config', models.types.LongText()), - column('created_at', sa.DateTime()), - column('updated_at', sa.DateTime()) - ) + provider_model_credentials_table = table('provider_model_credentials', + column('id', models.types.StringUUID()), + column('tenant_id', models.types.StringUUID()), + column('provider_name', sa.String()), + column('model_name', sa.String()), + column('model_type', sa.String()), + column('credential_name', sa.String()), + column('encrypted_config', models.types.LongText()), + column('created_at', sa.DateTime()), + column('updated_at', sa.DateTime()) + ) # Get database connection @@ -183,14 +156,8 @@ def migrate_existing_provider_models_data(): def downgrade(): # Re-add encrypted_config column to provider_models table - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('provider_models', schema=None) as batch_op: - batch_op.add_column(sa.Column('encrypted_config', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('provider_models', schema=None) as batch_op: - batch_op.add_column(sa.Column('encrypted_config', models.types.LongText(), nullable=True)) + with op.batch_alter_table('provider_models', schema=None) as batch_op: + batch_op.add_column(sa.Column('encrypted_config', models.types.LongText(), nullable=True)) if not context.is_offline_mode(): # Migrate data back from provider_model_credentials to provider_models diff --git a/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py b/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py index 75b4d61173..79fe9d9bba 100644 --- a/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py +++ b/api/migrations/versions/2025_08_20_1747-8d289573e1da_add_oauth_provider_apps.py @@ -8,7 +8,6 @@ Create Date: 2025-08-20 17:47:17.015695 from alembic import op import models as models import sqlalchemy as sa -from libs.uuid_utils import uuidv7 def _is_pg(conn): diff --git a/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py b/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py index 4f472fe4b4..cf2b973d2d 100644 --- a/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py +++ b/api/migrations/versions/2025_09_08_1007-c20211f18133_add_headers_to_mcp_provider.py @@ -9,8 +9,6 @@ from alembic import op import models as models -def _is_pg(conn): - return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -23,12 +21,7 @@ depends_on = None def upgrade(): # Add encrypted_headers column to tool_mcp_providers table - conn = op.get_bind() - - if _is_pg(conn): - op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', sa.Text(), nullable=True)) - else: - op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', models.types.LongText(), nullable=True)) + op.add_column('tool_mcp_providers', sa.Column('encrypted_headers', models.types.LongText(), nullable=True)) def downgrade(): diff --git a/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py b/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py index 8eac0dee10..bad516dcac 100644 --- a/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py +++ b/api/migrations/versions/2025_09_17_1515-68519ad5cd18_knowledge_pipeline_migrate.py @@ -44,6 +44,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='datasource_oauth_config_pkey'), sa.UniqueConstraint('plugin_id', 'provider', name='datasource_oauth_config_datasource_id_provider_idx') ) + if _is_pg(conn): op.create_table('datasource_oauth_tenant_params', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -70,6 +71,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='datasource_oauth_tenant_config_pkey'), sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='datasource_oauth_tenant_config_unique') ) + if _is_pg(conn): op.create_table('datasource_providers', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -104,6 +106,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='datasource_provider_pkey'), sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', 'name', name='datasource_provider_unique_name') ) + with op.batch_alter_table('datasource_providers', schema=None) as batch_op: batch_op.create_index('datasource_provider_auth_type_provider_idx', ['tenant_id', 'plugin_id', 'provider'], unique=False) @@ -133,6 +136,7 @@ def upgrade(): sa.Column('created_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), sa.PrimaryKeyConstraint('id', name='document_pipeline_execution_log_pkey') ) + with op.batch_alter_table('document_pipeline_execution_logs', schema=None) as batch_op: batch_op.create_index('document_pipeline_execution_logs_document_id_idx', ['document_id'], unique=False) @@ -174,6 +178,7 @@ def upgrade(): sa.Column('updated_by', models.types.StringUUID(), nullable=True), sa.PrimaryKeyConstraint('id', name='pipeline_built_in_template_pkey') ) + if _is_pg(conn): op.create_table('pipeline_customized_templates', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -193,7 +198,6 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='pipeline_customized_template_pkey') ) else: - # MySQL: Use compatible syntax op.create_table('pipeline_customized_templates', sa.Column('id', models.types.StringUUID(), nullable=False), sa.Column('tenant_id', models.types.StringUUID(), nullable=False), @@ -211,6 +215,7 @@ def upgrade(): sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), sa.PrimaryKeyConstraint('id', name='pipeline_customized_template_pkey') ) + with op.batch_alter_table('pipeline_customized_templates', schema=None) as batch_op: batch_op.create_index('pipeline_customized_template_tenant_idx', ['tenant_id'], unique=False) @@ -236,6 +241,7 @@ def upgrade(): sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), sa.PrimaryKeyConstraint('id', name='pipeline_recommended_plugin_pkey') ) + if _is_pg(conn): op.create_table('pipelines', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -266,6 +272,7 @@ def upgrade(): sa.Column('updated_at', sa.DateTime(), server_default=sa.func.current_timestamp(), nullable=False), sa.PrimaryKeyConstraint('id', name='pipeline_pkey') ) + if _is_pg(conn): op.create_table('workflow_draft_variable_files', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -292,6 +299,7 @@ def upgrade(): sa.Column('value_type', sa.String(20), nullable=False), sa.PrimaryKeyConstraint('id', name=op.f('workflow_draft_variable_files_pkey')) ) + if _is_pg(conn): op.create_table('workflow_node_execution_offload', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuidv7()'), nullable=False), @@ -316,6 +324,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name=op.f('workflow_node_execution_offload_pkey')), sa.UniqueConstraint('node_execution_id', 'type', name=op.f('workflow_node_execution_offload_node_execution_id_key')) ) + if _is_pg(conn): with op.batch_alter_table('datasets', schema=None) as batch_op: batch_op.add_column(sa.Column('keyword_number', sa.Integer(), server_default=sa.text('10'), nullable=True)) @@ -342,6 +351,7 @@ def upgrade(): comment='Indicates whether the current value is the default for a conversation variable. Always `FALSE` for other types of variables.',) ) batch_op.create_index('workflow_draft_variable_file_id_idx', ['file_id'], unique=False) + if _is_pg(conn): with op.batch_alter_table('workflows', schema=None) as batch_op: batch_op.add_column(sa.Column('rag_pipeline_variables', sa.Text(), server_default='{}', nullable=False)) diff --git a/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py b/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py index 0776ab0818..ec0cfbd11d 100644 --- a/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py +++ b/api/migrations/versions/2025_10_21_1430-ae662b25d9bc_remove_builtin_template_user.py @@ -9,8 +9,6 @@ from alembic import op import models as models -def _is_pg(conn): - return conn.dialect.name == "postgresql" import sqlalchemy as sa @@ -33,15 +31,9 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by', sa.UUID(), autoincrement=False, nullable=False)) - batch_op.add_column(sa.Column('updated_by', sa.UUID(), autoincrement=False, nullable=True)) - else: - with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: - batch_op.add_column(sa.Column('created_by', models.types.StringUUID(), autoincrement=False, nullable=False)) - batch_op.add_column(sa.Column('updated_by', models.types.StringUUID(), autoincrement=False, nullable=True)) + + with op.batch_alter_table('pipeline_built_in_templates', schema=None) as batch_op: + batch_op.add_column(sa.Column('created_by', models.types.StringUUID(), autoincrement=False, nullable=False)) + batch_op.add_column(sa.Column('updated_by', models.types.StringUUID(), autoincrement=False, nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py b/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py index 627219cc4b..12905b3674 100644 --- a/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py +++ b/api/migrations/versions/2025_10_22_1611-03f8dcbc611e_add_workflowpause_model.py @@ -9,7 +9,6 @@ Create Date: 2025-10-22 16:11:31.805407 from alembic import op import models as models import sqlalchemy as sa -from libs.uuid_utils import uuidv7 def _is_pg(conn): return conn.dialect.name == "postgresql" diff --git a/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py b/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py index 9641a15c89..c27c1058d1 100644 --- a/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py +++ b/api/migrations/versions/2025_10_30_1518-669ffd70119c_introduce_trigger.py @@ -105,6 +105,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='trigger_oauth_tenant_client_pkey'), sa.UniqueConstraint('tenant_id', 'plugin_id', 'provider', name='unique_trigger_oauth_tenant_client') ) + if _is_pg(conn): op.create_table('trigger_subscriptions', sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), @@ -143,6 +144,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='trigger_provider_pkey'), sa.UniqueConstraint('tenant_id', 'provider_id', 'name', name='unique_trigger_provider') ) + with op.batch_alter_table('trigger_subscriptions', schema=None) as batch_op: batch_op.create_index('idx_trigger_providers_endpoint', ['endpoint_id'], unique=True) batch_op.create_index('idx_trigger_providers_tenant_endpoint', ['tenant_id', 'endpoint_id'], unique=False) @@ -176,6 +178,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='workflow_plugin_trigger_pkey'), sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node_subscription') ) + with op.batch_alter_table('workflow_plugin_triggers', schema=None) as batch_op: batch_op.create_index('workflow_plugin_trigger_tenant_subscription_idx', ['tenant_id', 'subscription_id', 'event_name'], unique=False) @@ -207,6 +210,7 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='workflow_schedule_plan_pkey'), sa.UniqueConstraint('app_id', 'node_id', name='uniq_app_node') ) + with op.batch_alter_table('workflow_schedule_plans', schema=None) as batch_op: batch_op.create_index('workflow_schedule_plan_next_idx', ['next_run_at'], unique=False) @@ -264,6 +268,7 @@ def upgrade(): sa.Column('finished_at', sa.DateTime(), nullable=True), sa.PrimaryKeyConstraint('id', name='workflow_trigger_log_pkey') ) + with op.batch_alter_table('workflow_trigger_logs', schema=None) as batch_op: batch_op.create_index('workflow_trigger_log_created_at_idx', ['created_at'], unique=False) batch_op.create_index('workflow_trigger_log_status_idx', ['status'], unique=False) @@ -299,6 +304,7 @@ def upgrade(): sa.UniqueConstraint('app_id', 'node_id', name='uniq_node'), sa.UniqueConstraint('webhook_id', name='uniq_webhook_id') ) + with op.batch_alter_table('workflow_webhook_triggers', schema=None) as batch_op: batch_op.create_index('workflow_webhook_trigger_tenant_idx', ['tenant_id'], unique=False) diff --git a/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py b/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py index fae506906b..127ffd5599 100644 --- a/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py +++ b/api/migrations/versions/23db93619b9d_add_message_files_into_agent_thought.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '23db93619b9d' down_revision = '8ae9bc661daa' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_files', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_files', models.types.LongText(), nullable=True)) + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_files', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py b/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py index 2676ef0b94..31829d8e58 100644 --- a/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py +++ b/api/migrations/versions/246ba09cbbdb_add_app_anntation_setting.py @@ -62,14 +62,8 @@ def upgrade(): def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', sa.TEXT(), autoincrement=False, nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), autoincrement=False, nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), autoincrement=False, nullable=True)) with op.batch_alter_table('app_annotation_settings', schema=None) as batch_op: batch_op.drop_index('app_annotation_settings_app_idx') diff --git a/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py b/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py index 3362a3a09f..07a8cd86b1 100644 --- a/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py +++ b/api/migrations/versions/2a3aebbbf4bb_add_app_tracing.py @@ -11,9 +11,6 @@ from alembic import op import models as models -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '2a3aebbbf4bb' down_revision = 'c031d46af369' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('tracing', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('apps', schema=None) as batch_op: - batch_op.add_column(sa.Column('tracing', models.types.LongText(), nullable=True)) + with op.batch_alter_table('apps', schema=None) as batch_op: + batch_op.add_column(sa.Column('tracing', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py b/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py index 40bd727f66..211b2d8882 100644 --- a/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py +++ b/api/migrations/versions/2e9819ca5b28_add_tenant_id_in_api_token.py @@ -7,14 +7,10 @@ Create Date: 2023-09-22 15:41:01.243183 """ import sqlalchemy as sa from alembic import op -from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '2e9819ca5b28' down_revision = 'ab23c11305d4' @@ -24,35 +20,19 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('tenant_id', postgresql.UUID(), nullable=True)) - batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) - batch_op.drop_column('dataset_id') - else: - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('tenant_id', models.types.StringUUID(), nullable=True)) - batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) - batch_op.drop_column('dataset_id') + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('tenant_id', models.types.StringUUID(), nullable=True)) + batch_op.create_index('api_token_tenant_idx', ['tenant_id', 'type'], unique=False) + batch_op.drop_column('dataset_id') # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('dataset_id', postgresql.UUID(), autoincrement=False, nullable=True)) - batch_op.drop_index('api_token_tenant_idx') - batch_op.drop_column('tenant_id') - else: - with op.batch_alter_table('api_tokens', schema=None) as batch_op: - batch_op.add_column(sa.Column('dataset_id', models.types.StringUUID(), autoincrement=False, nullable=True)) - batch_op.drop_index('api_token_tenant_idx') - batch_op.drop_column('tenant_id') + with op.batch_alter_table('api_tokens', schema=None) as batch_op: + batch_op.add_column(sa.Column('dataset_id', models.types.StringUUID(), autoincrement=False, nullable=True)) + batch_op.drop_index('api_token_tenant_idx') + batch_op.drop_column('tenant_id') # ### end Alembic commands ### diff --git a/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py b/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py index 76056a9460..3491c85e2f 100644 --- a/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py +++ b/api/migrations/versions/42e85ed5564d_conversation_columns_set_nullable.py @@ -7,14 +7,10 @@ Create Date: 2024-03-07 08:30:29.133614 """ import sqlalchemy as sa from alembic import op -from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '42e85ed5564d' down_revision = 'f9107f83abab' @@ -24,59 +20,31 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('app_model_config_id', - existing_type=postgresql.UUID(), - nullable=True) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=True) - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=True) - else: - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('app_model_config_id', - existing_type=models.types.StringUUID(), - nullable=True) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=True) - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=True) + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('app_model_config_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=True) + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('app_model_config_id', - existing_type=postgresql.UUID(), - nullable=False) - else: - with op.batch_alter_table('conversations', schema=None) as batch_op: - batch_op.alter_column('model_id', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('model_provider', - existing_type=sa.VARCHAR(length=255), - nullable=False) - batch_op.alter_column('app_model_config_id', - existing_type=models.types.StringUUID(), - nullable=False) + with op.batch_alter_table('conversations', schema=None) as batch_op: + batch_op.alter_column('model_id', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('model_provider', + existing_type=sa.VARCHAR(length=255), + nullable=False) + batch_op.alter_column('app_model_config_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py b/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py index ef066587b7..8537a87233 100644 --- a/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py +++ b/api/migrations/versions/4829e54d2fee_change_message_chain_id_to_nullable.py @@ -6,14 +6,10 @@ Create Date: 2024-01-12 03:42:27.362415 """ from alembic import op -from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '4829e54d2fee' down_revision = '114eed84c228' @@ -23,39 +19,21 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - # PostgreSQL: Keep original syntax - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=postgresql.UUID(), - nullable=True) - else: - # MySQL: Use compatible syntax - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=models.types.StringUUID(), - nullable=True) + + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - # PostgreSQL: Keep original syntax - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=postgresql.UUID(), - nullable=False) - else: - # MySQL: Use compatible syntax - with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: - batch_op.alter_column('message_chain_id', - existing_type=models.types.StringUUID(), - nullable=False) + + with op.batch_alter_table('message_agent_thoughts', schema=None) as batch_op: + batch_op.alter_column('message_chain_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py b/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py index b080e7680b..22405e3cc8 100644 --- a/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py +++ b/api/migrations/versions/563cf8bf777b_enable_tool_file_without_conversation_id.py @@ -6,14 +6,10 @@ Create Date: 2024-03-14 04:54:56.679506 """ from alembic import op -from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '563cf8bf777b' down_revision = 'b5429b71023c' @@ -23,35 +19,19 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=True) - else: - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=models.types.StringUUID(), - nullable=True) + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=False) - else: - with op.batch_alter_table('tool_files', schema=None) as batch_op: - batch_op.alter_column('conversation_id', - existing_type=models.types.StringUUID(), - nullable=False) + with op.batch_alter_table('tool_files', schema=None) as batch_op: + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=False) # ### end Alembic commands ### diff --git a/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py b/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py index 1ace8ea5a0..01d7d5ba21 100644 --- a/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py +++ b/api/migrations/versions/6e2cfb077b04_add_dataset_collection_binding.py @@ -48,12 +48,9 @@ def upgrade(): with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.create_index('provider_model_name_idx', ['provider_name', 'model_name'], unique=False) - if _is_pg(conn): - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('collection_binding_id', postgresql.UUID(), nullable=True)) - else: - with op.batch_alter_table('datasets', schema=None) as batch_op: - batch_op.add_column(sa.Column('collection_binding_id', models.types.StringUUID(), nullable=True)) + + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.add_column(sa.Column('collection_binding_id', models.types.StringUUID(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py b/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py index 457338ef42..0faa48f535 100644 --- a/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py +++ b/api/migrations/versions/714aafe25d39_add_anntation_history_match_response.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '714aafe25d39' down_revision = 'f2a6fc85e260' @@ -23,16 +20,9 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_question', sa.Text(), nullable=False)) - batch_op.add_column(sa.Column('annotation_content', sa.Text(), nullable=False)) - else: - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_question', models.types.LongText(), nullable=False)) - batch_op.add_column(sa.Column('annotation_content', models.types.LongText(), nullable=False)) + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_question', models.types.LongText(), nullable=False)) + batch_op.add_column(sa.Column('annotation_content', models.types.LongText(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py b/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py index 7bcd1a1be3..aa7b4a21e2 100644 --- a/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py +++ b/api/migrations/versions/77e83833755c_add_app_config_retriever_resource.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '77e83833755c' down_revision = '6dcb43972bdc' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('retriever_resource', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('retriever_resource', models.types.LongText(), nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('retriever_resource', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py b/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py index 3c0aa082d5..34a17697d3 100644 --- a/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py +++ b/api/migrations/versions/7ce5a52e4eee_add_tool_providers.py @@ -27,7 +27,6 @@ def upgrade(): conn = op.get_bind() if _is_pg(conn): - # PostgreSQL: Keep original syntax op.create_table('tool_providers', sa.Column('id', postgresql.UUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False), sa.Column('tenant_id', postgresql.UUID(), nullable=False), @@ -40,7 +39,6 @@ def upgrade(): sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') ) else: - # MySQL: Use compatible syntax op.create_table('tool_providers', sa.Column('id', models.types.StringUUID(), nullable=False), sa.Column('tenant_id', models.types.StringUUID(), nullable=False), @@ -52,12 +50,9 @@ def upgrade(): sa.PrimaryKeyConstraint('id', name='tool_provider_pkey'), sa.UniqueConstraint('tenant_id', 'tool_name', name='unique_tool_provider_tool_name') ) - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('sensitive_word_avoidance', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('sensitive_word_avoidance', models.types.LongText(), nullable=True)) + + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('sensitive_word_avoidance', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py b/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py index beea90b384..884839c010 100644 --- a/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py +++ b/api/migrations/versions/88072f0caa04_add_custom_config_in_tenant.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '88072f0caa04' down_revision = '246ba09cbbdb' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tenants', schema=None) as batch_op: - batch_op.add_column(sa.Column('custom_config', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('tenants', schema=None) as batch_op: - batch_op.add_column(sa.Column('custom_config', models.types.LongText(), nullable=True)) + with op.batch_alter_table('tenants', schema=None) as batch_op: + batch_op.add_column(sa.Column('custom_config', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/89c7899ca936_.py b/api/migrations/versions/89c7899ca936_.py index 2420710e74..d26f1e82d6 100644 --- a/api/migrations/versions/89c7899ca936_.py +++ b/api/migrations/versions/89c7899ca936_.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '89c7899ca936' down_revision = '187385f442fc' @@ -23,39 +20,21 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=sa.VARCHAR(length=255), - type_=sa.Text(), - existing_nullable=True) - else: - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=sa.VARCHAR(length=255), - type_=models.types.LongText(), - existing_nullable=True) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=sa.VARCHAR(length=255), + type_=models.types.LongText(), + existing_nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=sa.Text(), - type_=sa.VARCHAR(length=255), - existing_nullable=True) - else: - with op.batch_alter_table('sites', schema=None) as batch_op: - batch_op.alter_column('description', - existing_type=models.types.LongText(), - type_=sa.VARCHAR(length=255), - existing_nullable=True) + with op.batch_alter_table('sites', schema=None) as batch_op: + batch_op.alter_column('description', + existing_type=models.types.LongText(), + type_=sa.VARCHAR(length=255), + existing_nullable=True) # ### end Alembic commands ### diff --git a/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py b/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py index 111e81240b..6022ea2c20 100644 --- a/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py +++ b/api/migrations/versions/8ec536f3c800_rename_api_provider_credentails.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = '8ec536f3c800' down_revision = 'ad472b61a054' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('credentials_str', sa.Text(), nullable=False)) - else: - with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: - batch_op.add_column(sa.Column('credentials_str', models.types.LongText(), nullable=False)) + with op.batch_alter_table('tool_api_providers', schema=None) as batch_op: + batch_op.add_column(sa.Column('credentials_str', models.types.LongText(), nullable=False)) # ### end Alembic commands ### diff --git a/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py b/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py index 1c1c6cacbb..9d6d40114d 100644 --- a/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py +++ b/api/migrations/versions/8fe468ba0ca5_add_gpt4v_supports.py @@ -57,12 +57,9 @@ def upgrade(): batch_op.create_index('message_file_created_by_idx', ['created_by'], unique=False) batch_op.create_index('message_file_message_idx', ['message_id'], unique=False) - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('file_upload', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('file_upload', models.types.LongText(), nullable=True)) + + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('file_upload', models.types.LongText(), nullable=True)) if _is_pg(conn): with op.batch_alter_table('upload_files', schema=None) as batch_op: diff --git a/api/migrations/versions/9f4e3427ea84_add_created_by_role.py b/api/migrations/versions/9f4e3427ea84_add_created_by_role.py index 5d29d354f3..0b3f92a12e 100644 --- a/api/migrations/versions/9f4e3427ea84_add_created_by_role.py +++ b/api/migrations/versions/9f4e3427ea84_add_created_by_role.py @@ -24,7 +24,6 @@ def upgrade(): conn = op.get_bind() if _is_pg(conn): - # PostgreSQL: Keep original syntax with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'::character varying"), nullable=False)) batch_op.drop_index('pinned_conversation_conversation_idx') @@ -35,7 +34,6 @@ def upgrade(): batch_op.drop_index('saved_message_message_idx') batch_op.create_index('saved_message_message_idx', ['app_id', 'message_id', 'created_by_role', 'created_by'], unique=False) else: - # MySQL: Use compatible syntax with op.batch_alter_table('pinned_conversations', schema=None) as batch_op: batch_op.add_column(sa.Column('created_by_role', sa.String(length=255), server_default=sa.text("'end_user'"), nullable=False)) batch_op.drop_index('pinned_conversation_conversation_idx') diff --git a/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py b/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py index 616cb2f163..c8747a51f7 100644 --- a/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py +++ b/api/migrations/versions/a5b56fb053ef_app_config_add_speech_to_text.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = 'a5b56fb053ef' down_revision = 'd3d503a3471c' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('speech_to_text', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('speech_to_text', models.types.LongText(), nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('speech_to_text', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py b/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py index 900ff78036..f56aeb7e66 100644 --- a/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py +++ b/api/migrations/versions/a9836e3baeee_add_external_data_tools_in_app_model_.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = 'a9836e3baeee' down_revision = '968fff4c0ab9' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('external_data_tools', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('external_data_tools', models.types.LongText(), nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('external_data_tools', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/b24be59fbb04_.py b/api/migrations/versions/b24be59fbb04_.py index b0a6d10d8c..ae91eaf1bc 100644 --- a/api/migrations/versions/b24be59fbb04_.py +++ b/api/migrations/versions/b24be59fbb04_.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = 'b24be59fbb04' down_revision = 'de95f5c77138' @@ -23,14 +20,8 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('text_to_speech', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('text_to_speech', models.types.LongText(), nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('text_to_speech', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py b/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py index 772395c25b..c02c24c23f 100644 --- a/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py +++ b/api/migrations/versions/b3a09c049e8e_add_advanced_prompt_templates.py @@ -11,9 +11,6 @@ from alembic import op import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = 'b3a09c049e8e' down_revision = '2e9819ca5b28' @@ -23,20 +20,11 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) - batch_op.add_column(sa.Column('chat_prompt_config', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('completion_prompt_config', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('dataset_configs', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) - batch_op.add_column(sa.Column('chat_prompt_config', models.types.LongText(), nullable=True)) - batch_op.add_column(sa.Column('completion_prompt_config', models.types.LongText(), nullable=True)) - batch_op.add_column(sa.Column('dataset_configs', models.types.LongText(), nullable=True)) + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('prompt_type', sa.String(length=255), nullable=False, server_default='simple')) + batch_op.add_column(sa.Column('chat_prompt_config', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('completion_prompt_config', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('dataset_configs', models.types.LongText(), nullable=True)) # ### end Alembic commands ### diff --git a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py index 76be794ff4..fe51d1c78d 100644 --- a/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py +++ b/api/migrations/versions/c031d46af369_remove_app_model_config_trace_config_.py @@ -7,7 +7,6 @@ Create Date: 2024-06-17 10:01:00.255189 """ import sqlalchemy as sa from alembic import op -from sqlalchemy.dialects import postgresql import models.types diff --git a/api/migrations/versions/e1901f623fd0_add_annotation_reply.py b/api/migrations/versions/e1901f623fd0_add_annotation_reply.py index 9e02ec5d84..36e934f0fc 100644 --- a/api/migrations/versions/e1901f623fd0_add_annotation_reply.py +++ b/api/migrations/versions/e1901f623fd0_add_annotation_reply.py @@ -54,12 +54,9 @@ def upgrade(): batch_op.create_index('app_annotation_hit_histories_annotation_idx', ['annotation_id'], unique=False) batch_op.create_index('app_annotation_hit_histories_app_idx', ['app_id'], unique=False) - if _is_pg(conn): - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', sa.Text(), nullable=True)) - else: - with op.batch_alter_table('app_model_configs', schema=None) as batch_op: - batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), nullable=True)) + + with op.batch_alter_table('app_model_configs', schema=None) as batch_op: + batch_op.add_column(sa.Column('annotation_reply', models.types.LongText(), nullable=True)) if _is_pg(conn): with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: @@ -68,54 +65,31 @@ def upgrade(): with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.add_column(sa.Column('type', sa.String(length=40), server_default=sa.text("'dataset'"), nullable=False)) - if _is_pg(conn): - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.add_column(sa.Column('question', sa.Text(), nullable=True)) - batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=True) - batch_op.alter_column('message_id', - existing_type=postgresql.UUID(), - nullable=True) - else: - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.add_column(sa.Column('question', models.types.LongText(), nullable=True)) - batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) - batch_op.alter_column('conversation_id', - existing_type=models.types.StringUUID(), - nullable=True) - batch_op.alter_column('message_id', - existing_type=models.types.StringUUID(), - nullable=True) + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.add_column(sa.Column('question', models.types.LongText(), nullable=True)) + batch_op.add_column(sa.Column('hit_count', sa.Integer(), server_default=sa.text('0'), nullable=False)) + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=True) + batch_op.alter_column('message_id', + existing_type=models.types.StringUUID(), + nullable=True) # ### end Alembic commands ### def downgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - if _is_pg(conn): - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.alter_column('message_id', - existing_type=postgresql.UUID(), - nullable=False) - batch_op.alter_column('conversation_id', - existing_type=postgresql.UUID(), - nullable=False) - batch_op.drop_column('hit_count') - batch_op.drop_column('question') - else: - with op.batch_alter_table('message_annotations', schema=None) as batch_op: - batch_op.alter_column('message_id', - existing_type=models.types.StringUUID(), - nullable=False) - batch_op.alter_column('conversation_id', - existing_type=models.types.StringUUID(), - nullable=False) - batch_op.drop_column('hit_count') - batch_op.drop_column('question') + with op.batch_alter_table('message_annotations', schema=None) as batch_op: + batch_op.alter_column('message_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.alter_column('conversation_id', + existing_type=models.types.StringUUID(), + nullable=False) + batch_op.drop_column('hit_count') + batch_op.drop_column('question') with op.batch_alter_table('dataset_collection_bindings', schema=None) as batch_op: batch_op.drop_column('type') diff --git a/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py b/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py index 02098e91c1..ac1c14e50c 100644 --- a/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py +++ b/api/migrations/versions/f2a6fc85e260_add_anntation_history_message_id.py @@ -12,9 +12,6 @@ from sqlalchemy.dialects import postgresql import models.types -def _is_pg(conn): - return conn.dialect.name == "postgresql" - # revision identifiers, used by Alembic. revision = 'f2a6fc85e260' down_revision = '46976cc39132' @@ -24,16 +21,9 @@ depends_on = None def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - conn = op.get_bind() - - if _is_pg(conn): - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_id', postgresql.UUID(), nullable=False)) - batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) - else: - with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: - batch_op.add_column(sa.Column('message_id', models.types.StringUUID(), nullable=False)) - batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) + with op.batch_alter_table('app_annotation_hit_histories', schema=None) as batch_op: + batch_op.add_column(sa.Column('message_id', models.types.StringUUID(), nullable=False)) + batch_op.create_index('app_annotation_hit_histories_message_idx', ['message_id'], unique=False) # ### end Alembic commands ### diff --git a/api/models/account.py b/api/models/account.py index 420e6adc6c..f7a9c20026 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -8,7 +8,7 @@ from uuid import uuid4 import sqlalchemy as sa from flask_login import UserMixin from sqlalchemy import DateTime, String, func, select -from sqlalchemy.orm import Mapped, Session, mapped_column +from sqlalchemy.orm import Mapped, Session, mapped_column, validates from typing_extensions import deprecated from .base import TypeBase @@ -116,6 +116,12 @@ class Account(UserMixin, TypeBase): role: TenantAccountRole | None = field(default=None, init=False) _current_tenant: "Tenant | None" = field(default=None, init=False) + @validates("status") + def _normalize_status(self, _key: str, value: str | AccountStatus) -> str: + if isinstance(value, AccountStatus): + return value.value + return value + @property def is_password_set(self): return self.password is not None diff --git a/api/schedule/queue_monitor_task.py b/api/schedule/queue_monitor_task.py index db610df290..77d6b5a138 100644 --- a/api/schedule/queue_monitor_task.py +++ b/api/schedule/queue_monitor_task.py @@ -16,6 +16,11 @@ celery_redis = Redis( port=redis_config.get("port") or 6379, password=redis_config.get("password") or None, db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1, + ssl=bool(dify_config.BROKER_USE_SSL), + ssl_ca_certs=dify_config.REDIS_SSL_CA_CERTS if dify_config.BROKER_USE_SSL else None, + ssl_cert_reqs=getattr(dify_config, "REDIS_SSL_CERT_REQS", None) if dify_config.BROKER_USE_SSL else None, + ssl_certfile=getattr(dify_config, "REDIS_SSL_CERTFILE", None) if dify_config.BROKER_USE_SSL else None, + ssl_keyfile=getattr(dify_config, "REDIS_SSL_KEYFILE", None) if dify_config.BROKER_USE_SSL else None, ) logger = logging.getLogger(__name__) diff --git a/api/services/entities/model_provider_entities.py b/api/services/entities/model_provider_entities.py index f405546909..a29d848ac5 100644 --- a/api/services/entities/model_provider_entities.py +++ b/api/services/entities/model_provider_entities.py @@ -70,7 +70,6 @@ class ProviderResponse(BaseModel): description: I18nObject | None = None icon_small: I18nObject | None = None icon_small_dark: I18nObject | None = None - icon_large: I18nObject | None = None background: str | None = None help: ProviderHelpEntity | None = None supported_model_types: Sequence[ModelType] @@ -98,11 +97,6 @@ class ProviderResponse(BaseModel): en_US=f"{url_prefix}/icon_small_dark/en_US", zh_Hans=f"{url_prefix}/icon_small_dark/zh_Hans", ) - - if self.icon_large is not None: - self.icon_large = I18nObject( - en_US=f"{url_prefix}/icon_large/en_US", zh_Hans=f"{url_prefix}/icon_large/zh_Hans" - ) return self @@ -116,7 +110,6 @@ class ProviderWithModelsResponse(BaseModel): label: I18nObject icon_small: I18nObject | None = None icon_small_dark: I18nObject | None = None - icon_large: I18nObject | None = None status: CustomConfigurationStatus models: list[ProviderModelWithStatusEntity] @@ -134,11 +127,6 @@ class ProviderWithModelsResponse(BaseModel): self.icon_small_dark = I18nObject( en_US=f"{url_prefix}/icon_small_dark/en_US", zh_Hans=f"{url_prefix}/icon_small_dark/zh_Hans" ) - - if self.icon_large is not None: - self.icon_large = I18nObject( - en_US=f"{url_prefix}/icon_large/en_US", zh_Hans=f"{url_prefix}/icon_large/zh_Hans" - ) return self @@ -163,11 +151,6 @@ class SimpleProviderEntityResponse(SimpleProviderEntity): self.icon_small_dark = I18nObject( en_US=f"{url_prefix}/icon_small_dark/en_US", zh_Hans=f"{url_prefix}/icon_small_dark/zh_Hans" ) - - if self.icon_large is not None: - self.icon_large = I18nObject( - en_US=f"{url_prefix}/icon_large/en_US", zh_Hans=f"{url_prefix}/icon_large/zh_Hans" - ) return self diff --git a/api/services/model_provider_service.py b/api/services/model_provider_service.py index eea382febe..edd1004b82 100644 --- a/api/services/model_provider_service.py +++ b/api/services/model_provider_service.py @@ -99,7 +99,6 @@ class ModelProviderService: description=provider_configuration.provider.description, icon_small=provider_configuration.provider.icon_small, icon_small_dark=provider_configuration.provider.icon_small_dark, - icon_large=provider_configuration.provider.icon_large, background=provider_configuration.provider.background, help=provider_configuration.provider.help, supported_model_types=provider_configuration.provider.supported_model_types, @@ -423,7 +422,6 @@ class ModelProviderService: label=first_model.provider.label, icon_small=first_model.provider.icon_small, icon_small_dark=first_model.provider.icon_small_dark, - icon_large=first_model.provider.icon_large, status=CustomConfigurationStatus.ACTIVE, models=[ ProviderModelWithStatusEntity( @@ -488,7 +486,6 @@ class ModelProviderService: provider=result.provider.provider, label=result.provider.label, icon_small=result.provider.icon_small, - icon_large=result.provider.icon_large, supported_model_types=result.provider.supported_model_types, ), ) @@ -522,7 +519,7 @@ class ModelProviderService: :param tenant_id: workspace id :param provider: provider name - :param icon_type: icon type (icon_small or icon_large) + :param icon_type: icon type (icon_small or icon_small_dark) :param lang: language (zh_Hans or en_US) :return: """ diff --git a/api/services/tools/api_tools_manage_service.py b/api/services/tools/api_tools_manage_service.py index 250d29f335..c32157919b 100644 --- a/api/services/tools/api_tools_manage_service.py +++ b/api/services/tools/api_tools_manage_service.py @@ -85,7 +85,9 @@ class ApiToolManageService: raise ValueError(f"invalid schema: {str(e)}") @staticmethod - def convert_schema_to_tool_bundles(schema: str, extra_info: dict | None = None) -> tuple[list[ApiToolBundle], str]: + def convert_schema_to_tool_bundles( + schema: str, extra_info: dict | None = None + ) -> tuple[list[ApiToolBundle], ApiProviderSchemaType]: """ convert schema to tool bundles @@ -103,7 +105,7 @@ class ApiToolManageService: provider_name: str, icon: dict, credentials: dict, - schema_type: str, + schema_type: ApiProviderSchemaType, schema: str, privacy_policy: str, custom_disclaimer: str, @@ -112,9 +114,6 @@ class ApiToolManageService: """ create api tool provider """ - if schema_type not in [member.value for member in ApiProviderSchemaType]: - raise ValueError(f"invalid schema type {schema}") - provider_name = provider_name.strip() # check if the provider exists @@ -241,18 +240,15 @@ class ApiToolManageService: original_provider: str, icon: dict, credentials: dict, - schema_type: str, + _schema_type: ApiProviderSchemaType, schema: str, - privacy_policy: str, + privacy_policy: str | None, custom_disclaimer: str, labels: list[str], ): """ update api tool provider """ - if schema_type not in [member.value for member in ApiProviderSchemaType]: - raise ValueError(f"invalid schema type {schema}") - provider_name = provider_name.strip() # check if the provider exists @@ -277,7 +273,7 @@ class ApiToolManageService: provider.icon = json.dumps(icon) provider.schema = schema provider.description = extra_info.get("description", "") - provider.schema_type_str = ApiProviderSchemaType.OPENAPI + provider.schema_type_str = schema_type provider.tools_str = json.dumps(jsonable_encoder(tool_bundles)) provider.privacy_policy = privacy_policy provider.custom_disclaimer = custom_disclaimer @@ -356,7 +352,7 @@ class ApiToolManageService: tool_name: str, credentials: dict, parameters: dict, - schema_type: str, + schema_type: ApiProviderSchemaType, schema: str, ): """ diff --git a/api/tests/integration_tests/model_runtime/__mock/plugin_model.py b/api/tests/integration_tests/model_runtime/__mock/plugin_model.py index d59d5dc0fe..5012defdad 100644 --- a/api/tests/integration_tests/model_runtime/__mock/plugin_model.py +++ b/api/tests/integration_tests/model_runtime/__mock/plugin_model.py @@ -48,10 +48,6 @@ class MockModelClass(PluginModelClient): en_US="https://example.com/icon_small.png", zh_Hans="https://example.com/icon_small.png", ), - icon_large=I18nObject( - en_US="https://example.com/icon_large.png", - zh_Hans="https://example.com/icon_large.png", - ), supported_model_types=[ModelType.LLM], configurate_methods=[ConfigurateMethod.PREDEFINED_MODEL], models=[ diff --git a/api/tests/test_containers_integration_tests/services/test_model_provider_service.py b/api/tests/test_containers_integration_tests/services/test_model_provider_service.py index 612210ef86..d57ab7428b 100644 --- a/api/tests/test_containers_integration_tests/services/test_model_provider_service.py +++ b/api/tests/test_containers_integration_tests/services/test_model_provider_service.py @@ -228,7 +228,6 @@ class TestModelProviderService: mock_provider_entity.description = {"en_US": "OpenAI provider", "zh_Hans": "OpenAI 提供商"} mock_provider_entity.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} mock_provider_entity.icon_small_dark = None - mock_provider_entity.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} mock_provider_entity.background = "#FF6B6B" mock_provider_entity.help = None mock_provider_entity.supported_model_types = [ModelType.LLM, ModelType.TEXT_EMBEDDING] @@ -302,7 +301,6 @@ class TestModelProviderService: mock_provider_entity_llm.description = {"en_US": "OpenAI provider", "zh_Hans": "OpenAI 提供商"} mock_provider_entity_llm.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} mock_provider_entity_llm.icon_small_dark = None - mock_provider_entity_llm.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} mock_provider_entity_llm.background = "#FF6B6B" mock_provider_entity_llm.help = None mock_provider_entity_llm.supported_model_types = [ModelType.LLM] @@ -316,7 +314,6 @@ class TestModelProviderService: mock_provider_entity_embedding.description = {"en_US": "Cohere provider", "zh_Hans": "Cohere 提供商"} mock_provider_entity_embedding.icon_small = {"en_US": "icon_small.png", "zh_Hans": "icon_small.png"} mock_provider_entity_embedding.icon_small_dark = None - mock_provider_entity_embedding.icon_large = {"en_US": "icon_large.png", "zh_Hans": "icon_large.png"} mock_provider_entity_embedding.background = "#4ECDC4" mock_provider_entity_embedding.help = None mock_provider_entity_embedding.supported_model_types = [ModelType.TEXT_EMBEDDING] @@ -419,7 +416,6 @@ class TestModelProviderService: provider="openai", label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), - icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), supported_model_types=[ModelType.LLM], configurate_methods=[], models=[], @@ -431,7 +427,6 @@ class TestModelProviderService: provider="openai", label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), - icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), supported_model_types=[ModelType.LLM], configurate_methods=[], models=[], @@ -655,7 +650,6 @@ class TestModelProviderService: provider="openai", label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), - icon_large=I18nObject(en_US="icon_large.png", zh_Hans="icon_large.png"), supported_model_types=[ModelType.LLM], ), ) @@ -1027,7 +1021,6 @@ class TestModelProviderService: label={"en_US": "OpenAI", "zh_Hans": "OpenAI"}, icon_small={"en_US": "icon_small.png", "zh_Hans": "icon_small.png"}, icon_small_dark=None, - icon_large={"en_US": "icon_large.png", "zh_Hans": "icon_large.png"}, ), model="gpt-3.5-turbo", model_type=ModelType.LLM, @@ -1045,7 +1038,6 @@ class TestModelProviderService: label={"en_US": "OpenAI", "zh_Hans": "OpenAI"}, icon_small={"en_US": "icon_small.png", "zh_Hans": "icon_small.png"}, icon_small_dark=None, - icon_large={"en_US": "icon_large.png", "zh_Hans": "icon_large.png"}, ), model="gpt-4", model_type=ModelType.LLM, diff --git a/api/tests/unit_tests/controllers/common/test_fields.py b/api/tests/unit_tests/controllers/common/test_fields.py new file mode 100644 index 0000000000..d4dc13127d --- /dev/null +++ b/api/tests/unit_tests/controllers/common/test_fields.py @@ -0,0 +1,69 @@ +import builtins +from types import SimpleNamespace +from unittest.mock import patch + +from flask.views import MethodView as FlaskMethodView + +_NEEDS_METHOD_VIEW_CLEANUP = False +if not hasattr(builtins, "MethodView"): + builtins.MethodView = FlaskMethodView + _NEEDS_METHOD_VIEW_CLEANUP = True +from controllers.common.fields import Parameters, Site +from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict +from models.model import IconType + + +def test_parameters_model_round_trip(): + parameters = get_parameters_from_feature_dict(features_dict={}, user_input_form=[]) + + model = Parameters.model_validate(parameters) + + assert model.model_dump(mode="json") == parameters + + +def test_site_icon_url_uses_signed_url_for_image_icon(): + site = SimpleNamespace( + title="Example", + chat_color_theme=None, + chat_color_theme_inverted=False, + icon_type=IconType.IMAGE, + icon="file-id", + icon_background=None, + description=None, + copyright=None, + privacy_policy=None, + custom_disclaimer=None, + default_language="en-US", + show_workflow_steps=True, + use_icon_as_answer_icon=False, + ) + + with patch("controllers.common.fields.file_helpers.get_signed_file_url", return_value="signed") as mock_helper: + model = Site.model_validate(site) + + assert model.icon_url == "signed" + mock_helper.assert_called_once_with("file-id") + + +def test_site_icon_url_is_none_for_non_image_icon(): + site = SimpleNamespace( + title="Example", + chat_color_theme=None, + chat_color_theme_inverted=False, + icon_type=IconType.EMOJI, + icon="file-id", + icon_background=None, + description=None, + copyright=None, + privacy_policy=None, + custom_disclaimer=None, + default_language="en-US", + show_workflow_steps=True, + use_icon_as_answer_icon=False, + ) + + with patch("controllers.common.fields.file_helpers.get_signed_file_url") as mock_helper: + model = Site.model_validate(site) + + assert model.icon_url is None + mock_helper.assert_not_called() diff --git a/api/tests/unit_tests/controllers/console/app/test_xss_prevention.py b/api/tests/unit_tests/controllers/console/app/test_xss_prevention.py new file mode 100644 index 0000000000..313818547b --- /dev/null +++ b/api/tests/unit_tests/controllers/console/app/test_xss_prevention.py @@ -0,0 +1,254 @@ +""" +Unit tests for XSS prevention in App payloads. + +This test module validates that HTML tags, JavaScript, and other potentially +dangerous content are rejected in App names and descriptions. +""" + +import pytest + +from controllers.console.app.app import CopyAppPayload, CreateAppPayload, UpdateAppPayload + + +class TestXSSPreventionUnit: + """Unit tests for XSS prevention in App payloads.""" + + def test_create_app_valid_names(self): + """Test CreateAppPayload with valid app names.""" + # Normal app names should be valid + valid_names = [ + "My App", + "Test App 123", + "App with - dash", + "App with _ underscore", + "App with + plus", + "App with () parentheses", + "App with [] brackets", + "App with {} braces", + "App with ! exclamation", + "App with @ at", + "App with # hash", + "App with $ dollar", + "App with % percent", + "App with ^ caret", + "App with & ampersand", + "App with * asterisk", + "Unicode: 测试应用", + "Emoji: 🤖", + "Mixed: Test 测试 123", + ] + + for name in valid_names: + payload = CreateAppPayload( + name=name, + mode="chat", + ) + assert payload.name == name + + def test_create_app_xss_script_tags(self): + """Test CreateAppPayload rejects script tags.""" + xss_payloads = [ + "", + "", + "", + "", + "", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_iframe_tags(self): + """Test CreateAppPayload rejects iframe tags.""" + xss_payloads = [ + "", + "", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_javascript_protocol(self): + """Test CreateAppPayload rejects javascript: protocol.""" + xss_payloads = [ + "javascript:alert(1)", + "JAVASCRIPT:alert(1)", + "JavaScript:alert(document.cookie)", + "javascript:void(0)", + "javascript://comment%0Aalert(1)", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_svg_onload(self): + """Test CreateAppPayload rejects SVG with onload.""" + xss_payloads = [ + "", + "", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_event_handlers(self): + """Test CreateAppPayload rejects HTML event handlers.""" + xss_payloads = [ + "
", + "", + "", + "", + "", + "
", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_object_embed(self): + """Test CreateAppPayload rejects object and embed tags.""" + xss_payloads = [ + "", + "", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_link_javascript(self): + """Test CreateAppPayload rejects link tags with javascript.""" + xss_payloads = [ + "", + "", + ] + + for name in xss_payloads: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_xss_in_description(self): + """Test CreateAppPayload rejects XSS in description.""" + xss_descriptions = [ + "", + "javascript:alert(1)", + "", + ] + + for description in xss_descriptions: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload( + name="Valid Name", + mode="chat", + description=description, + ) + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_create_app_valid_descriptions(self): + """Test CreateAppPayload with valid descriptions.""" + valid_descriptions = [ + "A simple description", + "Description with < and > symbols", + "Description with & ampersand", + "Description with 'quotes' and \"double quotes\"", + "Description with / slashes", + "Description with \\ backslashes", + "Description with ; semicolons", + "Unicode: 这是一个描述", + "Emoji: 🎉🚀", + ] + + for description in valid_descriptions: + payload = CreateAppPayload( + name="Valid App Name", + mode="chat", + description=description, + ) + assert payload.description == description + + def test_create_app_none_description(self): + """Test CreateAppPayload with None description.""" + payload = CreateAppPayload( + name="Valid App Name", + mode="chat", + description=None, + ) + assert payload.description is None + + def test_update_app_xss_prevention(self): + """Test UpdateAppPayload also prevents XSS.""" + xss_names = [ + "", + "javascript:alert(1)", + "", + ] + + for name in xss_names: + with pytest.raises(ValueError) as exc_info: + UpdateAppPayload(name=name) + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_update_app_valid_names(self): + """Test UpdateAppPayload with valid names.""" + payload = UpdateAppPayload(name="Valid Updated Name") + assert payload.name == "Valid Updated Name" + + def test_copy_app_xss_prevention(self): + """Test CopyAppPayload also prevents XSS.""" + xss_names = [ + "", + "javascript:alert(1)", + "", + ] + + for name in xss_names: + with pytest.raises(ValueError) as exc_info: + CopyAppPayload(name=name) + assert "invalid characters or patterns" in str(exc_info.value).lower() + + def test_copy_app_valid_names(self): + """Test CopyAppPayload with valid names.""" + payload = CopyAppPayload(name="Valid Copy Name") + assert payload.name == "Valid Copy Name" + + def test_copy_app_none_name(self): + """Test CopyAppPayload with None name (should be allowed).""" + payload = CopyAppPayload(name=None) + assert payload.name is None + + def test_edge_case_angle_brackets_content(self): + """Test that angle brackets with actual content are rejected.""" + # Angle brackets without valid HTML-like patterns should be checked + # The regex pattern <.*?on\w+\s*= should catch event handlers + # But let's verify other patterns too + + # Valid: angle brackets used as symbols (not matched by our patterns) + # Our patterns specifically look for dangerous constructs + + # Invalid: actual HTML tags with event handlers + invalid_names = [ + "
", + "", + ] + + for name in invalid_names: + with pytest.raises(ValueError) as exc_info: + CreateAppPayload(name=name, mode="chat") + assert "invalid characters or patterns" in str(exc_info.value).lower() diff --git a/api/tests/unit_tests/controllers/console/auth/test_oauth.py b/api/tests/unit_tests/controllers/console/auth/test_oauth.py index 399caf8c4d..3ddfcdb832 100644 --- a/api/tests/unit_tests/controllers/console/auth/test_oauth.py +++ b/api/tests/unit_tests/controllers/console/auth/test_oauth.py @@ -171,7 +171,7 @@ class TestOAuthCallback: ): mock_config.CONSOLE_WEB_URL = "http://localhost:3000" mock_get_providers.return_value = {"github": oauth_setup["provider"]} - mock_generate_account.return_value = oauth_setup["account"] + mock_generate_account.return_value = (oauth_setup["account"], True) mock_account_service.login.return_value = oauth_setup["token_pair"] with app.test_request_context("/auth/oauth/github/callback?code=test_code"): @@ -179,7 +179,7 @@ class TestOAuthCallback: oauth_setup["provider"].get_access_token.assert_called_once_with("test_code") oauth_setup["provider"].get_user_info.assert_called_once_with("access_token") - mock_redirect.assert_called_once_with("http://localhost:3000") + mock_redirect.assert_called_once_with("http://localhost:3000?oauth_new_user=true") @pytest.mark.parametrize( ("exception", "expected_error"), @@ -223,7 +223,7 @@ class TestOAuthCallback: # This documents actual behavior. See test_defensive_check_for_closed_account_status for details ( AccountStatus.CLOSED.value, - "http://localhost:3000", + "http://localhost:3000?oauth_new_user=false", ), ], ) @@ -260,7 +260,7 @@ class TestOAuthCallback: account = MagicMock() account.status = account_status account.id = "123" - mock_generate_account.return_value = account + mock_generate_account.return_value = (account, False) # Mock login for CLOSED status mock_token_pair = MagicMock() @@ -296,7 +296,7 @@ class TestOAuthCallback: mock_account = MagicMock() mock_account.status = AccountStatus.PENDING - mock_generate_account.return_value = mock_account + mock_generate_account.return_value = (mock_account, False) mock_token_pair = MagicMock() mock_token_pair.access_token = "jwt_access_token" @@ -360,7 +360,7 @@ class TestOAuthCallback: closed_account.status = AccountStatus.CLOSED closed_account.id = "123" closed_account.name = "Closed Account" - mock_generate_account.return_value = closed_account + mock_generate_account.return_value = (closed_account, False) # Mock successful login (current behavior) mock_token_pair = MagicMock() @@ -374,7 +374,7 @@ class TestOAuthCallback: resource.get("github") # Verify current behavior: login succeeds (this is NOT ideal) - mock_redirect.assert_called_once_with("http://localhost:3000") + mock_redirect.assert_called_once_with("http://localhost:3000?oauth_new_user=false") mock_account_service.login.assert_called_once() # Document expected behavior in comments: @@ -458,8 +458,9 @@ class TestAccountGeneration: with pytest.raises(AccountRegisterError): _generate_account("github", user_info) else: - result = _generate_account("github", user_info) + result, oauth_new_user = _generate_account("github", user_info) assert result == mock_account + assert oauth_new_user == should_create if should_create: mock_register_service.register.assert_called_once_with( @@ -490,9 +491,10 @@ class TestAccountGeneration: mock_tenant_service.create_tenant.return_value = mock_new_tenant with app.test_request_context(headers={"Accept-Language": "en-US,en;q=0.9"}): - result = _generate_account("github", user_info) + result, oauth_new_user = _generate_account("github", user_info) assert result == mock_account + assert oauth_new_user is False mock_tenant_service.create_tenant.assert_called_once_with("Test User's Workspace") mock_tenant_service.create_tenant_member.assert_called_once_with( mock_new_tenant, mock_account, role="owner" diff --git a/api/tests/unit_tests/core/rag/cleaner/__init__.py b/api/tests/unit_tests/core/rag/cleaner/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/rag/cleaner/test_clean_processor.py b/api/tests/unit_tests/core/rag/cleaner/test_clean_processor.py new file mode 100644 index 0000000000..65ee62b8dd --- /dev/null +++ b/api/tests/unit_tests/core/rag/cleaner/test_clean_processor.py @@ -0,0 +1,213 @@ +from core.rag.cleaner.clean_processor import CleanProcessor + + +class TestCleanProcessor: + """Test cases for CleanProcessor.clean method.""" + + def test_clean_default_removal_of_invalid_symbols(self): + """Test default cleaning removes invalid symbols.""" + # Test <| replacement + assert CleanProcessor.clean("text<|with<|invalid", None) == "text replacement + assert CleanProcessor.clean("text|>with|>invalid", None) == "text>with>invalid" + + # Test removal of control characters + text_with_control = "normal\x00text\x1fwith\x07control\x7fchars" + expected = "normaltextwithcontrolchars" + assert CleanProcessor.clean(text_with_control, None) == expected + + # Test U+FFFE removal + text_with_ufffe = "normal\ufffepadding" + expected = "normalpadding" + assert CleanProcessor.clean(text_with_ufffe, None) == expected + + def test_clean_with_none_process_rule(self): + """Test cleaning with None process_rule - only default cleaning applied.""" + text = "Hello<|World\x00" + expected = "Hello becomes >, control chars and U+FFFE are removed + assert CleanProcessor.clean(text, None) == "<<>>" + + def test_clean_multiple_markdown_links_preserved(self): + """Test multiple markdown links are all preserved.""" + process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}} + + text = "[One](https://one.com) [Two](http://two.org) [Three](https://three.net)" + expected = "[One](https://one.com) [Two](http://two.org) [Three](https://three.net)" + assert CleanProcessor.clean(text, process_rule) == expected + + def test_clean_markdown_link_text_as_url(self): + """Test markdown link where the link text itself is a URL.""" + process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}} + + # Link text that looks like URL should be preserved + text = "[https://text-url.com](https://actual-url.com)" + expected = "[https://text-url.com](https://actual-url.com)" + assert CleanProcessor.clean(text, process_rule) == expected + + # Text URL without markdown should be removed + text = "https://text-url.com https://actual-url.com" + expected = " " + assert CleanProcessor.clean(text, process_rule) == expected + + def test_clean_complex_markdown_link_content(self): + """Test markdown links with complex content - known limitation with brackets in link text.""" + process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}} + + # Note: The regex pattern [^\]]* cannot handle ] within link text + # This is a known limitation - the pattern stops at the first ] + text = "[Text with [brackets] and (parens)](https://example.com)" + # Actual behavior: only matches up to first ], URL gets removed + expected = "[Text with [brackets] and (parens)](" + assert CleanProcessor.clean(text, process_rule) == expected + + # Test that properly formatted markdown links work + text = "[Text with (parens) and symbols](https://example.com)" + expected = "[Text with (parens) and symbols](https://example.com)" + assert CleanProcessor.clean(text, process_rule) == expected diff --git a/api/tests/unit_tests/core/rag/extractor/test_pdf_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_pdf_extractor.py new file mode 100644 index 0000000000..3167a9a301 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_pdf_extractor.py @@ -0,0 +1,186 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +import core.rag.extractor.pdf_extractor as pe + + +@pytest.fixture +def mock_dependencies(monkeypatch): + # Mock storage + saves = [] + + def save(key, data): + saves.append((key, data)) + + monkeypatch.setattr(pe, "storage", SimpleNamespace(save=save)) + + # Mock db + class DummySession: + def __init__(self): + self.added = [] + self.committed = False + + def add(self, obj): + self.added.append(obj) + + def add_all(self, objs): + self.added.extend(objs) + + def commit(self): + self.committed = True + + db_stub = SimpleNamespace(session=DummySession()) + monkeypatch.setattr(pe, "db", db_stub) + + # Mock UploadFile + class FakeUploadFile: + DEFAULT_ID = "test_file_id" + + def __init__(self, **kwargs): + # Assign id from DEFAULT_ID, allow override via kwargs if needed + self.id = self.DEFAULT_ID + for k, v in kwargs.items(): + setattr(self, k, v) + + monkeypatch.setattr(pe, "UploadFile", FakeUploadFile) + + # Mock config + monkeypatch.setattr(pe.dify_config, "FILES_URL", "http://files.local") + monkeypatch.setattr(pe.dify_config, "INTERNAL_FILES_URL", None) + monkeypatch.setattr(pe.dify_config, "STORAGE_TYPE", "local") + + return SimpleNamespace(saves=saves, db=db_stub, UploadFile=FakeUploadFile) + + +@pytest.mark.parametrize( + ("image_bytes", "expected_mime", "expected_ext", "file_id"), + [ + (b"\xff\xd8\xff some jpeg", "image/jpeg", "jpg", "test_file_id_jpeg"), + (b"\x89PNG\r\n\x1a\n some png", "image/png", "png", "test_file_id_png"), + ], +) +def test_extract_images_formats(mock_dependencies, monkeypatch, image_bytes, expected_mime, expected_ext, file_id): + saves = mock_dependencies.saves + db_stub = mock_dependencies.db + + # Customize FakeUploadFile id for this test case. + # Using monkeypatch ensures the class attribute is reset between parameter sets. + monkeypatch.setattr(mock_dependencies.UploadFile, "DEFAULT_ID", file_id) + + # Mock page and image objects + mock_page = MagicMock() + mock_image_obj = MagicMock() + + def mock_extract(buf, fb_format=None): + buf.write(image_bytes) + + mock_image_obj.extract.side_effect = mock_extract + + mock_page.get_objects.return_value = [mock_image_obj] + + extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1") + + # We need to handle the import inside _extract_images + with patch("pypdfium2.raw") as mock_raw: + mock_raw.FPDF_PAGEOBJ_IMAGE = 1 + result = extractor._extract_images(mock_page) + + assert f"![image](http://files.local/files/{file_id}/file-preview)" in result + assert len(saves) == 1 + assert saves[0][1] == image_bytes + assert len(db_stub.session.added) == 1 + assert db_stub.session.added[0].tenant_id == "t1" + assert db_stub.session.added[0].size == len(image_bytes) + assert db_stub.session.added[0].mime_type == expected_mime + assert db_stub.session.added[0].extension == expected_ext + assert db_stub.session.committed is True + + +@pytest.mark.parametrize( + ("get_objects_side_effect", "get_objects_return_value"), + [ + (None, []), # Empty list + (None, None), # None returned + (Exception("Failed to get objects"), None), # Exception raised + ], +) +def test_extract_images_get_objects_scenarios(mock_dependencies, get_objects_side_effect, get_objects_return_value): + mock_page = MagicMock() + if get_objects_side_effect: + mock_page.get_objects.side_effect = get_objects_side_effect + else: + mock_page.get_objects.return_value = get_objects_return_value + + extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1") + + with patch("pypdfium2.raw") as mock_raw: + mock_raw.FPDF_PAGEOBJ_IMAGE = 1 + result = extractor._extract_images(mock_page) + + assert result == "" + + +def test_extract_calls_extract_images(mock_dependencies, monkeypatch): + # Mock pypdfium2 + mock_pdf_doc = MagicMock() + mock_page = MagicMock() + mock_pdf_doc.__iter__.return_value = [mock_page] + + # Mock text extraction + mock_text_page = MagicMock() + mock_text_page.get_text_range.return_value = "Page text content" + mock_page.get_textpage.return_value = mock_text_page + + with patch("pypdfium2.PdfDocument", return_value=mock_pdf_doc): + # Mock Blob + mock_blob = MagicMock() + mock_blob.source = "test.pdf" + with patch("core.rag.extractor.pdf_extractor.Blob.from_path", return_value=mock_blob): + extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1") + + # Mock _extract_images to return a known string + monkeypatch.setattr(extractor, "_extract_images", lambda p: "![image](img_url)") + + documents = list(extractor.extract()) + + assert len(documents) == 1 + assert "Page text content" in documents[0].page_content + assert "![image](img_url)" in documents[0].page_content + assert documents[0].metadata["page"] == 0 + + +def test_extract_images_failures(mock_dependencies): + saves = mock_dependencies.saves + db_stub = mock_dependencies.db + + # Mock page and image objects + mock_page = MagicMock() + mock_image_obj_fail = MagicMock() + mock_image_obj_ok = MagicMock() + + # First image raises exception + mock_image_obj_fail.extract.side_effect = Exception("Extraction failure") + + # Second image is OK (JPEG) + jpeg_bytes = b"\xff\xd8\xff some image data" + + def mock_extract(buf, fb_format=None): + buf.write(jpeg_bytes) + + mock_image_obj_ok.extract.side_effect = mock_extract + + mock_page.get_objects.return_value = [mock_image_obj_fail, mock_image_obj_ok] + + extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1") + + with patch("pypdfium2.raw") as mock_raw: + mock_raw.FPDF_PAGEOBJ_IMAGE = 1 + result = extractor._extract_images(mock_page) + + # Should have one success + assert "![image](http://files.local/files/test_file_id/file-preview)" in result + assert len(saves) == 1 + assert saves[0][1] == jpeg_bytes + assert db_stub.session.committed is True diff --git a/api/tests/unit_tests/core/test_provider_configuration.py b/api/tests/unit_tests/core/test_provider_configuration.py index 9060cf7b6c..636fac7a40 100644 --- a/api/tests/unit_tests/core/test_provider_configuration.py +++ b/api/tests/unit_tests/core/test_provider_configuration.py @@ -32,7 +32,6 @@ def mock_provider_entity(): label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), description=I18nObject(en_US="OpenAI provider", zh_Hans="OpenAI 提供商"), icon_small=I18nObject(en_US="icon.png", zh_Hans="icon.png"), - icon_large=I18nObject(en_US="icon.png", zh_Hans="icon.png"), background="background.png", help=None, supported_model_types=[ModelType.LLM], diff --git a/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/__init__.py b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/__init__.py new file mode 100644 index 0000000000..cf8811dc2b --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/__init__.py @@ -0,0 +1 @@ +"""Tests for graph traversal components.""" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py new file mode 100644 index 0000000000..0019020ede --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py @@ -0,0 +1,307 @@ +"""Unit tests for skip propagator.""" + +from unittest.mock import MagicMock, create_autospec + +from core.workflow.graph import Edge, Graph +from core.workflow.graph_engine.graph_state_manager import GraphStateManager +from core.workflow.graph_engine.graph_traversal.skip_propagator import SkipPropagator + + +class TestSkipPropagator: + """Test suite for SkipPropagator.""" + + def test_propagate_skip_from_edge_with_unknown_edges_stops_processing(self) -> None: + """When there are unknown incoming edges, propagation should stop.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create a mock edge + mock_edge = MagicMock(spec=Edge) + mock_edge.id = "edge_1" + mock_edge.head = "node_2" + + # Setup graph edges dict + mock_graph.edges = {"edge_1": mock_edge} + + # Setup incoming edges + incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge)] + mock_graph.get_incoming_edges.return_value = incoming_edges + + # Setup state manager to return has_unknown=True + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": True, + "has_taken": False, + "all_skipped": False, + } + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert + mock_graph.get_incoming_edges.assert_called_once_with("node_2") + mock_state_manager.analyze_edge_states.assert_called_once_with(incoming_edges) + # Should not call any other state manager methods + mock_state_manager.enqueue_node.assert_not_called() + mock_state_manager.start_execution.assert_not_called() + mock_state_manager.mark_node_skipped.assert_not_called() + + def test_propagate_skip_from_edge_with_taken_edge_enqueues_node(self) -> None: + """When there is at least one taken edge, node should be enqueued.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create a mock edge + mock_edge = MagicMock(spec=Edge) + mock_edge.id = "edge_1" + mock_edge.head = "node_2" + + mock_graph.edges = {"edge_1": mock_edge} + incoming_edges = [MagicMock(spec=Edge)] + mock_graph.get_incoming_edges.return_value = incoming_edges + + # Setup state manager to return has_taken=True + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": False, + "has_taken": True, + "all_skipped": False, + } + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert + mock_state_manager.enqueue_node.assert_called_once_with("node_2") + mock_state_manager.start_execution.assert_called_once_with("node_2") + mock_state_manager.mark_node_skipped.assert_not_called() + + def test_propagate_skip_from_edge_with_all_skipped_propagates_to_node(self) -> None: + """When all incoming edges are skipped, should propagate skip to node.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create a mock edge + mock_edge = MagicMock(spec=Edge) + mock_edge.id = "edge_1" + mock_edge.head = "node_2" + + mock_graph.edges = {"edge_1": mock_edge} + incoming_edges = [MagicMock(spec=Edge)] + mock_graph.get_incoming_edges.return_value = incoming_edges + + # Setup state manager to return all_skipped=True + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": False, + "has_taken": False, + "all_skipped": True, + } + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert + mock_state_manager.mark_node_skipped.assert_called_once_with("node_2") + mock_state_manager.enqueue_node.assert_not_called() + mock_state_manager.start_execution.assert_not_called() + + def test_propagate_skip_to_node_marks_node_and_outgoing_edges_skipped(self) -> None: + """_propagate_skip_to_node should mark node and all outgoing edges as skipped.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create outgoing edges + edge1 = MagicMock(spec=Edge) + edge1.id = "edge_2" + edge1.head = "node_downstream_1" # Set head for propagate_skip_from_edge + + edge2 = MagicMock(spec=Edge) + edge2.id = "edge_3" + edge2.head = "node_downstream_2" + + # Setup graph edges dict for propagate_skip_from_edge + mock_graph.edges = {"edge_2": edge1, "edge_3": edge2} + mock_graph.get_outgoing_edges.return_value = [edge1, edge2] + + # Setup get_incoming_edges to return empty list to stop recursion + mock_graph.get_incoming_edges.return_value = [] + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Use mock to call private method + # Act + propagator._propagate_skip_to_node("node_1") + + # Assert + mock_state_manager.mark_node_skipped.assert_called_once_with("node_1") + mock_state_manager.mark_edge_skipped.assert_any_call("edge_2") + mock_state_manager.mark_edge_skipped.assert_any_call("edge_3") + assert mock_state_manager.mark_edge_skipped.call_count == 2 + # Should recursively propagate from each edge + # Since propagate_skip_from_edge is called, we need to verify it was called + # But we can't directly verify due to recursion. We'll trust the logic. + + def test_skip_branch_paths_marks_unselected_edges_and_propagates(self) -> None: + """skip_branch_paths should mark all unselected edges as skipped and propagate.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create unselected edges + edge1 = MagicMock(spec=Edge) + edge1.id = "edge_1" + edge1.head = "node_downstream_1" + + edge2 = MagicMock(spec=Edge) + edge2.id = "edge_2" + edge2.head = "node_downstream_2" + + unselected_edges = [edge1, edge2] + + # Setup graph edges dict + mock_graph.edges = {"edge_1": edge1, "edge_2": edge2} + # Setup get_incoming_edges to return empty list to stop recursion + mock_graph.get_incoming_edges.return_value = [] + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.skip_branch_paths(unselected_edges) + + # Assert + mock_state_manager.mark_edge_skipped.assert_any_call("edge_1") + mock_state_manager.mark_edge_skipped.assert_any_call("edge_2") + assert mock_state_manager.mark_edge_skipped.call_count == 2 + # propagate_skip_from_edge should be called for each edge + # We can't directly verify due to the mock, but the logic is covered + + def test_propagate_skip_from_edge_recursively_propagates_through_graph(self) -> None: + """Skip propagation should recursively propagate through the graph.""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + # Create edge chain: edge_1 -> node_2 -> edge_3 -> node_4 + edge1 = MagicMock(spec=Edge) + edge1.id = "edge_1" + edge1.head = "node_2" + + edge3 = MagicMock(spec=Edge) + edge3.id = "edge_3" + edge3.head = "node_4" + + mock_graph.edges = {"edge_1": edge1, "edge_3": edge3} + + # Setup get_incoming_edges to return different values based on node + def get_incoming_edges_side_effect(node_id): + if node_id == "node_2": + return [edge1] + elif node_id == "node_4": + return [edge3] + return [] + + mock_graph.get_incoming_edges.side_effect = get_incoming_edges_side_effect + + # Setup get_outgoing_edges to return different values based on node + def get_outgoing_edges_side_effect(node_id): + if node_id == "node_2": + return [edge3] + elif node_id == "node_4": + return [] # No outgoing edges, stops recursion + return [] + + mock_graph.get_outgoing_edges.side_effect = get_outgoing_edges_side_effect + + # Setup state manager to return all_skipped for both nodes + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": False, + "has_taken": False, + "all_skipped": True, + } + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert + # Should mark node_2 as skipped + mock_state_manager.mark_node_skipped.assert_any_call("node_2") + # Should mark edge_3 as skipped + mock_state_manager.mark_edge_skipped.assert_any_call("edge_3") + # Should propagate to node_4 + mock_state_manager.mark_node_skipped.assert_any_call("node_4") + assert mock_state_manager.mark_node_skipped.call_count == 2 + + def test_propagate_skip_from_edge_with_mixed_edge_states_handles_correctly(self) -> None: + """Test with mixed edge states (some unknown, some taken, some skipped).""" + # Arrange + mock_graph = create_autospec(Graph) + mock_state_manager = create_autospec(GraphStateManager) + + mock_edge = MagicMock(spec=Edge) + mock_edge.id = "edge_1" + mock_edge.head = "node_2" + + mock_graph.edges = {"edge_1": mock_edge} + incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge), MagicMock(spec=Edge)] + mock_graph.get_incoming_edges.return_value = incoming_edges + + # Test 1: has_unknown=True, has_taken=False, all_skipped=False + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": True, + "has_taken": False, + "all_skipped": False, + } + + propagator = SkipPropagator(mock_graph, mock_state_manager) + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert - should stop processing + mock_state_manager.enqueue_node.assert_not_called() + mock_state_manager.mark_node_skipped.assert_not_called() + + # Reset mocks for next test + mock_state_manager.reset_mock() + mock_graph.reset_mock() + + # Test 2: has_unknown=False, has_taken=True, all_skipped=False + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": False, + "has_taken": True, + "all_skipped": False, + } + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert - should enqueue node + mock_state_manager.enqueue_node.assert_called_once_with("node_2") + mock_state_manager.start_execution.assert_called_once_with("node_2") + + # Reset mocks for next test + mock_state_manager.reset_mock() + mock_graph.reset_mock() + + # Test 3: has_unknown=False, has_taken=False, all_skipped=True + mock_state_manager.analyze_edge_states.return_value = { + "has_unknown": False, + "has_taken": False, + "all_skipped": True, + } + + # Act + propagator.propagate_skip_from_edge("edge_1") + + # Assert - should propagate skip + mock_state_manager.mark_node_skipped.assert_called_once_with("node_2") diff --git a/api/tests/unit_tests/extensions/test_celery_ssl.py b/api/tests/unit_tests/extensions/test_celery_ssl.py index fc7a090ef9..d3a4d69f07 100644 --- a/api/tests/unit_tests/extensions/test_celery_ssl.py +++ b/api/tests/unit_tests/extensions/test_celery_ssl.py @@ -8,11 +8,12 @@ class TestCelerySSLConfiguration: """Test suite for Celery SSL configuration.""" def test_get_celery_ssl_options_when_ssl_disabled(self): - """Test SSL options when REDIS_USE_SSL is False.""" - mock_config = MagicMock() - mock_config.REDIS_USE_SSL = False + """Test SSL options when BROKER_USE_SSL is False.""" + from configs import DifyConfig - with patch("extensions.ext_celery.dify_config", mock_config): + dify_config = DifyConfig(CELERY_BROKER_URL="redis://localhost:6379/0") + + with patch("extensions.ext_celery.dify_config", dify_config): from extensions.ext_celery import _get_celery_ssl_options result = _get_celery_ssl_options() @@ -21,7 +22,6 @@ class TestCelerySSLConfiguration: def test_get_celery_ssl_options_when_broker_not_redis(self): """Test SSL options when broker is not Redis.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "amqp://localhost:5672" with patch("extensions.ext_celery.dify_config", mock_config): @@ -33,7 +33,6 @@ class TestCelerySSLConfiguration: def test_get_celery_ssl_options_with_cert_none(self): """Test SSL options with CERT_NONE requirement.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0" mock_config.REDIS_SSL_CERT_REQS = "CERT_NONE" mock_config.REDIS_SSL_CA_CERTS = None @@ -53,7 +52,6 @@ class TestCelerySSLConfiguration: def test_get_celery_ssl_options_with_cert_required(self): """Test SSL options with CERT_REQUIRED and certificates.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "rediss://localhost:6380/0" mock_config.REDIS_SSL_CERT_REQS = "CERT_REQUIRED" mock_config.REDIS_SSL_CA_CERTS = "/path/to/ca.crt" @@ -73,7 +71,6 @@ class TestCelerySSLConfiguration: def test_get_celery_ssl_options_with_cert_optional(self): """Test SSL options with CERT_OPTIONAL requirement.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0" mock_config.REDIS_SSL_CERT_REQS = "CERT_OPTIONAL" mock_config.REDIS_SSL_CA_CERTS = "/path/to/ca.crt" @@ -91,7 +88,6 @@ class TestCelerySSLConfiguration: def test_get_celery_ssl_options_with_invalid_cert_reqs(self): """Test SSL options with invalid cert requirement defaults to CERT_NONE.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0" mock_config.REDIS_SSL_CERT_REQS = "INVALID_VALUE" mock_config.REDIS_SSL_CA_CERTS = None @@ -108,7 +104,6 @@ class TestCelerySSLConfiguration: def test_celery_init_applies_ssl_to_broker_and_backend(self): """Test that SSL options are applied to both broker and backend when using Redis.""" mock_config = MagicMock() - mock_config.REDIS_USE_SSL = True mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0" mock_config.CELERY_BACKEND = "redis" mock_config.CELERY_RESULT_BACKEND = "redis://localhost:6379/0" diff --git a/api/tests/unit_tests/libs/test_archive_storage.py b/api/tests/unit_tests/libs/test_archive_storage.py new file mode 100644 index 0000000000..697760e33a --- /dev/null +++ b/api/tests/unit_tests/libs/test_archive_storage.py @@ -0,0 +1,272 @@ +import base64 +import hashlib +from datetime import datetime +from unittest.mock import ANY, MagicMock + +import pytest +from botocore.exceptions import ClientError + +from libs import archive_storage as storage_module +from libs.archive_storage import ( + ArchiveStorage, + ArchiveStorageError, + ArchiveStorageNotConfiguredError, +) + +BUCKET_NAME = "archive-bucket" + + +def _configure_storage(monkeypatch, **overrides): + defaults = { + "ARCHIVE_STORAGE_ENABLED": True, + "ARCHIVE_STORAGE_ENDPOINT": "https://storage.example.com", + "ARCHIVE_STORAGE_ARCHIVE_BUCKET": BUCKET_NAME, + "ARCHIVE_STORAGE_ACCESS_KEY": "access", + "ARCHIVE_STORAGE_SECRET_KEY": "secret", + "ARCHIVE_STORAGE_REGION": "auto", + } + defaults.update(overrides) + for key, value in defaults.items(): + monkeypatch.setattr(storage_module.dify_config, key, value, raising=False) + + +def _client_error(code: str) -> ClientError: + return ClientError({"Error": {"Code": code}}, "Operation") + + +def _mock_client(monkeypatch): + client = MagicMock() + client.head_bucket.return_value = None + boto_client = MagicMock(return_value=client) + monkeypatch.setattr(storage_module.boto3, "client", boto_client) + return client, boto_client + + +def test_init_disabled(monkeypatch): + _configure_storage(monkeypatch, ARCHIVE_STORAGE_ENABLED=False) + with pytest.raises(ArchiveStorageNotConfiguredError, match="not enabled"): + ArchiveStorage(bucket=BUCKET_NAME) + + +def test_init_missing_config(monkeypatch): + _configure_storage(monkeypatch, ARCHIVE_STORAGE_ENDPOINT=None) + with pytest.raises(ArchiveStorageNotConfiguredError, match="incomplete"): + ArchiveStorage(bucket=BUCKET_NAME) + + +def test_init_bucket_not_found(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.head_bucket.side_effect = _client_error("404") + + with pytest.raises(ArchiveStorageNotConfiguredError, match="does not exist"): + ArchiveStorage(bucket=BUCKET_NAME) + + +def test_init_bucket_access_denied(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.head_bucket.side_effect = _client_error("403") + + with pytest.raises(ArchiveStorageNotConfiguredError, match="Access denied"): + ArchiveStorage(bucket=BUCKET_NAME) + + +def test_init_bucket_other_error(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.head_bucket.side_effect = _client_error("500") + + with pytest.raises(ArchiveStorageError, match="Failed to access archive bucket"): + ArchiveStorage(bucket=BUCKET_NAME) + + +def test_init_sets_client(monkeypatch): + _configure_storage(monkeypatch) + client, boto_client = _mock_client(monkeypatch) + + storage = ArchiveStorage(bucket=BUCKET_NAME) + + boto_client.assert_called_once_with( + "s3", + endpoint_url="https://storage.example.com", + aws_access_key_id="access", + aws_secret_access_key="secret", + region_name="auto", + config=ANY, + ) + assert storage.client is client + assert storage.bucket == BUCKET_NAME + + +def test_put_object_returns_checksum(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + storage = ArchiveStorage(bucket=BUCKET_NAME) + + data = b"hello" + checksum = storage.put_object("key", data) + + expected_md5 = hashlib.md5(data).hexdigest() + expected_content_md5 = base64.b64encode(hashlib.md5(data).digest()).decode() + client.put_object.assert_called_once_with( + Bucket="archive-bucket", + Key="key", + Body=data, + ContentMD5=expected_content_md5, + ) + assert checksum == expected_md5 + + +def test_put_object_raises_on_error(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + storage = ArchiveStorage(bucket=BUCKET_NAME) + client.put_object.side_effect = _client_error("500") + + with pytest.raises(ArchiveStorageError, match="Failed to upload object"): + storage.put_object("key", b"data") + + +def test_get_object_returns_bytes(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + body = MagicMock() + body.read.return_value = b"payload" + client.get_object.return_value = {"Body": body} + storage = ArchiveStorage(bucket=BUCKET_NAME) + + assert storage.get_object("key") == b"payload" + + +def test_get_object_missing(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.get_object.side_effect = _client_error("NoSuchKey") + storage = ArchiveStorage(bucket=BUCKET_NAME) + + with pytest.raises(FileNotFoundError, match="Archive object not found"): + storage.get_object("missing") + + +def test_get_object_stream(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + body = MagicMock() + body.iter_chunks.return_value = [b"a", b"b"] + client.get_object.return_value = {"Body": body} + storage = ArchiveStorage(bucket=BUCKET_NAME) + + assert list(storage.get_object_stream("key")) == [b"a", b"b"] + + +def test_get_object_stream_missing(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.get_object.side_effect = _client_error("NoSuchKey") + storage = ArchiveStorage(bucket=BUCKET_NAME) + + with pytest.raises(FileNotFoundError, match="Archive object not found"): + list(storage.get_object_stream("missing")) + + +def test_object_exists(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + storage = ArchiveStorage(bucket=BUCKET_NAME) + + assert storage.object_exists("key") is True + client.head_object.side_effect = _client_error("404") + assert storage.object_exists("missing") is False + + +def test_delete_object_error(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.delete_object.side_effect = _client_error("500") + storage = ArchiveStorage(bucket=BUCKET_NAME) + + with pytest.raises(ArchiveStorageError, match="Failed to delete object"): + storage.delete_object("key") + + +def test_list_objects(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + paginator = MagicMock() + paginator.paginate.return_value = [ + {"Contents": [{"Key": "a"}, {"Key": "b"}]}, + {"Contents": [{"Key": "c"}]}, + ] + client.get_paginator.return_value = paginator + storage = ArchiveStorage(bucket=BUCKET_NAME) + + assert storage.list_objects("prefix") == ["a", "b", "c"] + paginator.paginate.assert_called_once_with(Bucket="archive-bucket", Prefix="prefix") + + +def test_list_objects_error(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + paginator = MagicMock() + paginator.paginate.side_effect = _client_error("500") + client.get_paginator.return_value = paginator + storage = ArchiveStorage(bucket=BUCKET_NAME) + + with pytest.raises(ArchiveStorageError, match="Failed to list objects"): + storage.list_objects("prefix") + + +def test_generate_presigned_url(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.generate_presigned_url.return_value = "http://signed-url" + storage = ArchiveStorage(bucket=BUCKET_NAME) + + url = storage.generate_presigned_url("key", expires_in=123) + + client.generate_presigned_url.assert_called_once_with( + ClientMethod="get_object", + Params={"Bucket": "archive-bucket", "Key": "key"}, + ExpiresIn=123, + ) + assert url == "http://signed-url" + + +def test_generate_presigned_url_error(monkeypatch): + _configure_storage(monkeypatch) + client, _ = _mock_client(monkeypatch) + client.generate_presigned_url.side_effect = _client_error("500") + storage = ArchiveStorage(bucket=BUCKET_NAME) + + with pytest.raises(ArchiveStorageError, match="Failed to generate pre-signed URL"): + storage.generate_presigned_url("key") + + +def test_serialization_roundtrip(): + records = [ + { + "id": "1", + "created_at": datetime(2024, 1, 1, 12, 0, 0), + "payload": {"nested": "value"}, + "items": [{"name": "a"}], + }, + {"id": "2", "value": 123}, + ] + + data = ArchiveStorage.serialize_to_jsonl_gz(records) + decoded = ArchiveStorage.deserialize_from_jsonl_gz(data) + + assert decoded[0]["id"] == "1" + assert decoded[0]["payload"]["nested"] == "value" + assert decoded[0]["items"][0]["name"] == "a" + assert "2024-01-01T12:00:00" in decoded[0]["created_at"] + assert decoded[1]["value"] == 123 + + +def test_content_md5_matches_checksum(): + data = b"checksum" + expected = base64.b64encode(hashlib.md5(data).digest()).decode() + + assert ArchiveStorage._content_md5(data) == expected + assert ArchiveStorage.compute_checksum(data) == hashlib.md5(data).hexdigest() diff --git a/api/tests/unit_tests/services/test_model_provider_service_sanitization.py b/api/tests/unit_tests/services/test_model_provider_service_sanitization.py index 9a107da1c7..e2360b116d 100644 --- a/api/tests/unit_tests/services/test_model_provider_service_sanitization.py +++ b/api/tests/unit_tests/services/test_model_provider_service_sanitization.py @@ -27,7 +27,6 @@ def service_with_fake_configurations(): description=None, icon_small=None, icon_small_dark=None, - icon_large=None, background=None, help=None, supported_model_types=[ModelType.LLM], diff --git a/api/tests/unit_tests/utils/test_text_processing.py b/api/tests/unit_tests/utils/test_text_processing.py index 11e017464a..bf61162a66 100644 --- a/api/tests/unit_tests/utils/test_text_processing.py +++ b/api/tests/unit_tests/utils/test_text_processing.py @@ -15,6 +15,11 @@ from core.tools.utils.text_processing_utils import remove_leading_symbols ("", ""), (" ", " "), ("【测试】", "【测试】"), + # Markdown link preservation - should be preserved if text starts with a markdown link + ("[Google](https://google.com) is a search engine", "[Google](https://google.com) is a search engine"), + ("[Example](http://example.com) some text", "[Example](http://example.com) some text"), + # Leading symbols before markdown link are removed, including the opening bracket [ + ("@[Test](https://example.com)", "Test](https://example.com)"), ], ) def test_remove_leading_symbols(input_text, expected_output): diff --git a/docker/.env.example b/docker/.env.example index 0e09d6869d..5c1089408c 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -447,6 +447,15 @@ S3_SECRET_KEY= # If set to false, the access key and secret key must be provided. S3_USE_AWS_MANAGED_IAM=false +# Workflow run and Conversation archive storage (S3-compatible) +ARCHIVE_STORAGE_ENABLED=false +ARCHIVE_STORAGE_ENDPOINT= +ARCHIVE_STORAGE_ARCHIVE_BUCKET= +ARCHIVE_STORAGE_EXPORT_BUCKET= +ARCHIVE_STORAGE_ACCESS_KEY= +ARCHIVE_STORAGE_SECRET_KEY= +ARCHIVE_STORAGE_REGION=auto + # Azure Blob Configuration # AZURE_BLOB_ACCOUNT_NAME=difyai diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 1c8d8d03e3..9910c95a84 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -122,6 +122,13 @@ x-shared-env: &shared-api-worker-env S3_ACCESS_KEY: ${S3_ACCESS_KEY:-} S3_SECRET_KEY: ${S3_SECRET_KEY:-} S3_USE_AWS_MANAGED_IAM: ${S3_USE_AWS_MANAGED_IAM:-false} + ARCHIVE_STORAGE_ENABLED: ${ARCHIVE_STORAGE_ENABLED:-false} + ARCHIVE_STORAGE_ENDPOINT: ${ARCHIVE_STORAGE_ENDPOINT:-} + ARCHIVE_STORAGE_ARCHIVE_BUCKET: ${ARCHIVE_STORAGE_ARCHIVE_BUCKET:-} + ARCHIVE_STORAGE_EXPORT_BUCKET: ${ARCHIVE_STORAGE_EXPORT_BUCKET:-} + ARCHIVE_STORAGE_ACCESS_KEY: ${ARCHIVE_STORAGE_ACCESS_KEY:-} + ARCHIVE_STORAGE_SECRET_KEY: ${ARCHIVE_STORAGE_SECRET_KEY:-} + ARCHIVE_STORAGE_REGION: ${ARCHIVE_STORAGE_REGION:-auto} AZURE_BLOB_ACCOUNT_NAME: ${AZURE_BLOB_ACCOUNT_NAME:-difyai} AZURE_BLOB_ACCOUNT_KEY: ${AZURE_BLOB_ACCOUNT_KEY:-difyai} AZURE_BLOB_CONTAINER_NAME: ${AZURE_BLOB_CONTAINER_NAME:-difyai-container} diff --git a/web/__mocks__/provider-context.ts b/web/__mocks__/provider-context.ts index c69a2ad1d2..373c2f86d3 100644 --- a/web/__mocks__/provider-context.ts +++ b/web/__mocks__/provider-context.ts @@ -1,6 +1,7 @@ import type { Plan, UsagePlanInfo } from '@/app/components/billing/type' import type { ProviderContextState } from '@/context/provider-context' -import { merge, noop } from 'es-toolkit/compat' +import { merge } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import { defaultPlan } from '@/app/components/billing/config' // Avoid being mocked in tests diff --git a/web/__tests__/workflow-parallel-limit.test.tsx b/web/__tests__/workflow-parallel-limit.test.tsx index 18657f4bd2..ba3840ac3e 100644 --- a/web/__tests__/workflow-parallel-limit.test.tsx +++ b/web/__tests__/workflow-parallel-limit.test.tsx @@ -64,7 +64,6 @@ vi.mock('i18next', () => ({ // Mock the useConfig hook vi.mock('@/app/components/workflow/nodes/iteration/use-config', () => ({ - __esModule: true, default: () => ({ inputs: { is_parallel: true, diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx index 004f83afc5..368c3dcfc3 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/date-picker.tsx @@ -4,11 +4,11 @@ import type { FC } from 'react' import type { TriggerProps } from '@/app/components/base/date-and-time-picker/types' import { RiCalendarLine } from '@remixicon/react' import dayjs from 'dayjs' -import { noop } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import * as React from 'react' import { useCallback } from 'react' import Picker from '@/app/components/base/date-and-time-picker/date-picker' -import { useI18N } from '@/context/i18n' +import { useLocale } from '@/context/i18n' import { cn } from '@/utils/classnames' import { formatToLocalTime } from '@/utils/format' @@ -26,7 +26,7 @@ const DatePicker: FC = ({ onStartChange, onEndChange, }) => { - const { locale } = useI18N() + const locale = useLocale() const renderDate = useCallback(({ value, handleClickTrigger, isOpen }: TriggerProps) => { return ( diff --git a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx index 10209de97b..53794ad8db 100644 --- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx +++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/time-range-picker/index.tsx @@ -7,7 +7,7 @@ import dayjs from 'dayjs' import * as React from 'react' import { useCallback, useState } from 'react' import { HourglassShape } from '@/app/components/base/icons/src/vender/other' -import { useI18N } from '@/context/i18n' +import { useLocale } from '@/context/i18n' import { formatToLocalTime } from '@/utils/format' import DatePicker from './date-picker' import RangeSelector from './range-selector' @@ -27,7 +27,7 @@ const TimeRangePicker: FC = ({ onSelect, queryDateFormat, }) => { - const { locale } = useI18N() + const locale = useLocale() const [isCustomRange, setIsCustomRange] = useState(false) const [start, setStart] = useState(today) diff --git a/web/app/(shareLayout)/webapp-reset-password/check-code/page.tsx b/web/app/(shareLayout)/webapp-reset-password/check-code/page.tsx index ac15f1df6d..fbf45259e5 100644 --- a/web/app/(shareLayout)/webapp-reset-password/check-code/page.tsx +++ b/web/app/(shareLayout)/webapp-reset-password/check-code/page.tsx @@ -3,12 +3,12 @@ import { RiArrowLeftLine, RiMailSendFill } from '@remixicon/react' import { useRouter, useSearchParams } from 'next/navigation' import { useState } from 'react' import { useTranslation } from 'react-i18next' -import { useContext } from 'use-context-selector' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Toast from '@/app/components/base/toast' import Countdown from '@/app/components/signin/countdown' -import I18NContext from '@/context/i18n' + +import { useLocale } from '@/context/i18n' import { sendWebAppResetPasswordCode, verifyWebAppResetPasswordCode } from '@/service/common' export default function CheckCode() { @@ -19,7 +19,7 @@ export default function CheckCode() { const token = decodeURIComponent(searchParams.get('token') as string) const [code, setVerifyCode] = useState('') const [loading, setIsLoading] = useState(false) - const { locale } = useContext(I18NContext) + const locale = useLocale() const verify = async () => { try { diff --git a/web/app/(shareLayout)/webapp-reset-password/page.tsx b/web/app/(shareLayout)/webapp-reset-password/page.tsx index 6acd8d08f4..9b9a853cdd 100644 --- a/web/app/(shareLayout)/webapp-reset-password/page.tsx +++ b/web/app/(shareLayout)/webapp-reset-password/page.tsx @@ -1,17 +1,17 @@ 'use client' import { RiArrowLeftLine, RiLockPasswordLine } from '@remixicon/react' -import { noop } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import Link from 'next/link' import { useRouter, useSearchParams } from 'next/navigation' import { useState } from 'react' import { useTranslation } from 'react-i18next' -import { useContext } from 'use-context-selector' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Toast from '@/app/components/base/toast' import { COUNT_DOWN_KEY, COUNT_DOWN_TIME_MS } from '@/app/components/signin/countdown' import { emailRegex } from '@/config' -import I18NContext from '@/context/i18n' + +import { useLocale } from '@/context/i18n' import useDocumentTitle from '@/hooks/use-document-title' import { sendResetPasswordCode } from '@/service/common' @@ -22,7 +22,7 @@ export default function CheckCode() { const router = useRouter() const [email, setEmail] = useState('') const [loading, setIsLoading] = useState(false) - const { locale } = useContext(I18NContext) + const locale = useLocale() const handleGetEMailVerificationCode = async () => { try { diff --git a/web/app/(shareLayout)/webapp-signin/check-code/page.tsx b/web/app/(shareLayout)/webapp-signin/check-code/page.tsx index 0ef63dcbd2..bda5484197 100644 --- a/web/app/(shareLayout)/webapp-signin/check-code/page.tsx +++ b/web/app/(shareLayout)/webapp-signin/check-code/page.tsx @@ -4,12 +4,12 @@ import { RiArrowLeftLine, RiMailSendFill } from '@remixicon/react' import { useRouter, useSearchParams } from 'next/navigation' import { useCallback, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' -import { useContext } from 'use-context-selector' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Toast from '@/app/components/base/toast' import Countdown from '@/app/components/signin/countdown' -import I18NContext from '@/context/i18n' + +import { useLocale } from '@/context/i18n' import { useWebAppStore } from '@/context/web-app-context' import { sendWebAppEMailLoginCode, webAppEmailLoginWithCode } from '@/service/common' import { fetchAccessToken } from '@/service/share' @@ -23,7 +23,7 @@ export default function CheckCode() { const token = decodeURIComponent(searchParams.get('token') as string) const [code, setVerifyCode] = useState('') const [loading, setIsLoading] = useState(false) - const { locale } = useContext(I18NContext) + const locale = useLocale() const codeInputRef = useRef(null) const redirectUrl = searchParams.get('redirect_url') const embeddedUserId = useWebAppStore(s => s.embeddedUserId) diff --git a/web/app/(shareLayout)/webapp-signin/components/mail-and-code-auth.tsx b/web/app/(shareLayout)/webapp-signin/components/mail-and-code-auth.tsx index f3e018a1fa..5aa9d9f141 100644 --- a/web/app/(shareLayout)/webapp-signin/components/mail-and-code-auth.tsx +++ b/web/app/(shareLayout)/webapp-signin/components/mail-and-code-auth.tsx @@ -1,14 +1,13 @@ -import { noop } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import { useRouter, useSearchParams } from 'next/navigation' import { useState } from 'react' import { useTranslation } from 'react-i18next' -import { useContext } from 'use-context-selector' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Toast from '@/app/components/base/toast' import { COUNT_DOWN_KEY, COUNT_DOWN_TIME_MS } from '@/app/components/signin/countdown' import { emailRegex } from '@/config' -import I18NContext from '@/context/i18n' +import { useLocale } from '@/context/i18n' import { sendWebAppEMailLoginCode } from '@/service/common' export default function MailAndCodeAuth() { @@ -18,7 +17,7 @@ export default function MailAndCodeAuth() { const emailFromLink = decodeURIComponent(searchParams.get('email') || '') const [email, setEmail] = useState(emailFromLink) const [loading, setIsLoading] = useState(false) - const { locale } = useContext(I18NContext) + const locale = useLocale() const handleGetEMailVerificationCode = async () => { try { diff --git a/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx b/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx index 7e76a87250..23ac83e76c 100644 --- a/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx +++ b/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx @@ -1,15 +1,14 @@ 'use client' -import { noop } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import Link from 'next/link' import { useRouter, useSearchParams } from 'next/navigation' import { useCallback, useState } from 'react' import { useTranslation } from 'react-i18next' -import { useContext } from 'use-context-selector' import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import Toast from '@/app/components/base/toast' import { emailRegex } from '@/config' -import I18NContext from '@/context/i18n' +import { useLocale } from '@/context/i18n' import { useWebAppStore } from '@/context/web-app-context' import { webAppLogin } from '@/service/common' import { fetchAccessToken } from '@/service/share' @@ -21,7 +20,7 @@ type MailAndPasswordAuthProps = { export default function MailAndPasswordAuth({ isEmailSetup }: MailAndPasswordAuthProps) { const { t } = useTranslation() - const { locale } = useContext(I18NContext) + const locale = useLocale() const router = useRouter() const searchParams = useSearchParams() const [showPassword, setShowPassword] = useState(false) diff --git a/web/app/account/(commonLayout)/account-page/email-change-modal.tsx b/web/app/account/(commonLayout)/account-page/email-change-modal.tsx index 6e702770f7..87ca6a689c 100644 --- a/web/app/account/(commonLayout)/account-page/email-change-modal.tsx +++ b/web/app/account/(commonLayout)/account-page/email-change-modal.tsx @@ -1,6 +1,6 @@ import type { ResponseError } from '@/service/fetch' import { RiCloseLine } from '@remixicon/react' -import { noop } from 'es-toolkit/compat' +import { noop } from 'es-toolkit/function' import { useRouter } from 'next/navigation' import * as React from 'react' import { useState } from 'react' @@ -214,7 +214,8 @@ const EmailChangeModal = ({ onClose, email, show }: Props) => {
{t('account.changeEmail.authTip', { ns: 'common' })}
}} values={{ email }} /> @@ -244,7 +245,8 @@ const EmailChangeModal = ({ onClose, email, show }: Props) => {
}} values={{ email }} /> @@ -333,7 +335,8 @@ const EmailChangeModal = ({ onClose, email, show }: Props) => {
}} values={{ email: mail }} /> diff --git a/web/app/components/app-initializer.tsx b/web/app/components/app-initializer.tsx index 0f710abf39..e30646eb3f 100644 --- a/web/app/components/app-initializer.tsx +++ b/web/app/components/app-initializer.tsx @@ -1,14 +1,18 @@ 'use client' import type { ReactNode } from 'react' +import Cookies from 'js-cookie' import { usePathname, useRouter, useSearchParams } from 'next/navigation' +import { parseAsString, useQueryState } from 'nuqs' import { useCallback, useEffect, useState } from 'react' import { EDUCATION_VERIFY_URL_SEARCHPARAMS_ACTION, EDUCATION_VERIFYING_LOCALSTORAGE_ITEM, } from '@/app/education-apply/constants' import { fetchSetupStatus } from '@/service/common' +import { sendGAEvent } from '@/utils/gtag' import { resolvePostLoginRedirect } from '../signin/utils/post-login-redirect' +import { trackEvent } from './base/amplitude' type AppInitializerProps = { children: ReactNode @@ -22,6 +26,10 @@ export const AppInitializer = ({ // Tokens are now stored in cookies, no need to check localStorage const pathname = usePathname() const [init, setInit] = useState(false) + const [oauthNewUser, setOauthNewUser] = useQueryState( + 'oauth_new_user', + parseAsString.withOptions({ history: 'replace' }), + ) const isSetupFinished = useCallback(async () => { try { @@ -45,6 +53,34 @@ export const AppInitializer = ({ (async () => { const action = searchParams.get('action') + if (oauthNewUser === 'true') { + let utmInfo = null + const utmInfoStr = Cookies.get('utm_info') + if (utmInfoStr) { + try { + utmInfo = JSON.parse(utmInfoStr) + } + catch (e) { + console.error('Failed to parse utm_info cookie:', e) + } + } + + // Track registration event with UTM params + trackEvent(utmInfo ? 'user_registration_success_with_utm' : 'user_registration_success', { + method: 'oauth', + ...utmInfo, + }) + + sendGAEvent(utmInfo ? 'user_registration_success_with_utm' : 'user_registration_success', { + method: 'oauth', + ...utmInfo, + }) + + // Clean up: remove utm_info cookie and URL params + Cookies.remove('utm_info') + setOauthNewUser(null) + } + if (action === EDUCATION_VERIFY_URL_SEARCHPARAMS_ACTION) localStorage.setItem(EDUCATION_VERIFYING_LOCALSTORAGE_ITEM, 'yes') @@ -67,7 +103,7 @@ export const AppInitializer = ({ router.replace('/signin') } })() - }, [isSetupFinished, router, pathname, searchParams]) + }, [isSetupFinished, router, pathname, searchParams, oauthNewUser, setOauthNewUser]) return init ? children : null } diff --git a/web/app/components/app-sidebar/dataset-info/index.spec.tsx b/web/app/components/app-sidebar/dataset-info/index.spec.tsx index da7eb6d7ff..9996ef2b4d 100644 --- a/web/app/components/app-sidebar/dataset-info/index.spec.tsx +++ b/web/app/components/app-sidebar/dataset-info/index.spec.tsx @@ -132,7 +132,6 @@ vi.mock('@/hooks/use-knowledge', () => ({ })) vi.mock('@/app/components/datasets/rename-modal', () => ({ - __esModule: true, default: ({ show, onClose, diff --git a/web/app/components/app-sidebar/text-squeeze-fix-verification.spec.tsx b/web/app/components/app-sidebar/text-squeeze-fix-verification.spec.tsx index 7c0c8b3aca..f7e91b3dea 100644 --- a/web/app/components/app-sidebar/text-squeeze-fix-verification.spec.tsx +++ b/web/app/components/app-sidebar/text-squeeze-fix-verification.spec.tsx @@ -13,7 +13,6 @@ vi.mock('next/navigation', () => ({ // Mock classnames utility vi.mock('@/utils/classnames', () => ({ - __esModule: true, default: (...classes: any[]) => classes.filter(Boolean).join(' '), })) diff --git a/web/app/components/app/annotation/add-annotation-modal/index.spec.tsx b/web/app/components/app/annotation/add-annotation-modal/index.spec.tsx index 6837516b3c..bad3ceefdf 100644 --- a/web/app/components/app/annotation/add-annotation-modal/index.spec.tsx +++ b/web/app/components/app/annotation/add-annotation-modal/index.spec.tsx @@ -10,7 +10,6 @@ vi.mock('@/context/provider-context', () => ({ const mockToastNotify = vi.fn() vi.mock('@/app/components/base/toast', () => ({ - __esModule: true, default: { notify: vi.fn(args => mockToastNotify(args)), }, diff --git a/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.spec.tsx b/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.spec.tsx index a3ab73b339..2ab0934fe2 100644 --- a/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.spec.tsx +++ b/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.spec.tsx @@ -1,7 +1,8 @@ +import type { Mock } from 'vitest' import type { Locale } from '@/i18n-config' import { render, screen } from '@testing-library/react' import * as React from 'react' -import I18nContext from '@/context/i18n' +import { useLocale } from '@/context/i18n' import { LanguagesSupported } from '@/i18n-config/language' import CSVDownload from './csv-downloader' @@ -17,17 +18,13 @@ vi.mock('react-papaparse', () => ({ })), })) +vi.mock('@/context/i18n', () => ({ + useLocale: vi.fn(() => 'en-US'), +})) + const renderWithLocale = (locale: Locale) => { - return render( - - - , - ) + ;(useLocale as Mock).mockReturnValue(locale) + return render() } describe('CSVDownload', () => { diff --git a/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.tsx b/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.tsx index a0c204062b..8db70104bc 100644 --- a/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.tsx +++ b/web/app/components/app/annotation/batch-add-annotation-modal/csv-downloader.tsx @@ -5,9 +5,9 @@ import { useTranslation } from 'react-i18next' import { useCSVDownloader, } from 'react-papaparse' -import { useContext } from 'use-context-selector' import { Download02 as DownloadIcon } from '@/app/components/base/icons/src/vender/solid/general' -import I18n from '@/context/i18n' + +import { useLocale } from '@/context/i18n' import { LanguagesSupported } from '@/i18n-config/language' const CSV_TEMPLATE_QA_EN = [ @@ -24,7 +24,7 @@ const CSV_TEMPLATE_QA_CN = [ const CSVDownload: FC = () => { const { t } = useTranslation() - const { locale } = useContext(I18n) + const locale = useLocale() const { CSVDownloader, Type } = useCSVDownloader() const getTemplate = () => { diff --git a/web/app/components/app/annotation/batch-add-annotation-modal/index.spec.tsx b/web/app/components/app/annotation/batch-add-annotation-modal/index.spec.tsx index d7458d6b90..7fdb99fbab 100644 --- a/web/app/components/app/annotation/batch-add-annotation-modal/index.spec.tsx +++ b/web/app/components/app/annotation/batch-add-annotation-modal/index.spec.tsx @@ -8,7 +8,6 @@ import { annotationBatchImport, checkAnnotationBatchImportProgress } from '@/ser import BatchModal, { ProcessStatus } from './index' vi.mock('@/app/components/base/toast', () => ({ - __esModule: true, default: { notify: vi.fn(), }, @@ -24,14 +23,12 @@ vi.mock('@/context/provider-context', () => ({ })) vi.mock('./csv-downloader', () => ({ - __esModule: true, default: () =>
, })) let lastUploadedFile: File | undefined vi.mock('./csv-uploader', () => ({ - __esModule: true, default: ({ file, updateFile }: { file?: File, updateFile: (file?: File) => void }) => (