From dd089573817205ff039debbbab05d20f4df491c2 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:40:26 +0800 Subject: [PATCH 01/14] fix full_text_search name (#26104) --- api/core/workflow/nodes/knowledge_index/entities.py | 2 +- .../entities/knowledge_entities/rag_pipeline_entities.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/workflow/nodes/knowledge_index/entities.py b/api/core/workflow/nodes/knowledge_index/entities.py index 2a2e983a0c..c79373afd5 100644 --- a/api/core/workflow/nodes/knowledge_index/entities.py +++ b/api/core/workflow/nodes/knowledge_index/entities.py @@ -63,7 +63,7 @@ class RetrievalSetting(BaseModel): Retrieval Setting. """ - search_method: Literal["semantic_search", "keyword_search", "fulltext_search", "hybrid_search"] + search_method: Literal["semantic_search", "keyword_search", "full_text_search", "hybrid_search"] top_k: int score_threshold: float | None = 0.5 score_threshold_enabled: bool = False diff --git a/api/services/entities/knowledge_entities/rag_pipeline_entities.py b/api/services/entities/knowledge_entities/rag_pipeline_entities.py index ac96b5c8ad..860bfde401 100644 --- a/api/services/entities/knowledge_entities/rag_pipeline_entities.py +++ b/api/services/entities/knowledge_entities/rag_pipeline_entities.py @@ -83,7 +83,7 @@ class RetrievalSetting(BaseModel): Retrieval Setting. """ - search_method: Literal["semantic_search", "fulltext_search", "keyword_search", "hybrid_search"] + search_method: Literal["semantic_search", "full_text_search", "keyword_search", "hybrid_search"] top_k: int score_threshold: float | None = 0.5 score_threshold_enabled: bool = False From 8b74ae683a0a242e70c58d0dee247e00127ed7ae Mon Sep 17 00:00:00 2001 From: 17hz <0x149527@gmail.com> Date: Tue, 23 Sep 2025 16:59:26 +0800 Subject: [PATCH 02/14] bump nextjs to 15.5 and turbopack for development mode (#24346) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: 非法操作 --- web/app/styles/globals.css | 14 ++-- web/next.config.js | 14 ++-- web/package.json | 4 +- web/pnpm-lock.yaml | 129 +++++++++++++++++++++---------------- web/tsconfig.json | 3 + 5 files changed, 96 insertions(+), 68 deletions(-) diff --git a/web/app/styles/globals.css b/web/app/styles/globals.css index 353cfa2fff..c1078b6eb6 100644 --- a/web/app/styles/globals.css +++ b/web/app/styles/globals.css @@ -1,12 +1,18 @@ @import "preflight.css"; -@tailwind base; -@tailwind components; + @import '../../themes/light.css'; @import '../../themes/dark.css'; @import "../../themes/manual-light.css"; @import "../../themes/manual-dark.css"; +@import "../components/base/button/index.css"; +@import "../components/base/action-button/index.css"; +@import "../components/base/modal/index.css"; + +@tailwind base; +@tailwind components; + html { color-scheme: light; } @@ -680,10 +686,6 @@ button:focus-within { display: none; } -@import "../components/base/button/index.css"; -@import "../components/base/action-button/index.css"; -@import "../components/base/modal/index.css"; - @tailwind utilities; @layer utilities { diff --git a/web/next.config.js b/web/next.config.js index 7e89c33e62..9c5e331f34 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -91,12 +91,10 @@ const remoteImageURLs = [hasSetWebPrefix ? new URL(`${process.env.NEXT_PUBLIC_WE /** @type {import('next').NextConfig} */ const nextConfig = { basePath: process.env.NEXT_PUBLIC_BASE_PATH || '', - webpack: (config, { dev, isServer }) => { - if (dev) { - config.plugins.push(codeInspectorPlugin({ bundler: 'webpack' })) - } - - return config + turbopack: { + rules: codeInspectorPlugin({ + bundler: 'turbopack' + }) }, productionBrowserSourceMaps: false, // enable browser source map generation during the production build // Configure pageExtensions to include md and mdx @@ -112,6 +110,10 @@ const nextConfig = { })), }, experimental: { + optimizePackageImports: [ + '@remixicon/react', + '@heroicons/react' + ], }, // fix all before production. Now it slow the develop speed. eslint: { diff --git a/web/package.json b/web/package.json index 57ab734eb2..78e62c9aa7 100644 --- a/web/package.json +++ b/web/package.json @@ -19,7 +19,7 @@ "and_qq >= 14.9" ], "scripts": { - "dev": "cross-env NODE_OPTIONS='--inspect' next dev", + "dev": "cross-env NODE_OPTIONS='--inspect' next dev --turbopack", "build": "next build", "build:docker": "next build && node scripts/optimize-standalone.js", "start": "cp -r .next/static .next/standalone/.next/static && cp -r public .next/standalone/public && cross-env PORT=$npm_config_port HOSTNAME=$npm_config_host node .next/standalone/server.js", @@ -203,7 +203,7 @@ "autoprefixer": "^10.4.20", "babel-loader": "^10.0.0", "bing-translate-api": "^4.0.2", - "code-inspector-plugin": "^0.18.1", + "code-inspector-plugin": "1.2.9", "cross-env": "^7.0.3", "eslint": "^9.35.0", "eslint-config-next": "15.5.0", diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml index 5d4308288c..e47985fd71 100644 --- a/web/pnpm-lock.yaml +++ b/web/pnpm-lock.yaml @@ -519,8 +519,8 @@ importers: specifier: ^4.0.2 version: 4.1.0 code-inspector-plugin: - specifier: ^0.18.1 - version: 0.18.3 + specifier: 1.2.9 + version: 1.2.9 cross-env: specifier: ^7.0.3 version: 7.0.3 @@ -1372,6 +1372,24 @@ packages: '@clack/prompts@0.11.0': resolution: {integrity: sha512-pMN5FcrEw9hUkZA4f+zLlzivQSeQf5dRGJjSUbvVYDLvpKCdQx5OaknvKzgbtXOizhP+SJJJjqEbOe55uKKfAw==} + '@code-inspector/core@1.2.9': + resolution: {integrity: sha512-A1w+G73HlTB6S8X6sA6tT+ziWHTAcTyH+7FZ1Sgd3ZLXF/E/jT+hgRbKposjXMwxcbodRc6hBG6UyiV+VxwE6Q==} + + '@code-inspector/esbuild@1.2.9': + resolution: {integrity: sha512-DuyfxGupV43CN8YElIqynAniBtE86i037+3OVJYrm3jlJscXzbV98/kOzvu+VJQQvElcDgpgD6C/aGmPvFEiUg==} + + '@code-inspector/mako@1.2.9': + resolution: {integrity: sha512-8N+MHdr64AnthLB4v+YGe8/9bgog3BnkxIW/fqX5iVS0X06mF7X1pxfZOD2bABVtv1tW25lRtNs5AgvYJs0vpg==} + + '@code-inspector/turbopack@1.2.9': + resolution: {integrity: sha512-UVOUbqU6rpi5eOkrFamKrdeSWb0/OFFJQBaxbgs1RK5V5f4/iVwC5KjO2wkjv8cOGU4EppLfBVSBI1ysOo8S5A==} + + '@code-inspector/vite@1.2.9': + resolution: {integrity: sha512-saIokJ3o3SdrHEgTEg1fbbowbKfh7J4mYtu0i1mVfah1b1UfdCF/iFHTEJ6SADMiY47TeNZTg0TQWTlU1AWPww==} + + '@code-inspector/webpack@1.2.9': + resolution: {integrity: sha512-9YEykVrOIc0zMV7pyTyZhCprjScjn6gPPmxb4/OQXKCrP2fAm+NB188rg0s95e4sM7U3qRUpPA4NUH5F7Ogo+g==} + '@cspotcode/source-map-support@0.8.1': resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==} engines: {node: '>=12'} @@ -4425,11 +4443,8 @@ packages: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} - code-inspector-core@0.18.3: - resolution: {integrity: sha512-60pT2cPoguMTUYdN1MMpjoPUnuF0ud/u7M2y+Vqit/bniLEit9dySEWAVxLU/Ukc5ILrDeLKEttc6fCMl9RUrA==} - - code-inspector-plugin@0.18.3: - resolution: {integrity: sha512-d9oJXZUsnvfTaQDwFmDNA2F+AR/TXIxWg1rr8KGcEskltR2prbZsfuu1z70EAn4khpx0smfi/PvIIwNJQ7FAMw==} + code-inspector-plugin@1.2.9: + resolution: {integrity: sha512-PGp/AQ03vaajimG9rn5+eQHGifrym5CSNLCViPtwzot7FM3MqEkGNqcvimH0FVuv3wDOcP5KvETAUSLf1BE3HA==} collapse-white-space@2.1.0: resolution: {integrity: sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==} @@ -5055,9 +5070,6 @@ packages: esast-util-from-js@2.0.1: resolution: {integrity: sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw==} - esbuild-code-inspector-plugin@0.18.3: - resolution: {integrity: sha512-FaPt5eFMtW1oXMWqAcqfAJByNagP1V/R9dwDDLQO29JmryMF35+frskTqy+G53whmTaVi19+TCrFqhNbMZH5ZQ==} - esbuild-register@3.6.0: resolution: {integrity: sha512-H2/S7Pm8a9CL1uhp9OvjwrBh5Pvx0H8qVOxNu8Wed9Y7qv56MPtq+GGM8RJpq6glYJn9Wspr8uw7l55uyinNeg==} peerDependencies: @@ -6413,8 +6425,8 @@ packages: resolution: {integrity: sha512-MbjN408fEndfiQXbFQ1vnd+1NoLDsnQW41410oQBXiyXDMYH5z505juWa4KUE1LqxRC7DgOgZDbKLxHIwm27hA==} engines: {node: '>=0.10'} - launch-ide@1.0.1: - resolution: {integrity: sha512-U7qBxSNk774PxWq4XbmRe0ThiIstPoa4sMH/OGSYxrFVvg8x3biXcF1fsH6wasDpEmEXMdINUrQhBdwsSgKyMg==} + launch-ide@1.2.0: + resolution: {integrity: sha512-7nXSPQOt3b2JT52Ge8jp4miFcY+nrUEZxNLWBzrEfjmByDTb9b5ytqMSwGhsNwY6Cntwop+6n7rWIFN0+S8PTw==} layout-base@1.0.2: resolution: {integrity: sha512-8h2oVEZNktL4BH2JCOI90iD1yXwL6iNW7KcCKT2QZgQJR2vbqDsldCTPRU9NifTCqHZci57XvQQ15YTu+sTYPg==} @@ -8693,9 +8705,6 @@ packages: vfile@6.0.3: resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==} - vite-code-inspector-plugin@0.18.3: - resolution: {integrity: sha512-178H73vbDUHE+JpvfAfioUHlUr7qXCYIEa2YNXtzenFQGOjtae59P1jjcxGfa6pPHEnOoaitb13K+0qxwhi/WA==} - vm-browserify@1.1.2: resolution: {integrity: sha512-2ham8XPWTONajOR0ohOKOHXkm3+gaBmGut3SRuu75xLd/RRaY6vqgh8NBYYk7+RW3u5AtzPQZG8F10LHkl0lAQ==} @@ -8754,9 +8763,6 @@ packages: engines: {node: '>= 10.13.0'} hasBin: true - webpack-code-inspector-plugin@0.18.3: - resolution: {integrity: sha512-3782rsJhBnRiw0IpR6EqnyGDQoiSq0CcGeLJ52rZXlszYCe8igXtcujq7OhI0byaivWQ1LW7sXKyMEoVpBhq0w==} - webpack-dev-middleware@6.1.3: resolution: {integrity: sha512-A4ChP0Qj8oGociTs6UdlRUGANIGrCDL3y+pmQMc+dSsraXHCatFpmMey4mYELA+juqwUqwQsUgJJISXl1KWmiw==} engines: {node: '>= 14.15.0'} @@ -9993,6 +9999,48 @@ snapshots: picocolors: 1.1.1 sisteransi: 1.0.5 + '@code-inspector/core@1.2.9': + dependencies: + '@vue/compiler-dom': 3.5.17 + chalk: 4.1.2 + dotenv: 16.6.1 + launch-ide: 1.2.0 + portfinder: 1.0.37 + transitivePeerDependencies: + - supports-color + + '@code-inspector/esbuild@1.2.9': + dependencies: + '@code-inspector/core': 1.2.9 + transitivePeerDependencies: + - supports-color + + '@code-inspector/mako@1.2.9': + dependencies: + '@code-inspector/core': 1.2.9 + transitivePeerDependencies: + - supports-color + + '@code-inspector/turbopack@1.2.9': + dependencies: + '@code-inspector/core': 1.2.9 + '@code-inspector/webpack': 1.2.9 + transitivePeerDependencies: + - supports-color + + '@code-inspector/vite@1.2.9': + dependencies: + '@code-inspector/core': 1.2.9 + chalk: 4.1.1 + transitivePeerDependencies: + - supports-color + + '@code-inspector/webpack@1.2.9': + dependencies: + '@code-inspector/core': 1.2.9 + transitivePeerDependencies: + - supports-color + '@cspotcode/source-map-support@0.8.1': dependencies: '@jridgewell/trace-mapping': 0.3.9 @@ -12799,7 +12847,7 @@ snapshots: '@vue/compiler-core@3.5.17': dependencies: - '@babel/parser': 7.28.0 + '@babel/parser': 7.28.4 '@vue/shared': 3.5.17 entities: 4.5.0 estree-walker: 2.0.2 @@ -13503,24 +13551,15 @@ snapshots: co@4.6.0: {} - code-inspector-core@0.18.3: + code-inspector-plugin@1.2.9: dependencies: - '@vue/compiler-dom': 3.5.17 + '@code-inspector/core': 1.2.9 + '@code-inspector/esbuild': 1.2.9 + '@code-inspector/mako': 1.2.9 + '@code-inspector/turbopack': 1.2.9 + '@code-inspector/vite': 1.2.9 + '@code-inspector/webpack': 1.2.9 chalk: 4.1.1 - dotenv: 16.6.1 - launch-ide: 1.0.1 - portfinder: 1.0.37 - transitivePeerDependencies: - - supports-color - - code-inspector-plugin@0.18.3: - dependencies: - chalk: 4.1.1 - code-inspector-core: 0.18.3 - dotenv: 16.6.1 - esbuild-code-inspector-plugin: 0.18.3 - vite-code-inspector-plugin: 0.18.3 - webpack-code-inspector-plugin: 0.18.3 transitivePeerDependencies: - supports-color @@ -14160,12 +14199,6 @@ snapshots: esast-util-from-estree: 2.0.0 vfile-message: 4.0.2 - esbuild-code-inspector-plugin@0.18.3: - dependencies: - code-inspector-core: 0.18.3 - transitivePeerDependencies: - - supports-color - esbuild-register@3.6.0(esbuild@0.25.0): dependencies: debug: 4.4.1 @@ -16020,7 +16053,7 @@ snapshots: dependencies: language-subtag-registry: 0.3.23 - launch-ide@1.0.1: + launch-ide@1.2.0: dependencies: chalk: 4.1.2 dotenv: 16.6.1 @@ -18779,12 +18812,6 @@ snapshots: '@types/unist': 3.0.3 vfile-message: 4.0.2 - vite-code-inspector-plugin@0.18.3: - dependencies: - code-inspector-core: 0.18.3 - transitivePeerDependencies: - - supports-color - vm-browserify@1.1.2: {} void-elements@3.1.0: {} @@ -18855,12 +18882,6 @@ snapshots: - bufferutil - utf-8-validate - webpack-code-inspector-plugin@0.18.3: - dependencies: - code-inspector-core: 0.18.3 - transitivePeerDependencies: - - supports-color - webpack-dev-middleware@6.1.3(webpack@5.100.2(esbuild@0.25.0)(uglify-js@3.19.3)): dependencies: colorette: 2.0.20 diff --git a/web/tsconfig.json b/web/tsconfig.json index c3e0bca665..3b022e4708 100644 --- a/web/tsconfig.json +++ b/web/tsconfig.json @@ -26,6 +26,9 @@ "paths": { "@/*": [ "./*" + ], + "~@/*": [ + "./*" ] } }, From fb6ccccc3da48739c20ce0d8a4bee9fa8ad89908 Mon Sep 17 00:00:00 2001 From: GuanMu Date: Tue, 23 Sep 2025 17:04:56 +0800 Subject: [PATCH 03/14] chore: refactor component exports for consistency (#26033) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- web/app/components/app/log/list.tsx | 2 +- web/app/components/base/copy-icon/index.tsx | 2 +- web/app/components/base/markdown-blocks/think-block.tsx | 2 +- web/app/components/base/svg-gallery/index.tsx | 2 +- .../json-schema-config-modal/json-schema-generator/index.tsx | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/web/app/components/app/log/list.tsx b/web/app/components/app/log/list.tsx index b73d1f19de..8b3370b678 100644 --- a/web/app/components/app/log/list.tsx +++ b/web/app/components/app/log/list.tsx @@ -35,7 +35,7 @@ import { useStore as useAppStore } from '@/app/components/app/store' import { useAppContext } from '@/context/app-context' import useTimestamp from '@/hooks/use-timestamp' import Tooltip from '@/app/components/base/tooltip' -import { CopyIcon } from '@/app/components/base/copy-icon' +import CopyIcon from '@/app/components/base/copy-icon' import { buildChatItemTree, getThreadMessages } from '@/app/components/base/chat/utils' import { getProcessedFilesFromResponse } from '@/app/components/base/file-uploader/utils' import cn from '@/utils/classnames' diff --git a/web/app/components/base/copy-icon/index.tsx b/web/app/components/base/copy-icon/index.tsx index 196e256978..158eaa5fbe 100644 --- a/web/app/components/base/copy-icon/index.tsx +++ b/web/app/components/base/copy-icon/index.tsx @@ -15,7 +15,7 @@ type Props = { const prefixEmbedded = 'appOverview.overview.appInfo.embedded' -export const CopyIcon = ({ content }: Props) => { +const CopyIcon = ({ content }: Props) => { const { t } = useTranslation() const [isCopied, setIsCopied] = useState(false) diff --git a/web/app/components/base/markdown-blocks/think-block.tsx b/web/app/components/base/markdown-blocks/think-block.tsx index a5813266f1..acceecd433 100644 --- a/web/app/components/base/markdown-blocks/think-block.tsx +++ b/web/app/components/base/markdown-blocks/think-block.tsx @@ -63,7 +63,7 @@ const useThinkTimer = (children: any) => { return { elapsedTime, isComplete } } -export const ThinkBlock = ({ children, ...props }: any) => { +const ThinkBlock = ({ children, ...props }: React.ComponentProps<'details'>) => { const { elapsedTime, isComplete } = useThinkTimer(children) const displayContent = removeEndThink(children) const { t } = useTranslation() diff --git a/web/app/components/base/svg-gallery/index.tsx b/web/app/components/base/svg-gallery/index.tsx index 710a0107fb..b6b2fe9db4 100644 --- a/web/app/components/base/svg-gallery/index.tsx +++ b/web/app/components/base/svg-gallery/index.tsx @@ -3,7 +3,7 @@ import { SVG } from '@svgdotjs/svg.js' import DOMPurify from 'dompurify' import ImagePreview from '@/app/components/base/image-uploader/image-preview' -export const SVGRenderer = ({ content }: { content: string }) => { +const SVGRenderer = ({ content }: { content: string }) => { const svgRef = useRef(null) const [imagePreview, setImagePreview] = useState('') const [windowSize, setWindowSize] = useState({ diff --git a/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx b/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx index 64138b3cbd..1a4eb3cfdb 100644 --- a/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx +++ b/web/app/components/workflow/nodes/llm/components/json-schema-config-modal/json-schema-generator/index.tsx @@ -30,7 +30,7 @@ enum GeneratorView { result = 'result', } -export const JsonSchemaGenerator: FC = ({ +const JsonSchemaGenerator: FC = ({ onApply, crossAxisOffset, }) => { From 24b4289d6cfb641e0619099a64d176b57b8d22da Mon Sep 17 00:00:00 2001 From: longbingljw Date: Tue, 23 Sep 2025 17:06:06 +0800 Subject: [PATCH 04/14] fix:add some explanation for oceanbase parser selection (#26071) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/configs/middleware/vdb/oceanbase_config.py | 8 ++++++-- api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py | 2 +- docker/.env.example | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/api/configs/middleware/vdb/oceanbase_config.py b/api/configs/middleware/vdb/oceanbase_config.py index 99f4c49407..7c9376f86b 100644 --- a/api/configs/middleware/vdb/oceanbase_config.py +++ b/api/configs/middleware/vdb/oceanbase_config.py @@ -40,8 +40,12 @@ class OceanBaseVectorConfig(BaseSettings): OCEANBASE_FULLTEXT_PARSER: str | None = Field( description=( - "Fulltext parser to use for text indexing. Options: 'japanese_ftparser' (Japanese), " - "'thai_ftparser' (Thai), 'ik' (Chinese). Default is 'ik'" + "Fulltext parser to use for text indexing. " + "Built-in options: 'ngram' (N-gram tokenizer for English/numbers), " + "'beng' (Basic English tokenizer), 'space' (Space-based tokenizer), " + "'ngram2' (Improved N-gram tokenizer), 'ik' (Chinese tokenizer). " + "External plugins (require installation): 'japanese_ftparser' (Japanese tokenizer), " + "'thai_ftparser' (Thai tokenizer). Default is 'ik'" ), default="ik", ) diff --git a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py index 49cf900126..b3db7332e8 100644 --- a/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py +++ b/api/core/rag/datasource/vdb/oceanbase/oceanbase_vector.py @@ -123,7 +123,7 @@ class OceanBaseVector(BaseVector): # Get parser from config or use default ik parser parser_name = dify_config.OCEANBASE_FULLTEXT_PARSER or "ik" - allowed_parsers = ["ik", "japanese_ftparser", "thai_ftparser"] + allowed_parsers = ["ngram", "beng", "space", "ngram2", "ik", "japanese_ftparser", "thai_ftparser"] if parser_name not in allowed_parsers: raise ValueError( f"Invalid OceanBase full-text parser: {parser_name}. " diff --git a/docker/.env.example b/docker/.env.example index d4e8ab3beb..07b4088470 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -655,6 +655,8 @@ LINDORM_USING_UGC=True LINDORM_QUERY_TIMEOUT=1 # OceanBase Vector configuration, only available when VECTOR_STORE is `oceanbase` +# Built-in fulltext parsers are `ngram`, `beng`, `space`, `ngram2`, `ik` +# External fulltext parsers (require plugin installation) are `japanese_ftparser`, `thai_ftparser` OCEANBASE_VECTOR_HOST=oceanbase OCEANBASE_VECTOR_PORT=2881 OCEANBASE_VECTOR_USER=root@test From d9e45a1abe01ca778b3e9440c0ffa7e045bbbeb0 Mon Sep 17 00:00:00 2001 From: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:18:22 +0800 Subject: [PATCH 05/14] feat(pipeline): add language support to built-in pipeline templates and update related components (#26124) --- .../list/built-in-pipeline-list.tsx | 11 ++++++++++- web/app/components/plugins/plugin-item/index.tsx | 7 +++---- web/app/components/tools/provider/detail.tsx | 3 +-- web/models/pipeline.ts | 1 + web/service/use-pipeline.ts | 6 ++++-- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx b/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx index 9d3d0e7717..6d22f2115a 100644 --- a/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx +++ b/web/app/components/datasets/create-from-pipeline/list/built-in-pipeline-list.tsx @@ -1,9 +1,18 @@ import { usePipelineTemplateList } from '@/service/use-pipeline' import TemplateCard from './template-card' import CreateCard from './create-card' +import { useI18N } from '@/context/i18n' +import { useMemo } from 'react' +import { LanguagesSupported } from '@/i18n-config/language' const BuiltInPipelineList = () => { - const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in' }) + const { locale } = useI18N() + const language = useMemo(() => { + if (['zh-Hans', 'ja-JP'].includes(locale)) + return locale + return LanguagesSupported[0] + }, [locale]) + const { data: pipelineList, isLoading } = usePipelineTemplateList({ type: 'built-in', language }) const list = pipelineList?.pipeline_templates || [] return ( diff --git a/web/app/components/plugins/plugin-item/index.tsx b/web/app/components/plugins/plugin-item/index.tsx index c228ca4db4..ed7cf47bb7 100644 --- a/web/app/components/plugins/plugin-item/index.tsx +++ b/web/app/components/plugins/plugin-item/index.tsx @@ -146,7 +146,6 @@ const PluginItem: FC = ({ {/* Organization & Name */}
= ({ {category === PluginType.extension && ( <>
·
-
- +
+ = ({ && <>
{t('plugin.from')} marketplace
- +
} diff --git a/web/app/components/tools/provider/detail.tsx b/web/app/components/tools/provider/detail.tsx index 87d09bd527..dd2972a9d6 100644 --- a/web/app/components/tools/provider/detail.tsx +++ b/web/app/components/tools/provider/detail.tsx @@ -244,9 +244,8 @@ const ProviderDetail = ({
</div> - <div className='mb-1 flex h-4 items-center justify-between'> + <div className='mb-1 mt-0.5 flex h-4 items-center justify-between'> <OrgInfo - className="mt-0.5" packageNameClassName='w-auto' orgName={collection.author} packageName={collection.name} diff --git a/web/models/pipeline.ts b/web/models/pipeline.ts index d644d43bf3..1c2211b6d9 100644 --- a/web/models/pipeline.ts +++ b/web/models/pipeline.ts @@ -17,6 +17,7 @@ export enum DatasourceType { export type PipelineTemplateListParams = { type: 'built-in' | 'customized' + language?: string } export type PipelineTemplate = { diff --git a/web/service/use-pipeline.ts b/web/service/use-pipeline.ts index 870fb5bc84..a7b9c89410 100644 --- a/web/service/use-pipeline.ts +++ b/web/service/use-pipeline.ts @@ -40,8 +40,9 @@ const NAME_SPACE = 'pipeline' export const PipelineTemplateListQueryKeyPrefix = [NAME_SPACE, 'template-list'] export const usePipelineTemplateList = (params: PipelineTemplateListParams) => { + const { type, language } = params return useQuery<PipelineTemplateListResponse>({ - queryKey: [...PipelineTemplateListQueryKeyPrefix, params.type], + queryKey: [...PipelineTemplateListQueryKeyPrefix, type, language], queryFn: () => { return get<PipelineTemplateListResponse>('/rag/pipeline/templates', { params }) }, @@ -55,7 +56,7 @@ export const useInvalidCustomizedTemplateList = () => { export const usePipelineTemplateById = (params: PipelineTemplateByIdRequest, enabled: boolean) => { const { template_id, type } = params return useQuery<PipelineTemplateByIdResponse>({ - queryKey: [NAME_SPACE, 'template', template_id], + queryKey: [NAME_SPACE, 'template', type, template_id], queryFn: () => { return get<PipelineTemplateByIdResponse>(`/rag/pipeline/templates/${template_id}`, { params: { @@ -64,6 +65,7 @@ export const usePipelineTemplateById = (params: PipelineTemplateByIdRequest, ena }) }, enabled, + staleTime: 0, }) } From 2913d17fe23bda01c5d00d3c8e139845bf9203a2 Mon Sep 17 00:00:00 2001 From: QuantumGhost <obelisk.reg+git@gmail.com> Date: Tue, 23 Sep 2025 18:48:02 +0800 Subject: [PATCH 06/14] ci: Add hotfix/** branches to build-push workflow triggers (#26129) --- .github/workflows/build-push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml index 17af047267..24a9da4400 100644 --- a/.github/workflows/build-push.yml +++ b/.github/workflows/build-push.yml @@ -8,6 +8,7 @@ on: - "deploy/enterprise" - "build/**" - "release/e-*" + - "hotfix/**" tags: - "*" From 96a0b9991e274c8e61fa2f2aaf46b7df3eb52906 Mon Sep 17 00:00:00 2001 From: QuantumGhost <obelisk.reg+git@gmail.com> Date: Tue, 23 Sep 2025 21:30:46 +0800 Subject: [PATCH 07/14] fix(api): Fix variable truncation for `list[File]` value in output mapping (#26133) --- api/services/variable_truncator.py | 8 ++++++++ api/tests/unit_tests/services/test_variable_truncator.py | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/api/services/variable_truncator.py b/api/services/variable_truncator.py index 4362bb0291..d02508e4f3 100644 --- a/api/services/variable_truncator.py +++ b/api/services/variable_truncator.py @@ -262,6 +262,14 @@ class VariableTruncator: target_length = self._array_element_limit for i, item in enumerate(value): + # Dirty fix: + # The output of `Start` node may contain list of `File` elements, + # causing `AssertionError` while invoking `_truncate_json_primitives`. + # + # This check ensures that `list[File]` are handled separately + if isinstance(item, File): + truncated_value.append(item) + continue if i >= target_length: return _PartResult(truncated_value, used_size, True) if i > 0: diff --git a/api/tests/unit_tests/services/test_variable_truncator.py b/api/tests/unit_tests/services/test_variable_truncator.py index 0ad056c985..6761f939e3 100644 --- a/api/tests/unit_tests/services/test_variable_truncator.py +++ b/api/tests/unit_tests/services/test_variable_truncator.py @@ -588,3 +588,11 @@ class TestIntegrationScenarios: if isinstance(result.result, ObjectSegment): result_size = truncator.calculate_json_size(result.result.value) assert result_size <= original_size + + def test_file_and_array_file_variable_mapping(self, file): + truncator = VariableTruncator(string_length_limit=30, array_element_limit=3, max_size_bytes=300) + + mapping = {"array_file": [file]} + truncated_mapping, truncated = truncator.truncate_variable_mapping(mapping) + assert truncated is False + assert truncated_mapping == mapping From 25c69ac54089e9676e8e926603e28ac252f6f231 Mon Sep 17 00:00:00 2001 From: Asuka Minato <i@asukaminato.eu.org> Date: Wed, 24 Sep 2025 04:32:48 +0900 Subject: [PATCH 08/14] one example of Session (#24135) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: -LAN- <laipz8200@outlook.com> --- api/commands.py | 152 +++++++++--------- api/controllers/console/app/conversation.py | 5 +- .../console/datasets/datasets_document.py | 5 +- api/models/dataset.py | 4 +- api/models/model.py | 6 +- api/services/app_service.py | 3 +- api/services/dataset_service.py | 12 +- api/services/plugin/plugin_migration.py | 2 +- api/services/tag_service.py | 3 +- api/tasks/document_indexing_sync_task.py | 3 +- 10 files changed, 100 insertions(+), 95 deletions(-) diff --git a/api/commands.py b/api/commands.py index cb8aa8430a..82efe34611 100644 --- a/api/commands.py +++ b/api/commands.py @@ -10,6 +10,7 @@ from flask import current_app from pydantic import TypeAdapter from sqlalchemy import select from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy.orm import sessionmaker from configs import dify_config from constants.languages import languages @@ -61,31 +62,30 @@ def reset_password(email, new_password, password_confirm): if str(new_password).strip() != str(password_confirm).strip(): click.echo(click.style("Passwords do not match.", fg="red")) return + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + account = session.query(Account).where(Account.email == email).one_or_none() - account = db.session.query(Account).where(Account.email == email).one_or_none() + if not account: + click.echo(click.style(f"Account not found for email: {email}", fg="red")) + return - if not account: - click.echo(click.style(f"Account not found for email: {email}", fg="red")) - return + try: + valid_password(new_password) + except: + click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) + return - try: - valid_password(new_password) - except: - click.echo(click.style(f"Invalid password. Must match {password_pattern}", fg="red")) - return + # generate password salt + salt = secrets.token_bytes(16) + base64_salt = base64.b64encode(salt).decode() - # generate password salt - salt = secrets.token_bytes(16) - base64_salt = base64.b64encode(salt).decode() - - # encrypt password with salt - password_hashed = hash_password(new_password, salt) - base64_password_hashed = base64.b64encode(password_hashed).decode() - account.password = base64_password_hashed - account.password_salt = base64_salt - db.session.commit() - AccountService.reset_login_error_rate_limit(email) - click.echo(click.style("Password reset successfully.", fg="green")) + # encrypt password with salt + password_hashed = hash_password(new_password, salt) + base64_password_hashed = base64.b64encode(password_hashed).decode() + account.password = base64_password_hashed + account.password_salt = base64_salt + AccountService.reset_login_error_rate_limit(email) + click.echo(click.style("Password reset successfully.", fg="green")) @click.command("reset-email", help="Reset the account email.") @@ -100,22 +100,21 @@ def reset_email(email, new_email, email_confirm): if str(new_email).strip() != str(email_confirm).strip(): click.echo(click.style("New emails do not match.", fg="red")) return + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + account = session.query(Account).where(Account.email == email).one_or_none() - account = db.session.query(Account).where(Account.email == email).one_or_none() + if not account: + click.echo(click.style(f"Account not found for email: {email}", fg="red")) + return - if not account: - click.echo(click.style(f"Account not found for email: {email}", fg="red")) - return + try: + email_validate(new_email) + except: + click.echo(click.style(f"Invalid email: {new_email}", fg="red")) + return - try: - email_validate(new_email) - except: - click.echo(click.style(f"Invalid email: {new_email}", fg="red")) - return - - account.email = new_email - db.session.commit() - click.echo(click.style("Email updated successfully.", fg="green")) + account.email = new_email + click.echo(click.style("Email updated successfully.", fg="green")) @click.command( @@ -139,25 +138,24 @@ def reset_encrypt_key_pair(): if dify_config.EDITION != "SELF_HOSTED": click.echo(click.style("This command is only for SELF_HOSTED installations.", fg="red")) return + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + tenants = session.query(Tenant).all() + for tenant in tenants: + if not tenant: + click.echo(click.style("No workspaces found. Run /install first.", fg="red")) + return - tenants = db.session.query(Tenant).all() - for tenant in tenants: - if not tenant: - click.echo(click.style("No workspaces found. Run /install first.", fg="red")) - return + tenant.encrypt_public_key = generate_key_pair(tenant.id) - tenant.encrypt_public_key = generate_key_pair(tenant.id) + session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete() + session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete() - db.session.query(Provider).where(Provider.provider_type == "custom", Provider.tenant_id == tenant.id).delete() - db.session.query(ProviderModel).where(ProviderModel.tenant_id == tenant.id).delete() - db.session.commit() - - click.echo( - click.style( - f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", - fg="green", + click.echo( + click.style( + f"Congratulations! The asymmetric key pair of workspace {tenant.id} has been reset.", + fg="green", + ) ) - ) @click.command("vdb-migrate", help="Migrate vector db.") @@ -182,14 +180,15 @@ def migrate_annotation_vector_database(): try: # get apps info per_page = 50 - apps = ( - db.session.query(App) - .where(App.status == "normal") - .order_by(App.created_at.desc()) - .limit(per_page) - .offset((page - 1) * per_page) - .all() - ) + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + apps = ( + session.query(App) + .where(App.status == "normal") + .order_by(App.created_at.desc()) + .limit(per_page) + .offset((page - 1) * per_page) + .all() + ) if not apps: break except SQLAlchemyError: @@ -203,26 +202,27 @@ def migrate_annotation_vector_database(): ) try: click.echo(f"Creating app annotation index: {app.id}") - app_annotation_setting = ( - db.session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() - ) + with sessionmaker(db.engine, expire_on_commit=False).begin() as session: + app_annotation_setting = ( + session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first() + ) - if not app_annotation_setting: - skipped_count = skipped_count + 1 - click.echo(f"App annotation setting disabled: {app.id}") - continue - # get dataset_collection_binding info - dataset_collection_binding = ( - db.session.query(DatasetCollectionBinding) - .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id) - .first() - ) - if not dataset_collection_binding: - click.echo(f"App annotation collection binding not found: {app.id}") - continue - annotations = db.session.scalars( - select(MessageAnnotation).where(MessageAnnotation.app_id == app.id) - ).all() + if not app_annotation_setting: + skipped_count = skipped_count + 1 + click.echo(f"App annotation setting disabled: {app.id}") + continue + # get dataset_collection_binding info + dataset_collection_binding = ( + session.query(DatasetCollectionBinding) + .where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id) + .first() + ) + if not dataset_collection_binding: + click.echo(f"App annotation collection binding not found: {app.id}") + continue + annotations = session.scalars( + select(MessageAnnotation).where(MessageAnnotation.app_id == app.id) + ).all() dataset = Dataset( id=app.id, tenant_id=app.tenant_id, diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py index c0cbf6613e..f104ab5dee 100644 --- a/api/controllers/console/app/conversation.py +++ b/api/controllers/console/app/conversation.py @@ -1,6 +1,7 @@ from datetime import datetime import pytz # pip install pytz +import sqlalchemy as sa from flask_login import current_user from flask_restx import Resource, marshal_with, reqparse from flask_restx.inputs import int_range @@ -70,7 +71,7 @@ class CompletionConversationApi(Resource): parser.add_argument("limit", type=int_range(1, 100), default=20, location="args") args = parser.parse_args() - query = db.select(Conversation).where( + query = sa.select(Conversation).where( Conversation.app_id == app_model.id, Conversation.mode == "completion", Conversation.is_deleted.is_(False) ) @@ -236,7 +237,7 @@ class ChatConversationApi(Resource): .subquery() ) - query = db.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False)) + query = sa.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False)) if args["keyword"]: keyword_filter = f"%{args['keyword']}%" diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index 5de1f6c6ee..e6f5daa87b 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -4,6 +4,7 @@ from argparse import ArgumentTypeError from collections.abc import Sequence from typing import Literal, cast +import sqlalchemy as sa from flask import request from flask_login import current_user from flask_restx import Resource, fields, marshal, marshal_with, reqparse @@ -211,13 +212,13 @@ class DatasetDocumentListApi(Resource): if sort == "hit_count": sub_query = ( - db.select(DocumentSegment.document_id, db.func.sum(DocumentSegment.hit_count).label("total_hit_count")) + sa.select(DocumentSegment.document_id, sa.func.sum(DocumentSegment.hit_count).label("total_hit_count")) .group_by(DocumentSegment.document_id) .subquery() ) query = query.outerjoin(sub_query, sub_query.c.document_id == Document.id).order_by( - sort_logic(db.func.coalesce(sub_query.c.total_hit_count, 0)), + sort_logic(sa.func.coalesce(sub_query.c.total_hit_count, 0)), sort_logic(Document.position), ) elif sort == "created_at": diff --git a/api/models/dataset.py b/api/models/dataset.py index 2c4059f800..25ebe14738 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -910,7 +910,7 @@ class AppDatasetJoin(Base): id = mapped_column(StringUUID, primary_key=True, nullable=False, server_default=sa.text("uuid_generate_v4()")) app_id = mapped_column(StringUUID, nullable=False) dataset_id = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=db.func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) @property def app(self): @@ -931,7 +931,7 @@ class DatasetQuery(Base): source_app_id = mapped_column(StringUUID, nullable=True) created_by_role = mapped_column(String, nullable=False) created_by = mapped_column(StringUUID, nullable=False) - created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=db.func.current_timestamp()) + created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=sa.func.current_timestamp()) class DatasetKeywordTable(Base): diff --git a/api/models/model.py b/api/models/model.py index 9bcb81b41b..a8218c3a4e 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -1731,7 +1731,7 @@ class MessageChain(Base): type: Mapped[str] = mapped_column(String(255), nullable=False) input = mapped_column(sa.Text, nullable=True) output = mapped_column(sa.Text, nullable=True) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp()) + created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) class MessageAgentThought(Base): @@ -1769,7 +1769,7 @@ class MessageAgentThought(Base): latency: Mapped[float | None] = mapped_column(sa.Float, nullable=True) created_by_role = mapped_column(String, nullable=False) created_by = mapped_column(StringUUID, nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp()) + created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) @property def files(self) -> list[Any]: @@ -1872,7 +1872,7 @@ class DatasetRetrieverResource(Base): index_node_hash = mapped_column(sa.Text, nullable=True) retriever_from = mapped_column(sa.Text, nullable=False) created_by = mapped_column(StringUUID, nullable=False) - created_at = mapped_column(sa.DateTime, nullable=False, server_default=db.func.current_timestamp()) + created_at = mapped_column(sa.DateTime, nullable=False, server_default=sa.func.current_timestamp()) class Tag(Base): diff --git a/api/services/app_service.py b/api/services/app_service.py index d524adbf3e..4fc6cf2494 100644 --- a/api/services/app_service.py +++ b/api/services/app_service.py @@ -2,6 +2,7 @@ import json import logging from typing import TypedDict, cast +import sqlalchemy as sa from flask_sqlalchemy.pagination import Pagination from configs import dify_config @@ -65,7 +66,7 @@ class AppService: return None app_models = db.paginate( - db.select(App).where(*filters).order_by(App.created_at.desc()), + sa.select(App).where(*filters).order_by(App.created_at.desc()), page=args["page"], per_page=args["limit"], error_out=False, diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index 8b3720026d..c9dd78ddd1 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -115,12 +115,12 @@ class DatasetService: # Check if permitted_dataset_ids is not empty to avoid WHERE false condition if permitted_dataset_ids and len(permitted_dataset_ids) > 0: query = query.where( - db.or_( + sa.or_( Dataset.permission == DatasetPermissionEnum.ALL_TEAM, - db.and_( + sa.and_( Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id ), - db.and_( + sa.and_( Dataset.permission == DatasetPermissionEnum.PARTIAL_TEAM, Dataset.id.in_(permitted_dataset_ids), ), @@ -128,9 +128,9 @@ class DatasetService: ) else: query = query.where( - db.or_( + sa.or_( Dataset.permission == DatasetPermissionEnum.ALL_TEAM, - db.and_( + sa.and_( Dataset.permission == DatasetPermissionEnum.ONLY_ME, Dataset.created_by == user.id ), ) @@ -1879,7 +1879,7 @@ class DocumentService: # for notion_info in notion_info_list: # workspace_id = notion_info.workspace_id # data_source_binding = DataSourceOauthBinding.query.filter( - # db.and_( + # sa.and_( # DataSourceOauthBinding.tenant_id == current_user.current_tenant_id, # DataSourceOauthBinding.provider == "notion", # DataSourceOauthBinding.disabled == False, diff --git a/api/services/plugin/plugin_migration.py b/api/services/plugin/plugin_migration.py index 5db19711e6..99946d8fa9 100644 --- a/api/services/plugin/plugin_migration.py +++ b/api/services/plugin/plugin_migration.py @@ -471,7 +471,7 @@ class PluginMigration: total_failed_tenant = 0 while True: # paginate - tenants = db.paginate(db.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100) + tenants = db.paginate(sa.select(Tenant).order_by(Tenant.created_at.desc()), page=page, per_page=100) if tenants.items is None or len(tenants.items) == 0: break diff --git a/api/services/tag_service.py b/api/services/tag_service.py index 4674335ba8..db7ed3d5c3 100644 --- a/api/services/tag_service.py +++ b/api/services/tag_service.py @@ -1,5 +1,6 @@ import uuid +import sqlalchemy as sa from flask_login import current_user from sqlalchemy import func, select from werkzeug.exceptions import NotFound @@ -18,7 +19,7 @@ class TagService: .where(Tag.type == tag_type, Tag.tenant_id == current_tenant_id) ) if keyword: - query = query.where(db.and_(Tag.name.ilike(f"%{keyword}%"))) + query = query.where(sa.and_(Tag.name.ilike(f"%{keyword}%"))) query = query.group_by(Tag.id, Tag.type, Tag.name, Tag.created_at) results: list = query.order_by(Tag.created_at.desc()).all() return results diff --git a/api/tasks/document_indexing_sync_task.py b/api/tasks/document_indexing_sync_task.py index 10da9a9af4..4c1f38c3bb 100644 --- a/api/tasks/document_indexing_sync_task.py +++ b/api/tasks/document_indexing_sync_task.py @@ -2,6 +2,7 @@ import logging import time import click +import sqlalchemy as sa from celery import shared_task from sqlalchemy import select @@ -51,7 +52,7 @@ def document_indexing_sync_task(dataset_id: str, document_id: str): data_source_binding = ( db.session.query(DataSourceOauthBinding) .where( - db.and_( + sa.and_( DataSourceOauthBinding.tenant_id == document.tenant_id, DataSourceOauthBinding.provider == "notion", DataSourceOauthBinding.disabled == False, From a4acc64afd87d07fc1522a17f26bd287802ecc42 Mon Sep 17 00:00:00 2001 From: quicksand <quicksandzn@gmail.com> Date: Wed, 24 Sep 2025 10:09:35 +0800 Subject: [PATCH 09/14] fix(api):LLM node losing Flask context during parallel iterations (#26098) --- .../nodes/iteration/iteration_node.py | 36 +++++++++++-------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/api/core/workflow/nodes/iteration/iteration_node.py b/api/core/workflow/nodes/iteration/iteration_node.py index 593281c9b5..1a417b5739 100644 --- a/api/core/workflow/nodes/iteration/iteration_node.py +++ b/api/core/workflow/nodes/iteration/iteration_node.py @@ -1,9 +1,11 @@ +import contextvars import logging from collections.abc import Generator, Mapping, Sequence from concurrent.futures import Future, ThreadPoolExecutor, as_completed from datetime import UTC, datetime from typing import TYPE_CHECKING, Any, NewType, cast +from flask import Flask, current_app from typing_extensions import TypeIs from core.variables import IntegerVariable, NoneSegment @@ -35,6 +37,7 @@ from core.workflow.nodes.base.entities import BaseNodeData, RetryConfig from core.workflow.nodes.base.node import Node from core.workflow.nodes.iteration.entities import ErrorHandleMode, IterationNodeData from libs.datetime_utils import naive_utc_now +from libs.flask_utils import preserve_flask_contexts from .exc import ( InvalidIteratorValueError, @@ -239,6 +242,8 @@ class IterationNode(Node): self._execute_single_iteration_parallel, index=index, item=item, + flask_app=current_app._get_current_object(), # type: ignore + context_vars=contextvars.copy_context(), ) future_to_index[future] = index @@ -281,26 +286,29 @@ class IterationNode(Node): self, index: int, item: object, + flask_app: Flask, + context_vars: contextvars.Context, ) -> tuple[datetime, list[GraphNodeEventBase], object | None, int]: """Execute a single iteration in parallel mode and return results.""" - iter_start_at = datetime.now(UTC).replace(tzinfo=None) - events: list[GraphNodeEventBase] = [] - outputs_temp: list[object] = [] + with preserve_flask_contexts(flask_app=flask_app, context_vars=context_vars): + iter_start_at = datetime.now(UTC).replace(tzinfo=None) + events: list[GraphNodeEventBase] = [] + outputs_temp: list[object] = [] - graph_engine = self._create_graph_engine(index, item) + graph_engine = self._create_graph_engine(index, item) - # Collect events instead of yielding them directly - for event in self._run_single_iter( - variable_pool=graph_engine.graph_runtime_state.variable_pool, - outputs=outputs_temp, - graph_engine=graph_engine, - ): - events.append(event) + # Collect events instead of yielding them directly + for event in self._run_single_iter( + variable_pool=graph_engine.graph_runtime_state.variable_pool, + outputs=outputs_temp, + graph_engine=graph_engine, + ): + events.append(event) - # Get the output value from the temporary outputs list - output_value = outputs_temp[0] if outputs_temp else None + # Get the output value from the temporary outputs list + output_value = outputs_temp[0] if outputs_temp else None - return iter_start_at, events, output_value, graph_engine.graph_runtime_state.total_tokens + return iter_start_at, events, output_value, graph_engine.graph_runtime_state.total_tokens def _handle_iteration_success( self, From 9b360592920a69c1da8201be94211110ffa1e3de Mon Sep 17 00:00:00 2001 From: yangzheli <43645580+yangzheli@users.noreply.github.com> Date: Wed, 24 Sep 2025 15:53:59 +0800 Subject: [PATCH 10/14] fix(search-input): ensure proper value extraction in composition end handler (#26147) --- web/app/components/base/search-input/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/app/components/base/search-input/index.tsx b/web/app/components/base/search-input/index.tsx index cf9bc1c376..3330b55330 100644 --- a/web/app/components/base/search-input/index.tsx +++ b/web/app/components/base/search-input/index.tsx @@ -53,7 +53,7 @@ const SearchInput: FC<SearchInputProps> = ({ }} onCompositionEnd={(e) => { isComposing.current = false - onChange(e.data) + onChange(e.currentTarget.value) }} onFocus={() => setFocus(true)} onBlur={() => setFocus(false)} From 960bb8a9b4421232029a35a3c2ef8d9c09dc82c3 Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Wed, 24 Sep 2025 21:32:37 +0800 Subject: [PATCH 11/14] delete end_user check (#26187) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/controllers/service_api/dataset/document.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index d26c64fe36..e01bc8940c 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -30,7 +30,6 @@ from extensions.ext_database import db from fields.document_fields import document_fields, document_status_fields from libs.login import current_user from models.dataset import Dataset, Document, DocumentSegment -from models.model import EndUser from services.dataset_service import DatasetService, DocumentService from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig from services.file_service import FileService @@ -311,8 +310,6 @@ class DocumentAddByFileApi(DatasetApiResource): if not file.filename: raise FilenameNotExistsError - if not isinstance(current_user, EndUser): - raise ValueError("Invalid user account") if not current_user: raise ValueError("current_user is required") upload_file = FileService(db.engine).upload_file( @@ -406,9 +403,6 @@ class DocumentUpdateByFileApi(DatasetApiResource): if not current_user: raise ValueError("current_user is required") - if not isinstance(current_user, EndUser): - raise ValueError("Invalid user account") - try: upload_file = FileService(db.engine).upload_file( filename=file.filename, From e937c8c72e56ec8690c1790ff40cb4311bb63510 Mon Sep 17 00:00:00 2001 From: Blackoutta <37723456+Blackoutta@users.noreply.github.com> Date: Wed, 24 Sep 2025 22:14:50 +0800 Subject: [PATCH 12/14] improve: pooling httpx clients for requests to code sandbox and ssrf (#26052) --- api/.env.example | 7 ++ api/configs/feature/__init__.py | 35 ++++++++ .../helper/code_executor/code_executor.py | 39 ++++++--- api/core/helper/http_client_pooling.py | 59 +++++++++++++ api/core/helper/ssrf_proxy.py | 86 ++++++++++++------- docker/.env.example | 7 ++ docker/docker-compose.yaml | 7 ++ 7 files changed, 199 insertions(+), 41 deletions(-) create mode 100644 api/core/helper/http_client_pooling.py diff --git a/api/.env.example b/api/.env.example index 78a363e506..64e79bf0b8 100644 --- a/api/.env.example +++ b/api/.env.example @@ -408,6 +408,9 @@ SSRF_DEFAULT_TIME_OUT=5 SSRF_DEFAULT_CONNECT_TIME_OUT=5 SSRF_DEFAULT_READ_TIME_OUT=5 SSRF_DEFAULT_WRITE_TIME_OUT=5 +SSRF_POOL_MAX_CONNECTIONS=100 +SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20 +SSRF_POOL_KEEPALIVE_EXPIRY=5.0 BATCH_UPLOAD_LIMIT=10 KEYWORD_DATA_SOURCE_TYPE=database @@ -418,6 +421,10 @@ WORKFLOW_FILE_UPLOAD_LIMIT=10 # CODE EXECUTION CONFIGURATION CODE_EXECUTION_ENDPOINT=http://127.0.0.1:8194 CODE_EXECUTION_API_KEY=dify-sandbox +CODE_EXECUTION_SSL_VERIFY=True +CODE_EXECUTION_POOL_MAX_CONNECTIONS=100 +CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20 +CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0 CODE_MAX_NUMBER=9223372036854775807 CODE_MIN_NUMBER=-9223372036854775808 CODE_MAX_STRING_LENGTH=80000 diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py index b17f30210c..e836059ca6 100644 --- a/api/configs/feature/__init__.py +++ b/api/configs/feature/__init__.py @@ -113,6 +113,21 @@ class CodeExecutionSandboxConfig(BaseSettings): default=10.0, ) + CODE_EXECUTION_POOL_MAX_CONNECTIONS: PositiveInt = Field( + description="Maximum number of concurrent connections for the code execution HTTP client", + default=100, + ) + + CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field( + description="Maximum number of persistent keep-alive connections for the code execution HTTP client", + default=20, + ) + + CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field( + description="Keep-alive expiry in seconds for idle connections (set to None to disable)", + default=5.0, + ) + CODE_MAX_NUMBER: PositiveInt = Field( description="Maximum allowed numeric value in code execution", default=9223372036854775807, @@ -153,6 +168,11 @@ class CodeExecutionSandboxConfig(BaseSettings): default=1000, ) + CODE_EXECUTION_SSL_VERIFY: bool = Field( + description="Enable or disable SSL verification for code execution requests", + default=True, + ) + class PluginConfig(BaseSettings): """ @@ -404,6 +424,21 @@ class HttpConfig(BaseSettings): default=5, ) + SSRF_POOL_MAX_CONNECTIONS: PositiveInt = Field( + description="Maximum number of concurrent connections for the SSRF HTTP client", + default=100, + ) + + SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: PositiveInt = Field( + description="Maximum number of persistent keep-alive connections for the SSRF HTTP client", + default=20, + ) + + SSRF_POOL_KEEPALIVE_EXPIRY: PositiveFloat | None = Field( + description="Keep-alive expiry in seconds for idle SSRF connections (set to None to disable)", + default=5.0, + ) + RESPECT_XFORWARD_HEADERS_ENABLED: bool = Field( description="Enable handling of X-Forwarded-For, X-Forwarded-Proto, and X-Forwarded-Port headers" " when the app is behind a single trusted reverse proxy.", diff --git a/api/core/helper/code_executor/code_executor.py b/api/core/helper/code_executor/code_executor.py index c44a8e1840..0c1d03dc13 100644 --- a/api/core/helper/code_executor/code_executor.py +++ b/api/core/helper/code_executor/code_executor.py @@ -4,7 +4,7 @@ from enum import StrEnum from threading import Lock from typing import Any -from httpx import Timeout, post +import httpx from pydantic import BaseModel from yarl import URL @@ -13,9 +13,17 @@ from core.helper.code_executor.javascript.javascript_transformer import NodeJsTe from core.helper.code_executor.jinja2.jinja2_transformer import Jinja2TemplateTransformer from core.helper.code_executor.python3.python3_transformer import Python3TemplateTransformer from core.helper.code_executor.template_transformer import TemplateTransformer +from core.helper.http_client_pooling import get_pooled_http_client logger = logging.getLogger(__name__) code_execution_endpoint_url = URL(str(dify_config.CODE_EXECUTION_ENDPOINT)) +CODE_EXECUTION_SSL_VERIFY = dify_config.CODE_EXECUTION_SSL_VERIFY +_CODE_EXECUTOR_CLIENT_LIMITS = httpx.Limits( + max_connections=dify_config.CODE_EXECUTION_POOL_MAX_CONNECTIONS, + max_keepalive_connections=dify_config.CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS, + keepalive_expiry=dify_config.CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY, +) +_CODE_EXECUTOR_CLIENT_KEY = "code_executor:http_client" class CodeExecutionError(Exception): @@ -38,6 +46,13 @@ class CodeLanguage(StrEnum): JAVASCRIPT = "javascript" +def _build_code_executor_client() -> httpx.Client: + return httpx.Client( + verify=CODE_EXECUTION_SSL_VERIFY, + limits=_CODE_EXECUTOR_CLIENT_LIMITS, + ) + + class CodeExecutor: dependencies_cache: dict[str, str] = {} dependencies_cache_lock = Lock() @@ -76,17 +91,21 @@ class CodeExecutor: "enable_network": True, } + timeout = httpx.Timeout( + connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT, + read=dify_config.CODE_EXECUTION_READ_TIMEOUT, + write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT, + pool=None, + ) + + client = get_pooled_http_client(_CODE_EXECUTOR_CLIENT_KEY, _build_code_executor_client) + try: - response = post( + response = client.post( str(url), json=data, headers=headers, - timeout=Timeout( - connect=dify_config.CODE_EXECUTION_CONNECT_TIMEOUT, - read=dify_config.CODE_EXECUTION_READ_TIMEOUT, - write=dify_config.CODE_EXECUTION_WRITE_TIMEOUT, - pool=None, - ), + timeout=timeout, ) if response.status_code == 503: raise CodeExecutionError("Code execution service is unavailable") @@ -106,8 +125,8 @@ class CodeExecutor: try: response_data = response.json() - except: - raise CodeExecutionError("Failed to parse response") + except Exception as e: + raise CodeExecutionError("Failed to parse response") from e if (code := response_data.get("code")) != 0: raise CodeExecutionError(f"Got error code: {code}. Got error msg: {response_data.get('message')}") diff --git a/api/core/helper/http_client_pooling.py b/api/core/helper/http_client_pooling.py new file mode 100644 index 0000000000..f4c3ff0e8b --- /dev/null +++ b/api/core/helper/http_client_pooling.py @@ -0,0 +1,59 @@ +"""HTTP client pooling utilities.""" + +from __future__ import annotations + +import atexit +import threading +from collections.abc import Callable + +import httpx + +ClientBuilder = Callable[[], httpx.Client] + + +class HttpClientPoolFactory: + """Thread-safe factory that maintains reusable HTTP client instances.""" + + def __init__(self) -> None: + self._clients: dict[str, httpx.Client] = {} + self._lock = threading.Lock() + + def get_or_create(self, key: str, builder: ClientBuilder) -> httpx.Client: + """Return a pooled client associated with ``key`` creating it on demand.""" + client = self._clients.get(key) + if client is not None: + return client + + with self._lock: + client = self._clients.get(key) + if client is None: + client = builder() + self._clients[key] = client + return client + + def close_all(self) -> None: + """Close all pooled clients and clear the pool.""" + with self._lock: + for client in self._clients.values(): + client.close() + self._clients.clear() + + +_factory = HttpClientPoolFactory() + + +def get_pooled_http_client(key: str, builder: ClientBuilder) -> httpx.Client: + """Return a pooled client for the given ``key`` using ``builder`` when missing.""" + return _factory.get_or_create(key, builder) + + +def close_all_pooled_clients() -> None: + """Close every client created through the pooling factory.""" + _factory.close_all() + + +def _register_shutdown_hook() -> None: + atexit.register(close_all_pooled_clients) + + +_register_shutdown_hook() diff --git a/api/core/helper/ssrf_proxy.py b/api/core/helper/ssrf_proxy.py index cbb78939d2..0de026f3c7 100644 --- a/api/core/helper/ssrf_proxy.py +++ b/api/core/helper/ssrf_proxy.py @@ -8,27 +8,23 @@ import time import httpx from configs import dify_config +from core.helper.http_client_pooling import get_pooled_http_client logger = logging.getLogger(__name__) SSRF_DEFAULT_MAX_RETRIES = dify_config.SSRF_DEFAULT_MAX_RETRIES -http_request_node_ssl_verify = True # Default value for http_request_node_ssl_verify is True -try: - config_value = dify_config.HTTP_REQUEST_NODE_SSL_VERIFY - http_request_node_ssl_verify_lower = str(config_value).lower() - if http_request_node_ssl_verify_lower == "true": - http_request_node_ssl_verify = True - elif http_request_node_ssl_verify_lower == "false": - http_request_node_ssl_verify = False - else: - raise ValueError("Invalid value. HTTP_REQUEST_NODE_SSL_VERIFY should be 'True' or 'False'") -except NameError: - http_request_node_ssl_verify = True - BACKOFF_FACTOR = 0.5 STATUS_FORCELIST = [429, 500, 502, 503, 504] +_SSL_VERIFIED_POOL_KEY = "ssrf:verified" +_SSL_UNVERIFIED_POOL_KEY = "ssrf:unverified" +_SSRF_CLIENT_LIMITS = httpx.Limits( + max_connections=dify_config.SSRF_POOL_MAX_CONNECTIONS, + max_keepalive_connections=dify_config.SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS, + keepalive_expiry=dify_config.SSRF_POOL_KEEPALIVE_EXPIRY, +) + class MaxRetriesExceededError(ValueError): """Raised when the maximum number of retries is exceeded.""" @@ -36,6 +32,45 @@ class MaxRetriesExceededError(ValueError): pass +def _create_proxy_mounts() -> dict[str, httpx.HTTPTransport]: + return { + "http://": httpx.HTTPTransport( + proxy=dify_config.SSRF_PROXY_HTTP_URL, + ), + "https://": httpx.HTTPTransport( + proxy=dify_config.SSRF_PROXY_HTTPS_URL, + ), + } + + +def _build_ssrf_client(verify: bool) -> httpx.Client: + if dify_config.SSRF_PROXY_ALL_URL: + return httpx.Client( + proxy=dify_config.SSRF_PROXY_ALL_URL, + verify=verify, + limits=_SSRF_CLIENT_LIMITS, + ) + + if dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL: + return httpx.Client( + mounts=_create_proxy_mounts(), + verify=verify, + limits=_SSRF_CLIENT_LIMITS, + ) + + return httpx.Client(verify=verify, limits=_SSRF_CLIENT_LIMITS) + + +def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client: + if not isinstance(ssl_verify_enabled, bool): + raise ValueError("SSRF client verify flag must be a boolean") + + return get_pooled_http_client( + _SSL_VERIFIED_POOL_KEY if ssl_verify_enabled else _SSL_UNVERIFIED_POOL_KEY, + lambda: _build_ssrf_client(verify=ssl_verify_enabled), + ) + + def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): if "allow_redirects" in kwargs: allow_redirects = kwargs.pop("allow_redirects") @@ -50,33 +85,22 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs): write=dify_config.SSRF_DEFAULT_WRITE_TIME_OUT, ) - if "ssl_verify" not in kwargs: - kwargs["ssl_verify"] = http_request_node_ssl_verify - - ssl_verify = kwargs.pop("ssl_verify") + # prioritize per-call option, which can be switched on and off inside the HTTP node on the web UI + verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY) + client = _get_ssrf_client(verify_option) retries = 0 while retries <= max_retries: try: - if dify_config.SSRF_PROXY_ALL_URL: - with httpx.Client(proxy=dify_config.SSRF_PROXY_ALL_URL, verify=ssl_verify) as client: - response = client.request(method=method, url=url, **kwargs) - elif dify_config.SSRF_PROXY_HTTP_URL and dify_config.SSRF_PROXY_HTTPS_URL: - proxy_mounts = { - "http://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTP_URL, verify=ssl_verify), - "https://": httpx.HTTPTransport(proxy=dify_config.SSRF_PROXY_HTTPS_URL, verify=ssl_verify), - } - with httpx.Client(mounts=proxy_mounts, verify=ssl_verify) as client: - response = client.request(method=method, url=url, **kwargs) - else: - with httpx.Client(verify=ssl_verify) as client: - response = client.request(method=method, url=url, **kwargs) + response = client.request(method=method, url=url, **kwargs) if response.status_code not in STATUS_FORCELIST: return response else: logger.warning( - "Received status code %s for URL %s which is in the force list", response.status_code, url + "Received status code %s for URL %s which is in the force list", + response.status_code, + url, ) except httpx.RequestError as e: diff --git a/docker/.env.example b/docker/.env.example index 07b4088470..eebc18118f 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -859,6 +859,10 @@ OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES=5 # The sandbox service endpoint. CODE_EXECUTION_ENDPOINT=http://sandbox:8194 CODE_EXECUTION_API_KEY=dify-sandbox +CODE_EXECUTION_SSL_VERIFY=True +CODE_EXECUTION_POOL_MAX_CONNECTIONS=100 +CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20 +CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0 CODE_MAX_NUMBER=9223372036854775807 CODE_MIN_NUMBER=-9223372036854775808 CODE_MAX_DEPTH=5 @@ -1134,6 +1138,9 @@ SSRF_DEFAULT_TIME_OUT=5 SSRF_DEFAULT_CONNECT_TIME_OUT=5 SSRF_DEFAULT_READ_TIME_OUT=5 SSRF_DEFAULT_WRITE_TIME_OUT=5 +SSRF_POOL_MAX_CONNECTIONS=100 +SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS=20 +SSRF_POOL_KEEPALIVE_EXPIRY=5.0 # ------------------------------ # docker env var for specifying vector db type at startup diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index dc94883b75..dd3d42c0f7 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -382,6 +382,10 @@ x-shared-env: &shared-api-worker-env OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES: ${OWNER_TRANSFER_TOKEN_EXPIRY_MINUTES:-5} CODE_EXECUTION_ENDPOINT: ${CODE_EXECUTION_ENDPOINT:-http://sandbox:8194} CODE_EXECUTION_API_KEY: ${CODE_EXECUTION_API_KEY:-dify-sandbox} + CODE_EXECUTION_SSL_VERIFY: ${CODE_EXECUTION_SSL_VERIFY:-True} + CODE_EXECUTION_POOL_MAX_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_CONNECTIONS:-100} + CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS: ${CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS:-20} + CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY: ${CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY:-5.0} CODE_MAX_NUMBER: ${CODE_MAX_NUMBER:-9223372036854775807} CODE_MIN_NUMBER: ${CODE_MIN_NUMBER:--9223372036854775808} CODE_MAX_DEPTH: ${CODE_MAX_DEPTH:-5} @@ -497,6 +501,9 @@ x-shared-env: &shared-api-worker-env SSRF_DEFAULT_CONNECT_TIME_OUT: ${SSRF_DEFAULT_CONNECT_TIME_OUT:-5} SSRF_DEFAULT_READ_TIME_OUT: ${SSRF_DEFAULT_READ_TIME_OUT:-5} SSRF_DEFAULT_WRITE_TIME_OUT: ${SSRF_DEFAULT_WRITE_TIME_OUT:-5} + SSRF_POOL_MAX_CONNECTIONS: ${SSRF_POOL_MAX_CONNECTIONS:-100} + SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS: ${SSRF_POOL_MAX_KEEPALIVE_CONNECTIONS:-20} + SSRF_POOL_KEEPALIVE_EXPIRY: ${SSRF_POOL_KEEPALIVE_EXPIRY:-5.0} EXPOSE_NGINX_PORT: ${EXPOSE_NGINX_PORT:-80} EXPOSE_NGINX_SSL_PORT: ${EXPOSE_NGINX_SSL_PORT:-443} POSITION_TOOL_PINS: ${POSITION_TOOL_PINS:-} From 6841a09667d87dc1ad4c4bbfbc6bcc79dd3aafe7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=AA=E3=82=A4=E3=83=8E=20Lin?= <sorphwer@gmail.com> Date: Thu, 25 Sep 2025 10:39:44 +0800 Subject: [PATCH 13/14] fix: remote filename will be 'inline' if Content-Disposition: inline (#25877) Fixed the issue that filename will be 'inline' if response header contains `Content-Disposition: inline` while retrieving file by url. Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> --- api/factories/file_factory.py | 41 +++++-- .../unit_tests/factories/test_file_factory.py | 115 ++++++++++++++++++ 2 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 api/tests/unit_tests/factories/test_file_factory.py diff --git a/api/factories/file_factory.py b/api/factories/file_factory.py index 588168bd39..d66c757249 100644 --- a/api/factories/file_factory.py +++ b/api/factories/file_factory.py @@ -8,6 +8,7 @@ from typing import Any import httpx from sqlalchemy import select from sqlalchemy.orm import Session +from werkzeug.http import parse_options_header from constants import AUDIO_EXTENSIONS, DOCUMENT_EXTENSIONS, IMAGE_EXTENSIONS, VIDEO_EXTENSIONS from core.file import File, FileBelongsTo, FileTransferMethod, FileType, FileUploadConfig, helpers @@ -247,6 +248,25 @@ def _build_from_remote_url( ) +def _extract_filename(url_path: str, content_disposition: str | None) -> str | None: + filename = None + # Try to extract from Content-Disposition header first + if content_disposition: + _, params = parse_options_header(content_disposition) + # RFC 5987 https://datatracker.ietf.org/doc/html/rfc5987: filename* takes precedence over filename + filename = params.get("filename*") or params.get("filename") + # Fallback to URL path if no filename from header + if not filename: + filename = os.path.basename(url_path) + return filename or None + + +def _guess_mime_type(filename: str) -> str: + """Guess MIME type from filename, returning empty string if None.""" + guessed_mime, _ = mimetypes.guess_type(filename) + return guessed_mime or "" + + def _get_remote_file_info(url: str): file_size = -1 parsed_url = urllib.parse.urlparse(url) @@ -254,23 +274,26 @@ def _get_remote_file_info(url: str): filename = os.path.basename(url_path) # Initialize mime_type from filename as fallback - mime_type, _ = mimetypes.guess_type(filename) - if mime_type is None: - mime_type = "" + mime_type = _guess_mime_type(filename) resp = ssrf_proxy.head(url, follow_redirects=True) if resp.status_code == httpx.codes.OK: - if content_disposition := resp.headers.get("Content-Disposition"): - filename = str(content_disposition.split("filename=")[-1].strip('"')) - # Re-guess mime_type from updated filename - mime_type, _ = mimetypes.guess_type(filename) - if mime_type is None: - mime_type = "" + content_disposition = resp.headers.get("Content-Disposition") + extracted_filename = _extract_filename(url_path, content_disposition) + if extracted_filename: + filename = extracted_filename + mime_type = _guess_mime_type(filename) file_size = int(resp.headers.get("Content-Length", file_size)) # Fallback to Content-Type header if mime_type is still empty if not mime_type: mime_type = resp.headers.get("Content-Type", "").split(";")[0].strip() + if not filename: + extension = mimetypes.guess_extension(mime_type) or ".bin" + filename = f"{uuid.uuid4().hex}{extension}" + if not mime_type: + mime_type = _guess_mime_type(filename) + return mime_type, filename, file_size diff --git a/api/tests/unit_tests/factories/test_file_factory.py b/api/tests/unit_tests/factories/test_file_factory.py new file mode 100644 index 0000000000..777fe5a6e7 --- /dev/null +++ b/api/tests/unit_tests/factories/test_file_factory.py @@ -0,0 +1,115 @@ +import re + +import pytest + +from factories.file_factory import _get_remote_file_info + + +class _FakeResponse: + def __init__(self, status_code: int, headers: dict[str, str]): + self.status_code = status_code + self.headers = headers + + +def _mock_head(monkeypatch: pytest.MonkeyPatch, headers: dict[str, str], status_code: int = 200): + def _fake_head(url: str, follow_redirects: bool = True): + return _FakeResponse(status_code=status_code, headers=headers) + + monkeypatch.setattr("factories.file_factory.ssrf_proxy.head", _fake_head) + + +class TestGetRemoteFileInfo: + """Tests for _get_remote_file_info focusing on filename extraction rules.""" + + def test_inline_no_filename(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": "inline", + "Content-Type": "application/pdf", + "Content-Length": "123", + }, + ) + mime_type, filename, size = _get_remote_file_info("http://example.com/some/path/file.pdf") + assert filename == "file.pdf" + assert mime_type == "application/pdf" + assert size == 123 + + def test_attachment_no_filename(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": "attachment", + "Content-Type": "application/octet-stream", + "Content-Length": "456", + }, + ) + mime_type, filename, size = _get_remote_file_info("http://example.com/downloads/data.bin") + assert filename == "data.bin" + assert mime_type == "application/octet-stream" + assert size == 456 + + def test_attachment_quoted_space_filename(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": 'attachment; filename="file name.jpg"', + "Content-Type": "image/jpeg", + "Content-Length": "789", + }, + ) + mime_type, filename, size = _get_remote_file_info("http://example.com/ignored") + assert filename == "file name.jpg" + assert mime_type == "image/jpeg" + assert size == 789 + + def test_attachment_filename_star_percent20(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": "attachment; filename*=UTF-8''file%20name.jpg", + "Content-Type": "image/jpeg", + }, + ) + mime_type, filename, _ = _get_remote_file_info("http://example.com/ignored") + assert filename == "file name.jpg" + assert mime_type == "image/jpeg" + + def test_attachment_filename_star_chinese(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": "attachment; filename*=UTF-8''%E6%B5%8B%E8%AF%95%E6%96%87%E4%BB%B6.jpg", + "Content-Type": "image/jpeg", + }, + ) + mime_type, filename, _ = _get_remote_file_info("http://example.com/ignored") + assert filename == "测试文件.jpg" + assert mime_type == "image/jpeg" + + def test_filename_from_url_when_no_header(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + # No Content-Disposition + "Content-Type": "text/plain", + "Content-Length": "12", + }, + ) + mime_type, filename, size = _get_remote_file_info("http://example.com/static/file.txt") + assert filename == "file.txt" + assert mime_type == "text/plain" + assert size == 12 + + def test_no_filename_in_url_or_header_generates_uuid_bin(self, monkeypatch: pytest.MonkeyPatch): + _mock_head( + monkeypatch, + { + "Content-Disposition": "inline", + "Content-Type": "application/octet-stream", + }, + ) + mime_type, filename, _ = _get_remote_file_info("http://example.com/test/") + # Should generate a random hex filename with .bin extension + assert re.match(r"^[0-9a-f]{32}\.bin$", filename) is not None + assert mime_type == "application/octet-stream" From f104839672ccf111b2799fc31a85870e5e997b7d Mon Sep 17 00:00:00 2001 From: "Junyan Qin (Chin)" <rockchinq@gmail.com> Date: Thu, 25 Sep 2025 15:33:27 +0800 Subject: [PATCH 14/14] perf: provide X-Dify-Version for marketplace api access (#26210) --- api/core/helper/marketplace.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/core/helper/marketplace.py b/api/core/helper/marketplace.py index 89dae4808f..10f304c087 100644 --- a/api/core/helper/marketplace.py +++ b/api/core/helper/marketplace.py @@ -23,7 +23,7 @@ def batch_fetch_plugin_manifests(plugin_ids: list[str]) -> Sequence[MarketplaceP return [] url = str(marketplace_api_url / "api/v1/plugins/batch") - response = httpx.post(url, json={"plugin_ids": plugin_ids}) + response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version}) response.raise_for_status() return [MarketplacePluginDeclaration(**plugin) for plugin in response.json()["data"]["plugins"]] @@ -36,7 +36,7 @@ def batch_fetch_plugin_manifests_ignore_deserialization_error( return [] url = str(marketplace_api_url / "api/v1/plugins/batch") - response = httpx.post(url, json={"plugin_ids": plugin_ids}) + response = httpx.post(url, json={"plugin_ids": plugin_ids}, headers={"X-Dify-Version": dify_config.project.version}) response.raise_for_status() result: list[MarketplacePluginDeclaration] = [] for plugin in response.json()["data"]["plugins"]: