Merge branch 'main' into fix/loop-iteration-single-step-debug

This commit is contained in:
-LAN- 2025-12-24 15:50:19 +08:00 committed by GitHub
commit 0383ee3a77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3659 changed files with 160523 additions and 90635 deletions

View File

@ -1,13 +1,13 @@
---
name: Dify Frontend Testing
description: Generate Jest + React Testing Library tests for Dify frontend components, hooks, and utilities. Triggers on testing, spec files, coverage, Jest, RTL, unit tests, integration tests, or write/review test requests.
name: frontend-testing
description: Generate Vitest + React Testing Library tests for Dify frontend components, hooks, and utilities. Triggers on testing, spec files, coverage, Vitest, RTL, unit tests, integration tests, or write/review test requests.
---
# Dify Frontend Testing Skill
This skill enables Claude to generate high-quality, comprehensive frontend tests for the Dify project following established conventions and best practices.
> **⚠️ Authoritative Source**: This skill is derived from `web/testing/testing.md`. When in doubt, always refer to that document as the canonical specification.
> **⚠️ Authoritative Source**: This skill is derived from `web/testing/testing.md`. Use Vitest mock/timer APIs (`vi.*`).
## When to Apply This Skill
@ -15,7 +15,7 @@ Apply this skill when the user:
- Asks to **write tests** for a component, hook, or utility
- Asks to **review existing tests** for completeness
- Mentions **Jest**, **React Testing Library**, **RTL**, or **spec files**
- Mentions **Vitest**, **React Testing Library**, **RTL**, or **spec files**
- Requests **test coverage** improvement
- Uses `pnpm analyze-component` output as context
- Mentions **testing**, **unit tests**, or **integration tests** for frontend code
@ -33,9 +33,9 @@ Apply this skill when the user:
| Tool | Version | Purpose |
|------|---------|---------|
| Jest | 29.7 | Test runner |
| Vitest | 4.0.16 | Test runner |
| React Testing Library | 16.0 | Component testing |
| happy-dom | - | Test environment |
| jsdom | - | Test environment |
| nock | 14.0 | HTTP mocking |
| TypeScript | 5.x | Type safety |
@ -46,13 +46,13 @@ Apply this skill when the user:
pnpm test
# Watch mode
pnpm test -- --watch
pnpm test:watch
# Run specific file
pnpm test -- path/to/file.spec.tsx
pnpm test path/to/file.spec.tsx
# Generate coverage report
pnpm test -- --coverage
pnpm test:coverage
# Analyze component complexity
pnpm analyze-component <path>
@ -77,9 +77,9 @@ import Component from './index'
// import { ChildComponent } from './child-component'
// ✅ Mock external dependencies only
jest.mock('@/service/api')
jest.mock('next/navigation', () => ({
useRouter: () => ({ push: jest.fn() }),
vi.mock('@/service/api')
vi.mock('next/navigation', () => ({
useRouter: () => ({ push: vi.fn() }),
usePathname: () => '/test',
}))
@ -88,7 +88,7 @@ let mockSharedState = false
describe('ComponentName', () => {
beforeEach(() => {
jest.clearAllMocks() // ✅ Reset mocks BEFORE each test
vi.clearAllMocks() // ✅ Reset mocks BEFORE each test
mockSharedState = false // ✅ Reset shared state
})
@ -117,7 +117,7 @@ describe('ComponentName', () => {
// User Interactions
describe('User Interactions', () => {
it('should handle click events', () => {
const handleClick = jest.fn()
const handleClick = vi.fn()
render(<Component onClick={handleClick} />)
fireEvent.click(screen.getByRole('button'))
@ -155,7 +155,7 @@ describe('ComponentName', () => {
For each file:
┌────────────────────────────────────────┐
│ 1. Write test │
│ 2. Run: pnpm test -- <file>.spec.tsx │
│ 2. Run: pnpm test <file>.spec.tsx
│ 3. PASS? → Mark complete, next file │
│ FAIL? → Fix first, then continue │
└────────────────────────────────────────┘
@ -178,7 +178,7 @@ Process in this order for multi-file testing:
- **500+ lines**: Consider splitting before testing
- **Many dependencies**: Extract logic into hooks first
> 📖 See `guides/workflow.md` for complete workflow details and todo list format.
> 📖 See `references/workflow.md` for complete workflow details and todo list format.
## Testing Strategy
@ -289,17 +289,18 @@ For each test file generated, aim for:
- ✅ **>95%** branch coverage
- ✅ **>95%** line coverage
> **Note**: For multi-file directories, process one file at a time with full coverage each. See `guides/workflow.md`.
> **Note**: For multi-file directories, process one file at a time with full coverage each. See `references/workflow.md`.
## Detailed Guides
For more detailed information, refer to:
- `guides/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing)
- `guides/mocking.md` - Mock patterns and best practices
- `guides/async-testing.md` - Async operations and API calls
- `guides/domain-components.md` - Workflow, Dataset, Configuration testing
- `guides/common-patterns.md` - Frequently used testing patterns
- `references/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing)
- `references/mocking.md` - Mock patterns and best practices
- `references/async-testing.md` - Async operations and API calls
- `references/domain-components.md` - Workflow, Dataset, Configuration testing
- `references/common-patterns.md` - Frequently used testing patterns
- `references/checklist.md` - Test generation checklist and validation steps
## Authoritative References
@ -315,7 +316,7 @@ For more detailed information, refer to:
### Project Configuration
- `web/jest.config.ts` - Jest configuration
- `web/jest.setup.ts` - Test environment setup
- `web/vitest.config.ts` - Vitest configuration
- `web/vitest.setup.ts` - Test environment setup
- `web/testing/analyze-component.js` - Component analysis tool
- `web/__mocks__/react-i18next.ts` - Shared i18n mock (auto-loaded by Jest, no explicit mock needed; override locally only for custom translations)
- Modules are not mocked automatically. Global mocks live in `web/vitest.setup.ts` (for example `react-i18next`, `next/image`); mock other modules like `ky` or `mime` locally in test files.

View File

@ -23,14 +23,14 @@ import userEvent from '@testing-library/user-event'
// ============================================================================
// Mocks
// ============================================================================
// WHY: Mocks must be hoisted to top of file (Jest requirement).
// WHY: Mocks must be hoisted to top of file (Vitest requirement).
// They run BEFORE imports, so keep them before component imports.
// i18n (automatically mocked)
// WHY: Shared mock at web/__mocks__/react-i18next.ts is auto-loaded by Jest
// WHY: Global mock in web/vitest.setup.ts is auto-loaded by Vitest setup
// No explicit mock needed - it returns translation keys as-is
// Override only if custom translations are required:
// jest.mock('react-i18next', () => ({
// vi.mock('react-i18next', () => ({
// useTranslation: () => ({
// t: (key: string) => {
// const customTranslations: Record<string, string> = {
@ -43,17 +43,17 @@ import userEvent from '@testing-library/user-event'
// Router (if component uses useRouter, usePathname, useSearchParams)
// WHY: Isolates tests from Next.js routing, enables testing navigation behavior
// const mockPush = jest.fn()
// jest.mock('next/navigation', () => ({
// const mockPush = vi.fn()
// vi.mock('next/navigation', () => ({
// useRouter: () => ({ push: mockPush }),
// usePathname: () => '/test-path',
// }))
// API services (if component fetches data)
// WHY: Prevents real network calls, enables testing all states (loading/success/error)
// jest.mock('@/service/api')
// vi.mock('@/service/api')
// import * as api from '@/service/api'
// const mockedApi = api as jest.Mocked<typeof api>
// const mockedApi = vi.mocked(api)
// Shared mock state (for portal/dropdown components)
// WHY: Portal components like PortalToFollowElem need shared state between
@ -98,7 +98,7 @@ describe('ComponentName', () => {
// - Prevents mock call history from leaking between tests
// - MUST be beforeEach (not afterEach) to reset BEFORE assertions like toHaveBeenCalledTimes
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
// Reset shared mock state if used (CRITICAL for portal/dropdown tests)
// mockOpenState = false
})
@ -155,7 +155,7 @@ describe('ComponentName', () => {
// - userEvent simulates real user behavior (focus, hover, then click)
// - fireEvent is lower-level, doesn't trigger all browser events
// const user = userEvent.setup()
// const handleClick = jest.fn()
// const handleClick = vi.fn()
// render(<ComponentName onClick={handleClick} />)
//
// await user.click(screen.getByRole('button'))
@ -165,7 +165,7 @@ describe('ComponentName', () => {
it('should call onChange when value changes', async () => {
// const user = userEvent.setup()
// const handleChange = jest.fn()
// const handleChange = vi.fn()
// render(<ComponentName onChange={handleChange} />)
//
// await user.type(screen.getByRole('textbox'), 'new value')
@ -198,7 +198,7 @@ describe('ComponentName', () => {
})
// --------------------------------------------------------------------------
// Async Operations (if component fetches data - useSWR, useQuery, fetch)
// Async Operations (if component fetches data - useQuery, fetch)
// --------------------------------------------------------------------------
// WHY: Async operations have 3 states users experience: loading, success, error
describe('Async Operations', () => {

View File

@ -15,9 +15,9 @@ import { renderHook, act, waitFor } from '@testing-library/react'
// ============================================================================
// API services (if hook fetches data)
// jest.mock('@/service/api')
// vi.mock('@/service/api')
// import * as api from '@/service/api'
// const mockedApi = api as jest.Mocked<typeof api>
// const mockedApi = vi.mocked(api)
// ============================================================================
// Test Helpers
@ -38,7 +38,7 @@ import { renderHook, act, waitFor } from '@testing-library/react'
describe('useHookName', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
})
// --------------------------------------------------------------------------
@ -145,7 +145,7 @@ describe('useHookName', () => {
// --------------------------------------------------------------------------
describe('Side Effects', () => {
it('should call callback when value changes', () => {
// const callback = jest.fn()
// const callback = vi.fn()
// const { result } = renderHook(() => useHookName({ onChange: callback }))
//
// act(() => {
@ -156,9 +156,9 @@ describe('useHookName', () => {
})
it('should cleanup on unmount', () => {
// const cleanup = jest.fn()
// jest.spyOn(window, 'addEventListener')
// jest.spyOn(window, 'removeEventListener')
// const cleanup = vi.fn()
// vi.spyOn(window, 'addEventListener')
// vi.spyOn(window, 'removeEventListener')
//
// const { unmount } = renderHook(() => useHookName())
//

View File

@ -49,7 +49,7 @@ import userEvent from '@testing-library/user-event'
it('should submit form', async () => {
const user = userEvent.setup()
const onSubmit = jest.fn()
const onSubmit = vi.fn()
render(<Form onSubmit={onSubmit} />)
@ -77,15 +77,15 @@ it('should submit form', async () => {
```typescript
describe('Debounced Search', () => {
beforeEach(() => {
jest.useFakeTimers()
vi.useFakeTimers()
})
afterEach(() => {
jest.useRealTimers()
vi.useRealTimers()
})
it('should debounce search input', async () => {
const onSearch = jest.fn()
const onSearch = vi.fn()
render(<SearchInput onSearch={onSearch} debounceMs={300} />)
// Type in the input
@ -95,7 +95,7 @@ describe('Debounced Search', () => {
expect(onSearch).not.toHaveBeenCalled()
// Advance timers
jest.advanceTimersByTime(300)
vi.advanceTimersByTime(300)
// Now search is called
expect(onSearch).toHaveBeenCalledWith('query')
@ -107,8 +107,8 @@ describe('Debounced Search', () => {
```typescript
it('should retry on failure', async () => {
jest.useFakeTimers()
const fetchData = jest.fn()
vi.useFakeTimers()
const fetchData = vi.fn()
.mockRejectedValueOnce(new Error('Network error'))
.mockResolvedValueOnce({ data: 'success' })
@ -120,7 +120,7 @@ it('should retry on failure', async () => {
})
// Advance timer for retry
jest.advanceTimersByTime(1000)
vi.advanceTimersByTime(1000)
// Second call succeeds
await waitFor(() => {
@ -128,7 +128,7 @@ it('should retry on failure', async () => {
expect(screen.getByText('success')).toBeInTheDocument()
})
jest.useRealTimers()
vi.useRealTimers()
})
```
@ -136,19 +136,19 @@ it('should retry on failure', async () => {
```typescript
// Run all pending timers
jest.runAllTimers()
vi.runAllTimers()
// Run only pending timers (not new ones created during execution)
jest.runOnlyPendingTimers()
vi.runOnlyPendingTimers()
// Advance by specific time
jest.advanceTimersByTime(1000)
vi.advanceTimersByTime(1000)
// Get current fake time
jest.now()
Date.now()
// Clear all timers
jest.clearAllTimers()
vi.clearAllTimers()
```
## API Testing Patterns
@ -158,7 +158,7 @@ jest.clearAllTimers()
```typescript
describe('DataFetcher', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
})
it('should show loading state', () => {
@ -241,7 +241,7 @@ it('should submit form and show success', async () => {
```typescript
it('should fetch data on mount', async () => {
const fetchData = jest.fn().mockResolvedValue({ data: 'test' })
const fetchData = vi.fn().mockResolvedValue({ data: 'test' })
render(<ComponentWithEffect fetchData={fetchData} />)
@ -255,7 +255,7 @@ it('should fetch data on mount', async () => {
```typescript
it('should refetch when id changes', async () => {
const fetchData = jest.fn().mockResolvedValue({ data: 'test' })
const fetchData = vi.fn().mockResolvedValue({ data: 'test' })
const { rerender } = render(<ComponentWithEffect id="1" fetchData={fetchData} />)
@ -276,8 +276,8 @@ it('should refetch when id changes', async () => {
```typescript
it('should cleanup subscription on unmount', () => {
const subscribe = jest.fn()
const unsubscribe = jest.fn()
const subscribe = vi.fn()
const unsubscribe = vi.fn()
subscribe.mockReturnValue(unsubscribe)
const { unmount } = render(<SubscriptionComponent subscribe={subscribe} />)
@ -332,14 +332,14 @@ expect(description).toBeInTheDocument()
```typescript
// Bad - fake timers don't work well with real Promises
jest.useFakeTimers()
vi.useFakeTimers()
await waitFor(() => {
expect(screen.getByText('Data')).toBeInTheDocument()
}) // May timeout!
// Good - use runAllTimers or advanceTimersByTime
jest.useFakeTimers()
vi.useFakeTimers()
render(<Component />)
jest.runAllTimers()
vi.runAllTimers()
expect(screen.getByText('Data')).toBeInTheDocument()
```

View File

@ -74,9 +74,9 @@ Use this checklist when generating or reviewing tests for Dify frontend componen
### Mocks
- [ ] **DO NOT mock base components** (`@/app/components/base/*`)
- [ ] `jest.clearAllMocks()` in `beforeEach` (not `afterEach`)
- [ ] `vi.clearAllMocks()` in `beforeEach` (not `afterEach`)
- [ ] Shared mock state reset in `beforeEach`
- [ ] i18n uses shared mock (auto-loaded); only override locally for custom translations
- [ ] i18n uses global mock (auto-loaded in `web/vitest.setup.ts`); only override locally for custom translations
- [ ] Router mocks match actual Next.js API
- [ ] Mocks reflect actual component conditional behavior
- [ ] Only mock: API services, complex context providers, third-party libs
@ -114,15 +114,15 @@ For the current file being tested:
**Run these checks after EACH test file, not just at the end:**
- [ ] Run `pnpm test -- path/to/file.spec.tsx` - **MUST PASS before next file**
- [ ] Run `pnpm test path/to/file.spec.tsx` - **MUST PASS before next file**
- [ ] Fix any failures immediately
- [ ] Mark file as complete in todo list
- [ ] Only then proceed to next file
### After All Files Complete
- [ ] Run full directory test: `pnpm test -- path/to/directory/`
- [ ] Check coverage report: `pnpm test -- --coverage`
- [ ] Run full directory test: `pnpm test path/to/directory/`
- [ ] Check coverage report: `pnpm test:coverage`
- [ ] Run `pnpm lint:fix` on all test files
- [ ] Run `pnpm type-check:tsgo`
@ -132,10 +132,10 @@ For the current file being tested:
```typescript
// ❌ Mock doesn't match actual behavior
jest.mock('./Component', () => () => <div>Mocked</div>)
vi.mock('./Component', () => () => <div>Mocked</div>)
// ✅ Mock matches actual conditional logic
jest.mock('./Component', () => ({ isOpen }: any) =>
vi.mock('./Component', () => ({ isOpen }: any) =>
isOpen ? <div>Content</div> : null
)
```
@ -145,7 +145,7 @@ jest.mock('./Component', () => ({ isOpen }: any) =>
```typescript
// ❌ Shared state not reset
let mockState = false
jest.mock('./useHook', () => () => mockState)
vi.mock('./useHook', () => () => mockState)
// ✅ Reset in beforeEach
beforeEach(() => {
@ -186,16 +186,16 @@ Always test these scenarios:
```bash
# Run specific test
pnpm test -- path/to/file.spec.tsx
pnpm test path/to/file.spec.tsx
# Run with coverage
pnpm test -- --coverage path/to/file.spec.tsx
pnpm test:coverage path/to/file.spec.tsx
# Watch mode
pnpm test -- --watch path/to/file.spec.tsx
pnpm test:watch path/to/file.spec.tsx
# Update snapshots (use sparingly)
pnpm test -- -u path/to/file.spec.tsx
pnpm test -u path/to/file.spec.tsx
# Analyze component
pnpm analyze-component path/to/component.tsx

View File

@ -126,7 +126,7 @@ describe('Counter', () => {
describe('ControlledInput', () => {
it('should call onChange with new value', async () => {
const user = userEvent.setup()
const handleChange = jest.fn()
const handleChange = vi.fn()
render(<ControlledInput value="" onChange={handleChange} />)
@ -136,7 +136,7 @@ describe('ControlledInput', () => {
})
it('should display controlled value', () => {
render(<ControlledInput value="controlled" onChange={jest.fn()} />)
render(<ControlledInput value="controlled" onChange={vi.fn()} />)
expect(screen.getByRole('textbox')).toHaveValue('controlled')
})
@ -195,7 +195,7 @@ describe('ItemList', () => {
it('should handle item selection', async () => {
const user = userEvent.setup()
const onSelect = jest.fn()
const onSelect = vi.fn()
render(<ItemList items={items} onSelect={onSelect} />)
@ -217,20 +217,20 @@ describe('ItemList', () => {
```typescript
describe('Modal', () => {
it('should not render when closed', () => {
render(<Modal isOpen={false} onClose={jest.fn()} />)
render(<Modal isOpen={false} onClose={vi.fn()} />)
expect(screen.queryByRole('dialog')).not.toBeInTheDocument()
})
it('should render when open', () => {
render(<Modal isOpen={true} onClose={jest.fn()} />)
render(<Modal isOpen={true} onClose={vi.fn()} />)
expect(screen.getByRole('dialog')).toBeInTheDocument()
})
it('should call onClose when clicking overlay', async () => {
const user = userEvent.setup()
const handleClose = jest.fn()
const handleClose = vi.fn()
render(<Modal isOpen={true} onClose={handleClose} />)
@ -241,7 +241,7 @@ describe('Modal', () => {
it('should call onClose when pressing Escape', async () => {
const user = userEvent.setup()
const handleClose = jest.fn()
const handleClose = vi.fn()
render(<Modal isOpen={true} onClose={handleClose} />)
@ -254,7 +254,7 @@ describe('Modal', () => {
const user = userEvent.setup()
render(
<Modal isOpen={true} onClose={jest.fn()}>
<Modal isOpen={true} onClose={vi.fn()}>
<button>First</button>
<button>Second</button>
</Modal>
@ -279,7 +279,7 @@ describe('Modal', () => {
describe('LoginForm', () => {
it('should submit valid form', async () => {
const user = userEvent.setup()
const onSubmit = jest.fn()
const onSubmit = vi.fn()
render(<LoginForm onSubmit={onSubmit} />)
@ -296,7 +296,7 @@ describe('LoginForm', () => {
it('should show validation errors', async () => {
const user = userEvent.setup()
render(<LoginForm onSubmit={jest.fn()} />)
render(<LoginForm onSubmit={vi.fn()} />)
// Submit empty form
await user.click(screen.getByRole('button', { name: /sign in/i }))
@ -308,7 +308,7 @@ describe('LoginForm', () => {
it('should validate email format', async () => {
const user = userEvent.setup()
render(<LoginForm onSubmit={jest.fn()} />)
render(<LoginForm onSubmit={vi.fn()} />)
await user.type(screen.getByLabelText(/email/i), 'invalid-email')
await user.click(screen.getByRole('button', { name: /sign in/i }))
@ -318,7 +318,7 @@ describe('LoginForm', () => {
it('should disable submit button while submitting', async () => {
const user = userEvent.setup()
const onSubmit = jest.fn(() => new Promise(resolve => setTimeout(resolve, 100)))
const onSubmit = vi.fn(() => new Promise(resolve => setTimeout(resolve, 100)))
render(<LoginForm onSubmit={onSubmit} />)
@ -407,7 +407,7 @@ it('test 1', () => {
// Good - cleanup is automatic with RTL, but reset mocks
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
})
```

View File

@ -23,7 +23,7 @@ import NodeConfigPanel from './node-config-panel'
import { createMockNode, createMockWorkflowContext } from '@/__mocks__/workflow'
// Mock workflow context
jest.mock('@/app/components/workflow/hooks', () => ({
vi.mock('@/app/components/workflow/hooks', () => ({
useWorkflowStore: () => mockWorkflowStore,
useNodesInteractions: () => mockNodesInteractions,
}))
@ -31,21 +31,21 @@ jest.mock('@/app/components/workflow/hooks', () => ({
let mockWorkflowStore = {
nodes: [],
edges: [],
updateNode: jest.fn(),
updateNode: vi.fn(),
}
let mockNodesInteractions = {
handleNodeSelect: jest.fn(),
handleNodeDelete: jest.fn(),
handleNodeSelect: vi.fn(),
handleNodeDelete: vi.fn(),
}
describe('NodeConfigPanel', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
mockWorkflowStore = {
nodes: [],
edges: [],
updateNode: jest.fn(),
updateNode: vi.fn(),
}
})
@ -161,23 +161,23 @@ import { render, screen, fireEvent, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import DocumentUploader from './document-uploader'
jest.mock('@/service/datasets', () => ({
uploadDocument: jest.fn(),
parseDocument: jest.fn(),
vi.mock('@/service/datasets', () => ({
uploadDocument: vi.fn(),
parseDocument: vi.fn(),
}))
import * as datasetService from '@/service/datasets'
const mockedService = datasetService as jest.Mocked<typeof datasetService>
const mockedService = vi.mocked(datasetService)
describe('DocumentUploader', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
})
describe('File Upload', () => {
it('should accept valid file types', async () => {
const user = userEvent.setup()
const onUpload = jest.fn()
const onUpload = vi.fn()
mockedService.uploadDocument.mockResolvedValue({ id: 'doc-1' })
render(<DocumentUploader onUpload={onUpload} />)
@ -326,14 +326,14 @@ describe('DocumentList', () => {
describe('Search & Filtering', () => {
it('should filter by search query', async () => {
const user = userEvent.setup()
jest.useFakeTimers()
vi.useFakeTimers()
render(<DocumentList datasetId="ds-1" />)
await user.type(screen.getByPlaceholderText(/search/i), 'test query')
// Debounce
jest.advanceTimersByTime(300)
vi.advanceTimersByTime(300)
await waitFor(() => {
expect(mockedService.getDocuments).toHaveBeenCalledWith(
@ -342,7 +342,7 @@ describe('DocumentList', () => {
)
})
jest.useRealTimers()
vi.useRealTimers()
})
})
})
@ -367,13 +367,13 @@ import { render, screen, fireEvent, waitFor } from '@testing-library/react'
import userEvent from '@testing-library/user-event'
import AppConfigForm from './app-config-form'
jest.mock('@/service/apps', () => ({
updateAppConfig: jest.fn(),
getAppConfig: jest.fn(),
vi.mock('@/service/apps', () => ({
updateAppConfig: vi.fn(),
getAppConfig: vi.fn(),
}))
import * as appService from '@/service/apps'
const mockedService = appService as jest.Mocked<typeof appService>
const mockedService = vi.mocked(appService)
describe('AppConfigForm', () => {
const defaultConfig = {
@ -384,7 +384,7 @@ describe('AppConfigForm', () => {
}
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
mockedService.getAppConfig.mockResolvedValue(defaultConfig)
})

View File

@ -19,8 +19,8 @@
```typescript
// ❌ WRONG: Don't mock base components
jest.mock('@/app/components/base/loading', () => () => <div>Loading</div>)
jest.mock('@/app/components/base/button', () => ({ children }: any) => <button>{children}</button>)
vi.mock('@/app/components/base/loading', () => () => <div>Loading</div>)
vi.mock('@/app/components/base/button', () => ({ children }: any) => <button>{children}</button>)
// ✅ CORRECT: Import and use real base components
import Loading from '@/app/components/base/loading'
@ -41,20 +41,23 @@ Only mock these categories:
| Location | Purpose |
|----------|---------|
| `web/__mocks__/` | Reusable mocks shared across multiple test files |
| Test file | Test-specific mocks, inline with `jest.mock()` |
| `web/vitest.setup.ts` | Global mocks shared by all tests (for example `react-i18next`, `next/image`) |
| `web/__mocks__/` | Reusable mock factories shared across multiple test files |
| Test file | Test-specific mocks, inline with `vi.mock()` |
Modules are not mocked automatically. Use `vi.mock` in test files, or add global mocks in `web/vitest.setup.ts`.
## Essential Mocks
### 1. i18n (Auto-loaded via Shared Mock)
### 1. i18n (Auto-loaded via Global Mock)
A shared mock is available at `web/__mocks__/react-i18next.ts` and is auto-loaded by Jest.
A global mock is defined in `web/vitest.setup.ts` and is auto-loaded by Vitest setup.
**No explicit mock needed** for most tests - it returns translation keys as-is.
For tests requiring custom translations, override the mock:
```typescript
jest.mock('react-i18next', () => ({
vi.mock('react-i18next', () => ({
useTranslation: () => ({
t: (key: string) => {
const translations: Record<string, string> = {
@ -69,15 +72,15 @@ jest.mock('react-i18next', () => ({
### 2. Next.js Router
```typescript
const mockPush = jest.fn()
const mockReplace = jest.fn()
const mockPush = vi.fn()
const mockReplace = vi.fn()
jest.mock('next/navigation', () => ({
vi.mock('next/navigation', () => ({
useRouter: () => ({
push: mockPush,
replace: mockReplace,
back: jest.fn(),
prefetch: jest.fn(),
back: vi.fn(),
prefetch: vi.fn(),
}),
usePathname: () => '/current-path',
useSearchParams: () => new URLSearchParams('?key=value'),
@ -85,7 +88,7 @@ jest.mock('next/navigation', () => ({
describe('Component', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
})
it('should navigate on click', () => {
@ -102,7 +105,7 @@ describe('Component', () => {
// ⚠️ Important: Use shared state for components that depend on each other
let mockPortalOpenState = false
jest.mock('@/app/components/base/portal-to-follow-elem', () => ({
vi.mock('@/app/components/base/portal-to-follow-elem', () => ({
PortalToFollowElem: ({ children, open, ...props }: any) => {
mockPortalOpenState = open || false // Update shared state
return <div data-testid="portal" data-open={open}>{children}</div>
@ -119,7 +122,7 @@ jest.mock('@/app/components/base/portal-to-follow-elem', () => ({
describe('Component', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
mockPortalOpenState = false // ✅ Reset shared state
})
})
@ -130,13 +133,13 @@ describe('Component', () => {
```typescript
import * as api from '@/service/api'
jest.mock('@/service/api')
vi.mock('@/service/api')
const mockedApi = api as jest.Mocked<typeof api>
const mockedApi = vi.mocked(api)
describe('Component', () => {
beforeEach(() => {
jest.clearAllMocks()
vi.clearAllMocks()
// Setup default mock implementation
mockedApi.fetchData.mockResolvedValue({ data: [] })
@ -239,32 +242,9 @@ describe('Component with Context', () => {
})
```
### 7. SWR / React Query
### 7. React Query
```typescript
// SWR
jest.mock('swr', () => ({
__esModule: true,
default: jest.fn(),
}))
import useSWR from 'swr'
const mockedUseSWR = useSWR as jest.Mock
describe('Component with SWR', () => {
it('should show loading state', () => {
mockedUseSWR.mockReturnValue({
data: undefined,
error: undefined,
isLoading: true,
})
render(<Component />)
expect(screen.getByText(/loading/i)).toBeInTheDocument()
})
})
// React Query
import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
const createTestQueryClient = () => new QueryClient({

View File

@ -35,7 +35,7 @@ When testing a **single component, hook, or utility**:
2. Run `pnpm analyze-component <path>` (if available)
3. Check complexity score and features detected
4. Write the test file
5. Run test: `pnpm test -- <file>.spec.tsx`
5. Run test: `pnpm test <file>.spec.tsx`
6. Fix any failures
7. Verify coverage meets goals (100% function, >95% branch)
```
@ -80,7 +80,7 @@ Process files in this recommended order:
```
┌─────────────────────────────────────────────┐
│ 1. Write test file │
│ 2. Run: pnpm test -- <file>.spec.tsx │
│ 2. Run: pnpm test <file>.spec.tsx
│ 3. If FAIL → Fix immediately, re-run │
│ 4. If PASS → Mark complete in todo list │
│ 5. ONLY THEN proceed to next file │
@ -95,10 +95,10 @@ After all individual tests pass:
```bash
# Run all tests in the directory together
pnpm test -- path/to/directory/
pnpm test path/to/directory/
# Check coverage
pnpm test -- --coverage path/to/directory/
pnpm test:coverage path/to/directory/
```
## Component Complexity Guidelines
@ -201,9 +201,9 @@ Run pnpm test ← Multiple failures, hard to debug
```
# GOOD: Incremental with verification
Write component-a.spec.tsx
Run pnpm test -- component-a.spec.tsx ✅
Run pnpm test component-a.spec.tsx ✅
Write component-b.spec.tsx
Run pnpm test -- component-b.spec.tsx ✅
Run pnpm test component-b.spec.tsx ✅
...continue...
```

1
.codex/skills Symbolic link
View File

@ -0,0 +1 @@
../.claude/skills

View File

@ -6,6 +6,9 @@
"context": "..",
"dockerfile": "Dockerfile"
},
"mounts": [
"source=dify-dev-tmp,target=/tmp,type=volume"
],
"features": {
"ghcr.io/devcontainers/features/node:1": {
"nodeGypDependencies": true,
@ -34,19 +37,13 @@
},
"postStartCommand": "./.devcontainer/post_start_command.sh",
"postCreateCommand": "./.devcontainer/post_create_command.sh"
// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "python --version",
// Configure tool-specific properties.
// "customizations": {},
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "root"
}
}

View File

@ -1,12 +1,13 @@
#!/bin/bash
WORKSPACE_ROOT=$(pwd)
export COREPACK_ENABLE_DOWNLOAD_PROMPT=0
corepack enable
cd web && pnpm install
pipx install uv
echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc
echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor\"" >> ~/.bashrc
echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention\"" >> ~/.bashrc
echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc
echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc
echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc

313
.github/CODEOWNERS vendored
View File

@ -6,229 +6,244 @@
* @crazywoola @laipz8200 @Yeuoly
# CODEOWNERS file
/.github/CODEOWNERS @laipz8200 @crazywoola
# Docs
/docs/ @crazywoola
# Backend (default owner, more specific rules below will override)
api/ @QuantumGhost
/api/ @QuantumGhost
# Backend - MCP
api/core/mcp/ @Nov1c444
api/core/entities/mcp_provider.py @Nov1c444
api/services/tools/mcp_tools_manage_service.py @Nov1c444
api/controllers/mcp/ @Nov1c444
api/controllers/console/app/mcp_server.py @Nov1c444
api/tests/**/*mcp* @Nov1c444
/api/core/mcp/ @Nov1c444
/api/core/entities/mcp_provider.py @Nov1c444
/api/services/tools/mcp_tools_manage_service.py @Nov1c444
/api/controllers/mcp/ @Nov1c444
/api/controllers/console/app/mcp_server.py @Nov1c444
/api/tests/**/*mcp* @Nov1c444
# Backend - Workflow - Engine (Core graph execution engine)
api/core/workflow/graph_engine/ @laipz8200 @QuantumGhost
api/core/workflow/runtime/ @laipz8200 @QuantumGhost
api/core/workflow/graph/ @laipz8200 @QuantumGhost
api/core/workflow/graph_events/ @laipz8200 @QuantumGhost
api/core/workflow/node_events/ @laipz8200 @QuantumGhost
api/core/model_runtime/ @laipz8200 @QuantumGhost
/api/core/workflow/graph_engine/ @laipz8200 @QuantumGhost
/api/core/workflow/runtime/ @laipz8200 @QuantumGhost
/api/core/workflow/graph/ @laipz8200 @QuantumGhost
/api/core/workflow/graph_events/ @laipz8200 @QuantumGhost
/api/core/workflow/node_events/ @laipz8200 @QuantumGhost
/api/core/model_runtime/ @laipz8200 @QuantumGhost
# Backend - Workflow - Nodes (Agent, Iteration, Loop, LLM)
api/core/workflow/nodes/agent/ @Nov1c444
api/core/workflow/nodes/iteration/ @Nov1c444
api/core/workflow/nodes/loop/ @Nov1c444
api/core/workflow/nodes/llm/ @Nov1c444
/api/core/workflow/nodes/agent/ @Nov1c444
/api/core/workflow/nodes/iteration/ @Nov1c444
/api/core/workflow/nodes/loop/ @Nov1c444
/api/core/workflow/nodes/llm/ @Nov1c444
# Backend - RAG (Retrieval Augmented Generation)
api/core/rag/ @JohnJyong
api/services/rag_pipeline/ @JohnJyong
api/services/dataset_service.py @JohnJyong
api/services/knowledge_service.py @JohnJyong
api/services/external_knowledge_service.py @JohnJyong
api/services/hit_testing_service.py @JohnJyong
api/services/metadata_service.py @JohnJyong
api/services/vector_service.py @JohnJyong
api/services/entities/knowledge_entities/ @JohnJyong
api/services/entities/external_knowledge_entities/ @JohnJyong
api/controllers/console/datasets/ @JohnJyong
api/controllers/service_api/dataset/ @JohnJyong
api/models/dataset.py @JohnJyong
api/tasks/rag_pipeline/ @JohnJyong
api/tasks/add_document_to_index_task.py @JohnJyong
api/tasks/batch_clean_document_task.py @JohnJyong
api/tasks/clean_document_task.py @JohnJyong
api/tasks/clean_notion_document_task.py @JohnJyong
api/tasks/document_indexing_task.py @JohnJyong
api/tasks/document_indexing_sync_task.py @JohnJyong
api/tasks/document_indexing_update_task.py @JohnJyong
api/tasks/duplicate_document_indexing_task.py @JohnJyong
api/tasks/recover_document_indexing_task.py @JohnJyong
api/tasks/remove_document_from_index_task.py @JohnJyong
api/tasks/retry_document_indexing_task.py @JohnJyong
api/tasks/sync_website_document_indexing_task.py @JohnJyong
api/tasks/batch_create_segment_to_index_task.py @JohnJyong
api/tasks/create_segment_to_index_task.py @JohnJyong
api/tasks/delete_segment_from_index_task.py @JohnJyong
api/tasks/disable_segment_from_index_task.py @JohnJyong
api/tasks/disable_segments_from_index_task.py @JohnJyong
api/tasks/enable_segment_to_index_task.py @JohnJyong
api/tasks/enable_segments_to_index_task.py @JohnJyong
api/tasks/clean_dataset_task.py @JohnJyong
api/tasks/deal_dataset_index_update_task.py @JohnJyong
api/tasks/deal_dataset_vector_index_task.py @JohnJyong
/api/core/rag/ @JohnJyong
/api/services/rag_pipeline/ @JohnJyong
/api/services/dataset_service.py @JohnJyong
/api/services/knowledge_service.py @JohnJyong
/api/services/external_knowledge_service.py @JohnJyong
/api/services/hit_testing_service.py @JohnJyong
/api/services/metadata_service.py @JohnJyong
/api/services/vector_service.py @JohnJyong
/api/services/entities/knowledge_entities/ @JohnJyong
/api/services/entities/external_knowledge_entities/ @JohnJyong
/api/controllers/console/datasets/ @JohnJyong
/api/controllers/service_api/dataset/ @JohnJyong
/api/models/dataset.py @JohnJyong
/api/tasks/rag_pipeline/ @JohnJyong
/api/tasks/add_document_to_index_task.py @JohnJyong
/api/tasks/batch_clean_document_task.py @JohnJyong
/api/tasks/clean_document_task.py @JohnJyong
/api/tasks/clean_notion_document_task.py @JohnJyong
/api/tasks/document_indexing_task.py @JohnJyong
/api/tasks/document_indexing_sync_task.py @JohnJyong
/api/tasks/document_indexing_update_task.py @JohnJyong
/api/tasks/duplicate_document_indexing_task.py @JohnJyong
/api/tasks/recover_document_indexing_task.py @JohnJyong
/api/tasks/remove_document_from_index_task.py @JohnJyong
/api/tasks/retry_document_indexing_task.py @JohnJyong
/api/tasks/sync_website_document_indexing_task.py @JohnJyong
/api/tasks/batch_create_segment_to_index_task.py @JohnJyong
/api/tasks/create_segment_to_index_task.py @JohnJyong
/api/tasks/delete_segment_from_index_task.py @JohnJyong
/api/tasks/disable_segment_from_index_task.py @JohnJyong
/api/tasks/disable_segments_from_index_task.py @JohnJyong
/api/tasks/enable_segment_to_index_task.py @JohnJyong
/api/tasks/enable_segments_to_index_task.py @JohnJyong
/api/tasks/clean_dataset_task.py @JohnJyong
/api/tasks/deal_dataset_index_update_task.py @JohnJyong
/api/tasks/deal_dataset_vector_index_task.py @JohnJyong
# Backend - Plugins
api/core/plugin/ @Mairuis @Yeuoly @Stream29
api/services/plugin/ @Mairuis @Yeuoly @Stream29
api/controllers/console/workspace/plugin.py @Mairuis @Yeuoly @Stream29
api/controllers/inner_api/plugin/ @Mairuis @Yeuoly @Stream29
api/tasks/process_tenant_plugin_autoupgrade_check_task.py @Mairuis @Yeuoly @Stream29
/api/core/plugin/ @Mairuis @Yeuoly @Stream29
/api/services/plugin/ @Mairuis @Yeuoly @Stream29
/api/controllers/console/workspace/plugin.py @Mairuis @Yeuoly @Stream29
/api/controllers/inner_api/plugin/ @Mairuis @Yeuoly @Stream29
/api/tasks/process_tenant_plugin_autoupgrade_check_task.py @Mairuis @Yeuoly @Stream29
# Backend - Trigger/Schedule/Webhook
api/controllers/trigger/ @Mairuis @Yeuoly
api/controllers/console/app/workflow_trigger.py @Mairuis @Yeuoly
api/controllers/console/workspace/trigger_providers.py @Mairuis @Yeuoly
api/core/trigger/ @Mairuis @Yeuoly
api/core/app/layers/trigger_post_layer.py @Mairuis @Yeuoly
api/services/trigger/ @Mairuis @Yeuoly
api/models/trigger.py @Mairuis @Yeuoly
api/fields/workflow_trigger_fields.py @Mairuis @Yeuoly
api/repositories/workflow_trigger_log_repository.py @Mairuis @Yeuoly
api/repositories/sqlalchemy_workflow_trigger_log_repository.py @Mairuis @Yeuoly
api/libs/schedule_utils.py @Mairuis @Yeuoly
api/services/workflow/scheduler.py @Mairuis @Yeuoly
api/schedule/trigger_provider_refresh_task.py @Mairuis @Yeuoly
api/schedule/workflow_schedule_task.py @Mairuis @Yeuoly
api/tasks/trigger_processing_tasks.py @Mairuis @Yeuoly
api/tasks/trigger_subscription_refresh_tasks.py @Mairuis @Yeuoly
api/tasks/workflow_schedule_tasks.py @Mairuis @Yeuoly
api/tasks/workflow_cfs_scheduler/ @Mairuis @Yeuoly
api/events/event_handlers/sync_plugin_trigger_when_app_created.py @Mairuis @Yeuoly
api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @Mairuis @Yeuoly
api/events/event_handlers/sync_workflow_schedule_when_app_published.py @Mairuis @Yeuoly
api/events/event_handlers/sync_webhook_when_app_created.py @Mairuis @Yeuoly
/api/controllers/trigger/ @Mairuis @Yeuoly
/api/controllers/console/app/workflow_trigger.py @Mairuis @Yeuoly
/api/controllers/console/workspace/trigger_providers.py @Mairuis @Yeuoly
/api/core/trigger/ @Mairuis @Yeuoly
/api/core/app/layers/trigger_post_layer.py @Mairuis @Yeuoly
/api/services/trigger/ @Mairuis @Yeuoly
/api/models/trigger.py @Mairuis @Yeuoly
/api/fields/workflow_trigger_fields.py @Mairuis @Yeuoly
/api/repositories/workflow_trigger_log_repository.py @Mairuis @Yeuoly
/api/repositories/sqlalchemy_workflow_trigger_log_repository.py @Mairuis @Yeuoly
/api/libs/schedule_utils.py @Mairuis @Yeuoly
/api/services/workflow/scheduler.py @Mairuis @Yeuoly
/api/schedule/trigger_provider_refresh_task.py @Mairuis @Yeuoly
/api/schedule/workflow_schedule_task.py @Mairuis @Yeuoly
/api/tasks/trigger_processing_tasks.py @Mairuis @Yeuoly
/api/tasks/trigger_subscription_refresh_tasks.py @Mairuis @Yeuoly
/api/tasks/workflow_schedule_tasks.py @Mairuis @Yeuoly
/api/tasks/workflow_cfs_scheduler/ @Mairuis @Yeuoly
/api/events/event_handlers/sync_plugin_trigger_when_app_created.py @Mairuis @Yeuoly
/api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @Mairuis @Yeuoly
/api/events/event_handlers/sync_workflow_schedule_when_app_published.py @Mairuis @Yeuoly
/api/events/event_handlers/sync_webhook_when_app_created.py @Mairuis @Yeuoly
# Backend - Async Workflow
api/services/async_workflow_service.py @Mairuis @Yeuoly
api/tasks/async_workflow_tasks.py @Mairuis @Yeuoly
/api/services/async_workflow_service.py @Mairuis @Yeuoly
/api/tasks/async_workflow_tasks.py @Mairuis @Yeuoly
# Backend - Billing
api/services/billing_service.py @hj24 @zyssyz123
api/controllers/console/billing/ @hj24 @zyssyz123
/api/services/billing_service.py @hj24 @zyssyz123
/api/controllers/console/billing/ @hj24 @zyssyz123
# Backend - Enterprise
api/configs/enterprise/ @GarfieldDai @GareArc
api/services/enterprise/ @GarfieldDai @GareArc
api/services/feature_service.py @GarfieldDai @GareArc
api/controllers/console/feature.py @GarfieldDai @GareArc
api/controllers/web/feature.py @GarfieldDai @GareArc
/api/configs/enterprise/ @GarfieldDai @GareArc
/api/services/enterprise/ @GarfieldDai @GareArc
/api/services/feature_service.py @GarfieldDai @GareArc
/api/controllers/console/feature.py @GarfieldDai @GareArc
/api/controllers/web/feature.py @GarfieldDai @GareArc
# Backend - Database Migrations
api/migrations/ @snakevash @laipz8200
/api/migrations/ @snakevash @laipz8200 @MRZHUH
# Backend - Vector DB Middleware
/api/configs/middleware/vdb/* @JohnJyong
# Frontend
web/ @iamjoel
/web/ @iamjoel
# Frontend - Web Tests
/.github/workflows/web-tests.yml @iamjoel
# Frontend - App - Orchestration
web/app/components/workflow/ @iamjoel @zxhlyh
web/app/components/workflow-app/ @iamjoel @zxhlyh
web/app/components/app/configuration/ @iamjoel @zxhlyh
web/app/components/app/app-publisher/ @iamjoel @zxhlyh
/web/app/components/workflow/ @iamjoel @zxhlyh
/web/app/components/workflow-app/ @iamjoel @zxhlyh
/web/app/components/app/configuration/ @iamjoel @zxhlyh
/web/app/components/app/app-publisher/ @iamjoel @zxhlyh
# Frontend - WebApp - Chat
web/app/components/base/chat/ @iamjoel @zxhlyh
/web/app/components/base/chat/ @iamjoel @zxhlyh
# Frontend - WebApp - Completion
web/app/components/share/text-generation/ @iamjoel @zxhlyh
/web/app/components/share/text-generation/ @iamjoel @zxhlyh
# Frontend - App - List and Creation
web/app/components/apps/ @JzoNgKVO @iamjoel
web/app/components/app/create-app-dialog/ @JzoNgKVO @iamjoel
web/app/components/app/create-app-modal/ @JzoNgKVO @iamjoel
web/app/components/app/create-from-dsl-modal/ @JzoNgKVO @iamjoel
/web/app/components/apps/ @JzoNgKVO @iamjoel
/web/app/components/app/create-app-dialog/ @JzoNgKVO @iamjoel
/web/app/components/app/create-app-modal/ @JzoNgKVO @iamjoel
/web/app/components/app/create-from-dsl-modal/ @JzoNgKVO @iamjoel
# Frontend - App - API Documentation
web/app/components/develop/ @JzoNgKVO @iamjoel
/web/app/components/develop/ @JzoNgKVO @iamjoel
# Frontend - App - Logs and Annotations
web/app/components/app/workflow-log/ @JzoNgKVO @iamjoel
web/app/components/app/log/ @JzoNgKVO @iamjoel
web/app/components/app/log-annotation/ @JzoNgKVO @iamjoel
web/app/components/app/annotation/ @JzoNgKVO @iamjoel
/web/app/components/app/workflow-log/ @JzoNgKVO @iamjoel
/web/app/components/app/log/ @JzoNgKVO @iamjoel
/web/app/components/app/log-annotation/ @JzoNgKVO @iamjoel
/web/app/components/app/annotation/ @JzoNgKVO @iamjoel
# Frontend - App - Monitoring
web/app/(commonLayout)/app/(appDetailLayout)/\[appId\]/overview/ @JzoNgKVO @iamjoel
web/app/components/app/overview/ @JzoNgKVO @iamjoel
/web/app/(commonLayout)/app/(appDetailLayout)/\[appId\]/overview/ @JzoNgKVO @iamjoel
/web/app/components/app/overview/ @JzoNgKVO @iamjoel
# Frontend - App - Settings
web/app/components/app-sidebar/ @JzoNgKVO @iamjoel
/web/app/components/app-sidebar/ @JzoNgKVO @iamjoel
# Frontend - RAG - Hit Testing
web/app/components/datasets/hit-testing/ @JzoNgKVO @iamjoel
/web/app/components/datasets/hit-testing/ @JzoNgKVO @iamjoel
# Frontend - RAG - List and Creation
web/app/components/datasets/list/ @iamjoel @WTW0313
web/app/components/datasets/create/ @iamjoel @WTW0313
web/app/components/datasets/create-from-pipeline/ @iamjoel @WTW0313
web/app/components/datasets/external-knowledge-base/ @iamjoel @WTW0313
/web/app/components/datasets/list/ @iamjoel @WTW0313
/web/app/components/datasets/create/ @iamjoel @WTW0313
/web/app/components/datasets/create-from-pipeline/ @iamjoel @WTW0313
/web/app/components/datasets/external-knowledge-base/ @iamjoel @WTW0313
# Frontend - RAG - Orchestration (general rule first, specific rules below override)
web/app/components/rag-pipeline/ @iamjoel @WTW0313
web/app/components/rag-pipeline/components/rag-pipeline-main.tsx @iamjoel @zxhlyh
web/app/components/rag-pipeline/store/ @iamjoel @zxhlyh
/web/app/components/rag-pipeline/ @iamjoel @WTW0313
/web/app/components/rag-pipeline/components/rag-pipeline-main.tsx @iamjoel @zxhlyh
/web/app/components/rag-pipeline/store/ @iamjoel @zxhlyh
# Frontend - RAG - Documents List
web/app/components/datasets/documents/list.tsx @iamjoel @WTW0313
web/app/components/datasets/documents/create-from-pipeline/ @iamjoel @WTW0313
/web/app/components/datasets/documents/list.tsx @iamjoel @WTW0313
/web/app/components/datasets/documents/create-from-pipeline/ @iamjoel @WTW0313
# Frontend - RAG - Segments List
web/app/components/datasets/documents/detail/ @iamjoel @WTW0313
/web/app/components/datasets/documents/detail/ @iamjoel @WTW0313
# Frontend - RAG - Settings
web/app/components/datasets/settings/ @iamjoel @WTW0313
/web/app/components/datasets/settings/ @iamjoel @WTW0313
# Frontend - Ecosystem - Plugins
web/app/components/plugins/ @iamjoel @zhsama
/web/app/components/plugins/ @iamjoel @zhsama
# Frontend - Ecosystem - Tools
web/app/components/tools/ @iamjoel @Yessenia-d
/web/app/components/tools/ @iamjoel @Yessenia-d
# Frontend - Ecosystem - MarketPlace
web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d
/web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d
# Frontend - Login and Registration
web/app/signin/ @douxc @iamjoel
web/app/signup/ @douxc @iamjoel
web/app/reset-password/ @douxc @iamjoel
web/app/install/ @douxc @iamjoel
web/app/init/ @douxc @iamjoel
web/app/forgot-password/ @douxc @iamjoel
web/app/account/ @douxc @iamjoel
/web/app/signin/ @douxc @iamjoel
/web/app/signup/ @douxc @iamjoel
/web/app/reset-password/ @douxc @iamjoel
/web/app/install/ @douxc @iamjoel
/web/app/init/ @douxc @iamjoel
/web/app/forgot-password/ @douxc @iamjoel
/web/app/account/ @douxc @iamjoel
# Frontend - Service Authentication
web/service/base.ts @douxc @iamjoel
/web/service/base.ts @douxc @iamjoel
# Frontend - WebApp Authentication and Access Control
web/app/(shareLayout)/components/ @douxc @iamjoel
web/app/(shareLayout)/webapp-signin/ @douxc @iamjoel
web/app/(shareLayout)/webapp-reset-password/ @douxc @iamjoel
web/app/components/app/app-access-control/ @douxc @iamjoel
/web/app/(shareLayout)/components/ @douxc @iamjoel
/web/app/(shareLayout)/webapp-signin/ @douxc @iamjoel
/web/app/(shareLayout)/webapp-reset-password/ @douxc @iamjoel
/web/app/components/app/app-access-control/ @douxc @iamjoel
# Frontend - Explore Page
web/app/components/explore/ @CodingOnStar @iamjoel
/web/app/components/explore/ @CodingOnStar @iamjoel
# Frontend - Personal Settings
web/app/components/header/account-setting/ @CodingOnStar @iamjoel
web/app/components/header/account-dropdown/ @CodingOnStar @iamjoel
/web/app/components/header/account-setting/ @CodingOnStar @iamjoel
/web/app/components/header/account-dropdown/ @CodingOnStar @iamjoel
# Frontend - Analytics
web/app/components/base/ga/ @CodingOnStar @iamjoel
/web/app/components/base/ga/ @CodingOnStar @iamjoel
# Frontend - Base Components
web/app/components/base/ @iamjoel @zxhlyh
/web/app/components/base/ @iamjoel @zxhlyh
# Frontend - Utils and Hooks
web/utils/classnames.ts @iamjoel @zxhlyh
web/utils/time.ts @iamjoel @zxhlyh
web/utils/format.ts @iamjoel @zxhlyh
web/utils/clipboard.ts @iamjoel @zxhlyh
web/hooks/use-document-title.ts @iamjoel @zxhlyh
/web/utils/classnames.ts @iamjoel @zxhlyh
/web/utils/time.ts @iamjoel @zxhlyh
/web/utils/format.ts @iamjoel @zxhlyh
/web/utils/clipboard.ts @iamjoel @zxhlyh
/web/hooks/use-document-title.ts @iamjoel @zxhlyh
# Frontend - Billing and Education
web/app/components/billing/ @iamjoel @zxhlyh
web/app/education-apply/ @iamjoel @zxhlyh
/web/app/components/billing/ @iamjoel @zxhlyh
/web/app/education-apply/ @iamjoel @zxhlyh
# Frontend - Workspace
web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh
/web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh
# Docker
/docker/* @laipz8200

View File

@ -66,27 +66,6 @@ jobs:
# mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter.
- name: mdformat
run: |
uvx --python 3.13 mdformat . --exclude ".claude/skills/**"
- name: Install pnpm
uses: pnpm/action-setup@v4
with:
package_json_file: web/package.json
run_install: false
- name: Setup NodeJS
uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
cache-dependency-path: ./web/package.json
- name: Web dependencies
working-directory: ./web
run: pnpm install --frozen-lockfile
- name: oxlint
working-directory: ./web
run: pnpm exec oxlint --config .oxlintrc.json --fix .
uvx --python 3.13 mdformat . --exclude ".claude/skills/**/SKILL.md"
- uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27

View File

@ -90,7 +90,7 @@ jobs:
with:
node-version: 22
cache: pnpm
cache-dependency-path: ./web/package.json
cache-dependency-path: ./web/pnpm-lock.yaml
- name: Web dependencies
if: steps.changed-files.outputs.any_changed == 'true'

View File

@ -55,7 +55,7 @@ jobs:
with:
node-version: 'lts/*'
cache: pnpm
cache-dependency-path: ./web/package.json
cache-dependency-path: ./web/pnpm-lock.yaml
- name: Install dependencies
if: env.FILES_CHANGED == 'true'

View File

@ -13,6 +13,7 @@ jobs:
runs-on: ubuntu-latest
defaults:
run:
shell: bash
working-directory: ./web
steps:
@ -21,14 +22,7 @@ jobs:
with:
persist-credentials: false
- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v46
with:
files: web/**
- name: Install pnpm
if: steps.changed-files.outputs.any_changed == 'true'
uses: pnpm/action-setup@v4
with:
package_json_file: web/package.json
@ -36,23 +30,342 @@ jobs:
- name: Setup Node.js
uses: actions/setup-node@v4
if: steps.changed-files.outputs.any_changed == 'true'
with:
node-version: 22
cache: pnpm
cache-dependency-path: ./web/package.json
cache-dependency-path: ./web/pnpm-lock.yaml
- name: Install dependencies
if: steps.changed-files.outputs.any_changed == 'true'
working-directory: ./web
run: pnpm install --frozen-lockfile
- name: Check i18n types synchronization
if: steps.changed-files.outputs.any_changed == 'true'
working-directory: ./web
run: pnpm run check:i18n-types
- name: Run tests
if: steps.changed-files.outputs.any_changed == 'true'
working-directory: ./web
run: pnpm test
run: pnpm test:coverage
- name: Coverage Summary
if: always()
id: coverage-summary
run: |
set -eo pipefail
COVERAGE_FILE="coverage/coverage-final.json"
COVERAGE_SUMMARY_FILE="coverage/coverage-summary.json"
if [ ! -f "$COVERAGE_FILE" ] && [ ! -f "$COVERAGE_SUMMARY_FILE" ]; then
echo "has_coverage=false" >> "$GITHUB_OUTPUT"
echo "### 🚨 Test Coverage Report :test_tube:" >> "$GITHUB_STEP_SUMMARY"
echo "Coverage data not found. Ensure Vitest runs with coverage enabled." >> "$GITHUB_STEP_SUMMARY"
exit 0
fi
echo "has_coverage=true" >> "$GITHUB_OUTPUT"
node <<'NODE' >> "$GITHUB_STEP_SUMMARY"
const fs = require('fs');
const path = require('path');
let libCoverage = null;
try {
libCoverage = require('istanbul-lib-coverage');
} catch (error) {
libCoverage = null;
}
const summaryPath = path.join('coverage', 'coverage-summary.json');
const finalPath = path.join('coverage', 'coverage-final.json');
const hasSummary = fs.existsSync(summaryPath);
const hasFinal = fs.existsSync(finalPath);
if (!hasSummary && !hasFinal) {
console.log('### Test Coverage Summary :test_tube:');
console.log('');
console.log('No coverage data found.');
process.exit(0);
}
const summary = hasSummary
? JSON.parse(fs.readFileSync(summaryPath, 'utf8'))
: null;
const coverage = hasFinal
? JSON.parse(fs.readFileSync(finalPath, 'utf8'))
: null;
const getLineCoverageFromStatements = (statementMap, statementHits) => {
const lineHits = {};
if (!statementMap || !statementHits) {
return lineHits;
}
Object.entries(statementMap).forEach(([key, statement]) => {
const line = statement?.start?.line;
if (!line) {
return;
}
const hits = statementHits[key] ?? 0;
const previous = lineHits[line];
lineHits[line] = previous === undefined ? hits : Math.max(previous, hits);
});
return lineHits;
};
const getFileCoverage = (entry) => (
libCoverage ? libCoverage.createFileCoverage(entry) : null
);
const getLineHits = (entry, fileCoverage) => {
const lineHits = entry.l ?? {};
if (Object.keys(lineHits).length > 0) {
return lineHits;
}
if (fileCoverage) {
return fileCoverage.getLineCoverage();
}
return getLineCoverageFromStatements(entry.statementMap ?? {}, entry.s ?? {});
};
const getUncoveredLines = (entry, fileCoverage, lineHits) => {
if (lineHits && Object.keys(lineHits).length > 0) {
return Object.entries(lineHits)
.filter(([, count]) => count === 0)
.map(([line]) => Number(line))
.sort((a, b) => a - b);
}
if (fileCoverage) {
return fileCoverage.getUncoveredLines();
}
return [];
};
const totals = {
lines: { covered: 0, total: 0 },
statements: { covered: 0, total: 0 },
branches: { covered: 0, total: 0 },
functions: { covered: 0, total: 0 },
};
const fileSummaries = [];
if (summary) {
const totalEntry = summary.total ?? {};
['lines', 'statements', 'branches', 'functions'].forEach((key) => {
if (totalEntry[key]) {
totals[key].covered = totalEntry[key].covered ?? 0;
totals[key].total = totalEntry[key].total ?? 0;
}
});
Object.entries(summary)
.filter(([file]) => file !== 'total')
.forEach(([file, data]) => {
fileSummaries.push({
file,
pct: data.lines?.pct ?? data.statements?.pct ?? 0,
lines: {
covered: data.lines?.covered ?? 0,
total: data.lines?.total ?? 0,
},
});
});
} else if (coverage) {
Object.entries(coverage).forEach(([file, entry]) => {
const fileCoverage = getFileCoverage(entry);
const lineHits = getLineHits(entry, fileCoverage);
const statementHits = entry.s ?? {};
const branchHits = entry.b ?? {};
const functionHits = entry.f ?? {};
const lineTotal = Object.keys(lineHits).length;
const lineCovered = Object.values(lineHits).filter((n) => n > 0).length;
const statementTotal = Object.keys(statementHits).length;
const statementCovered = Object.values(statementHits).filter((n) => n > 0).length;
const branchTotal = Object.values(branchHits).reduce((acc, branches) => acc + branches.length, 0);
const branchCovered = Object.values(branchHits).reduce(
(acc, branches) => acc + branches.filter((n) => n > 0).length,
0,
);
const functionTotal = Object.keys(functionHits).length;
const functionCovered = Object.values(functionHits).filter((n) => n > 0).length;
totals.lines.total += lineTotal;
totals.lines.covered += lineCovered;
totals.statements.total += statementTotal;
totals.statements.covered += statementCovered;
totals.branches.total += branchTotal;
totals.branches.covered += branchCovered;
totals.functions.total += functionTotal;
totals.functions.covered += functionCovered;
const pct = (covered, tot) => (tot > 0 ? (covered / tot) * 100 : 0);
fileSummaries.push({
file,
pct: pct(lineCovered || statementCovered, lineTotal || statementTotal),
lines: {
covered: lineCovered || statementCovered,
total: lineTotal || statementTotal,
},
});
});
}
const pct = (covered, tot) => (tot > 0 ? ((covered / tot) * 100).toFixed(2) : '0.00');
console.log('### Test Coverage Summary :test_tube:');
console.log('');
console.log('| Metric | Coverage | Covered / Total |');
console.log('|--------|----------|-----------------|');
console.log(`| Lines | ${pct(totals.lines.covered, totals.lines.total)}% | ${totals.lines.covered} / ${totals.lines.total} |`);
console.log(`| Statements | ${pct(totals.statements.covered, totals.statements.total)}% | ${totals.statements.covered} / ${totals.statements.total} |`);
console.log(`| Branches | ${pct(totals.branches.covered, totals.branches.total)}% | ${totals.branches.covered} / ${totals.branches.total} |`);
console.log(`| Functions | ${pct(totals.functions.covered, totals.functions.total)}% | ${totals.functions.covered} / ${totals.functions.total} |`);
console.log('');
console.log('<details><summary>File coverage (lowest lines first)</summary>');
console.log('');
console.log('```');
fileSummaries
.sort((a, b) => (a.pct - b.pct) || (b.lines.total - a.lines.total))
.slice(0, 25)
.forEach(({ file, pct, lines }) => {
console.log(`${pct.toFixed(2)}%\t${lines.covered}/${lines.total}\t${file}`);
});
console.log('```');
console.log('</details>');
if (coverage) {
const pctValue = (covered, tot) => {
if (tot === 0) {
return '0';
}
return ((covered / tot) * 100)
.toFixed(2)
.replace(/\.?0+$/, '');
};
const formatLineRanges = (lines) => {
if (lines.length === 0) {
return '';
}
const ranges = [];
let start = lines[0];
let end = lines[0];
for (let i = 1; i < lines.length; i += 1) {
const current = lines[i];
if (current === end + 1) {
end = current;
continue;
}
ranges.push(start === end ? `${start}` : `${start}-${end}`);
start = current;
end = current;
}
ranges.push(start === end ? `${start}` : `${start}-${end}`);
return ranges.join(',');
};
const tableTotals = {
statements: { covered: 0, total: 0 },
branches: { covered: 0, total: 0 },
functions: { covered: 0, total: 0 },
lines: { covered: 0, total: 0 },
};
const tableRows = Object.entries(coverage)
.map(([file, entry]) => {
const fileCoverage = getFileCoverage(entry);
const lineHits = getLineHits(entry, fileCoverage);
const statementHits = entry.s ?? {};
const branchHits = entry.b ?? {};
const functionHits = entry.f ?? {};
const lineTotal = Object.keys(lineHits).length;
const lineCovered = Object.values(lineHits).filter((n) => n > 0).length;
const statementTotal = Object.keys(statementHits).length;
const statementCovered = Object.values(statementHits).filter((n) => n > 0).length;
const branchTotal = Object.values(branchHits).reduce((acc, branches) => acc + branches.length, 0);
const branchCovered = Object.values(branchHits).reduce(
(acc, branches) => acc + branches.filter((n) => n > 0).length,
0,
);
const functionTotal = Object.keys(functionHits).length;
const functionCovered = Object.values(functionHits).filter((n) => n > 0).length;
tableTotals.lines.total += lineTotal;
tableTotals.lines.covered += lineCovered;
tableTotals.statements.total += statementTotal;
tableTotals.statements.covered += statementCovered;
tableTotals.branches.total += branchTotal;
tableTotals.branches.covered += branchCovered;
tableTotals.functions.total += functionTotal;
tableTotals.functions.covered += functionCovered;
const uncoveredLines = getUncoveredLines(entry, fileCoverage, lineHits);
const filePath = entry.path ?? file;
const relativePath = path.isAbsolute(filePath)
? path.relative(process.cwd(), filePath)
: filePath;
return {
file: relativePath || file,
statements: pctValue(statementCovered, statementTotal),
branches: pctValue(branchCovered, branchTotal),
functions: pctValue(functionCovered, functionTotal),
lines: pctValue(lineCovered, lineTotal),
uncovered: formatLineRanges(uncoveredLines),
};
})
.sort((a, b) => a.file.localeCompare(b.file));
const columns = [
{ key: 'file', header: 'File', align: 'left' },
{ key: 'statements', header: '% Stmts', align: 'right' },
{ key: 'branches', header: '% Branch', align: 'right' },
{ key: 'functions', header: '% Funcs', align: 'right' },
{ key: 'lines', header: '% Lines', align: 'right' },
{ key: 'uncovered', header: 'Uncovered Line #s', align: 'left' },
];
const allFilesRow = {
file: 'All files',
statements: pctValue(tableTotals.statements.covered, tableTotals.statements.total),
branches: pctValue(tableTotals.branches.covered, tableTotals.branches.total),
functions: pctValue(tableTotals.functions.covered, tableTotals.functions.total),
lines: pctValue(tableTotals.lines.covered, tableTotals.lines.total),
uncovered: '',
};
const rowsForOutput = [allFilesRow, ...tableRows];
const formatRow = (row) => `| ${columns
.map(({ key }) => String(row[key] ?? ''))
.join(' | ')} |`;
const headerRow = `| ${columns.map(({ header }) => header).join(' | ')} |`;
const dividerRow = `| ${columns
.map(({ align }) => (align === 'right' ? '---:' : ':---'))
.join(' | ')} |`;
console.log('');
console.log('<details><summary>Vitest coverage table</summary>');
console.log('');
console.log(headerRow);
console.log(dividerRow);
rowsForOutput.forEach((row) => console.log(formatRow(row)));
console.log('</details>');
}
NODE
- name: Upload Coverage Artifact
if: steps.coverage-summary.outputs.has_coverage == 'true'
uses: actions/upload-artifact@v4
with:
name: web-coverage-report
path: web/coverage
retention-days: 30
if-no-files-found: error

12
.gitignore vendored
View File

@ -139,7 +139,6 @@ pyrightconfig.json
.idea/'
.DS_Store
web/.vscode/settings.json
# Intellij IDEA Files
.idea/*
@ -196,6 +195,7 @@ docker/nginx/ssl/*
!docker/nginx/ssl/.gitkeep
docker/middleware.env
docker/docker-compose.override.yaml
docker/env-backup/*
sdks/python-client/build
sdks/python-client/dist
@ -205,7 +205,6 @@ sdks/python-client/dify_client.egg-info
!.vscode/launch.json.template
!.vscode/README.md
api/.vscode
web/.vscode
# vscode Code History Extension
.history
@ -220,15 +219,6 @@ plugins.jsonl
# mise
mise.toml
# Next.js build output
.next/
# PWA generated files
web/public/sw.js
web/public/sw.js.map
web/public/workbox-*.js
web/public/workbox-*.js.map
web/public/fallback-*.js
# AI Assistant
.roo/

View File

@ -37,7 +37,7 @@
"-c",
"1",
"-Q",
"dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor",
"dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention",
"--loglevel",
"INFO"
],

View File

@ -116,6 +116,7 @@ ALIYUN_OSS_AUTH_VERSION=v1
ALIYUN_OSS_REGION=your-region
# Don't start with '/'. OSS doesn't support leading slash in object names.
ALIYUN_OSS_PATH=your-path
ALIYUN_CLOUDBOX_ID=your-cloudbox-id
# Google Storage configuration
GOOGLE_STORAGE_BUCKET_NAME=your-bucket-name
@ -133,6 +134,7 @@ HUAWEI_OBS_BUCKET_NAME=your-bucket-name
HUAWEI_OBS_SECRET_KEY=your-secret-key
HUAWEI_OBS_ACCESS_KEY=your-access-key
HUAWEI_OBS_SERVER=your-server-url
HUAWEI_OBS_PATH_STYLE=false
# Baidu OBS Storage Configuration
BAIDU_OBS_BUCKET_NAME=your-bucket-name
@ -543,6 +545,25 @@ APP_MAX_EXECUTION_TIME=1200
APP_DEFAULT_ACTIVE_REQUESTS=0
APP_MAX_ACTIVE_REQUESTS=0
# Aliyun SLS Logstore Configuration
# Aliyun Access Key ID
ALIYUN_SLS_ACCESS_KEY_ID=
# Aliyun Access Key Secret
ALIYUN_SLS_ACCESS_KEY_SECRET=
# Aliyun SLS Endpoint (e.g., cn-hangzhou.log.aliyuncs.com)
ALIYUN_SLS_ENDPOINT=
# Aliyun SLS Region (e.g., cn-hangzhou)
ALIYUN_SLS_REGION=
# Aliyun SLS Project Name
ALIYUN_SLS_PROJECT_NAME=
# Number of days to retain workflow run logs (default: 365 days 3650 for permanent storage)
ALIYUN_SLS_LOGSTORE_TTL=365
# Enable dual-write to both SLS LogStore and SQL database (default: false)
LOGSTORE_DUAL_WRITE_ENABLED=false
# Enable dual-read fallback to SQL database when LogStore returns no results (default: true)
# Useful for migration scenarios where historical data exists only in SQL database
LOGSTORE_DUAL_READ_ENABLED=true
# Celery beat configuration
CELERY_BEAT_SCHEDULER_TIME=1
@ -671,3 +692,7 @@ ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
# Maximum number of concurrent annotation import tasks per tenant
ANNOTATION_IMPORT_MAX_CONCURRENT=5
# Sandbox expired records clean configuration
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30

View File

@ -84,7 +84,7 @@
1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
```bash
uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor
uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention
```
Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service:

View File

@ -75,6 +75,7 @@ def initialize_extensions(app: DifyApp):
ext_import_modules,
ext_logging,
ext_login,
ext_logstore,
ext_mail,
ext_migrate,
ext_orjson,
@ -105,6 +106,7 @@ def initialize_extensions(app: DifyApp):
ext_migrate,
ext_redis,
ext_storage,
ext_logstore, # Initialize logstore after storage, before celery
ext_celery,
ext_login,
ext_mail,

View File

@ -218,7 +218,7 @@ class PluginConfig(BaseSettings):
PLUGIN_DAEMON_TIMEOUT: PositiveFloat | None = Field(
description="Timeout in seconds for requests to the plugin daemon (set to None to disable)",
default=300.0,
default=600.0,
)
INNER_API_KEY_FOR_PLUGIN: str = Field(description="Inner api key for plugin", default="inner-api-key")
@ -1270,6 +1270,21 @@ class TenantIsolatedTaskQueueConfig(BaseSettings):
)
class SandboxExpiredRecordsCleanConfig(BaseSettings):
SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD: NonNegativeInt = Field(
description="Graceful period in days for sandbox records clean after subscription expiration",
default=21,
)
SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE: PositiveInt = Field(
description="Maximum number of records to process in each batch",
default=1000,
)
SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS: PositiveInt = Field(
description="Retention days for sandbox expired workflow_run records and message records",
default=30,
)
class FeatureConfig(
# place the configs in alphabet order
AppExecutionConfig,
@ -1295,6 +1310,7 @@ class FeatureConfig(
PositionConfig,
RagEtlConfig,
RepositoryConfig,
SandboxExpiredRecordsCleanConfig,
SecurityConfig,
TenantIsolatedTaskQueueConfig,
ToolConfig,

View File

@ -41,3 +41,8 @@ class AliyunOSSStorageConfig(BaseSettings):
description="Base path within the bucket to store objects (e.g., 'my-app-data/')",
default=None,
)
ALIYUN_CLOUDBOX_ID: str | None = Field(
description="Cloudbox id for aliyun cloudbox service",
default=None,
)

View File

@ -26,3 +26,8 @@ class HuaweiCloudOBSStorageConfig(BaseSettings):
description="Endpoint URL for Huawei Cloud OBS (e.g., 'https://obs.cn-north-4.myhuaweicloud.com')",
default=None,
)
HUAWEI_OBS_PATH_STYLE: bool = Field(
description="Flag to indicate whether to use path-style URLs for OBS requests",
default=False,
)

View File

@ -7,9 +7,9 @@ from controllers.console import console_ns
from controllers.console.error import AlreadyActivateError
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from libs.helper import EmailStr, extract_remote_ip, timezone
from libs.helper import EmailStr, timezone
from models import AccountStatus
from services.account_service import AccountService, RegisterService
from services.account_service import RegisterService
DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
@ -93,7 +93,6 @@ class ActivateApi(Resource):
"ActivationResponse",
{
"result": fields.String(description="Operation result"),
"data": fields.Raw(description="Login token data"),
},
),
)
@ -117,6 +116,4 @@ class ActivateApi(Resource):
account.initialized_at = naive_utc_now()
db.session.commit()
token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
return {"result": "success", "data": token_pair.model_dump()}
return {"result": "success"}

View File

@ -146,7 +146,7 @@ class DatasetUpdatePayload(BaseModel):
embedding_model: str | None = None
embedding_model_provider: str | None = None
retrieval_model: dict[str, Any] | None = None
partial_member_list: list[str] | None = None
partial_member_list: list[dict[str, str]] | None = None
external_retrieval_model: dict[str, Any] | None = None
external_knowledge_id: str | None = None
external_knowledge_api_id: str | None = None

View File

@ -572,7 +572,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
datasource_type=DatasourceType.NOTION,
notion_info=NotionInfo.model_validate(
{
"credential_id": data_source_info["credential_id"],
"credential_id": data_source_info.get("credential_id"),
"notion_workspace_id": data_source_info["notion_workspace_id"],
"notion_obj_id": data_source_info["notion_page_id"],
"notion_page_type": data_source_info["type"],

View File

@ -40,7 +40,7 @@ from .. import console_ns
logger = logging.getLogger(__name__)
class CompletionMessagePayload(BaseModel):
class CompletionMessageExplorePayload(BaseModel):
inputs: dict[str, Any]
query: str = ""
files: list[dict[str, Any]] | None = None
@ -71,7 +71,7 @@ class ChatMessagePayload(BaseModel):
raise ValueError("must be a valid UUID") from exc
register_schema_models(console_ns, CompletionMessagePayload, ChatMessagePayload)
register_schema_models(console_ns, CompletionMessageExplorePayload, ChatMessagePayload)
# define completion api for user
@ -80,13 +80,13 @@ register_schema_models(console_ns, CompletionMessagePayload, ChatMessagePayload)
endpoint="installed_app_completion",
)
class CompletionApi(InstalledAppResource):
@console_ns.expect(console_ns.models[CompletionMessagePayload.__name__])
@console_ns.expect(console_ns.models[CompletionMessageExplorePayload.__name__])
def post(self, installed_app):
app_model = installed_app.app
if app_model.mode != AppMode.COMPLETION:
raise NotCompletionAppError()
payload = CompletionMessagePayload.model_validate(console_ns.payload or {})
payload = CompletionMessageExplorePayload.model_validate(console_ns.payload or {})
args = payload.model_dump(exclude_none=True)
streaming = payload.response_mode == "streaming"

View File

@ -1,5 +1,4 @@
from typing import Any
from uuid import UUID
from flask import request
from flask_restx import marshal_with
@ -13,6 +12,7 @@ from controllers.console.explore.wraps import InstalledAppResource
from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db
from fields.conversation_fields import conversation_infinite_scroll_pagination_fields, simple_conversation_fields
from libs.helper import UUIDStrOrEmpty
from libs.login import current_user
from models import Account
from models.model import AppMode
@ -24,7 +24,7 @@ from .. import console_ns
class ConversationListQuery(BaseModel):
last_id: UUID | None = None
last_id: UUIDStrOrEmpty | None = None
limit: int = Field(default=20, ge=1, le=100)
pinned: bool | None = None

View File

@ -2,7 +2,8 @@ import logging
from typing import Any
from flask import request
from flask_restx import Resource, inputs, marshal_with, reqparse
from flask_restx import Resource, marshal_with
from pydantic import BaseModel
from sqlalchemy import and_, select
from werkzeug.exceptions import BadRequest, Forbidden, NotFound
@ -18,6 +19,15 @@ from services.account_service import TenantService
from services.enterprise.enterprise_service import EnterpriseService
from services.feature_service import FeatureService
class InstalledAppCreatePayload(BaseModel):
app_id: str
class InstalledAppUpdatePayload(BaseModel):
is_pinned: bool | None = None
logger = logging.getLogger(__name__)
@ -105,26 +115,25 @@ class InstalledAppsListApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("apps")
def post(self):
parser = reqparse.RequestParser().add_argument("app_id", type=str, required=True, help="Invalid app_id")
args = parser.parse_args()
payload = InstalledAppCreatePayload.model_validate(console_ns.payload or {})
recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == args["app_id"]).first()
recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == payload.app_id).first()
if recommended_app is None:
raise NotFound("App not found")
raise NotFound("Recommended app not found")
_, current_tenant_id = current_account_with_tenant()
app = db.session.query(App).where(App.id == args["app_id"]).first()
app = db.session.query(App).where(App.id == payload.app_id).first()
if app is None:
raise NotFound("App not found")
raise NotFound("App entity not found")
if not app.is_public:
raise Forbidden("You can't install a non-public app")
installed_app = (
db.session.query(InstalledApp)
.where(and_(InstalledApp.app_id == args["app_id"], InstalledApp.tenant_id == current_tenant_id))
.where(and_(InstalledApp.app_id == payload.app_id, InstalledApp.tenant_id == current_tenant_id))
.first()
)
@ -133,7 +142,7 @@ class InstalledAppsListApi(Resource):
recommended_app.install_count += 1
new_installed_app = InstalledApp(
app_id=args["app_id"],
app_id=payload.app_id,
tenant_id=current_tenant_id,
app_owner_tenant_id=app.tenant_id,
is_pinned=False,
@ -163,12 +172,11 @@ class InstalledAppApi(InstalledAppResource):
return {"result": "success", "message": "App uninstalled successfully"}, 204
def patch(self, installed_app):
parser = reqparse.RequestParser().add_argument("is_pinned", type=inputs.boolean)
args = parser.parse_args()
payload = InstalledAppUpdatePayload.model_validate(console_ns.payload or {})
commit_args = False
if "is_pinned" in args:
installed_app.is_pinned = args["is_pinned"]
if payload.is_pinned is not None:
installed_app.is_pinned = payload.is_pinned
commit_args = True
if commit_args:

View File

@ -1,14 +1,32 @@
from flask_restx import Resource, fields, marshal_with, reqparse
from flask import request
from flask_restx import Resource, fields, marshal_with
from pydantic import BaseModel, Field
from constants import HIDDEN_VALUE
from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, setup_required
from fields.api_based_extension_fields import api_based_extension_fields
from libs.login import current_account_with_tenant, login_required
from models.api_based_extension import APIBasedExtension
from services.api_based_extension_service import APIBasedExtensionService
from services.code_based_extension_service import CodeBasedExtensionService
from ..common.schema import register_schema_models
from . import console_ns
from .wraps import account_initialization_required, setup_required
class CodeBasedExtensionQuery(BaseModel):
module: str
class APIBasedExtensionPayload(BaseModel):
name: str = Field(description="Extension name")
api_endpoint: str = Field(description="API endpoint URL")
api_key: str = Field(description="API key for authentication")
register_schema_models(console_ns, APIBasedExtensionPayload)
api_based_extension_model = console_ns.model("ApiBasedExtensionModel", api_based_extension_fields)
api_based_extension_list_model = fields.List(fields.Nested(api_based_extension_model))
@ -18,11 +36,7 @@ api_based_extension_list_model = fields.List(fields.Nested(api_based_extension_m
class CodeBasedExtensionAPI(Resource):
@console_ns.doc("get_code_based_extension")
@console_ns.doc(description="Get code-based extension data by module name")
@console_ns.expect(
console_ns.parser().add_argument(
"module", type=str, required=True, location="args", help="Extension module name"
)
)
@console_ns.doc(params={"module": "Extension module name"})
@console_ns.response(
200,
"Success",
@ -35,10 +49,9 @@ class CodeBasedExtensionAPI(Resource):
@login_required
@account_initialization_required
def get(self):
parser = reqparse.RequestParser().add_argument("module", type=str, required=True, location="args")
args = parser.parse_args()
query = CodeBasedExtensionQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
return {"module": args["module"], "data": CodeBasedExtensionService.get_code_based_extension(args["module"])}
return {"module": query.module, "data": CodeBasedExtensionService.get_code_based_extension(query.module)}
@console_ns.route("/api-based-extension")
@ -56,30 +69,21 @@ class APIBasedExtensionAPI(Resource):
@console_ns.doc("create_api_based_extension")
@console_ns.doc(description="Create a new API-based extension")
@console_ns.expect(
console_ns.model(
"CreateAPIBasedExtensionRequest",
{
"name": fields.String(required=True, description="Extension name"),
"api_endpoint": fields.String(required=True, description="API endpoint URL"),
"api_key": fields.String(required=True, description="API key for authentication"),
},
)
)
@console_ns.expect(console_ns.models[APIBasedExtensionPayload.__name__])
@console_ns.response(201, "Extension created successfully", api_based_extension_model)
@setup_required
@login_required
@account_initialization_required
@marshal_with(api_based_extension_model)
def post(self):
args = console_ns.payload
payload = APIBasedExtensionPayload.model_validate(console_ns.payload or {})
_, current_tenant_id = current_account_with_tenant()
extension_data = APIBasedExtension(
tenant_id=current_tenant_id,
name=args["name"],
api_endpoint=args["api_endpoint"],
api_key=args["api_key"],
name=payload.name,
api_endpoint=payload.api_endpoint,
api_key=payload.api_key,
)
return APIBasedExtensionService.save(extension_data)
@ -104,16 +108,7 @@ class APIBasedExtensionDetailAPI(Resource):
@console_ns.doc("update_api_based_extension")
@console_ns.doc(description="Update API-based extension")
@console_ns.doc(params={"id": "Extension ID"})
@console_ns.expect(
console_ns.model(
"UpdateAPIBasedExtensionRequest",
{
"name": fields.String(required=True, description="Extension name"),
"api_endpoint": fields.String(required=True, description="API endpoint URL"),
"api_key": fields.String(required=True, description="API key for authentication"),
},
)
)
@console_ns.expect(console_ns.models[APIBasedExtensionPayload.__name__])
@console_ns.response(200, "Extension updated successfully", api_based_extension_model)
@setup_required
@login_required
@ -125,13 +120,13 @@ class APIBasedExtensionDetailAPI(Resource):
extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(current_tenant_id, api_based_extension_id)
args = console_ns.payload
payload = APIBasedExtensionPayload.model_validate(console_ns.payload or {})
extension_data_from_db.name = args["name"]
extension_data_from_db.api_endpoint = args["api_endpoint"]
extension_data_from_db.name = payload.name
extension_data_from_db.api_endpoint = payload.api_endpoint
if args["api_key"] != HIDDEN_VALUE:
extension_data_from_db.api_key = args["api_key"]
if payload.api_key != HIDDEN_VALUE:
extension_data_from_db.api_key = payload.api_key
return APIBasedExtensionService.save(extension_data_from_db)

View File

@ -1,31 +1,40 @@
from typing import Literal
from flask import request
from flask_restx import Resource, marshal_with, reqparse
from flask_restx import Resource, marshal_with
from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden
from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from fields.tag_fields import dataset_tag_fields
from libs.login import current_account_with_tenant, login_required
from models.model import Tag
from services.tag_service import TagService
def _validate_name(name):
if not name or len(name) < 1 or len(name) > 50:
raise ValueError("Name must be between 1 to 50 characters.")
return name
class TagBasePayload(BaseModel):
name: str = Field(description="Tag name", min_length=1, max_length=50)
type: Literal["knowledge", "app"] | None = Field(default=None, description="Tag type")
parser_tags = (
reqparse.RequestParser()
.add_argument(
"name",
nullable=False,
required=True,
help="Name must be between 1 to 50 characters.",
type=_validate_name,
)
.add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
class TagBindingPayload(BaseModel):
tag_ids: list[str] = Field(description="Tag IDs to bind")
target_id: str = Field(description="Target ID to bind tags to")
type: Literal["knowledge", "app"] | None = Field(default=None, description="Tag type")
class TagBindingRemovePayload(BaseModel):
tag_id: str = Field(description="Tag ID to remove")
target_id: str = Field(description="Target ID to unbind tag from")
type: Literal["knowledge", "app"] | None = Field(default=None, description="Tag type")
register_schema_models(
console_ns,
TagBasePayload,
TagBindingPayload,
TagBindingRemovePayload,
)
@ -43,7 +52,7 @@ class TagListApi(Resource):
return tags, 200
@console_ns.expect(parser_tags)
@console_ns.expect(console_ns.models[TagBasePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@ -53,22 +62,17 @@ class TagListApi(Resource):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = parser_tags.parse_args()
tag = TagService.save_tags(args)
payload = TagBasePayload.model_validate(console_ns.payload or {})
tag = TagService.save_tags(payload.model_dump())
response = {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0}
return response, 200
parser_tag_id = reqparse.RequestParser().add_argument(
"name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name
)
@console_ns.route("/tags/<uuid:tag_id>")
class TagUpdateDeleteApi(Resource):
@console_ns.expect(parser_tag_id)
@console_ns.expect(console_ns.models[TagBasePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@ -79,8 +83,8 @@ class TagUpdateDeleteApi(Resource):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = parser_tag_id.parse_args()
tag = TagService.update_tags(args, tag_id)
payload = TagBasePayload.model_validate(console_ns.payload or {})
tag = TagService.update_tags(payload.model_dump(), tag_id)
binding_count = TagService.get_tag_binding_count(tag_id)
@ -100,17 +104,9 @@ class TagUpdateDeleteApi(Resource):
return 204
parser_create = (
reqparse.RequestParser()
.add_argument("tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required.")
.add_argument("target_id", type=str, nullable=False, required=True, location="json", help="Target ID is required.")
.add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
)
@console_ns.route("/tag-bindings/create")
class TagBindingCreateApi(Resource):
@console_ns.expect(parser_create)
@console_ns.expect(console_ns.models[TagBindingPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@ -120,23 +116,15 @@ class TagBindingCreateApi(Resource):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = parser_create.parse_args()
TagService.save_tag_binding(args)
payload = TagBindingPayload.model_validate(console_ns.payload or {})
TagService.save_tag_binding(payload.model_dump())
return {"result": "success"}, 200
parser_remove = (
reqparse.RequestParser()
.add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.")
.add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.")
.add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
)
@console_ns.route("/tag-bindings/remove")
class TagBindingDeleteApi(Resource):
@console_ns.expect(parser_remove)
@console_ns.expect(console_ns.models[TagBindingRemovePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@ -146,7 +134,7 @@ class TagBindingDeleteApi(Resource):
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
args = parser_remove.parse_args()
TagService.delete_tag_binding(args)
payload = TagBindingRemovePayload.model_validate(console_ns.payload or {})
TagService.delete_tag_binding(payload.model_dump())
return {"result": "success"}, 200

View File

@ -18,6 +18,7 @@ from controllers.console.wraps import (
setup_required,
)
from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration
from core.helper.tool_provider_cache import ToolProviderListCache
from core.mcp.auth.auth_flow import auth, handle_callback
from core.mcp.error import MCPAuthError, MCPError, MCPRefreshTokenError
from core.mcp.mcp_client import MCPClient
@ -944,7 +945,7 @@ class ToolProviderMCPApi(Resource):
configuration = MCPConfiguration.model_validate(args["configuration"])
authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None
# Create provider
# Create provider in transaction
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
result = service.create_provider(
@ -960,7 +961,11 @@ class ToolProviderMCPApi(Resource):
configuration=configuration,
authentication=authentication,
)
return jsonable_encoder(result)
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
ToolProviderListCache.invalidate_cache(tenant_id)
return jsonable_encoder(result)
@console_ns.expect(parser_mcp_put)
@setup_required
@ -972,17 +977,23 @@ class ToolProviderMCPApi(Resource):
authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None
_, current_tenant_id = current_account_with_tenant()
# Step 1: Validate server URL change if needed (includes URL format validation and network operation)
validation_result = None
# Step 1: Get provider data for URL validation (short-lived session, no network I/O)
validation_data = None
with Session(db.engine) as session:
service = MCPToolManageService(session=session)
validation_result = service.validate_server_url_change(
tenant_id=current_tenant_id, provider_id=args["provider_id"], new_server_url=args["server_url"]
validation_data = service.get_provider_for_url_validation(
tenant_id=current_tenant_id, provider_id=args["provider_id"]
)
# No need to check for errors here, exceptions will be raised directly
# Step 2: Perform URL validation with network I/O OUTSIDE of any database session
# This prevents holding database locks during potentially slow network operations
validation_result = MCPToolManageService.validate_server_url_standalone(
tenant_id=current_tenant_id,
new_server_url=args["server_url"],
validation_data=validation_data,
)
# Step 2: Perform database update in a transaction
# Step 3: Perform database update in a transaction
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
service.update_provider(
@ -999,7 +1010,11 @@ class ToolProviderMCPApi(Resource):
authentication=authentication,
validation_result=validation_result,
)
return {"result": "success"}
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
ToolProviderListCache.invalidate_cache(current_tenant_id)
return {"result": "success"}
@console_ns.expect(parser_mcp_delete)
@setup_required
@ -1012,7 +1027,11 @@ class ToolProviderMCPApi(Resource):
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
service.delete_provider(tenant_id=current_tenant_id, provider_id=args["provider_id"])
return {"result": "success"}
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
ToolProviderListCache.invalidate_cache(current_tenant_id)
return {"result": "success"}
parser_auth = (
@ -1062,6 +1081,8 @@ class ToolMCPAuthApi(Resource):
credentials=provider_entity.credentials,
authed=True,
)
# Invalidate cache after updating credentials
ToolProviderListCache.invalidate_cache(tenant_id)
return {"result": "success"}
except MCPAuthError as e:
try:
@ -1075,16 +1096,22 @@ class ToolMCPAuthApi(Resource):
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
response = service.execute_auth_actions(auth_result)
# Invalidate cache after auth actions may have updated provider state
ToolProviderListCache.invalidate_cache(tenant_id)
return response
except MCPRefreshTokenError as e:
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
# Invalidate cache after clearing credentials
ToolProviderListCache.invalidate_cache(tenant_id)
raise ValueError(f"Failed to refresh token, please try to authorize again: {e}") from e
except (MCPError, ValueError) as e:
with Session(db.engine) as session, session.begin():
service = MCPToolManageService(session=session)
service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
# Invalidate cache after clearing credentials
ToolProviderListCache.invalidate_cache(tenant_id)
raise ValueError(f"Failed to connect to MCP server: {e}") from e

View File

@ -4,7 +4,7 @@ from uuid import UUID
from flask import request
from flask_restx import Resource
from flask_restx._http import HTTPStatus
from pydantic import BaseModel, Field, model_validator
from pydantic import BaseModel, Field, field_validator, model_validator
from sqlalchemy.orm import Session
from werkzeug.exceptions import BadRequest, NotFound
@ -51,6 +51,32 @@ class ConversationRenamePayload(BaseModel):
class ConversationVariablesQuery(BaseModel):
last_id: UUID | None = Field(default=None, description="Last variable ID for pagination")
limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return")
variable_name: str | None = Field(
default=None, description="Filter variables by name", min_length=1, max_length=255
)
@field_validator("variable_name", mode="before")
@classmethod
def validate_variable_name(cls, v: str | None) -> str | None:
"""
Validate variable_name to prevent injection attacks.
"""
if v is None:
return v
# Only allow safe characters: alphanumeric, underscore, hyphen, period
if not v.replace("-", "").replace("_", "").replace(".", "").isalnum():
raise ValueError(
"Variable name can only contain letters, numbers, hyphens (-), underscores (_), and periods (.)"
)
# Prevent SQL injection patterns
dangerous_patterns = ["'", '"', ";", "--", "/*", "*/", "xp_", "sp_"]
for pattern in dangerous_patterns:
if pattern in v.lower():
raise ValueError(f"Variable name contains invalid characters: {pattern}")
return v
class ConversationVariableUpdatePayload(BaseModel):
@ -199,7 +225,7 @@ class ConversationVariablesApi(Resource):
try:
return ConversationService.get_conversational_variable(
app_model, conversation_id, end_user, query_args.limit, last_id
app_model, conversation_id, end_user, query_args.limit, last_id, query_args.variable_name
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")

View File

@ -49,7 +49,7 @@ class DatasetUpdatePayload(BaseModel):
embedding_model: str | None = None
embedding_model_provider: str | None = None
retrieval_model: RetrievalModel | None = None
partial_member_list: list[str] | None = None
partial_member_list: list[dict[str, str]] | None = None
external_retrieval_model: dict[str, Any] | None = None
external_knowledge_id: str | None = None
external_knowledge_api_id: str | None = None

View File

@ -1,14 +1,13 @@
import logging
from flask import request
from flask_restx import Resource, marshal_with, reqparse
from flask_restx import Resource, marshal_with
from pydantic import BaseModel, ConfigDict, Field
from werkzeug.exceptions import Unauthorized
from constants import HEADER_NAME_APP_CODE
from controllers.common import fields
from controllers.web import web_ns
from controllers.web.error import AppUnavailableError
from controllers.web.wraps import WebApiResource
from controllers.common.schema import register_schema_models
from core.app.app_config.common.parameters_mapping import get_parameters_from_feature_dict
from libs.passport import PassportService
from libs.token import extract_webapp_passport
@ -18,9 +17,23 @@ from services.enterprise.enterprise_service import EnterpriseService
from services.feature_service import FeatureService
from services.webapp_auth_service import WebAppAuthService
from . import web_ns
from .error import AppUnavailableError
from .wraps import WebApiResource
logger = logging.getLogger(__name__)
class AppAccessModeQuery(BaseModel):
model_config = ConfigDict(populate_by_name=True)
app_id: str | None = Field(default=None, alias="appId", description="Application ID")
app_code: str | None = Field(default=None, alias="appCode", description="Application code")
register_schema_models(web_ns, AppAccessModeQuery)
@web_ns.route("/parameters")
class AppParameterApi(WebApiResource):
"""Resource for app variables."""
@ -96,21 +109,16 @@ class AppAccessMode(Resource):
}
)
def get(self):
parser = (
reqparse.RequestParser()
.add_argument("appId", type=str, required=False, location="args")
.add_argument("appCode", type=str, required=False, location="args")
)
args = parser.parse_args()
raw_args = request.args.to_dict()
args = AppAccessModeQuery.model_validate(raw_args)
features = FeatureService.get_system_features()
if not features.webapp_auth.enabled:
return {"accessMode": "public"}
app_id = args.get("appId")
if args.get("appCode"):
app_code = args["appCode"]
app_id = AppService.get_app_id_by_code(app_code)
app_id = args.app_id
if args.app_code:
app_id = AppService.get_app_id_by_code(args.app_code)
if not app_id:
raise ValueError("appId or appCode must be provided")

View File

@ -1,7 +1,8 @@
import logging
from flask import request
from flask_restx import fields, marshal_with, reqparse
from flask_restx import fields, marshal_with
from pydantic import BaseModel, field_validator
from werkzeug.exceptions import InternalServerError
import services
@ -20,6 +21,7 @@ from controllers.web.error import (
from controllers.web.wraps import WebApiResource
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from libs.helper import uuid_value
from models.model import App
from services.audio_service import AudioService
from services.errors.audio import (
@ -29,6 +31,25 @@ from services.errors.audio import (
UnsupportedAudioTypeServiceError,
)
from ..common.schema import register_schema_models
class TextToAudioPayload(BaseModel):
message_id: str | None = None
voice: str | None = None
text: str | None = None
streaming: bool | None = None
@field_validator("message_id")
@classmethod
def validate_message_id(cls, value: str | None) -> str | None:
if value is None:
return value
return uuid_value(value)
register_schema_models(web_ns, TextToAudioPayload)
logger = logging.getLogger(__name__)
@ -88,6 +109,7 @@ class AudioApi(WebApiResource):
@web_ns.route("/text-to-audio")
class TextApi(WebApiResource):
@web_ns.expect(web_ns.models[TextToAudioPayload.__name__])
@web_ns.doc("Text to Audio")
@web_ns.doc(description="Convert text to audio using text-to-speech service.")
@web_ns.doc(
@ -102,18 +124,11 @@ class TextApi(WebApiResource):
def post(self, app_model: App, end_user):
"""Convert text to audio"""
try:
parser = (
reqparse.RequestParser()
.add_argument("message_id", type=str, required=False, location="json")
.add_argument("voice", type=str, location="json")
.add_argument("text", type=str, location="json")
.add_argument("streaming", type=bool, location="json")
)
args = parser.parse_args()
payload = TextToAudioPayload.model_validate(web_ns.payload or {})
message_id = args.get("message_id", None)
text = args.get("text", None)
voice = args.get("voice", None)
message_id = payload.message_id
text = payload.text
voice = payload.voice
response = AudioService.transcript_tts(
app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
)

View File

@ -1,9 +1,11 @@
import logging
from typing import Any, Literal
from flask_restx import reqparse
from pydantic import BaseModel, Field, field_validator
from werkzeug.exceptions import InternalServerError, NotFound
import services
from controllers.common.schema import register_schema_models
from controllers.web import web_ns
from controllers.web.error import (
AppUnavailableError,
@ -34,25 +36,44 @@ from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__)
class CompletionMessagePayload(BaseModel):
inputs: dict[str, Any] = Field(description="Input variables for the completion")
query: str = Field(default="", description="Query text for completion")
files: list[dict[str, Any]] | None = Field(default=None, description="Files to be processed")
response_mode: Literal["blocking", "streaming"] | None = Field(
default=None, description="Response mode: blocking or streaming"
)
retriever_from: str = Field(default="web_app", description="Source of retriever")
class ChatMessagePayload(BaseModel):
inputs: dict[str, Any] = Field(description="Input variables for the chat")
query: str = Field(description="User query/message")
files: list[dict[str, Any]] | None = Field(default=None, description="Files to be processed")
response_mode: Literal["blocking", "streaming"] | None = Field(
default=None, description="Response mode: blocking or streaming"
)
conversation_id: str | None = Field(default=None, description="Conversation ID")
parent_message_id: str | None = Field(default=None, description="Parent message ID")
retriever_from: str = Field(default="web_app", description="Source of retriever")
@field_validator("conversation_id", "parent_message_id")
@classmethod
def validate_uuid(cls, value: str | None) -> str | None:
if value is None:
return value
return uuid_value(value)
register_schema_models(web_ns, CompletionMessagePayload, ChatMessagePayload)
# define completion api for user
@web_ns.route("/completion-messages")
class CompletionApi(WebApiResource):
@web_ns.doc("Create Completion Message")
@web_ns.doc(description="Create a completion message for text generation applications.")
@web_ns.doc(
params={
"inputs": {"description": "Input variables for the completion", "type": "object", "required": True},
"query": {"description": "Query text for completion", "type": "string", "required": False},
"files": {"description": "Files to be processed", "type": "array", "required": False},
"response_mode": {
"description": "Response mode: blocking or streaming",
"type": "string",
"enum": ["blocking", "streaming"],
"required": False,
},
"retriever_from": {"description": "Source of retriever", "type": "string", "required": False},
}
)
@web_ns.expect(web_ns.models[CompletionMessagePayload.__name__])
@web_ns.doc(
responses={
200: "Success",
@ -67,18 +88,10 @@ class CompletionApi(WebApiResource):
if app_model.mode != AppMode.COMPLETION:
raise NotCompletionAppError()
parser = (
reqparse.RequestParser()
.add_argument("inputs", type=dict, required=True, location="json")
.add_argument("query", type=str, location="json", default="")
.add_argument("files", type=list, required=False, location="json")
.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
.add_argument("retriever_from", type=str, required=False, default="web_app", location="json")
)
payload = CompletionMessagePayload.model_validate(web_ns.payload or {})
args = payload.model_dump(exclude_none=True)
args = parser.parse_args()
streaming = args["response_mode"] == "streaming"
streaming = payload.response_mode == "streaming"
args["auto_generate_name"] = False
try:
@ -142,22 +155,7 @@ class CompletionStopApi(WebApiResource):
class ChatApi(WebApiResource):
@web_ns.doc("Create Chat Message")
@web_ns.doc(description="Create a chat message for conversational applications.")
@web_ns.doc(
params={
"inputs": {"description": "Input variables for the chat", "type": "object", "required": True},
"query": {"description": "User query/message", "type": "string", "required": True},
"files": {"description": "Files to be processed", "type": "array", "required": False},
"response_mode": {
"description": "Response mode: blocking or streaming",
"type": "string",
"enum": ["blocking", "streaming"],
"required": False,
},
"conversation_id": {"description": "Conversation UUID", "type": "string", "required": False},
"parent_message_id": {"description": "Parent message UUID", "type": "string", "required": False},
"retriever_from": {"description": "Source of retriever", "type": "string", "required": False},
}
)
@web_ns.expect(web_ns.models[ChatMessagePayload.__name__])
@web_ns.doc(
responses={
200: "Success",
@ -173,20 +171,10 @@ class ChatApi(WebApiResource):
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
parser = (
reqparse.RequestParser()
.add_argument("inputs", type=dict, required=True, location="json")
.add_argument("query", type=str, required=True, location="json")
.add_argument("files", type=list, required=False, location="json")
.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
.add_argument("conversation_id", type=uuid_value, location="json")
.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
.add_argument("retriever_from", type=str, required=False, default="web_app", location="json")
)
payload = ChatMessagePayload.model_validate(web_ns.payload or {})
args = payload.model_dump(exclude_none=True)
args = parser.parse_args()
streaming = args["response_mode"] == "streaming"
streaming = payload.response_mode == "streaming"
args["auto_generate_name"] = False
try:

View File

@ -2,10 +2,12 @@ import base64
import secrets
from flask import request
from flask_restx import Resource, reqparse
from flask_restx import Resource
from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
from controllers.common.schema import register_schema_models
from controllers.console.auth.error import (
AuthenticationFailedError,
EmailCodeError,
@ -18,14 +20,40 @@ from controllers.console.error import EmailSendIpLimitError
from controllers.console.wraps import email_password_login_enabled, only_edition_enterprise, setup_required
from controllers.web import web_ns
from extensions.ext_database import db
from libs.helper import email, extract_remote_ip
from libs.helper import EmailStr, extract_remote_ip
from libs.password import hash_password, valid_password
from models import Account
from services.account_service import AccountService
class ForgotPasswordSendPayload(BaseModel):
email: EmailStr
language: str | None = None
class ForgotPasswordCheckPayload(BaseModel):
email: EmailStr
code: str
token: str = Field(min_length=1)
class ForgotPasswordResetPayload(BaseModel):
token: str = Field(min_length=1)
new_password: str
password_confirm: str
@field_validator("new_password", "password_confirm")
@classmethod
def validate_password(cls, value: str) -> str:
return valid_password(value)
register_schema_models(web_ns, ForgotPasswordSendPayload, ForgotPasswordCheckPayload, ForgotPasswordResetPayload)
@web_ns.route("/forgot-password")
class ForgotPasswordSendEmailApi(Resource):
@web_ns.expect(web_ns.models[ForgotPasswordSendPayload.__name__])
@only_edition_enterprise
@setup_required
@email_password_login_enabled
@ -40,35 +68,31 @@ class ForgotPasswordSendEmailApi(Resource):
}
)
def post(self):
parser = (
reqparse.RequestParser()
.add_argument("email", type=email, required=True, location="json")
.add_argument("language", type=str, required=False, location="json")
)
args = parser.parse_args()
payload = ForgotPasswordSendPayload.model_validate(web_ns.payload or {})
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
if args["language"] is not None and args["language"] == "zh-Hans":
if payload.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
with Session(db.engine) as session:
account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
account = session.execute(select(Account).filter_by(email=payload.email)).scalar_one_or_none()
token = None
if account is None:
raise AuthenticationFailedError()
else:
token = AccountService.send_reset_password_email(account=account, email=args["email"], language=language)
token = AccountService.send_reset_password_email(account=account, email=payload.email, language=language)
return {"result": "success", "data": token}
@web_ns.route("/forgot-password/validity")
class ForgotPasswordCheckApi(Resource):
@web_ns.expect(web_ns.models[ForgotPasswordCheckPayload.__name__])
@only_edition_enterprise
@setup_required
@email_password_login_enabled
@ -78,45 +102,40 @@ class ForgotPasswordCheckApi(Resource):
responses={200: "Token is valid", 400: "Bad request - invalid token format", 401: "Invalid or expired token"}
)
def post(self):
parser = (
reqparse.RequestParser()
.add_argument("email", type=str, required=True, location="json")
.add_argument("code", type=str, required=True, location="json")
.add_argument("token", type=str, required=True, nullable=False, location="json")
)
args = parser.parse_args()
payload = ForgotPasswordCheckPayload.model_validate(web_ns.payload or {})
user_email = args["email"]
user_email = payload.email
is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args["email"])
is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(payload.email)
if is_forgot_password_error_rate_limit:
raise EmailPasswordResetLimitError()
token_data = AccountService.get_reset_password_data(args["token"])
token_data = AccountService.get_reset_password_data(payload.token)
if token_data is None:
raise InvalidTokenError()
if user_email != token_data.get("email"):
raise InvalidEmailError()
if args["code"] != token_data.get("code"):
AccountService.add_forgot_password_error_rate_limit(args["email"])
if payload.code != token_data.get("code"):
AccountService.add_forgot_password_error_rate_limit(payload.email)
raise EmailCodeError()
# Verified, revoke the first token
AccountService.revoke_reset_password_token(args["token"])
AccountService.revoke_reset_password_token(payload.token)
# Refresh token data by generating a new token
_, new_token = AccountService.generate_reset_password_token(
user_email, code=args["code"], additional_data={"phase": "reset"}
user_email, code=payload.code, additional_data={"phase": "reset"}
)
AccountService.reset_forgot_password_error_rate_limit(args["email"])
AccountService.reset_forgot_password_error_rate_limit(payload.email)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
@web_ns.route("/forgot-password/resets")
class ForgotPasswordResetApi(Resource):
@web_ns.expect(web_ns.models[ForgotPasswordResetPayload.__name__])
@only_edition_enterprise
@setup_required
@email_password_login_enabled
@ -131,20 +150,14 @@ class ForgotPasswordResetApi(Resource):
}
)
def post(self):
parser = (
reqparse.RequestParser()
.add_argument("token", type=str, required=True, nullable=False, location="json")
.add_argument("new_password", type=valid_password, required=True, nullable=False, location="json")
.add_argument("password_confirm", type=valid_password, required=True, nullable=False, location="json")
)
args = parser.parse_args()
payload = ForgotPasswordResetPayload.model_validate(web_ns.payload or {})
# Validate passwords match
if args["new_password"] != args["password_confirm"]:
if payload.new_password != payload.password_confirm:
raise PasswordMismatchError()
# Validate token and get reset data
reset_data = AccountService.get_reset_password_data(args["token"])
reset_data = AccountService.get_reset_password_data(payload.token)
if not reset_data:
raise InvalidTokenError()
# Must use token in reset phase
@ -152,11 +165,11 @@ class ForgotPasswordResetApi(Resource):
raise InvalidTokenError()
# Revoke token to prevent reuse
AccountService.revoke_reset_password_token(args["token"])
AccountService.revoke_reset_password_token(payload.token)
# Generate secure salt and hash password
salt = secrets.token_bytes(16)
password_hashed = hash_password(args["new_password"], salt)
password_hashed = hash_password(payload.new_password, salt)
email = reset_data.get("email", "")
@ -170,7 +183,7 @@ class ForgotPasswordResetApi(Resource):
return {"result": "success"}
def _update_existing_account(self, account, password_hashed, salt, session):
def _update_existing_account(self, account: Account, password_hashed, salt, session):
# Update existing account credentials
account.password = base64.b64encode(password_hashed).decode()
account.password_salt = base64.b64encode(salt).decode()

View File

@ -1,9 +1,12 @@
import logging
from typing import Literal
from flask_restx import fields, marshal_with, reqparse
from flask_restx.inputs import int_range
from flask import request
from flask_restx import fields, marshal_with
from pydantic import BaseModel, Field, field_validator
from werkzeug.exceptions import InternalServerError, NotFound
from controllers.common.schema import register_schema_models
from controllers.web import web_ns
from controllers.web.error import (
AppMoreLikeThisDisabledError,
@ -38,6 +41,33 @@ from services.message_service import MessageService
logger = logging.getLogger(__name__)
class MessageListQuery(BaseModel):
conversation_id: str = Field(description="Conversation UUID")
first_id: str | None = Field(default=None, description="First message ID for pagination")
limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)")
@field_validator("conversation_id", "first_id")
@classmethod
def validate_uuid(cls, value: str | None) -> str | None:
if value is None:
return value
return uuid_value(value)
class MessageFeedbackPayload(BaseModel):
rating: Literal["like", "dislike"] | None = Field(default=None, description="Feedback rating")
content: str | None = Field(default=None, description="Feedback content")
class MessageMoreLikeThisQuery(BaseModel):
response_mode: Literal["blocking", "streaming"] = Field(
description="Response mode",
)
register_schema_models(web_ns, MessageListQuery, MessageFeedbackPayload, MessageMoreLikeThisQuery)
@web_ns.route("/messages")
class MessageListApi(WebApiResource):
message_fields = {
@ -68,7 +98,11 @@ class MessageListApi(WebApiResource):
@web_ns.doc(
params={
"conversation_id": {"description": "Conversation UUID", "type": "string", "required": True},
"first_id": {"description": "First message ID for pagination", "type": "string", "required": False},
"first_id": {
"description": "First message ID for pagination",
"type": "string",
"required": False,
},
"limit": {
"description": "Number of messages to return (1-100)",
"type": "integer",
@ -93,17 +127,12 @@ class MessageListApi(WebApiResource):
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
parser = (
reqparse.RequestParser()
.add_argument("conversation_id", required=True, type=uuid_value, location="args")
.add_argument("first_id", type=uuid_value, location="args")
.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
)
args = parser.parse_args()
raw_args = request.args.to_dict()
query = MessageListQuery.model_validate(raw_args)
try:
return MessageService.pagination_by_first_id(
app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
app_model, end_user, query.conversation_id, query.first_id, query.limit
)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@ -128,7 +157,7 @@ class MessageFeedbackApi(WebApiResource):
"enum": ["like", "dislike"],
"required": False,
},
"content": {"description": "Feedback content/comment", "type": "string", "required": False},
"content": {"description": "Feedback content", "type": "string", "required": False},
}
)
@web_ns.doc(
@ -145,20 +174,15 @@ class MessageFeedbackApi(WebApiResource):
def post(self, app_model, end_user, message_id):
message_id = str(message_id)
parser = (
reqparse.RequestParser()
.add_argument("rating", type=str, choices=["like", "dislike", None], location="json")
.add_argument("content", type=str, location="json", default=None)
)
args = parser.parse_args()
payload = MessageFeedbackPayload.model_validate(web_ns.payload or {})
try:
MessageService.create_feedback(
app_model=app_model,
message_id=message_id,
user=end_user,
rating=args.get("rating"),
content=args.get("content"),
rating=payload.rating,
content=payload.content,
)
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
@ -170,17 +194,7 @@ class MessageFeedbackApi(WebApiResource):
class MessageMoreLikeThisApi(WebApiResource):
@web_ns.doc("Generate More Like This")
@web_ns.doc(description="Generate a new completion similar to an existing message (completion apps only).")
@web_ns.doc(
params={
"message_id": {"description": "Message UUID", "type": "string", "required": True},
"response_mode": {
"description": "Response mode",
"type": "string",
"enum": ["blocking", "streaming"],
"required": True,
},
}
)
@web_ns.expect(web_ns.models[MessageMoreLikeThisQuery.__name__])
@web_ns.doc(
responses={
200: "Success",
@ -197,12 +211,10 @@ class MessageMoreLikeThisApi(WebApiResource):
message_id = str(message_id)
parser = reqparse.RequestParser().add_argument(
"response_mode", type=str, required=True, choices=["blocking", "streaming"], location="args"
)
args = parser.parse_args()
raw_args = request.args.to_dict()
query = MessageMoreLikeThisQuery.model_validate(raw_args)
streaming = args["response_mode"] == "streaming"
streaming = query.response_mode == "streaming"
try:
response = AppGenerateService.generate_more_like_this(

View File

@ -1,7 +1,8 @@
import urllib.parse
import httpx
from flask_restx import marshal_with, reqparse
from flask_restx import marshal_with
from pydantic import BaseModel, Field, HttpUrl
import services
from controllers.common import helpers
@ -10,14 +11,23 @@ from controllers.common.errors import (
RemoteFileUploadError,
UnsupportedFileTypeError,
)
from controllers.web import web_ns
from controllers.web.wraps import WebApiResource
from core.file import helpers as file_helpers
from core.helper import ssrf_proxy
from extensions.ext_database import db
from fields.file_fields import build_file_with_signed_url_model, build_remote_file_info_model
from services.file_service import FileService
from ..common.schema import register_schema_models
from . import web_ns
from .wraps import WebApiResource
class RemoteFileUploadPayload(BaseModel):
url: HttpUrl = Field(description="Remote file URL")
register_schema_models(web_ns, RemoteFileUploadPayload)
@web_ns.route("/remote-files/<path:url>")
class RemoteFileInfoApi(WebApiResource):
@ -97,10 +107,8 @@ class RemoteFileUploadApi(WebApiResource):
FileTooLargeError: File exceeds size limit
UnsupportedFileTypeError: File type not supported
"""
parser = reqparse.RequestParser().add_argument("url", type=str, required=True, help="URL is required")
args = parser.parse_args()
url = args["url"]
payload = RemoteFileUploadPayload.model_validate(web_ns.payload or {})
url = str(payload.url)
try:
resp = ssrf_proxy.head(url=url)

View File

@ -1,3 +1,4 @@
import json
from collections.abc import Sequence
from enum import StrEnum, auto
from typing import Any, Literal
@ -120,7 +121,7 @@ class VariableEntity(BaseModel):
allowed_file_types: Sequence[FileType] | None = Field(default_factory=list)
allowed_file_extensions: Sequence[str] | None = Field(default_factory=list)
allowed_file_upload_methods: Sequence[FileTransferMethod] | None = Field(default_factory=list)
json_schema: dict[str, Any] | None = Field(default=None)
json_schema: str | None = Field(default=None)
@field_validator("description", mode="before")
@classmethod
@ -134,11 +135,17 @@ class VariableEntity(BaseModel):
@field_validator("json_schema")
@classmethod
def validate_json_schema(cls, schema: dict[str, Any] | None) -> dict[str, Any] | None:
def validate_json_schema(cls, schema: str | None) -> str | None:
if schema is None:
return None
try:
Draft7Validator.check_schema(schema)
json_schema = json.loads(schema)
except json.JSONDecodeError:
raise ValueError(f"invalid json_schema value {schema}")
try:
Draft7Validator.check_schema(json_schema)
except SchemaError as e:
raise ValueError(f"Invalid JSON schema: {e.message}")
return schema

View File

@ -1,3 +1,4 @@
import json
from collections.abc import Generator, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Union, final
@ -104,8 +105,9 @@ class BaseAppGenerator:
variable_entity.type in {VariableEntityType.FILE, VariableEntityType.FILE_LIST}
and not variable_entity.required
):
# Treat empty string (frontend default) or empty list as unset
if not value and isinstance(value, (str, list)):
# Treat empty string (frontend default) as unset
# For FILE_LIST, allow empty list [] to pass through
if isinstance(value, str) and not value:
return None
if variable_entity.type in {
@ -175,6 +177,13 @@ class BaseAppGenerator:
value = True
elif value == 0:
value = False
case VariableEntityType.JSON_OBJECT:
if not isinstance(value, str):
raise ValueError(f"{variable_entity.variable} in input form must be a string")
try:
json.loads(value)
except json.JSONDecodeError:
raise ValueError(f"{variable_entity.variable} in input form must be a valid JSON object")
case _:
raise AssertionError("this statement should be unreachable.")

View File

@ -342,9 +342,11 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline):
self._task_state.llm_result.message.content = current_content
if isinstance(event, QueueLLMChunkEvent):
event_type = self._message_cycle_manager.get_message_event_type(message_id=self._message_id)
yield self._message_cycle_manager.message_to_stream_response(
answer=cast(str, delta_text),
message_id=self._message_id,
event_type=event_type,
)
else:
yield self._agent_message_to_stream_response(

View File

@ -5,7 +5,7 @@ from threading import Thread
from typing import Union
from flask import Flask, current_app
from sqlalchemy import select
from sqlalchemy import exists, select
from sqlalchemy.orm import Session
from configs import dify_config
@ -54,6 +54,20 @@ class MessageCycleManager:
):
self._application_generate_entity = application_generate_entity
self._task_state = task_state
self._message_has_file: set[str] = set()
def get_message_event_type(self, message_id: str) -> StreamEvent:
if message_id in self._message_has_file:
return StreamEvent.MESSAGE_FILE
with Session(db.engine, expire_on_commit=False) as session:
has_file = session.query(exists().where(MessageFile.message_id == message_id)).scalar()
if has_file:
self._message_has_file.add(message_id)
return StreamEvent.MESSAGE_FILE
return StreamEvent.MESSAGE
def generate_conversation_name(self, *, conversation_id: str, query: str) -> Thread | None:
"""
@ -214,7 +228,11 @@ class MessageCycleManager:
return None
def message_to_stream_response(
self, answer: str, message_id: str, from_variable_selector: list[str] | None = None
self,
answer: str,
message_id: str,
from_variable_selector: list[str] | None = None,
event_type: StreamEvent | None = None,
) -> MessageStreamResponse:
"""
Message to stream response.
@ -222,16 +240,12 @@ class MessageCycleManager:
:param message_id: message id
:return:
"""
with Session(db.engine, expire_on_commit=False) as session:
message_file = session.scalar(select(MessageFile).where(MessageFile.id == message_id))
event_type = StreamEvent.MESSAGE_FILE if message_file else StreamEvent.MESSAGE
return MessageStreamResponse(
task_id=self._application_generate_entity.task_id,
id=message_id,
answer=answer,
from_variable_selector=from_variable_selector,
event=event_type,
event=event_type or StreamEvent.MESSAGE,
)
def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:

View File

@ -72,6 +72,22 @@ def _get_ssrf_client(ssl_verify_enabled: bool) -> httpx.Client:
)
def _get_user_provided_host_header(headers: dict | None) -> str | None:
"""
Extract the user-provided Host header from the headers dict.
This is needed because when using a forward proxy, httpx may override the Host header.
We preserve the user's explicit Host header to support virtual hosting and other use cases.
"""
if not headers:
return None
# Case-insensitive lookup for Host header
for key, value in headers.items():
if key.lower() == "host":
return value
return None
def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
if "allow_redirects" in kwargs:
allow_redirects = kwargs.pop("allow_redirects")
@ -90,10 +106,26 @@ def make_request(method, url, max_retries=SSRF_DEFAULT_MAX_RETRIES, **kwargs):
verify_option = kwargs.pop("ssl_verify", dify_config.HTTP_REQUEST_NODE_SSL_VERIFY)
client = _get_ssrf_client(verify_option)
# Preserve user-provided Host header
# When using a forward proxy, httpx may override the Host header based on the URL.
# We extract and preserve any explicitly set Host header to support virtual hosting.
headers = kwargs.get("headers", {})
user_provided_host = _get_user_provided_host_header(headers)
retries = 0
while retries <= max_retries:
try:
response = client.request(method=method, url=url, **kwargs)
# Build the request manually to preserve the Host header
# httpx may override the Host header when using a proxy, so we use
# the request API to explicitly set headers before sending
request = client.build_request(method=method, url=url, **kwargs)
# If user explicitly provided a Host header, ensure it's preserved
if user_provided_host is not None:
request.headers["Host"] = user_provided_host
response = client.send(request)
# Check for SSRF protection by Squid proxy
if response.status_code in (401, 403):
# Check if this is a Squid SSRF rejection

View File

@ -396,7 +396,7 @@ class IndexingRunner:
datasource_type=DatasourceType.NOTION,
notion_info=NotionInfo.model_validate(
{
"credential_id": data_source_info["credential_id"],
"credential_id": data_source_info.get("credential_id"),
"notion_workspace_id": data_source_info["notion_workspace_id"],
"notion_obj_id": data_source_info["notion_page_id"],
"notion_page_type": data_source_info["type"],

View File

@ -47,7 +47,11 @@ def build_protected_resource_metadata_discovery_urls(
"""
Build a list of URLs to try for Protected Resource Metadata discovery.
Per SEP-985, supports fallback when discovery fails at one URL.
Per RFC 9728 Section 5.1, supports fallback when discovery fails at one URL.
Priority order:
1. URL from WWW-Authenticate header (if provided)
2. Well-known URI with path: https://example.com/.well-known/oauth-protected-resource/public/mcp
3. Well-known URI at root: https://example.com/.well-known/oauth-protected-resource
"""
urls = []
@ -58,9 +62,18 @@ def build_protected_resource_metadata_discovery_urls(
# Fallback: construct from server URL
parsed = urlparse(server_url)
base_url = f"{parsed.scheme}://{parsed.netloc}"
fallback_url = urljoin(base_url, "/.well-known/oauth-protected-resource")
if fallback_url not in urls:
urls.append(fallback_url)
path = parsed.path.rstrip("/")
# Priority 2: With path insertion (e.g., /.well-known/oauth-protected-resource/public/mcp)
if path:
path_url = f"{base_url}/.well-known/oauth-protected-resource{path}"
if path_url not in urls:
urls.append(path_url)
# Priority 3: At root (e.g., /.well-known/oauth-protected-resource)
root_url = f"{base_url}/.well-known/oauth-protected-resource"
if root_url not in urls:
urls.append(root_url)
return urls
@ -71,30 +84,34 @@ def build_oauth_authorization_server_metadata_discovery_urls(auth_server_url: st
Supports both OAuth 2.0 (RFC 8414) and OpenID Connect discovery.
Per RFC 8414 section 3:
- If issuer has no path: https://example.com/.well-known/oauth-authorization-server
- If issuer has path: https://example.com/.well-known/oauth-authorization-server{path}
Example:
- issuer: https://example.com/oauth
- metadata: https://example.com/.well-known/oauth-authorization-server/oauth
Per RFC 8414 section 3.1 and section 5, try all possible endpoints:
- OAuth 2.0 with path insertion: https://example.com/.well-known/oauth-authorization-server/tenant1
- OpenID Connect with path insertion: https://example.com/.well-known/openid-configuration/tenant1
- OpenID Connect path appending: https://example.com/tenant1/.well-known/openid-configuration
- OAuth 2.0 at root: https://example.com/.well-known/oauth-authorization-server
- OpenID Connect at root: https://example.com/.well-known/openid-configuration
"""
urls = []
base_url = auth_server_url or server_url
parsed = urlparse(base_url)
base = f"{parsed.scheme}://{parsed.netloc}"
path = parsed.path.rstrip("/") # Remove trailing slash
path = parsed.path.rstrip("/")
# OAuth 2.0 Authorization Server Metadata at root (MCP-03-26)
urls.append(f"{base}/.well-known/oauth-authorization-server")
# Try OpenID Connect discovery first (more common)
urls.append(urljoin(base + "/", ".well-known/openid-configuration"))
# OpenID Connect Discovery at root
urls.append(f"{base}/.well-known/openid-configuration")
# OAuth 2.0 Authorization Server Metadata (RFC 8414)
# Include the path component if present in the issuer URL
if path:
urls.append(urljoin(base, f".well-known/oauth-authorization-server{path}"))
else:
urls.append(urljoin(base, ".well-known/oauth-authorization-server"))
# OpenID Connect Discovery with path insertion
urls.append(f"{base}/.well-known/openid-configuration{path}")
# OpenID Connect Discovery path appending
urls.append(f"{base}{path}/.well-known/openid-configuration")
# OAuth 2.0 Authorization Server Metadata with path insertion
urls.append(f"{base}/.well-known/oauth-authorization-server{path}")
return urls

View File

@ -61,6 +61,7 @@ class SSETransport:
self.timeout = timeout
self.sse_read_timeout = sse_read_timeout
self.endpoint_url: str | None = None
self.event_source: EventSource | None = None
def _validate_endpoint_url(self, endpoint_url: str) -> bool:
"""Validate that the endpoint URL matches the connection origin.
@ -237,6 +238,9 @@ class SSETransport:
write_queue: WriteQueue = queue.Queue()
status_queue: StatusQueue = queue.Queue()
# Store event_source for graceful shutdown
self.event_source = event_source
# Start SSE reader thread
executor.submit(self.sse_reader, event_source, read_queue, status_queue)
@ -296,6 +300,13 @@ def sse_client(
logger.exception("Error connecting to SSE endpoint")
raise
finally:
# Close the SSE connection to unblock the reader thread
if transport.event_source is not None:
try:
transport.event_source.response.close()
except RuntimeError:
pass
# Clean up queues
if read_queue:
read_queue.put(None)

View File

@ -8,6 +8,7 @@ and session management.
import logging
import queue
import threading
from collections.abc import Callable, Generator
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager
@ -103,6 +104,9 @@ class StreamableHTTPTransport:
CONTENT_TYPE: JSON,
**self.headers,
}
self.stop_event = threading.Event()
self._active_responses: list[httpx.Response] = []
self._lock = threading.Lock()
def _update_headers_with_session(self, base_headers: dict[str, str]) -> dict[str, str]:
"""Update headers with session ID if available."""
@ -111,6 +115,30 @@ class StreamableHTTPTransport:
headers[MCP_SESSION_ID] = self.session_id
return headers
def _register_response(self, response: httpx.Response):
"""Register a response for cleanup on shutdown."""
with self._lock:
self._active_responses.append(response)
def _unregister_response(self, response: httpx.Response):
"""Unregister a response after it's closed."""
with self._lock:
try:
self._active_responses.remove(response)
except ValueError as e:
logger.debug("Ignoring error during response unregister: %s", e)
def close_active_responses(self):
"""Close all active SSE connections to unblock threads."""
with self._lock:
responses_to_close = list(self._active_responses)
self._active_responses.clear()
for response in responses_to_close:
try:
response.close()
except RuntimeError as e:
logger.debug("Ignoring error during active response close: %s", e)
def _is_initialization_request(self, message: JSONRPCMessage) -> bool:
"""Check if the message is an initialization request."""
return isinstance(message.root, JSONRPCRequest) and message.root.method == "initialize"
@ -195,11 +223,21 @@ class StreamableHTTPTransport:
event_source.response.raise_for_status()
logger.debug("GET SSE connection established")
for sse in event_source.iter_sse():
self._handle_sse_event(sse, server_to_client_queue)
# Register response for cleanup
self._register_response(event_source.response)
try:
for sse in event_source.iter_sse():
if self.stop_event.is_set():
logger.debug("GET stream received stop signal")
break
self._handle_sse_event(sse, server_to_client_queue)
finally:
self._unregister_response(event_source.response)
except Exception as exc:
logger.debug("GET stream error (non-fatal): %s", exc)
if not self.stop_event.is_set():
logger.debug("GET stream error (non-fatal): %s", exc)
def _handle_resumption_request(self, ctx: RequestContext):
"""Handle a resumption request using GET with SSE."""
@ -224,15 +262,24 @@ class StreamableHTTPTransport:
event_source.response.raise_for_status()
logger.debug("Resumption GET SSE connection established")
for sse in event_source.iter_sse():
is_complete = self._handle_sse_event(
sse,
ctx.server_to_client_queue,
original_request_id,
ctx.metadata.on_resumption_token_update if ctx.metadata else None,
)
if is_complete:
break
# Register response for cleanup
self._register_response(event_source.response)
try:
for sse in event_source.iter_sse():
if self.stop_event.is_set():
logger.debug("Resumption stream received stop signal")
break
is_complete = self._handle_sse_event(
sse,
ctx.server_to_client_queue,
original_request_id,
ctx.metadata.on_resumption_token_update if ctx.metadata else None,
)
if is_complete:
break
finally:
self._unregister_response(event_source.response)
def _handle_post_request(self, ctx: RequestContext):
"""Handle a POST request with response processing."""
@ -295,17 +342,27 @@ class StreamableHTTPTransport:
def _handle_sse_response(self, response: httpx.Response, ctx: RequestContext):
"""Handle SSE response from the server."""
try:
# Register response for cleanup
self._register_response(response)
event_source = EventSource(response)
for sse in event_source.iter_sse():
is_complete = self._handle_sse_event(
sse,
ctx.server_to_client_queue,
resumption_callback=(ctx.metadata.on_resumption_token_update if ctx.metadata else None),
)
if is_complete:
break
try:
for sse in event_source.iter_sse():
if self.stop_event.is_set():
logger.debug("SSE response stream received stop signal")
break
is_complete = self._handle_sse_event(
sse,
ctx.server_to_client_queue,
resumption_callback=(ctx.metadata.on_resumption_token_update if ctx.metadata else None),
)
if is_complete:
break
finally:
self._unregister_response(response)
except Exception as e:
ctx.server_to_client_queue.put(e)
if not self.stop_event.is_set():
ctx.server_to_client_queue.put(e)
def _handle_unexpected_content_type(
self,
@ -345,6 +402,11 @@ class StreamableHTTPTransport:
"""
while True:
try:
# Check if we should stop
if self.stop_event.is_set():
logger.debug("Post writer received stop signal")
break
# Read message from client queue with timeout to check stop_event periodically
session_message = client_to_server_queue.get(timeout=DEFAULT_QUEUE_READ_TIMEOUT)
if session_message is None:
@ -381,7 +443,8 @@ class StreamableHTTPTransport:
except queue.Empty:
continue
except Exception as exc:
server_to_client_queue.put(exc)
if not self.stop_event.is_set():
server_to_client_queue.put(exc)
def terminate_session(self, client: httpx.Client):
"""Terminate the session by sending a DELETE request."""
@ -465,6 +528,12 @@ def streamablehttp_client(
transport.get_session_id,
)
finally:
# Set stop event to signal all threads to stop
transport.stop_event.set()
# Close all active SSE connections to unblock threads
transport.close_active_responses()
if transport.session_id and terminate_on_close:
transport.terminate_session(client)

View File

@ -59,7 +59,7 @@ class MCPClient:
try:
logger.debug("Not supported method %s found in URL path, trying default 'mcp' method.", method_name)
self.connect_server(sse_client, "sse")
except MCPConnectionError:
except (MCPConnectionError, ValueError):
logger.debug("MCP connection failed with 'sse', falling back to 'mcp' method.")
self.connect_server(streamablehttp_client, "mcp")

View File

@ -18,34 +18,20 @@ This module provides the interface for invoking and authenticating various model
- Model provider display
![image-20231210143654461](./docs/en_US/images/index/image-20231210143654461.png)
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc. For detailed rule design, see: [Schema](./docs/en_US/schema.md).
Displays a list of all supported providers, including provider names, icons, supported model types list, predefined model list, configuration method, and credentials form rules, etc.
- Selectable model list display
![image-20231210144229650](./docs/en_US/images/index/image-20231210144229650.png)
After configuring provider/model credentials, the dropdown (application orchestration interface/default model) allows viewing of the available LLM list. Greyed out items represent predefined model lists from providers without configured credentials, facilitating user review of supported models.
In addition, this list also returns configurable parameter information and rules for LLM, as shown below:
![image-20231210144814617](./docs/en_US/images/index/image-20231210144814617.png)
These parameters are all defined in the backend, allowing different settings for various parameters supported by different models, as detailed in: [Schema](./docs/en_US/schema.md#ParameterRule).
In addition, this list also returns configurable parameter information and rules for LLM. These parameters are all defined in the backend, allowing different settings for various parameters supported by different models.
- Provider/model credential authentication
![image-20231210151548521](./docs/en_US/images/index/image-20231210151548521.png)
![image-20231210151628992](./docs/en_US/images/index/image-20231210151628992.png)
The provider list returns configuration information for the credentials form, which can be authenticated through Runtime's interface. The first image above is a provider credential DEMO, and the second is a model credential DEMO.
The provider list returns configuration information for the credentials form, which can be authenticated through Runtime's interface.
## Structure
![](./docs/en_US/images/index/image-20231210165243632.png)
Model Runtime is divided into three layers:
- The outermost layer is the factory method
@ -60,9 +46,6 @@ Model Runtime is divided into three layers:
It offers direct invocation of various model types, predefined model configuration information, getting predefined/remote model lists, model credential authentication methods. Different models provide additional special methods, like LLM's pre-computed tokens method, cost information obtaining method, etc., **allowing horizontal expansion** for different models under the same provider (within supported model types).
## Next Steps
## Documentation
- Add new provider configuration: [Link](./docs/en_US/provider_scale_out.md)
- Add new models for existing providers: [Link](./docs/en_US/provider_scale_out.md#AddModel)
- View YAML configuration rules: [Link](./docs/en_US/schema.md)
- Implement interface methods: [Link](./docs/en_US/interfaces.md)
For detailed documentation on how to add new providers or models, please refer to the [Dify documentation](https://docs.dify.ai/).

View File

@ -18,34 +18,20 @@
- 模型供应商展示
![image-20231210143654461](./docs/zh_Hans/images/index/image-20231210143654461.png)
展示所有已支持的供应商列表,除了返回供应商名称、图标之外,还提供了支持的模型类型列表,预定义模型列表、配置方式以及配置凭据的表单规则等等,规则设计详见:[Schema](./docs/zh_Hans/schema.md)。
展示所有已支持的供应商列表,除了返回供应商名称、图标之外,还提供了支持的模型类型列表,预定义模型列表、配置方式以及配置凭据的表单规则等等。
- 可选择的模型列表展示
![image-20231210144229650](./docs/zh_Hans/images/index/image-20231210144229650.png)
配置供应商/模型凭据后,可在此下拉(应用编排界面/默认模型)查看可用的 LLM 列表,其中灰色的为未配置凭据供应商的预定义模型列表,方便用户查看已支持的模型。
配置供应商/模型凭据后,可在此下拉(应用编排界面/默认模型)查看可用的 LLM 列表,其中灰色的为未配置凭据供应商的预定义模型列表,方便用户查看已支持的模型。
除此之外,该列表还返回了 LLM 可配置的参数信息和规则,如下图:
![image-20231210144814617](./docs/zh_Hans/images/index/image-20231210144814617.png)
这里的参数均为后端定义,相比之前只有 5 种固定参数,这里可为不同模型设置所支持的各种参数,详见:[Schema](./docs/zh_Hans/schema.md#ParameterRule)。
除此之外,该列表还返回了 LLM 可配置的参数信息和规则。这里的参数均为后端定义,相比之前只有 5 种固定参数,这里可为不同模型设置所支持的各种参数。
- 供应商/模型凭据鉴权
![image-20231210151548521](./docs/zh_Hans/images/index/image-20231210151548521.png)
![image-20231210151628992](./docs/zh_Hans/images/index/image-20231210151628992.png)
供应商列表返回了凭据表单的配置信息,可通过 Runtime 提供的接口对凭据进行鉴权,上图 1 为供应商凭据 DEMO上图 2 为模型凭据 DEMO。
供应商列表返回了凭据表单的配置信息,可通过 Runtime 提供的接口对凭据进行鉴权。
## 结构
![](./docs/zh_Hans/images/index/image-20231210165243632.png)
Model Runtime 分三层:
- 最外层为工厂方法
@ -59,8 +45,7 @@ Model Runtime 分三层:
对于供应商/模型凭据,有两种情况
- 如 OpenAI 这类中心化供应商,需要定义如**api_key**这类的鉴权凭据
- 如[**Xinference**](https://github.com/xorbitsai/inference)这类本地部署的供应商,需要定义如**server_url**这类的地址凭据,有时候还需要定义**model_uid**之类的模型类型凭据,就像下面这样,当在供应商层定义了这些凭据后,就可以在前端页面上直接展示,无需修改前端逻辑。
![Alt text](docs/zh_Hans/images/index/image.png)
- 如[**Xinference**](https://github.com/xorbitsai/inference)这类本地部署的供应商,需要定义如**server_url**这类的地址凭据,有时候还需要定义**model_uid**之类的模型类型凭据。当在供应商层定义了这些凭据后,就可以在前端页面上直接展示,无需修改前端逻辑。
当配置好凭据后,就可以通过 DifyRuntime 的外部接口直接获取到对应供应商所需要的**Schema**(凭据表单规则),从而在可以在不修改前端逻辑的情况下,提供新的供应商/模型的支持。
@ -74,20 +59,6 @@ Model Runtime 分三层:
- 模型凭据 (**在供应商层定义**):这是一类不经常变动,一般在配置好后就不会再变动的参数,如 **api_key**、**server_url** 等。在 DifyRuntime 中,他们的参数名一般为**credentials: dict[str, any]**Provider 层的 credentials 会直接被传递到这一层,不需要再单独定义。
## 下一步
## 文档
### [增加新的供应商配置 👈🏻](./docs/zh_Hans/provider_scale_out.md)
当添加后,这里将会出现一个新的供应商
![Alt text](docs/zh_Hans/images/index/image-1.png)
### [为已存在的供应商新增模型 👈🏻](./docs/zh_Hans/provider_scale_out.md#%E5%A2%9E%E5%8A%A0%E6%A8%A1%E5%9E%8B)
当添加后,对应供应商的模型列表中将会出现一个新的预定义模型供用户选择,如 GPT-3.5 GPT-4 ChatGLM3-6b 等,而对于支持自定义模型的供应商,则不需要新增模型。
![Alt text](docs/zh_Hans/images/index/image-2.png)
### [接口的具体实现 👈🏻](./docs/zh_Hans/interfaces.md)
你可以在这里找到你想要查看的接口的具体实现,以及接口的参数和返回值的具体含义。
有关如何添加新供应商或模型的详细文档,请参阅 [Dify 文档](https://docs.dify.ai/)。

View File

@ -54,7 +54,7 @@ def generate_dotted_order(run_id: str, start_time: Union[str, datetime], parent_
generate dotted_order for langsmith
"""
start_time = datetime.fromisoformat(start_time) if isinstance(start_time, str) else start_time
timestamp = start_time.strftime("%Y%m%dT%H%M%S%f")[:-3] + "Z"
timestamp = start_time.strftime("%Y%m%dT%H%M%S%f") + "Z"
current_segment = f"{timestamp}{run_id}"
if parent_dotted_order is None:

View File

@ -39,7 +39,7 @@ from core.trigger.errors import (
plugin_daemon_inner_api_baseurl = URL(str(dify_config.PLUGIN_DAEMON_URL))
_plugin_daemon_timeout_config = cast(
float | httpx.Timeout | None,
getattr(dify_config, "PLUGIN_DAEMON_TIMEOUT", 300.0),
getattr(dify_config, "PLUGIN_DAEMON_TIMEOUT", 600.0),
)
plugin_daemon_request_timeout: httpx.Timeout | None
if _plugin_daemon_timeout_config is None:

View File

@ -90,13 +90,17 @@ class Jieba(BaseKeyword):
sorted_chunk_indices = self._retrieve_ids_by_query(keyword_table or {}, query, k)
documents = []
segment_query_stmt = db.session.query(DocumentSegment).where(
DocumentSegment.dataset_id == self.dataset.id, DocumentSegment.index_node_id.in_(sorted_chunk_indices)
)
if document_ids_filter:
segment_query_stmt = segment_query_stmt.where(DocumentSegment.document_id.in_(document_ids_filter))
segments = db.session.execute(segment_query_stmt).scalars().all()
segment_map = {segment.index_node_id: segment for segment in segments}
for chunk_index in sorted_chunk_indices:
segment_query = db.session.query(DocumentSegment).where(
DocumentSegment.dataset_id == self.dataset.id, DocumentSegment.index_node_id == chunk_index
)
if document_ids_filter:
segment_query = segment_query.where(DocumentSegment.document_id.in_(document_ids_filter))
segment = segment_query.first()
segment = segment_map.get(chunk_index)
if segment:
documents.append(

View File

@ -7,6 +7,7 @@ from sqlalchemy import select
from sqlalchemy.orm import Session, load_only
from configs import dify_config
from core.db.session_factory import session_factory
from core.model_manager import ModelManager
from core.model_runtime.entities.model_entities import ModelType
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
@ -138,37 +139,47 @@ class RetrievalService:
@classmethod
def _deduplicate_documents(cls, documents: list[Document]) -> list[Document]:
"""Deduplicate documents based on doc_id to avoid duplicate chunks in hybrid search."""
"""Deduplicate documents in O(n) while preserving first-seen order.
Rules:
- For provider == "dify" and metadata["doc_id"] exists: keep the doc with the highest
metadata["score"] among duplicates; if a later duplicate has no score, ignore it.
- For non-dify documents (or dify without doc_id): deduplicate by content key
(provider, page_content), keeping the first occurrence.
"""
if not documents:
return documents
unique_documents = []
seen_doc_ids = set()
# Map of dedup key -> chosen Document
chosen: dict[tuple, Document] = {}
# Preserve the order of first appearance of each dedup key
order: list[tuple] = []
for document in documents:
# For dify provider documents, use doc_id for deduplication
if document.provider == "dify" and document.metadata is not None and "doc_id" in document.metadata:
doc_id = document.metadata["doc_id"]
if doc_id not in seen_doc_ids:
seen_doc_ids.add(doc_id)
unique_documents.append(document)
# If duplicate, keep the one with higher score
elif "score" in document.metadata:
# Find existing document with same doc_id and compare scores
for i, existing_doc in enumerate(unique_documents):
if (
existing_doc.metadata
and existing_doc.metadata.get("doc_id") == doc_id
and existing_doc.metadata.get("score", 0) < document.metadata.get("score", 0)
):
unique_documents[i] = document
break
for doc in documents:
is_dify = doc.provider == "dify"
doc_id = (doc.metadata or {}).get("doc_id") if is_dify else None
if is_dify and doc_id:
key = ("dify", doc_id)
if key not in chosen:
chosen[key] = doc
order.append(key)
else:
# Only replace if the new one has a score and it's strictly higher
if "score" in doc.metadata:
new_score = float(doc.metadata.get("score", 0.0))
old_score = float(chosen[key].metadata.get("score", 0.0)) if chosen[key].metadata else 0.0
if new_score > old_score:
chosen[key] = doc
else:
# For non-dify documents, use content-based deduplication
if document not in unique_documents:
unique_documents.append(document)
# Content-based dedup for non-dify or dify without doc_id
content_key = (doc.provider or "dify", doc.page_content)
if content_key not in chosen:
chosen[content_key] = doc
order.append(content_key)
# If duplicate content appears, we keep the first occurrence (no score comparison)
return unique_documents
return [chosen[k] for k in order]
@classmethod
def _get_dataset(cls, dataset_id: str) -> Dataset | None:
@ -371,58 +382,96 @@ class RetrievalService:
include_segment_ids = set()
segment_child_map = {}
segment_file_map = {}
with Session(bind=db.engine, expire_on_commit=False) as session:
# Process documents
for document in documents:
segment_id = None
attachment_info = None
child_chunk = None
document_id = document.metadata.get("document_id")
if document_id not in dataset_documents:
continue
dataset_document = dataset_documents[document_id]
if not dataset_document:
continue
valid_dataset_documents = {}
image_doc_ids = []
child_index_node_ids = []
index_node_ids = []
doc_to_document_map = {}
for document in documents:
document_id = document.metadata.get("document_id")
if document_id not in dataset_documents:
continue
if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
# Handle parent-child documents
if document.metadata.get("doc_type") == DocType.IMAGE:
attachment_info_dict = cls.get_segment_attachment_info(
dataset_document.dataset_id,
dataset_document.tenant_id,
document.metadata.get("doc_id") or "",
session,
)
if attachment_info_dict:
attachment_info = attachment_info_dict["attachment_info"]
segment_id = attachment_info_dict["segment_id"]
else:
child_index_node_id = document.metadata.get("doc_id")
child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id == child_index_node_id)
child_chunk = session.scalar(child_chunk_stmt)
dataset_document = dataset_documents[document_id]
if not dataset_document:
continue
valid_dataset_documents[document_id] = dataset_document
if not child_chunk:
continue
segment_id = child_chunk.segment_id
if dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
doc_id = document.metadata.get("doc_id") or ""
doc_to_document_map[doc_id] = document
if document.metadata.get("doc_type") == DocType.IMAGE:
image_doc_ids.append(doc_id)
else:
child_index_node_ids.append(doc_id)
else:
doc_id = document.metadata.get("doc_id") or ""
doc_to_document_map[doc_id] = document
if document.metadata.get("doc_type") == DocType.IMAGE:
image_doc_ids.append(doc_id)
else:
index_node_ids.append(doc_id)
if not segment_id:
continue
image_doc_ids = [i for i in image_doc_ids if i]
child_index_node_ids = [i for i in child_index_node_ids if i]
index_node_ids = [i for i in index_node_ids if i]
segment = (
session.query(DocumentSegment)
.where(
DocumentSegment.dataset_id == dataset_document.dataset_id,
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.id == segment_id,
)
.first()
)
segment_ids = []
index_node_segments: list[DocumentSegment] = []
segments: list[DocumentSegment] = []
attachment_map = {}
child_chunk_map = {}
doc_segment_map = {}
if not segment:
continue
with session_factory.create_session() as session:
attachments = cls.get_segment_attachment_infos(image_doc_ids, session)
for attachment in attachments:
segment_ids.append(attachment["segment_id"])
attachment_map[attachment["segment_id"]] = attachment
doc_segment_map[attachment["segment_id"]] = attachment["attachment_id"]
child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids))
child_index_nodes = session.execute(child_chunk_stmt).scalars().all()
for i in child_index_nodes:
segment_ids.append(i.segment_id)
child_chunk_map[i.segment_id] = i
doc_segment_map[i.segment_id] = i.index_node_id
if index_node_ids:
document_segment_stmt = select(DocumentSegment).where(
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.index_node_id.in_(index_node_ids),
)
index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore
for index_node_segment in index_node_segments:
doc_segment_map[index_node_segment.id] = index_node_segment.index_node_id
if segment_ids:
document_segment_stmt = select(DocumentSegment).where(
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.id.in_(segment_ids),
)
segments = session.execute(document_segment_stmt).scalars().all() # type: ignore
if index_node_segments:
segments.extend(index_node_segments)
for segment in segments:
doc_id = doc_segment_map.get(segment.id)
child_chunk = child_chunk_map.get(segment.id)
attachment_info = attachment_map.get(segment.id)
if doc_id:
document = doc_to_document_map[doc_id]
ds_dataset_document: DatasetDocument | None = valid_dataset_documents.get(
document.metadata.get("document_id")
)
if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
if segment.id not in include_segment_ids:
include_segment_ids.add(segment.id)
if child_chunk:
@ -430,10 +479,10 @@ class RetrievalService:
"id": child_chunk.id,
"content": child_chunk.content,
"position": child_chunk.position,
"score": document.metadata.get("score", 0.0),
"score": document.metadata.get("score", 0.0) if document else 0.0,
}
map_detail = {
"max_score": document.metadata.get("score", 0.0),
"max_score": document.metadata.get("score", 0.0) if document else 0.0,
"child_chunks": [child_chunk_detail],
}
segment_child_map[segment.id] = map_detail
@ -452,13 +501,14 @@ class RetrievalService:
"score": document.metadata.get("score", 0.0),
}
if segment.id in segment_child_map:
segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail)
segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) # type: ignore
segment_child_map[segment.id]["max_score"] = max(
segment_child_map[segment.id]["max_score"], document.metadata.get("score", 0.0)
segment_child_map[segment.id]["max_score"],
document.metadata.get("score", 0.0) if document else 0.0,
)
else:
segment_child_map[segment.id] = {
"max_score": document.metadata.get("score", 0.0),
"max_score": document.metadata.get("score", 0.0) if document else 0.0,
"child_chunks": [child_chunk_detail],
}
if attachment_info:
@ -467,46 +517,11 @@ class RetrievalService:
else:
segment_file_map[segment.id] = [attachment_info]
else:
# Handle normal documents
segment = None
if document.metadata.get("doc_type") == DocType.IMAGE:
attachment_info_dict = cls.get_segment_attachment_info(
dataset_document.dataset_id,
dataset_document.tenant_id,
document.metadata.get("doc_id") or "",
session,
)
if attachment_info_dict:
attachment_info = attachment_info_dict["attachment_info"]
segment_id = attachment_info_dict["segment_id"]
document_segment_stmt = select(DocumentSegment).where(
DocumentSegment.dataset_id == dataset_document.dataset_id,
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.id == segment_id,
)
segment = session.scalar(document_segment_stmt)
if segment:
segment_file_map[segment.id] = [attachment_info]
else:
index_node_id = document.metadata.get("doc_id")
if not index_node_id:
continue
document_segment_stmt = select(DocumentSegment).where(
DocumentSegment.dataset_id == dataset_document.dataset_id,
DocumentSegment.enabled == True,
DocumentSegment.status == "completed",
DocumentSegment.index_node_id == index_node_id,
)
segment = session.scalar(document_segment_stmt)
if not segment:
continue
if segment.id not in include_segment_ids:
include_segment_ids.add(segment.id)
record = {
"segment": segment,
"score": document.metadata.get("score"), # type: ignore
"score": document.metadata.get("score", 0.0), # type: ignore
}
if attachment_info:
segment_file_map[segment.id] = [attachment_info]
@ -522,7 +537,7 @@ class RetrievalService:
for record in records:
if record["segment"].id in segment_child_map:
record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore
record["score"] = segment_child_map[record["segment"].id]["max_score"]
record["score"] = segment_child_map[record["segment"].id]["max_score"] # type: ignore
if record["segment"].id in segment_file_map:
record["files"] = segment_file_map[record["segment"].id] # type: ignore[assignment]
@ -565,6 +580,8 @@ class RetrievalService:
flask_app: Flask,
retrieval_method: RetrievalMethod,
dataset: Dataset,
all_documents: list[Document],
exceptions: list[str],
query: str | None = None,
top_k: int = 4,
score_threshold: float | None = 0.0,
@ -573,8 +590,6 @@ class RetrievalService:
weights: dict | None = None,
document_ids_filter: list[str] | None = None,
attachment_id: str | None = None,
all_documents: list[Document] = [],
exceptions: list[str] = [],
):
if not query and not attachment_id:
return
@ -696,3 +711,37 @@ class RetrievalService:
}
return {"attachment_info": attachment_info, "segment_id": attachment_binding.segment_id}
return None
@classmethod
def get_segment_attachment_infos(cls, attachment_ids: list[str], session: Session) -> list[dict[str, Any]]:
attachment_infos = []
upload_files = session.query(UploadFile).where(UploadFile.id.in_(attachment_ids)).all()
if upload_files:
upload_file_ids = [upload_file.id for upload_file in upload_files]
attachment_bindings = (
session.query(SegmentAttachmentBinding)
.where(SegmentAttachmentBinding.attachment_id.in_(upload_file_ids))
.all()
)
attachment_binding_map = {binding.attachment_id: binding for binding in attachment_bindings}
if attachment_bindings:
for upload_file in upload_files:
attachment_binding = attachment_binding_map.get(upload_file.id)
attachment_info = {
"id": upload_file.id,
"name": upload_file.name,
"extension": "." + upload_file.extension,
"mime_type": upload_file.mime_type,
"source_url": sign_upload_file(upload_file.id, upload_file.extension),
"size": upload_file.size,
}
if attachment_binding:
attachment_infos.append(
{
"attachment_id": attachment_binding.attachment_id,
"attachment_info": attachment_info,
"segment_id": attachment_binding.segment_id,
}
)
return attachment_infos

View File

@ -289,7 +289,8 @@ class OracleVector(BaseVector):
words = pseg.cut(query)
current_entity = ""
for word, pos in words:
if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}: # nr: 人名ns: 地名nt: 机构名
# `nr`: Person, `ns`: Location, `nt`: Organization
if pos in {"nr", "Ng", "eng", "nz", "n", "ORG", "v"}:
current_entity += word
else:
if current_entity:

View File

@ -213,7 +213,7 @@ class VastbaseVector(BaseVector):
with self._get_cursor() as cur:
cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
# Vastbase 支持的向量维度取值范围为 [1,16000]
# Vastbase supports vector dimensions in the range [1, 16,000]
if dimension <= 16000:
cur.execute(SQL_CREATE_INDEX.format(table_name=self.table_name))
redis_client.set(collection_exist_cache_key, 1, ex=3600)

View File

@ -25,7 +25,7 @@ class FirecrawlApp:
}
if params:
json_data.update(params)
response = self._post_request(f"{self.base_url}/v2/scrape", json_data, headers)
response = self._post_request(self._build_url("v2/scrape"), json_data, headers)
if response.status_code == 200:
response_data = response.json()
data = response_data["data"]
@ -42,7 +42,7 @@ class FirecrawlApp:
json_data = {"url": url}
if params:
json_data.update(params)
response = self._post_request(f"{self.base_url}/v2/crawl", json_data, headers)
response = self._post_request(self._build_url("v2/crawl"), json_data, headers)
if response.status_code == 200:
# There's also another two fields in the response: "success" (bool) and "url" (str)
job_id = response.json().get("id")
@ -58,7 +58,7 @@ class FirecrawlApp:
if params:
# Pass through provided params, including optional "sitemap": "only" | "include" | "skip"
json_data.update(params)
response = self._post_request(f"{self.base_url}/v2/map", json_data, headers)
response = self._post_request(self._build_url("v2/map"), json_data, headers)
if response.status_code == 200:
return cast(dict[str, Any], response.json())
elif response.status_code in {402, 409, 500, 429, 408}:
@ -69,7 +69,7 @@ class FirecrawlApp:
def check_crawl_status(self, job_id) -> dict[str, Any]:
headers = self._prepare_headers()
response = self._get_request(f"{self.base_url}/v2/crawl/{job_id}", headers)
response = self._get_request(self._build_url(f"v2/crawl/{job_id}"), headers)
if response.status_code == 200:
crawl_status_response = response.json()
if crawl_status_response.get("status") == "completed":
@ -120,6 +120,10 @@ class FirecrawlApp:
def _prepare_headers(self) -> dict[str, Any]:
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
def _build_url(self, path: str) -> str:
# ensure exactly one slash between base and path, regardless of user-provided base_url
return f"{self.base_url.rstrip('/')}/{path.lstrip('/')}"
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5) -> httpx.Response:
for attempt in range(retries):
response = httpx.post(url, headers=headers, json=data)
@ -139,7 +143,11 @@ class FirecrawlApp:
return response
def _handle_error(self, response, action):
error_message = response.json().get("error", "Unknown error occurred")
try:
payload = response.json()
error_message = payload.get("error") or payload.get("message") or response.text or "Unknown error occurred"
except json.JSONDecodeError:
error_message = response.text or "Unknown error occurred"
raise Exception(f"Failed to {action}. Status code: {response.status_code}. Error: {error_message}") # type: ignore[return]
def search(self, query: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
@ -160,7 +168,7 @@ class FirecrawlApp:
}
if params:
json_data.update(params)
response = self._post_request(f"{self.base_url}/v2/search", json_data, headers)
response = self._post_request(self._build_url("v2/search"), json_data, headers)
if response.status_code == 200:
response_data = response.json()
if not response_data.get("success"):

View File

@ -45,6 +45,6 @@ def detect_file_encodings(file_path: str, timeout: int = 5, sample_size: int = 1
except concurrent.futures.TimeoutError:
raise TimeoutError(f"Timeout reached while detecting encoding for {file_path}")
if all(encoding["encoding"] is None for encoding in encodings):
if all(encoding.encoding is None for encoding in encodings):
raise RuntimeError(f"Could not detect encoding for {file_path}")
return [FileEncoding(**enc) for enc in encodings if enc["encoding"] is not None]
return [enc for enc in encodings if enc.encoding is not None]

View File

@ -48,13 +48,21 @@ class NotionExtractor(BaseExtractor):
if notion_access_token:
self._notion_access_token = notion_access_token
else:
self._notion_access_token = self._get_access_token(tenant_id, self._credential_id)
if not self._notion_access_token:
try:
self._notion_access_token = self._get_access_token(tenant_id, self._credential_id)
except Exception as e:
logger.warning(
(
"Failed to get Notion access token from datasource credentials: %s, "
"falling back to environment variable NOTION_INTEGRATION_TOKEN"
),
e,
)
integration_token = dify_config.NOTION_INTEGRATION_TOKEN
if integration_token is None:
raise ValueError(
"Must specify `integration_token` or set environment variable `NOTION_INTEGRATION_TOKEN`."
)
) from e
self._notion_access_token = integration_token

View File

@ -83,6 +83,7 @@ class WordExtractor(BaseExtractor):
def _extract_images_from_docx(self, doc):
image_count = 0
image_map = {}
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
for r_id, rel in doc.part.rels.items():
if "image" in rel.target_ref:
@ -121,8 +122,7 @@ class WordExtractor(BaseExtractor):
used_at=naive_utc_now(),
)
db.session.add(upload_file)
# Use r_id as key for external images since target_part is undefined
image_map[r_id] = f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)"
image_map[r_id] = f"![image]({base_url}/files/{upload_file.id}/file-preview)"
else:
image_ext = rel.target_ref.split(".")[-1]
if image_ext is None:
@ -150,10 +150,7 @@ class WordExtractor(BaseExtractor):
used_at=naive_utc_now(),
)
db.session.add(upload_file)
# Use target_part as key for internal images
image_map[rel.target_part] = (
f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)"
)
image_map[rel.target_part] = f"![image]({base_url}/files/{upload_file.id}/file-preview)"
db.session.commit()
return image_map

View File

@ -231,7 +231,7 @@ class BaseIndexProcessor(ABC):
if not filename:
parsed_url = urlparse(image_url)
# unquote 处理 URL 中的中文
# Decode percent-encoded characters in the URL path.
path = unquote(parsed_url.path)
filename = os.path.basename(path)

View File

@ -151,20 +151,14 @@ class DatasetRetrieval:
if ModelFeature.TOOL_CALL in features or ModelFeature.MULTI_TOOL_CALL in features:
planning_strategy = PlanningStrategy.ROUTER
available_datasets = []
for dataset_id in dataset_ids:
# get dataset from dataset id
dataset_stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id)
dataset = db.session.scalar(dataset_stmt)
# pass if dataset is not available
if not dataset:
dataset_stmt = select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id.in_(dataset_ids))
datasets: list[Dataset] = db.session.execute(dataset_stmt).scalars().all() # type: ignore
for dataset in datasets:
if dataset.available_document_count == 0 and dataset.provider != "external":
continue
# pass if dataset is not available
if dataset and dataset.available_document_count == 0 and dataset.provider != "external":
continue
available_datasets.append(dataset)
if inputs:
inputs = {key: str(value) for key, value in inputs.items()}
else:
@ -282,26 +276,35 @@ class DatasetRetrieval:
)
context_files.append(attachment_info)
if show_retrieve_source:
dataset_ids = [record.segment.dataset_id for record in records]
document_ids = [record.segment.document_id for record in records]
dataset_document_stmt = select(DatasetDocument).where(
DatasetDocument.id.in_(document_ids),
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
)
documents = db.session.execute(dataset_document_stmt).scalars().all() # type: ignore
dataset_stmt = select(Dataset).where(
Dataset.id.in_(dataset_ids),
)
datasets = db.session.execute(dataset_stmt).scalars().all() # type: ignore
dataset_map = {i.id: i for i in datasets}
document_map = {i.id: i for i in documents}
for record in records:
segment = record.segment
dataset = db.session.query(Dataset).filter_by(id=segment.dataset_id).first()
dataset_document_stmt = select(DatasetDocument).where(
DatasetDocument.id == segment.document_id,
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
)
document = db.session.scalar(dataset_document_stmt)
if dataset and document:
dataset_item = dataset_map.get(segment.dataset_id)
document_item = document_map.get(segment.document_id)
if dataset_item and document_item:
source = RetrievalSourceMetadata(
dataset_id=dataset.id,
dataset_name=dataset.name,
document_id=document.id,
document_name=document.name,
data_source_type=document.data_source_type,
dataset_id=dataset_item.id,
dataset_name=dataset_item.name,
document_id=document_item.id,
document_name=document_item.name,
data_source_type=document_item.data_source_type,
segment_id=segment.id,
retriever_from=invoke_from.to_source(),
score=record.score or 0.0,
doc_metadata=document.doc_metadata,
doc_metadata=document_item.doc_metadata,
)
if invoke_from.to_source() == "dev":

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import codecs
import re
from typing import Any
@ -52,7 +53,7 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter)
def __init__(self, fixed_separator: str = "\n\n", separators: list[str] | None = None, **kwargs: Any):
"""Create a new TextSplitter."""
super().__init__(**kwargs)
self._fixed_separator = fixed_separator
self._fixed_separator = codecs.decode(fixed_separator, "unicode_escape")
self._separators = separators or ["\n\n", "\n", "", ". ", " ", ""]
def split_text(self, text: str) -> list[str]:
@ -94,7 +95,8 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter)
splits = re.split(r" +", text)
else:
splits = text.split(separator)
splits = [item + separator if i < len(splits) else item for i, item in enumerate(splits)]
if self._keep_separator:
splits = [s + separator for s in splits[:-1]] + splits[-1:]
else:
splits = list(text)
if separator == "\n":
@ -103,7 +105,7 @@ class FixedRecursiveCharacterTextSplitter(EnhanceRecursiveCharacterTextSplitter)
splits = [s for s in splits if (s not in {"", "\n"})]
_good_splits = []
_good_splits_lengths = [] # cache the lengths of the splits
_separator = separator if self._keep_separator else ""
_separator = "" if self._keep_separator else separator
s_lens = self._length_function(splits)
if separator != "":
for s, s_len in zip(splits, s_lens):

View File

@ -247,6 +247,7 @@ class WorkflowNodeExecutionMetadataKey(StrEnum):
ERROR_STRATEGY = "error_strategy" # node in continue on error mode return the field
LOOP_VARIABLE_MAP = "loop_variable_map" # single loop variable output
DATASOURCE_INFO = "datasource_info"
COMPLETED_REASON = "completed_reason" # completed reason for loop node
class WorkflowNodeExecutionStatus(StrEnum):

View File

@ -86,6 +86,11 @@ class Executor:
node_data.authorization.config.api_key = variable_pool.convert_template(
node_data.authorization.config.api_key
).text
# Validate that API key is not empty after template conversion
if not node_data.authorization.config.api_key or not node_data.authorization.config.api_key.strip():
raise AuthorizationConfigError(
"API key is required for authorization but was empty. Please provide a valid API key."
)
self.url = node_data.url
self.method = node_data.method

View File

@ -1,3 +1,4 @@
from enum import StrEnum
from typing import Annotated, Any, Literal
from pydantic import AfterValidator, BaseModel, Field, field_validator
@ -96,3 +97,8 @@ class LoopState(BaseLoopState):
Get current output.
"""
return self.current_output
class LoopCompletedReason(StrEnum):
LOOP_BREAK = "loop_break"
LOOP_COMPLETED = "loop_completed"

View File

@ -29,7 +29,7 @@ from core.workflow.node_events import (
)
from core.workflow.nodes.base import LLMUsageTrackingMixin
from core.workflow.nodes.base.node import Node
from core.workflow.nodes.loop.entities import LoopNodeData, LoopVariableData
from core.workflow.nodes.loop.entities import LoopCompletedReason, LoopNodeData, LoopVariableData
from core.workflow.utils.condition.processor import ConditionProcessor
from factories.variable_factory import TypeMismatchError, build_segment_with_type, segment_to_variable
from libs.datetime_utils import naive_utc_now
@ -96,6 +96,7 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
loop_duration_map: dict[str, float] = {}
single_loop_variable_map: dict[str, dict[str, Any]] = {} # single loop variable output
loop_usage = LLMUsage.empty_usage()
loop_node_ids = self._extract_loop_node_ids_from_config(self.graph_config, self._node_id)
# Start Loop event
yield LoopStartedEvent(
@ -118,6 +119,8 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
loop_count = 0
for i in range(loop_count):
# Clear stale variables from previous loop iterations to avoid streaming old values
self._clear_loop_subgraph_variables(loop_node_ids)
graph_engine = self._create_graph_engine(start_at=start_at, root_node_id=root_node_id)
loop_start_time = naive_utc_now()
@ -177,7 +180,11 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: loop_usage.total_tokens,
WorkflowNodeExecutionMetadataKey.TOTAL_PRICE: loop_usage.total_price,
WorkflowNodeExecutionMetadataKey.CURRENCY: loop_usage.currency,
"completed_reason": "loop_break" if reach_break_condition else "loop_completed",
WorkflowNodeExecutionMetadataKey.COMPLETED_REASON: (
LoopCompletedReason.LOOP_BREAK
if reach_break_condition
else LoopCompletedReason.LOOP_COMPLETED.value
),
WorkflowNodeExecutionMetadataKey.LOOP_DURATION_MAP: loop_duration_map,
WorkflowNodeExecutionMetadataKey.LOOP_VARIABLE_MAP: single_loop_variable_map,
},
@ -274,6 +281,17 @@ class LoopNode(LLMUsageTrackingMixin, Node[LoopNodeData]):
if WorkflowNodeExecutionMetadataKey.LOOP_ID not in current_metadata:
event.node_run_result.metadata = {**current_metadata, **loop_metadata}
def _clear_loop_subgraph_variables(self, loop_node_ids: set[str]) -> None:
"""
Remove variables produced by loop sub-graph nodes from previous iterations.
Keeping stale variables causes a freshly created response coordinator in the
next iteration to fall back to outdated values when no stream chunks exist.
"""
variable_pool = self.graph_runtime_state.variable_pool
for node_id in loop_node_ids:
variable_pool.remove([node_id])
@classmethod
def _extract_variable_selector_to_variable_mapping(
cls,

View File

@ -1,3 +1,4 @@
import json
from typing import Any
from jsonschema import Draft7Validator, ValidationError
@ -42,15 +43,25 @@ class StartNode(Node[StartNodeData]):
if value is None and variable.required:
raise ValueError(f"{key} is required in input form")
if not isinstance(value, dict):
raise ValueError(f"{key} must be a JSON object")
schema = variable.json_schema
if not schema:
continue
if not value:
continue
try:
Draft7Validator(schema).validate(value)
json_schema = json.loads(schema)
except json.JSONDecodeError as e:
raise ValueError(f"{schema} must be a valid JSON object")
try:
json_value = json.loads(value)
except json.JSONDecodeError as e:
raise ValueError(f"{value} must be a valid JSON object")
try:
Draft7Validator(json_schema).validate(json_value)
except ValidationError as e:
raise ValueError(f"JSON object for '{key}' does not match schema: {e.message}")
node_inputs[key] = value
node_inputs[key] = json_value

View File

@ -34,10 +34,10 @@ if [[ "${MODE}" == "worker" ]]; then
if [[ -z "${CELERY_QUEUES}" ]]; then
if [[ "${EDITION}" == "CLOUD" ]]; then
# Cloud edition: separate queues for dataset and trigger tasks
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow_professional,workflow_team,workflow_sandbox,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
else
# Community edition (SELF_HOSTED): dataset, pipeline and workflow have separate queues
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
DEFAULT_QUEUES="dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor,retention"
fi
else
DEFAULT_QUEUES="${CELERY_QUEUES}"
@ -69,6 +69,53 @@ if [[ "${MODE}" == "worker" ]]; then
elif [[ "${MODE}" == "beat" ]]; then
exec celery -A app.celery beat --loglevel ${LOG_LEVEL:-INFO}
elif [[ "${MODE}" == "job" ]]; then
# Job mode: Run a one-time Flask command and exit
# Pass Flask command and arguments via container args
# Example K8s usage:
# args:
# - create-tenant
# - --email
# - admin@example.com
#
# Example Docker usage:
# docker run -e MODE=job dify-api:latest create-tenant --email admin@example.com
if [[ $# -eq 0 ]]; then
echo "Error: No command specified for job mode."
echo ""
echo "Usage examples:"
echo " Kubernetes:"
echo " args: [create-tenant, --email, admin@example.com]"
echo ""
echo " Docker:"
echo " docker run -e MODE=job dify-api create-tenant --email admin@example.com"
echo ""
echo "Available commands:"
echo " create-tenant, reset-password, reset-email, upgrade-db,"
echo " vdb-migrate, install-plugins, and more..."
echo ""
echo "Run 'flask --help' to see all available commands."
exit 1
fi
echo "Running Flask job command: flask $*"
# Temporarily disable exit on error to capture exit code
set +e
flask "$@"
JOB_EXIT_CODE=$?
set -e
if [[ ${JOB_EXIT_CODE} -eq 0 ]]; then
echo "Job completed successfully."
else
echo "Job failed with exit code ${JOB_EXIT_CODE}."
fi
exit ${JOB_EXIT_CODE}
else
if [[ "${DEBUG}" == "true" ]]; then
exec flask run --host=${DIFY_BIND_ADDRESS:-0.0.0.0} --port=${DIFY_PORT:-5001} --debug

View File

@ -0,0 +1,74 @@
"""
Logstore extension for Dify application.
This extension initializes the logstore (Aliyun SLS) on application startup,
creating necessary projects, logstores, and indexes if they don't exist.
"""
import logging
import os
from dotenv import load_dotenv
from dify_app import DifyApp
logger = logging.getLogger(__name__)
def is_enabled() -> bool:
"""
Check if logstore extension is enabled.
Returns:
True if all required Aliyun SLS environment variables are set, False otherwise
"""
# Load environment variables from .env file
load_dotenv()
required_vars = [
"ALIYUN_SLS_ACCESS_KEY_ID",
"ALIYUN_SLS_ACCESS_KEY_SECRET",
"ALIYUN_SLS_ENDPOINT",
"ALIYUN_SLS_REGION",
"ALIYUN_SLS_PROJECT_NAME",
]
all_set = all(os.environ.get(var) for var in required_vars)
if not all_set:
logger.info("Logstore extension disabled: required Aliyun SLS environment variables not set")
return all_set
def init_app(app: DifyApp):
"""
Initialize logstore on application startup.
This function:
1. Creates Aliyun SLS project if it doesn't exist
2. Creates logstores (workflow_execution, workflow_node_execution) if they don't exist
3. Creates indexes with field configurations based on PostgreSQL table structures
This operation is idempotent and only executes once during application startup.
Args:
app: The Dify application instance
"""
try:
from extensions.logstore.aliyun_logstore import AliyunLogStore
logger.info("Initializing logstore...")
# Create logstore client and initialize project/logstores/indexes
logstore_client = AliyunLogStore()
logstore_client.init_project_logstore()
# Attach to app for potential later use
app.extensions["logstore"] = logstore_client
logger.info("Logstore initialized successfully")
except Exception:
logger.exception("Failed to initialize logstore")
# Don't raise - allow application to continue even if logstore init fails
# This ensures that the application can still run if logstore is misconfigured

View File

@ -0,0 +1,890 @@
import logging
import os
import threading
import time
from collections.abc import Sequence
from typing import Any
import sqlalchemy as sa
from aliyun.log import ( # type: ignore[import-untyped]
GetLogsRequest,
IndexConfig,
IndexKeyConfig,
IndexLineConfig,
LogClient,
LogItem,
PutLogsRequest,
)
from aliyun.log.auth import AUTH_VERSION_4 # type: ignore[import-untyped]
from aliyun.log.logexception import LogException # type: ignore[import-untyped]
from dotenv import load_dotenv
from sqlalchemy.orm import DeclarativeBase
from configs import dify_config
from extensions.logstore.aliyun_logstore_pg import AliyunLogStorePG
logger = logging.getLogger(__name__)
class AliyunLogStore:
"""
Singleton class for Aliyun SLS LogStore operations.
Ensures only one instance exists to prevent multiple PG connection pools.
"""
_instance: "AliyunLogStore | None" = None
_initialized: bool = False
# Track delayed PG connection for newly created projects
_pg_connection_timer: threading.Timer | None = None
_pg_connection_delay: int = 90 # delay seconds
# Default tokenizer for text/json fields and full-text index
# Common delimiters: comma, space, quotes, punctuation, operators, brackets, special chars
DEFAULT_TOKEN_LIST = [
",",
" ",
'"',
'"',
";",
"=",
"(",
")",
"[",
"]",
"{",
"}",
"?",
"@",
"&",
"<",
">",
"/",
":",
"\n",
"\t",
]
def __new__(cls) -> "AliyunLogStore":
"""Implement singleton pattern."""
if cls._instance is None:
cls._instance = super().__new__(cls)
return cls._instance
project_des = "dify"
workflow_execution_logstore = "workflow_execution"
workflow_node_execution_logstore = "workflow_node_execution"
@staticmethod
def _sqlalchemy_type_to_logstore_type(column: Any) -> str:
"""
Map SQLAlchemy column type to Aliyun LogStore index type.
Args:
column: SQLAlchemy column object
Returns:
LogStore index type: 'text', 'long', 'double', or 'json'
"""
column_type = column.type
# Integer types -> long
if isinstance(column_type, (sa.Integer, sa.BigInteger, sa.SmallInteger)):
return "long"
# Float types -> double
if isinstance(column_type, (sa.Float, sa.Numeric)):
return "double"
# String and Text types -> text
if isinstance(column_type, (sa.String, sa.Text)):
return "text"
# DateTime -> text (stored as ISO format string in logstore)
if isinstance(column_type, sa.DateTime):
return "text"
# Boolean -> long (stored as 0/1)
if isinstance(column_type, sa.Boolean):
return "long"
# JSON -> json
if isinstance(column_type, sa.JSON):
return "json"
# Default to text for unknown types
return "text"
@staticmethod
def _generate_index_keys_from_model(model_class: type[DeclarativeBase]) -> dict[str, IndexKeyConfig]:
"""
Automatically generate LogStore field index configuration from SQLAlchemy model.
This method introspects the SQLAlchemy model's column definitions and creates
corresponding LogStore index configurations. When the PG schema is updated via
Flask-Migrate, this method will automatically pick up the new fields on next startup.
Args:
model_class: SQLAlchemy model class (e.g., WorkflowRun, WorkflowNodeExecutionModel)
Returns:
Dictionary mapping field names to IndexKeyConfig objects
"""
index_keys = {}
# Iterate over all mapped columns in the model
if hasattr(model_class, "__mapper__"):
for column_name, column_property in model_class.__mapper__.columns.items():
# Skip relationship properties and other non-column attributes
if not hasattr(column_property, "type"):
continue
# Map SQLAlchemy type to LogStore type
logstore_type = AliyunLogStore._sqlalchemy_type_to_logstore_type(column_property)
# Create index configuration
# - text fields: case_insensitive for better search, with tokenizer and Chinese support
# - all fields: doc_value=True for analytics
if logstore_type == "text":
index_keys[column_name] = IndexKeyConfig(
index_type="text",
case_sensitive=False,
doc_value=True,
token_list=AliyunLogStore.DEFAULT_TOKEN_LIST,
chinese=True,
)
else:
index_keys[column_name] = IndexKeyConfig(index_type=logstore_type, doc_value=True)
# Add log_version field (not in PG model, but used in logstore for versioning)
index_keys["log_version"] = IndexKeyConfig(index_type="long", doc_value=True)
return index_keys
def __init__(self) -> None:
# Skip initialization if already initialized (singleton pattern)
if self.__class__._initialized:
return
load_dotenv()
self.access_key_id: str = os.environ.get("ALIYUN_SLS_ACCESS_KEY_ID", "")
self.access_key_secret: str = os.environ.get("ALIYUN_SLS_ACCESS_KEY_SECRET", "")
self.endpoint: str = os.environ.get("ALIYUN_SLS_ENDPOINT", "")
self.region: str = os.environ.get("ALIYUN_SLS_REGION", "")
self.project_name: str = os.environ.get("ALIYUN_SLS_PROJECT_NAME", "")
self.logstore_ttl: int = int(os.environ.get("ALIYUN_SLS_LOGSTORE_TTL", 365))
self.log_enabled: bool = os.environ.get("SQLALCHEMY_ECHO", "false").lower() == "true"
self.pg_mode_enabled: bool = os.environ.get("LOGSTORE_PG_MODE_ENABLED", "true").lower() == "true"
# Initialize SDK client
self.client = LogClient(
self.endpoint, self.access_key_id, self.access_key_secret, auth_version=AUTH_VERSION_4, region=self.region
)
# Append Dify identification to the existing user agent
original_user_agent = self.client._user_agent # pyright: ignore[reportPrivateUsage]
dify_version = dify_config.project.version
enhanced_user_agent = f"Dify,Dify-{dify_version},{original_user_agent}"
self.client.set_user_agent(enhanced_user_agent)
# PG client will be initialized in init_project_logstore
self._pg_client: AliyunLogStorePG | None = None
self._use_pg_protocol: bool = False
self.__class__._initialized = True
@property
def supports_pg_protocol(self) -> bool:
"""Check if PG protocol is supported and enabled."""
return self._use_pg_protocol
def _attempt_pg_connection_init(self) -> bool:
"""
Attempt to initialize PG connection.
This method tries to establish PG connection and performs necessary checks.
It's used both for immediate connection (existing projects) and delayed connection (new projects).
Returns:
True if PG connection was successfully established, False otherwise.
"""
if not self.pg_mode_enabled or not self._pg_client:
return False
try:
self._use_pg_protocol = self._pg_client.init_connection()
if self._use_pg_protocol:
logger.info("Successfully connected to project %s using PG protocol", self.project_name)
# Check if scan_index is enabled for all logstores
self._check_and_disable_pg_if_scan_index_disabled()
return True
else:
logger.info("PG connection failed for project %s. Will use SDK mode.", self.project_name)
return False
except Exception as e:
logger.warning(
"Failed to establish PG connection for project %s: %s. Will use SDK mode.",
self.project_name,
str(e),
)
self._use_pg_protocol = False
return False
def _delayed_pg_connection_init(self) -> None:
"""
Delayed initialization of PG connection for newly created projects.
This method is called by a background timer 3 minutes after project creation.
"""
# Double check conditions in case state changed
if self._use_pg_protocol:
return
logger.info(
"Attempting delayed PG connection for newly created project %s ...",
self.project_name,
)
self._attempt_pg_connection_init()
self.__class__._pg_connection_timer = None
def init_project_logstore(self):
"""
Initialize project, logstore, index, and PG connection.
This method should be called once during application startup to ensure
all required resources exist and connections are established.
"""
# Step 1: Ensure project and logstore exist
project_is_new = False
if not self.is_project_exist():
self.create_project()
project_is_new = True
self.create_logstore_if_not_exist()
# Step 2: Initialize PG client and connection (if enabled)
if not self.pg_mode_enabled:
logger.info("PG mode is disabled. Will use SDK mode.")
return
# Create PG client if not already created
if self._pg_client is None:
logger.info("Initializing PG client for project %s...", self.project_name)
self._pg_client = AliyunLogStorePG(
self.access_key_id, self.access_key_secret, self.endpoint, self.project_name
)
# Step 3: Establish PG connection based on project status
if project_is_new:
# For newly created projects, schedule delayed PG connection
self._use_pg_protocol = False
logger.info(
"Project %s is newly created. Will use SDK mode and schedule PG connection attempt in %d seconds.",
self.project_name,
self.__class__._pg_connection_delay,
)
if self.__class__._pg_connection_timer is not None:
self.__class__._pg_connection_timer.cancel()
self.__class__._pg_connection_timer = threading.Timer(
self.__class__._pg_connection_delay,
self._delayed_pg_connection_init,
)
self.__class__._pg_connection_timer.daemon = True # Don't block app shutdown
self.__class__._pg_connection_timer.start()
else:
# For existing projects, attempt PG connection immediately
logger.info("Project %s already exists. Attempting PG connection...", self.project_name)
self._attempt_pg_connection_init()
def _check_and_disable_pg_if_scan_index_disabled(self) -> None:
"""
Check if scan_index is enabled for all logstores.
If any logstore has scan_index=false, disable PG protocol.
This is necessary because PG protocol requires scan_index to be enabled.
"""
logstore_name_list = [
AliyunLogStore.workflow_execution_logstore,
AliyunLogStore.workflow_node_execution_logstore,
]
for logstore_name in logstore_name_list:
existing_config = self.get_existing_index_config(logstore_name)
if existing_config and not existing_config.scan_index:
logger.info(
"Logstore %s has scan_index=false, USE SDK mode for read/write operations. "
"PG protocol requires scan_index to be enabled.",
logstore_name,
)
self._use_pg_protocol = False
# Close PG connection if it was initialized
if self._pg_client:
self._pg_client.close()
self._pg_client = None
return
def is_project_exist(self) -> bool:
try:
self.client.get_project(self.project_name)
return True
except Exception as e:
if e.args[0] == "ProjectNotExist":
return False
else:
raise e
def create_project(self):
try:
self.client.create_project(self.project_name, AliyunLogStore.project_des)
logger.info("Project %s created successfully", self.project_name)
except LogException as e:
logger.exception(
"Failed to create project %s: errorCode=%s, errorMessage=%s, requestId=%s",
self.project_name,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
def is_logstore_exist(self, logstore_name: str) -> bool:
try:
_ = self.client.get_logstore(self.project_name, logstore_name)
return True
except Exception as e:
if e.args[0] == "LogStoreNotExist":
return False
else:
raise e
def create_logstore_if_not_exist(self) -> None:
logstore_name_list = [
AliyunLogStore.workflow_execution_logstore,
AliyunLogStore.workflow_node_execution_logstore,
]
for logstore_name in logstore_name_list:
if not self.is_logstore_exist(logstore_name):
try:
self.client.create_logstore(
project_name=self.project_name, logstore_name=logstore_name, ttl=self.logstore_ttl
)
logger.info("logstore %s created successfully", logstore_name)
except LogException as e:
logger.exception(
"Failed to create logstore %s: errorCode=%s, errorMessage=%s, requestId=%s",
logstore_name,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
# Ensure index contains all Dify-required fields
# This intelligently merges with existing config, preserving custom indexes
self.ensure_index_config(logstore_name)
def is_index_exist(self, logstore_name: str) -> bool:
try:
_ = self.client.get_index_config(self.project_name, logstore_name)
return True
except Exception as e:
if e.args[0] == "IndexConfigNotExist":
return False
else:
raise e
def get_existing_index_config(self, logstore_name: str) -> IndexConfig | None:
"""
Get existing index configuration from logstore.
Args:
logstore_name: Name of the logstore
Returns:
IndexConfig object if index exists, None otherwise
"""
try:
response = self.client.get_index_config(self.project_name, logstore_name)
return response.get_index_config()
except Exception as e:
if e.args[0] == "IndexConfigNotExist":
return None
else:
logger.exception("Failed to get index config for logstore %s", logstore_name)
raise e
def _get_workflow_execution_index_keys(self) -> dict[str, IndexKeyConfig]:
"""
Get field index configuration for workflow_execution logstore.
This method automatically generates index configuration from the WorkflowRun SQLAlchemy model.
When the PG schema is updated via Flask-Migrate, the index configuration will be automatically
updated on next application startup.
"""
from models.workflow import WorkflowRun
index_keys = self._generate_index_keys_from_model(WorkflowRun)
# Add custom fields that are in logstore but not in PG model
# These fields are added by the repository layer
index_keys["error_message"] = IndexKeyConfig(
index_type="text",
case_sensitive=False,
doc_value=True,
token_list=self.DEFAULT_TOKEN_LIST,
chinese=True,
) # Maps to 'error' in PG
index_keys["started_at"] = IndexKeyConfig(
index_type="text",
case_sensitive=False,
doc_value=True,
token_list=self.DEFAULT_TOKEN_LIST,
chinese=True,
) # Maps to 'created_at' in PG
logger.info("Generated %d index keys for workflow_execution from WorkflowRun model", len(index_keys))
return index_keys
def _get_workflow_node_execution_index_keys(self) -> dict[str, IndexKeyConfig]:
"""
Get field index configuration for workflow_node_execution logstore.
This method automatically generates index configuration from the WorkflowNodeExecutionModel.
When the PG schema is updated via Flask-Migrate, the index configuration will be automatically
updated on next application startup.
"""
from models.workflow import WorkflowNodeExecutionModel
index_keys = self._generate_index_keys_from_model(WorkflowNodeExecutionModel)
logger.debug(
"Generated %d index keys for workflow_node_execution from WorkflowNodeExecutionModel", len(index_keys)
)
return index_keys
def _get_index_config(self, logstore_name: str) -> IndexConfig:
"""
Get index configuration for the specified logstore.
Args:
logstore_name: Name of the logstore
Returns:
IndexConfig object with line and field indexes
"""
# Create full-text index (line config) with tokenizer
line_config = IndexLineConfig(token_list=self.DEFAULT_TOKEN_LIST, case_sensitive=False, chinese=True)
# Get field index configuration based on logstore name
field_keys = {}
if logstore_name == AliyunLogStore.workflow_execution_logstore:
field_keys = self._get_workflow_execution_index_keys()
elif logstore_name == AliyunLogStore.workflow_node_execution_logstore:
field_keys = self._get_workflow_node_execution_index_keys()
# key_config_list should be a dict, not a list
# Create index config with both line and field indexes
return IndexConfig(line_config=line_config, key_config_list=field_keys, scan_index=True)
def create_index(self, logstore_name: str) -> None:
"""
Create index for the specified logstore with both full-text and field indexes.
Field indexes are automatically generated from the corresponding SQLAlchemy model.
"""
index_config = self._get_index_config(logstore_name)
try:
self.client.create_index(self.project_name, logstore_name, index_config)
logger.info(
"index for %s created successfully with %d field indexes",
logstore_name,
len(index_config.key_config_list or {}),
)
except LogException as e:
logger.exception(
"Failed to create index for logstore %s: errorCode=%s, errorMessage=%s, requestId=%s",
logstore_name,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
def _merge_index_configs(
self, existing_config: IndexConfig, required_keys: dict[str, IndexKeyConfig], logstore_name: str
) -> tuple[IndexConfig, bool]:
"""
Intelligently merge existing index config with Dify's required field indexes.
This method:
1. Preserves all existing field indexes in logstore (including custom fields)
2. Adds missing Dify-required fields
3. Updates fields where type doesn't match (with json/text compatibility)
4. Corrects case mismatches (e.g., if Dify needs 'status' but logstore has 'Status')
Type compatibility rules:
- json and text types are considered compatible (users can manually choose either)
- All other type mismatches will be corrected to match Dify requirements
Note: Logstore is case-sensitive and doesn't allow duplicate fields with different cases.
Case mismatch means: existing field name differs from required name only in case.
Args:
existing_config: Current index configuration from logstore
required_keys: Dify's required field index configurations
logstore_name: Name of the logstore (for logging)
Returns:
Tuple of (merged_config, needs_update)
"""
# key_config_list is already a dict in the SDK
# Make a copy to avoid modifying the original
existing_keys = dict(existing_config.key_config_list) if existing_config.key_config_list else {}
# Track changes
needs_update = False
case_corrections = [] # Fields that need case correction (e.g., 'Status' -> 'status')
missing_fields = []
type_mismatches = []
# First pass: Check for and resolve case mismatches with required fields
# Note: Logstore itself doesn't allow duplicate fields with different cases,
# so we only need to check if the existing case matches the required case
for required_name in required_keys:
lower_name = required_name.lower()
# Find key that matches case-insensitively but not exactly
wrong_case_key = None
for existing_key in existing_keys:
if existing_key.lower() == lower_name and existing_key != required_name:
wrong_case_key = existing_key
break
if wrong_case_key:
# Field exists but with wrong case (e.g., 'Status' when we need 'status')
# Remove the wrong-case key, will be added back with correct case later
case_corrections.append((wrong_case_key, required_name))
del existing_keys[wrong_case_key]
needs_update = True
# Second pass: Check each required field
for required_name, required_config in required_keys.items():
# Check for exact match (case-sensitive)
if required_name in existing_keys:
existing_type = existing_keys[required_name].index_type
required_type = required_config.index_type
# Check if type matches
# Special case: json and text are interchangeable for JSON content fields
# Allow users to manually configure text instead of json (or vice versa) without forcing updates
is_compatible = existing_type == required_type or ({existing_type, required_type} == {"json", "text"})
if not is_compatible:
type_mismatches.append((required_name, existing_type, required_type))
# Update with correct type
existing_keys[required_name] = required_config
needs_update = True
# else: field exists with compatible type, no action needed
else:
# Field doesn't exist (may have been removed in first pass due to case conflict)
missing_fields.append(required_name)
existing_keys[required_name] = required_config
needs_update = True
# Log changes
if missing_fields:
logger.info(
"Logstore %s: Adding %d missing Dify-required fields: %s",
logstore_name,
len(missing_fields),
", ".join(missing_fields[:10]) + ("..." if len(missing_fields) > 10 else ""),
)
if type_mismatches:
logger.info(
"Logstore %s: Fixing %d type mismatches: %s",
logstore_name,
len(type_mismatches),
", ".join([f"{name}({old}->{new})" for name, old, new in type_mismatches[:5]])
+ ("..." if len(type_mismatches) > 5 else ""),
)
if case_corrections:
logger.info(
"Logstore %s: Correcting %d field name cases: %s",
logstore_name,
len(case_corrections),
", ".join([f"'{old}' -> '{new}'" for old, new in case_corrections[:5]])
+ ("..." if len(case_corrections) > 5 else ""),
)
# Create merged config
# key_config_list should be a dict, not a list
# Preserve the original scan_index value - don't force it to True
merged_config = IndexConfig(
line_config=existing_config.line_config
or IndexLineConfig(token_list=self.DEFAULT_TOKEN_LIST, case_sensitive=False, chinese=True),
key_config_list=existing_keys,
scan_index=existing_config.scan_index,
)
return merged_config, needs_update
def ensure_index_config(self, logstore_name: str) -> None:
"""
Ensure index configuration includes all Dify-required fields.
This method intelligently manages index configuration:
1. If index doesn't exist, create it with Dify's required fields
2. If index exists:
- Check if all Dify-required fields are present
- Check if field types match requirements
- Only update if fields are missing or types are incorrect
- Preserve any additional custom index configurations
This approach allows users to add their own custom indexes without being overwritten.
"""
# Get Dify's required field indexes
required_keys = {}
if logstore_name == AliyunLogStore.workflow_execution_logstore:
required_keys = self._get_workflow_execution_index_keys()
elif logstore_name == AliyunLogStore.workflow_node_execution_logstore:
required_keys = self._get_workflow_node_execution_index_keys()
# Check if index exists
existing_config = self.get_existing_index_config(logstore_name)
if existing_config is None:
# Index doesn't exist, create it
logger.info(
"Logstore %s: Index doesn't exist, creating with %d required fields",
logstore_name,
len(required_keys),
)
self.create_index(logstore_name)
else:
merged_config, needs_update = self._merge_index_configs(existing_config, required_keys, logstore_name)
if needs_update:
logger.info("Logstore %s: Updating index to include Dify-required fields", logstore_name)
try:
self.client.update_index(self.project_name, logstore_name, merged_config)
logger.info(
"Logstore %s: Index updated successfully, now has %d total field indexes",
logstore_name,
len(merged_config.key_config_list or {}),
)
except LogException as e:
logger.exception(
"Failed to update index for logstore %s: errorCode=%s, errorMessage=%s, requestId=%s",
logstore_name,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
else:
logger.info(
"Logstore %s: Index already contains all %d Dify-required fields with correct types, "
"no update needed",
logstore_name,
len(required_keys),
)
def put_log(self, logstore: str, contents: Sequence[tuple[str, str]]) -> None:
# Route to PG or SDK based on protocol availability
if self._use_pg_protocol and self._pg_client:
self._pg_client.put_log(logstore, contents, self.log_enabled)
else:
log_item = LogItem(contents=contents)
request = PutLogsRequest(project=self.project_name, logstore=logstore, logitems=[log_item])
if self.log_enabled:
logger.info(
"[LogStore-SDK] PUT_LOG | logstore=%s | project=%s | items_count=%d",
logstore,
self.project_name,
len(contents),
)
try:
self.client.put_logs(request)
except LogException as e:
logger.exception(
"Failed to put logs to logstore %s: errorCode=%s, errorMessage=%s, requestId=%s",
logstore,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
def get_logs(
self,
logstore: str,
from_time: int,
to_time: int,
topic: str = "",
query: str = "",
line: int = 100,
offset: int = 0,
reverse: bool = True,
) -> list[dict]:
request = GetLogsRequest(
project=self.project_name,
logstore=logstore,
fromTime=from_time,
toTime=to_time,
topic=topic,
query=query,
line=line,
offset=offset,
reverse=reverse,
)
# Log query info if SQLALCHEMY_ECHO is enabled
if self.log_enabled:
logger.info(
"[LogStore] GET_LOGS | logstore=%s | project=%s | query=%s | "
"from_time=%d | to_time=%d | line=%d | offset=%d | reverse=%s",
logstore,
self.project_name,
query,
from_time,
to_time,
line,
offset,
reverse,
)
try:
response = self.client.get_logs(request)
result = []
logs = response.get_logs() if response else []
for log in logs:
result.append(log.get_contents())
# Log result count if SQLALCHEMY_ECHO is enabled
if self.log_enabled:
logger.info(
"[LogStore] GET_LOGS RESULT | logstore=%s | returned_count=%d",
logstore,
len(result),
)
return result
except LogException as e:
logger.exception(
"Failed to get logs from logstore %s with query '%s': errorCode=%s, errorMessage=%s, requestId=%s",
logstore,
query,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
)
raise
def execute_sql(
self,
sql: str,
logstore: str | None = None,
query: str = "*",
from_time: int | None = None,
to_time: int | None = None,
power_sql: bool = False,
) -> list[dict]:
"""
Execute SQL query for aggregation and analysis.
Args:
sql: SQL query string (SELECT statement)
logstore: Name of the logstore (required)
query: Search/filter query for SDK mode (default: "*" for all logs).
Only used in SDK mode. PG mode ignores this parameter.
from_time: Start time (Unix timestamp) - only used in SDK mode
to_time: End time (Unix timestamp) - only used in SDK mode
power_sql: Whether to use enhanced SQL mode (default: False)
Returns:
List of result rows as dictionaries
Note:
- PG mode: Only executes the SQL directly
- SDK mode: Combines query and sql as "query | sql"
"""
# Logstore is required
if not logstore:
raise ValueError("logstore parameter is required for execute_sql")
# Route to PG or SDK based on protocol availability
if self._use_pg_protocol and self._pg_client:
# PG mode: execute SQL directly (ignore query parameter)
return self._pg_client.execute_sql(sql, logstore, self.log_enabled)
else:
# SDK mode: combine query and sql as "query | sql"
full_query = f"{query} | {sql}"
# Provide default time range if not specified
if from_time is None:
from_time = 0
if to_time is None:
to_time = int(time.time()) # now
request = GetLogsRequest(
project=self.project_name,
logstore=logstore,
fromTime=from_time,
toTime=to_time,
query=full_query,
)
# Log query info if SQLALCHEMY_ECHO is enabled
if self.log_enabled:
logger.info(
"[LogStore-SDK] EXECUTE_SQL | logstore=%s | project=%s | from_time=%d | to_time=%d | full_query=%s",
logstore,
self.project_name,
from_time,
to_time,
query,
sql,
)
try:
response = self.client.get_logs(request)
result = []
logs = response.get_logs() if response else []
for log in logs:
result.append(log.get_contents())
# Log result count if SQLALCHEMY_ECHO is enabled
if self.log_enabled:
logger.info(
"[LogStore-SDK] EXECUTE_SQL RESULT | logstore=%s | returned_count=%d",
logstore,
len(result),
)
return result
except LogException as e:
logger.exception(
"Failed to execute SQL, logstore %s: errorCode=%s, errorMessage=%s, requestId=%s, full_query=%s",
logstore,
e.get_error_code(),
e.get_error_message(),
e.get_request_id(),
full_query,
)
raise
if __name__ == "__main__":
aliyun_logstore = AliyunLogStore()
# aliyun_logstore.init_project_logstore()
aliyun_logstore.put_log(AliyunLogStore.workflow_execution_logstore, [("key1", "value1")])

View File

@ -0,0 +1,407 @@
import logging
import os
import socket
import time
from collections.abc import Sequence
from contextlib import contextmanager
from typing import Any
import psycopg2
import psycopg2.pool
from psycopg2 import InterfaceError, OperationalError
from configs import dify_config
logger = logging.getLogger(__name__)
class AliyunLogStorePG:
"""
PostgreSQL protocol support for Aliyun SLS LogStore.
Handles PG connection pooling and operations for regions that support PG protocol.
"""
def __init__(self, access_key_id: str, access_key_secret: str, endpoint: str, project_name: str):
"""
Initialize PG connection for SLS.
Args:
access_key_id: Aliyun access key ID
access_key_secret: Aliyun access key secret
endpoint: SLS endpoint
project_name: SLS project name
"""
self._access_key_id = access_key_id
self._access_key_secret = access_key_secret
self._endpoint = endpoint
self.project_name = project_name
self._pg_pool: psycopg2.pool.SimpleConnectionPool | None = None
self._use_pg_protocol = False
def _check_port_connectivity(self, host: str, port: int, timeout: float = 2.0) -> bool:
"""
Check if a TCP port is reachable using socket connection.
This provides a fast check before attempting full database connection,
preventing long waits when connecting to unsupported regions.
Args:
host: Hostname or IP address
port: Port number
timeout: Connection timeout in seconds (default: 2.0)
Returns:
True if port is reachable, False otherwise
"""
try:
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(timeout)
result = sock.connect_ex((host, port))
sock.close()
return result == 0
except Exception as e:
logger.debug("Port connectivity check failed for %s:%d: %s", host, port, str(e))
return False
def init_connection(self) -> bool:
"""
Initialize PostgreSQL connection pool for SLS PG protocol support.
Attempts to connect to SLS using PostgreSQL protocol. If successful, sets
_use_pg_protocol to True and creates a connection pool. If connection fails
(region doesn't support PG protocol or other errors), returns False.
Returns:
True if PG protocol is supported and initialized, False otherwise
"""
try:
# Extract hostname from endpoint (remove protocol if present)
pg_host = self._endpoint.replace("http://", "").replace("https://", "")
# Get pool configuration
pg_max_connections = int(os.environ.get("ALIYUN_SLS_PG_MAX_CONNECTIONS", 10))
logger.debug(
"Check PG protocol connection to SLS: host=%s, project=%s",
pg_host,
self.project_name,
)
# Fast port connectivity check before attempting full connection
# This prevents long waits when connecting to unsupported regions
if not self._check_port_connectivity(pg_host, 5432, timeout=1.0):
logger.info(
"USE SDK mode for read/write operations, host=%s",
pg_host,
)
return False
# Create connection pool
self._pg_pool = psycopg2.pool.SimpleConnectionPool(
minconn=1,
maxconn=pg_max_connections,
host=pg_host,
port=5432,
database=self.project_name,
user=self._access_key_id,
password=self._access_key_secret,
sslmode="require",
connect_timeout=5,
application_name=f"Dify-{dify_config.project.version}",
)
# Note: Skip test query because SLS PG protocol only supports SELECT/INSERT on actual tables
# Connection pool creation success already indicates connectivity
self._use_pg_protocol = True
logger.info(
"PG protocol initialized successfully for SLS project=%s. Will use PG for read/write operations.",
self.project_name,
)
return True
except Exception as e:
# PG connection failed - fallback to SDK mode
self._use_pg_protocol = False
if self._pg_pool:
try:
self._pg_pool.closeall()
except Exception:
logger.debug("Failed to close PG connection pool during cleanup, ignoring")
self._pg_pool = None
logger.info(
"PG protocol connection failed (region may not support PG protocol): %s. "
"Falling back to SDK mode for read/write operations.",
str(e),
)
return False
def _is_connection_valid(self, conn: Any) -> bool:
"""
Check if a connection is still valid.
Args:
conn: psycopg2 connection object
Returns:
True if connection is valid, False otherwise
"""
try:
# Check if connection is closed
if conn.closed:
return False
# Quick ping test - execute a lightweight query
# For SLS PG protocol, we can't use SELECT 1 without FROM,
# so we just check the connection status
with conn.cursor() as cursor:
cursor.execute("SELECT 1")
cursor.fetchone()
return True
except Exception:
return False
@contextmanager
def _get_connection(self):
"""
Context manager to get a PostgreSQL connection from the pool.
Automatically validates and refreshes stale connections.
Note: Aliyun SLS PG protocol does not support transactions, so we always
use autocommit mode.
Yields:
psycopg2 connection object
Raises:
RuntimeError: If PG pool is not initialized
"""
if not self._pg_pool:
raise RuntimeError("PG connection pool is not initialized")
conn = self._pg_pool.getconn()
try:
# Validate connection and get a fresh one if needed
if not self._is_connection_valid(conn):
logger.debug("Connection is stale, marking as bad and getting a new one")
# Mark connection as bad and get a new one
self._pg_pool.putconn(conn, close=True)
conn = self._pg_pool.getconn()
# Aliyun SLS PG protocol does not support transactions, always use autocommit
conn.autocommit = True
yield conn
finally:
# Return connection to pool (or close if it's bad)
if self._is_connection_valid(conn):
self._pg_pool.putconn(conn)
else:
self._pg_pool.putconn(conn, close=True)
def close(self) -> None:
"""Close the PostgreSQL connection pool."""
if self._pg_pool:
try:
self._pg_pool.closeall()
logger.info("PG connection pool closed")
except Exception:
logger.exception("Failed to close PG connection pool")
def _is_retriable_error(self, error: Exception) -> bool:
"""
Check if an error is retriable (connection-related issues).
Args:
error: Exception to check
Returns:
True if the error is retriable, False otherwise
"""
# Retry on connection-related errors
if isinstance(error, (OperationalError, InterfaceError)):
return True
# Check error message for specific connection issues
error_msg = str(error).lower()
retriable_patterns = [
"connection",
"timeout",
"closed",
"broken pipe",
"reset by peer",
"no route to host",
"network",
]
return any(pattern in error_msg for pattern in retriable_patterns)
def put_log(self, logstore: str, contents: Sequence[tuple[str, str]], log_enabled: bool = False) -> None:
"""
Write log to SLS using PostgreSQL protocol with automatic retry.
Note: SLS PG protocol only supports INSERT (not UPDATE). This uses append-only
writes with log_version field for versioning, same as SDK implementation.
Args:
logstore: Name of the logstore table
contents: List of (field_name, value) tuples
log_enabled: Whether to enable logging
Raises:
psycopg2.Error: If database operation fails after all retries
"""
if not contents:
return
# Extract field names and values from contents
fields = [field_name for field_name, _ in contents]
values = [value for _, value in contents]
# Build INSERT statement with literal values
# Note: Aliyun SLS PG protocol doesn't support parameterized queries,
# so we need to use mogrify to safely create literal values
field_list = ", ".join([f'"{field}"' for field in fields])
if log_enabled:
logger.info(
"[LogStore-PG] PUT_LOG | logstore=%s | project=%s | items_count=%d",
logstore,
self.project_name,
len(contents),
)
# Retry configuration
max_retries = 3
retry_delay = 0.1 # Start with 100ms
for attempt in range(max_retries):
try:
with self._get_connection() as conn:
with conn.cursor() as cursor:
# Use mogrify to safely convert values to SQL literals
placeholders = ", ".join(["%s"] * len(fields))
values_literal = cursor.mogrify(f"({placeholders})", values).decode("utf-8")
insert_sql = f'INSERT INTO "{logstore}" ({field_list}) VALUES {values_literal}'
cursor.execute(insert_sql)
# Success - exit retry loop
return
except psycopg2.Error as e:
# Check if error is retriable
if not self._is_retriable_error(e):
# Not a retriable error (e.g., data validation error), fail immediately
logger.exception(
"Failed to put logs to logstore %s via PG protocol (non-retriable error)",
logstore,
)
raise
# Retriable error - log and retry if we have attempts left
if attempt < max_retries - 1:
logger.warning(
"Failed to put logs to logstore %s via PG protocol (attempt %d/%d): %s. Retrying...",
logstore,
attempt + 1,
max_retries,
str(e),
)
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
else:
# Last attempt failed
logger.exception(
"Failed to put logs to logstore %s via PG protocol after %d attempts",
logstore,
max_retries,
)
raise
def execute_sql(self, sql: str, logstore: str, log_enabled: bool = False) -> list[dict[str, Any]]:
"""
Execute SQL query using PostgreSQL protocol with automatic retry.
Args:
sql: SQL query string
logstore: Name of the logstore (for logging purposes)
log_enabled: Whether to enable logging
Returns:
List of result rows as dictionaries
Raises:
psycopg2.Error: If database operation fails after all retries
"""
if log_enabled:
logger.info(
"[LogStore-PG] EXECUTE_SQL | logstore=%s | project=%s | sql=%s",
logstore,
self.project_name,
sql,
)
# Retry configuration
max_retries = 3
retry_delay = 0.1 # Start with 100ms
for attempt in range(max_retries):
try:
with self._get_connection() as conn:
with conn.cursor() as cursor:
cursor.execute(sql)
# Get column names from cursor description
columns = [desc[0] for desc in cursor.description]
# Fetch all results and convert to list of dicts
result = []
for row in cursor.fetchall():
row_dict = {}
for col, val in zip(columns, row):
row_dict[col] = "" if val is None else str(val)
result.append(row_dict)
if log_enabled:
logger.info(
"[LogStore-PG] EXECUTE_SQL RESULT | logstore=%s | returned_count=%d",
logstore,
len(result),
)
return result
except psycopg2.Error as e:
# Check if error is retriable
if not self._is_retriable_error(e):
# Not a retriable error (e.g., SQL syntax error), fail immediately
logger.exception(
"Failed to execute SQL query on logstore %s via PG protocol (non-retriable error): sql=%s",
logstore,
sql,
)
raise
# Retriable error - log and retry if we have attempts left
if attempt < max_retries - 1:
logger.warning(
"Failed to execute SQL query on logstore %s via PG protocol (attempt %d/%d): %s. Retrying...",
logstore,
attempt + 1,
max_retries,
str(e),
)
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
else:
# Last attempt failed
logger.exception(
"Failed to execute SQL query on logstore %s via PG protocol after %d attempts: sql=%s",
logstore,
max_retries,
sql,
)
raise
# This line should never be reached due to raise above, but makes type checker happy
return []

View File

@ -0,0 +1,365 @@
"""
LogStore implementation of DifyAPIWorkflowNodeExecutionRepository.
This module provides the LogStore-based implementation for service-layer
WorkflowNodeExecutionModel operations using Aliyun SLS LogStore.
"""
import logging
import time
from collections.abc import Sequence
from datetime import datetime
from typing import Any
from sqlalchemy.orm import sessionmaker
from extensions.logstore.aliyun_logstore import AliyunLogStore
from models.workflow import WorkflowNodeExecutionModel
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
logger = logging.getLogger(__name__)
def _dict_to_workflow_node_execution_model(data: dict[str, Any]) -> WorkflowNodeExecutionModel:
"""
Convert LogStore result dictionary to WorkflowNodeExecutionModel instance.
Args:
data: Dictionary from LogStore query result
Returns:
WorkflowNodeExecutionModel instance (detached from session)
Note:
The returned model is not attached to any SQLAlchemy session.
Relationship fields (like offload_data) are not loaded from LogStore.
"""
logger.debug("_dict_to_workflow_node_execution_model: data keys=%s", list(data.keys())[:5])
# Create model instance without session
model = WorkflowNodeExecutionModel()
# Map all required fields with validation
# Critical fields - must not be None
model.id = data.get("id") or ""
model.tenant_id = data.get("tenant_id") or ""
model.app_id = data.get("app_id") or ""
model.workflow_id = data.get("workflow_id") or ""
model.triggered_from = data.get("triggered_from") or ""
model.node_id = data.get("node_id") or ""
model.node_type = data.get("node_type") or ""
model.status = data.get("status") or "running" # Default status if missing
model.title = data.get("title") or ""
model.created_by_role = data.get("created_by_role") or ""
model.created_by = data.get("created_by") or ""
# Numeric fields with defaults
model.index = int(data.get("index", 0))
model.elapsed_time = float(data.get("elapsed_time", 0))
# Optional fields
model.workflow_run_id = data.get("workflow_run_id")
model.predecessor_node_id = data.get("predecessor_node_id")
model.node_execution_id = data.get("node_execution_id")
model.inputs = data.get("inputs")
model.process_data = data.get("process_data")
model.outputs = data.get("outputs")
model.error = data.get("error")
model.execution_metadata = data.get("execution_metadata")
# Handle datetime fields
created_at = data.get("created_at")
if created_at:
if isinstance(created_at, str):
model.created_at = datetime.fromisoformat(created_at)
elif isinstance(created_at, (int, float)):
model.created_at = datetime.fromtimestamp(created_at)
else:
model.created_at = created_at
else:
# Provide default created_at if missing
model.created_at = datetime.now()
finished_at = data.get("finished_at")
if finished_at:
if isinstance(finished_at, str):
model.finished_at = datetime.fromisoformat(finished_at)
elif isinstance(finished_at, (int, float)):
model.finished_at = datetime.fromtimestamp(finished_at)
else:
model.finished_at = finished_at
return model
class LogstoreAPIWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecutionRepository):
"""
LogStore implementation of DifyAPIWorkflowNodeExecutionRepository.
Provides service-layer database operations for WorkflowNodeExecutionModel
using LogStore SQL queries with optimized deduplication strategies.
"""
def __init__(self, session_maker: sessionmaker | None = None):
"""
Initialize the repository with LogStore client.
Args:
session_maker: SQLAlchemy sessionmaker (unused, for compatibility with factory pattern)
"""
logger.debug("LogstoreAPIWorkflowNodeExecutionRepository.__init__: initializing")
self.logstore_client = AliyunLogStore()
def get_node_last_execution(
self,
tenant_id: str,
app_id: str,
workflow_id: str,
node_id: str,
) -> WorkflowNodeExecutionModel | None:
"""
Get the most recent execution for a specific node.
Uses query syntax to get raw logs and selects the one with max log_version.
Returns the most recent execution ordered by created_at.
"""
logger.debug(
"get_node_last_execution: tenant_id=%s, app_id=%s, workflow_id=%s, node_id=%s",
tenant_id,
app_id,
workflow_id,
node_id,
)
try:
# Check if PG protocol is supported
if self.logstore_client.supports_pg_protocol:
# Use PG protocol with SQL query (get latest version of each record)
sql_query = f"""
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) as rn
FROM "{AliyunLogStore.workflow_node_execution_logstore}"
WHERE tenant_id = '{tenant_id}'
AND app_id = '{app_id}'
AND workflow_id = '{workflow_id}'
AND node_id = '{node_id}'
AND __time__ > 0
) AS subquery WHERE rn = 1
LIMIT 100
"""
results = self.logstore_client.execute_sql(
sql=sql_query,
logstore=AliyunLogStore.workflow_node_execution_logstore,
)
else:
# Use SDK with LogStore query syntax
query = (
f"tenant_id: {tenant_id} and app_id: {app_id} and workflow_id: {workflow_id} and node_id: {node_id}"
)
from_time = 0
to_time = int(time.time()) # now
results = self.logstore_client.get_logs(
logstore=AliyunLogStore.workflow_node_execution_logstore,
from_time=from_time,
to_time=to_time,
query=query,
line=100,
reverse=False,
)
if not results:
return None
# For SDK mode, group by id and select the one with max log_version for each group
# For PG mode, this is already done by the SQL query
if not self.logstore_client.supports_pg_protocol:
id_to_results: dict[str, list[dict[str, Any]]] = {}
for row in results:
row_id = row.get("id")
if row_id:
if row_id not in id_to_results:
id_to_results[row_id] = []
id_to_results[row_id].append(row)
# For each id, select the row with max log_version
deduplicated_results = []
for rows in id_to_results.values():
if len(rows) > 1:
max_row = max(rows, key=lambda x: int(x.get("log_version", 0)))
else:
max_row = rows[0]
deduplicated_results.append(max_row)
else:
# For PG mode, results are already deduplicated by the SQL query
deduplicated_results = results
# Sort by created_at DESC and return the most recent one
deduplicated_results.sort(
key=lambda x: x.get("created_at", 0) if isinstance(x.get("created_at"), (int, float)) else 0,
reverse=True,
)
if deduplicated_results:
return _dict_to_workflow_node_execution_model(deduplicated_results[0])
return None
except Exception:
logger.exception("Failed to get node last execution from LogStore")
raise
def get_executions_by_workflow_run(
self,
tenant_id: str,
app_id: str,
workflow_run_id: str,
) -> Sequence[WorkflowNodeExecutionModel]:
"""
Get all node executions for a specific workflow run.
Uses query syntax to get raw logs and selects the one with max log_version for each node execution.
Ordered by index DESC for trace visualization.
"""
logger.debug(
"[LogStore] get_executions_by_workflow_run: tenant_id=%s, app_id=%s, workflow_run_id=%s",
tenant_id,
app_id,
workflow_run_id,
)
try:
# Check if PG protocol is supported
if self.logstore_client.supports_pg_protocol:
# Use PG protocol with SQL query (get latest version of each record)
sql_query = f"""
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) as rn
FROM "{AliyunLogStore.workflow_node_execution_logstore}"
WHERE tenant_id = '{tenant_id}'
AND app_id = '{app_id}'
AND workflow_run_id = '{workflow_run_id}'
AND __time__ > 0
) AS subquery WHERE rn = 1
LIMIT 1000
"""
results = self.logstore_client.execute_sql(
sql=sql_query,
logstore=AliyunLogStore.workflow_node_execution_logstore,
)
else:
# Use SDK with LogStore query syntax
query = f"tenant_id: {tenant_id} and app_id: {app_id} and workflow_run_id: {workflow_run_id}"
from_time = 0
to_time = int(time.time()) # now
results = self.logstore_client.get_logs(
logstore=AliyunLogStore.workflow_node_execution_logstore,
from_time=from_time,
to_time=to_time,
query=query,
line=1000, # Get more results for node executions
reverse=False,
)
if not results:
return []
# For SDK mode, group by id and select the one with max log_version for each group
# For PG mode, this is already done by the SQL query
models = []
if not self.logstore_client.supports_pg_protocol:
id_to_results: dict[str, list[dict[str, Any]]] = {}
for row in results:
row_id = row.get("id")
if row_id:
if row_id not in id_to_results:
id_to_results[row_id] = []
id_to_results[row_id].append(row)
# For each id, select the row with max log_version
for rows in id_to_results.values():
if len(rows) > 1:
max_row = max(rows, key=lambda x: int(x.get("log_version", 0)))
else:
max_row = rows[0]
model = _dict_to_workflow_node_execution_model(max_row)
if model and model.id: # Ensure model is valid
models.append(model)
else:
# For PG mode, results are already deduplicated by the SQL query
for row in results:
model = _dict_to_workflow_node_execution_model(row)
if model and model.id: # Ensure model is valid
models.append(model)
# Sort by index DESC for trace visualization
models.sort(key=lambda x: x.index, reverse=True)
return models
except Exception:
logger.exception("Failed to get executions by workflow run from LogStore")
raise
def get_execution_by_id(
self,
execution_id: str,
tenant_id: str | None = None,
) -> WorkflowNodeExecutionModel | None:
"""
Get a workflow node execution by its ID.
Uses query syntax to get raw logs and selects the one with max log_version.
"""
logger.debug("get_execution_by_id: execution_id=%s, tenant_id=%s", execution_id, tenant_id)
try:
# Check if PG protocol is supported
if self.logstore_client.supports_pg_protocol:
# Use PG protocol with SQL query (get latest version of record)
tenant_filter = f"AND tenant_id = '{tenant_id}'" if tenant_id else ""
sql_query = f"""
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) as rn
FROM "{AliyunLogStore.workflow_node_execution_logstore}"
WHERE id = '{execution_id}' {tenant_filter} AND __time__ > 0
) AS subquery WHERE rn = 1
LIMIT 1
"""
results = self.logstore_client.execute_sql(
sql=sql_query,
logstore=AliyunLogStore.workflow_node_execution_logstore,
)
else:
# Use SDK with LogStore query syntax
if tenant_id:
query = f"id: {execution_id} and tenant_id: {tenant_id}"
else:
query = f"id: {execution_id}"
from_time = 0
to_time = int(time.time()) # now
results = self.logstore_client.get_logs(
logstore=AliyunLogStore.workflow_node_execution_logstore,
from_time=from_time,
to_time=to_time,
query=query,
line=100,
reverse=False,
)
if not results:
return None
# For PG mode, result is already the latest version
# For SDK mode, if multiple results, select the one with max log_version
if self.logstore_client.supports_pg_protocol or len(results) == 1:
return _dict_to_workflow_node_execution_model(results[0])
else:
max_result = max(results, key=lambda x: int(x.get("log_version", 0)))
return _dict_to_workflow_node_execution_model(max_result)
except Exception:
logger.exception("Failed to get execution by ID from LogStore: execution_id=%s", execution_id)
raise

View File

@ -0,0 +1,757 @@
"""
LogStore API WorkflowRun Repository Implementation
This module provides the LogStore-based implementation of the APIWorkflowRunRepository
protocol. It handles service-layer WorkflowRun database operations using Aliyun SLS LogStore
with optimized queries for statistics and pagination.
Key Features:
- LogStore SQL queries for aggregation and statistics
- Optimized deduplication using finished_at IS NOT NULL filter
- Window functions only when necessary (running status queries)
- Multi-tenant data isolation and security
"""
import logging
import os
import time
from collections.abc import Sequence
from datetime import datetime
from typing import Any, cast
from sqlalchemy.orm import sessionmaker
from extensions.logstore.aliyun_logstore import AliyunLogStore
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import WorkflowRun
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
from repositories.types import (
AverageInteractionStats,
DailyRunsStats,
DailyTerminalsStats,
DailyTokenCostStats,
)
logger = logging.getLogger(__name__)
def _dict_to_workflow_run(data: dict[str, Any]) -> WorkflowRun:
"""
Convert LogStore result dictionary to WorkflowRun instance.
Args:
data: Dictionary from LogStore query result
Returns:
WorkflowRun instance
"""
logger.debug("_dict_to_workflow_run: data keys=%s", list(data.keys())[:5])
# Create model instance without session
model = WorkflowRun()
# Map all required fields with validation
# Critical fields - must not be None
model.id = data.get("id") or ""
model.tenant_id = data.get("tenant_id") or ""
model.app_id = data.get("app_id") or ""
model.workflow_id = data.get("workflow_id") or ""
model.type = data.get("type") or ""
model.triggered_from = data.get("triggered_from") or ""
model.version = data.get("version") or ""
model.status = data.get("status") or "running" # Default status if missing
model.created_by_role = data.get("created_by_role") or ""
model.created_by = data.get("created_by") or ""
# Numeric fields with defaults
model.total_tokens = int(data.get("total_tokens", 0))
model.total_steps = int(data.get("total_steps", 0))
model.exceptions_count = int(data.get("exceptions_count", 0))
# Optional fields
model.graph = data.get("graph")
model.inputs = data.get("inputs")
model.outputs = data.get("outputs")
model.error = data.get("error_message") or data.get("error")
# Handle datetime fields
started_at = data.get("started_at") or data.get("created_at")
if started_at:
if isinstance(started_at, str):
model.created_at = datetime.fromisoformat(started_at)
elif isinstance(started_at, (int, float)):
model.created_at = datetime.fromtimestamp(started_at)
else:
model.created_at = started_at
else:
# Provide default created_at if missing
model.created_at = datetime.now()
finished_at = data.get("finished_at")
if finished_at:
if isinstance(finished_at, str):
model.finished_at = datetime.fromisoformat(finished_at)
elif isinstance(finished_at, (int, float)):
model.finished_at = datetime.fromtimestamp(finished_at)
else:
model.finished_at = finished_at
# Compute elapsed_time from started_at and finished_at
# LogStore doesn't store elapsed_time, it's computed in WorkflowExecution domain entity
if model.finished_at and model.created_at:
model.elapsed_time = (model.finished_at - model.created_at).total_seconds()
else:
model.elapsed_time = float(data.get("elapsed_time", 0))
return model
class LogstoreAPIWorkflowRunRepository(APIWorkflowRunRepository):
"""
LogStore implementation of APIWorkflowRunRepository.
Provides service-layer WorkflowRun database operations using LogStore SQL
with optimized query strategies:
- Use finished_at IS NOT NULL for deduplication (10-100x faster)
- Use window functions only when running status is required
- Proper time range filtering for LogStore queries
"""
def __init__(self, session_maker: sessionmaker | None = None):
"""
Initialize the repository with LogStore client.
Args:
session_maker: SQLAlchemy sessionmaker (unused, for compatibility with factory pattern)
"""
logger.debug("LogstoreAPIWorkflowRunRepository.__init__: initializing")
self.logstore_client = AliyunLogStore()
# Control flag for dual-read (fallback to PostgreSQL when LogStore returns no results)
# Set to True to enable fallback for safe migration from PostgreSQL to LogStore
# Set to False for new deployments without legacy data in PostgreSQL
self._enable_dual_read = os.environ.get("LOGSTORE_DUAL_READ_ENABLED", "true").lower() == "true"
def get_paginated_workflow_runs(
self,
tenant_id: str,
app_id: str,
triggered_from: WorkflowRunTriggeredFrom | Sequence[WorkflowRunTriggeredFrom],
limit: int = 20,
last_id: str | None = None,
status: str | None = None,
) -> InfiniteScrollPagination:
"""
Get paginated workflow runs with filtering.
Uses window function for deduplication to support both running and finished states.
Args:
tenant_id: Tenant identifier for multi-tenant isolation
app_id: Application identifier
triggered_from: Filter by trigger source(s)
limit: Maximum number of records to return (default: 20)
last_id: Cursor for pagination - ID of the last record from previous page
status: Optional filter by status
Returns:
InfiniteScrollPagination object
"""
logger.debug(
"get_paginated_workflow_runs: tenant_id=%s, app_id=%s, limit=%d, status=%s",
tenant_id,
app_id,
limit,
status,
)
# Convert triggered_from to list if needed
if isinstance(triggered_from, WorkflowRunTriggeredFrom):
triggered_from_list = [triggered_from]
else:
triggered_from_list = list(triggered_from)
# Build triggered_from filter
triggered_from_filter = " OR ".join([f"triggered_from='{tf.value}'" for tf in triggered_from_list])
# Build status filter
status_filter = f"AND status='{status}'" if status else ""
# Build last_id filter for pagination
# Note: This is simplified. In production, you'd need to track created_at from last record
last_id_filter = ""
if last_id:
# TODO: Implement proper cursor-based pagination with created_at
logger.warning("last_id pagination not fully implemented for LogStore")
# Use window function to get latest log_version of each workflow run
sql = f"""
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) AS rn
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND ({triggered_from_filter})
{status_filter}
{last_id_filter}
) t
WHERE rn = 1
ORDER BY created_at DESC
LIMIT {limit + 1}
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore, from_time=None, to_time=None
)
# Check if there are more records
has_more = len(results) > limit
if has_more:
results = results[:limit]
# Convert results to WorkflowRun models
workflow_runs = [_dict_to_workflow_run(row) for row in results]
return InfiniteScrollPagination(data=workflow_runs, limit=limit, has_more=has_more)
except Exception:
logger.exception("Failed to get paginated workflow runs from LogStore")
raise
def get_workflow_run_by_id(
self,
tenant_id: str,
app_id: str,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID with tenant and app isolation.
Uses query syntax to get raw logs and selects the one with max log_version in code.
Falls back to PostgreSQL if not found in LogStore (for data consistency during migration).
"""
logger.debug("get_workflow_run_by_id: tenant_id=%s, app_id=%s, run_id=%s", tenant_id, app_id, run_id)
try:
# Check if PG protocol is supported
if self.logstore_client.supports_pg_protocol:
# Use PG protocol with SQL query (get latest version of record)
sql_query = f"""
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) as rn
FROM "{AliyunLogStore.workflow_execution_logstore}"
WHERE id = '{run_id}' AND tenant_id = '{tenant_id}' AND app_id = '{app_id}' AND __time__ > 0
) AS subquery WHERE rn = 1
LIMIT 100
"""
results = self.logstore_client.execute_sql(
sql=sql_query,
logstore=AliyunLogStore.workflow_execution_logstore,
)
else:
# Use SDK with LogStore query syntax
query = f"id: {run_id} and tenant_id: {tenant_id} and app_id: {app_id}"
from_time = 0
to_time = int(time.time()) # now
results = self.logstore_client.get_logs(
logstore=AliyunLogStore.workflow_execution_logstore,
from_time=from_time,
to_time=to_time,
query=query,
line=100,
reverse=False,
)
if not results:
# Fallback to PostgreSQL for records created before LogStore migration
if self._enable_dual_read:
logger.debug(
"WorkflowRun not found in LogStore, falling back to PostgreSQL: "
"run_id=%s, tenant_id=%s, app_id=%s",
run_id,
tenant_id,
app_id,
)
return self._fallback_get_workflow_run_by_id_with_tenant(run_id, tenant_id, app_id)
return None
# For PG mode, results are already deduplicated by the SQL query
# For SDK mode, if multiple results, select the one with max log_version
if self.logstore_client.supports_pg_protocol or len(results) == 1:
return _dict_to_workflow_run(results[0])
else:
max_result = max(results, key=lambda x: int(x.get("log_version", 0)))
return _dict_to_workflow_run(max_result)
except Exception:
logger.exception("Failed to get workflow run by ID from LogStore: run_id=%s", run_id)
# Try PostgreSQL fallback on any error (only if dual-read is enabled)
if self._enable_dual_read:
try:
return self._fallback_get_workflow_run_by_id_with_tenant(run_id, tenant_id, app_id)
except Exception:
logger.exception(
"PostgreSQL fallback also failed: run_id=%s, tenant_id=%s, app_id=%s", run_id, tenant_id, app_id
)
raise
def _fallback_get_workflow_run_by_id_with_tenant(
self, run_id: str, tenant_id: str, app_id: str
) -> WorkflowRun | None:
"""Fallback to PostgreSQL query for records not in LogStore (with tenant isolation)."""
from sqlalchemy import select
from sqlalchemy.orm import Session
from extensions.ext_database import db
with Session(db.engine) as session:
stmt = select(WorkflowRun).where(
WorkflowRun.id == run_id, WorkflowRun.tenant_id == tenant_id, WorkflowRun.app_id == app_id
)
return session.scalar(stmt)
def get_workflow_run_by_id_without_tenant(
self,
run_id: str,
) -> WorkflowRun | None:
"""
Get a specific workflow run by ID without tenant/app context.
Uses query syntax to get raw logs and selects the one with max log_version.
Falls back to PostgreSQL if not found in LogStore (controlled by LOGSTORE_DUAL_READ_ENABLED).
"""
logger.debug("get_workflow_run_by_id_without_tenant: run_id=%s", run_id)
try:
# Check if PG protocol is supported
if self.logstore_client.supports_pg_protocol:
# Use PG protocol with SQL query (get latest version of record)
sql_query = f"""
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) as rn
FROM "{AliyunLogStore.workflow_execution_logstore}"
WHERE id = '{run_id}' AND __time__ > 0
) AS subquery WHERE rn = 1
LIMIT 100
"""
results = self.logstore_client.execute_sql(
sql=sql_query,
logstore=AliyunLogStore.workflow_execution_logstore,
)
else:
# Use SDK with LogStore query syntax
query = f"id: {run_id}"
from_time = 0
to_time = int(time.time()) # now
results = self.logstore_client.get_logs(
logstore=AliyunLogStore.workflow_execution_logstore,
from_time=from_time,
to_time=to_time,
query=query,
line=100,
reverse=False,
)
if not results:
# Fallback to PostgreSQL for records created before LogStore migration
if self._enable_dual_read:
logger.debug("WorkflowRun not found in LogStore, falling back to PostgreSQL: run_id=%s", run_id)
return self._fallback_get_workflow_run_by_id(run_id)
return None
# For PG mode, results are already deduplicated by the SQL query
# For SDK mode, if multiple results, select the one with max log_version
if self.logstore_client.supports_pg_protocol or len(results) == 1:
return _dict_to_workflow_run(results[0])
else:
max_result = max(results, key=lambda x: int(x.get("log_version", 0)))
return _dict_to_workflow_run(max_result)
except Exception:
logger.exception("Failed to get workflow run without tenant: run_id=%s", run_id)
# Try PostgreSQL fallback on any error (only if dual-read is enabled)
if self._enable_dual_read:
try:
return self._fallback_get_workflow_run_by_id(run_id)
except Exception:
logger.exception("PostgreSQL fallback also failed: run_id=%s", run_id)
raise
def _fallback_get_workflow_run_by_id(self, run_id: str) -> WorkflowRun | None:
"""Fallback to PostgreSQL query for records not in LogStore."""
from sqlalchemy import select
from sqlalchemy.orm import Session
from extensions.ext_database import db
with Session(db.engine) as session:
stmt = select(WorkflowRun).where(WorkflowRun.id == run_id)
return session.scalar(stmt)
def get_workflow_runs_count(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
status: str | None = None,
time_range: str | None = None,
) -> dict[str, int]:
"""
Get workflow runs count statistics grouped by status.
Optimization: Use finished_at IS NOT NULL for completed runs (10-50x faster)
"""
logger.debug(
"get_workflow_runs_count: tenant_id=%s, app_id=%s, triggered_from=%s, status=%s",
tenant_id,
app_id,
triggered_from,
status,
)
# Build time range filter
time_filter = ""
if time_range:
# TODO: Parse time_range and convert to from_time/to_time
logger.warning("time_range filter not implemented")
# If status is provided, simple count
if status:
if status == "running":
# Running status requires window function
sql = f"""
SELECT COUNT(*) as count
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) AS rn
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND status='running'
{time_filter}
) t
WHERE rn = 1
"""
else:
# Finished status uses optimized filter
sql = f"""
SELECT COUNT(DISTINCT id) as count
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND status='{status}'
AND finished_at IS NOT NULL
{time_filter}
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
count = results[0]["count"] if results and len(results) > 0 else 0
return {
"total": count,
"running": count if status == "running" else 0,
"succeeded": count if status == "succeeded" else 0,
"failed": count if status == "failed" else 0,
"stopped": count if status == "stopped" else 0,
"partial-succeeded": count if status == "partial-succeeded" else 0,
}
except Exception:
logger.exception("Failed to get workflow runs count")
raise
# No status filter - get counts grouped by status
# Use optimized query for finished runs, separate query for running
try:
# Count finished runs grouped by status
finished_sql = f"""
SELECT status, COUNT(DISTINCT id) as count
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND finished_at IS NOT NULL
{time_filter}
GROUP BY status
"""
# Count running runs
running_sql = f"""
SELECT COUNT(*) as count
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY id ORDER BY log_version DESC) AS rn
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND status='running'
{time_filter}
) t
WHERE rn = 1
"""
finished_results = self.logstore_client.execute_sql(
sql=finished_sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
running_results = self.logstore_client.execute_sql(
sql=running_sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
# Build response
status_counts = {
"running": 0,
"succeeded": 0,
"failed": 0,
"stopped": 0,
"partial-succeeded": 0,
}
total = 0
for result in finished_results:
status_val = result.get("status")
count = result.get("count", 0)
if status_val in status_counts:
status_counts[status_val] = count
total += count
# Add running count
running_count = running_results[0]["count"] if running_results and len(running_results) > 0 else 0
status_counts["running"] = running_count
total += running_count
return {"total": total} | status_counts
except Exception:
logger.exception("Failed to get workflow runs count")
raise
def get_daily_runs_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyRunsStats]:
"""
Get daily runs statistics using optimized query.
Optimization: Use finished_at IS NOT NULL + COUNT(DISTINCT id) (20-100x faster)
"""
logger.debug(
"get_daily_runs_statistics: tenant_id=%s, app_id=%s, triggered_from=%s", tenant_id, app_id, triggered_from
)
# Build time range filter
time_filter = ""
if start_date:
time_filter += f" AND __time__ >= to_unixtime(from_iso8601_timestamp('{start_date.isoformat()}'))"
if end_date:
time_filter += f" AND __time__ < to_unixtime(from_iso8601_timestamp('{end_date.isoformat()}'))"
# Optimized query: Use finished_at filter to avoid window function
sql = f"""
SELECT DATE(from_unixtime(__time__)) as date, COUNT(DISTINCT id) as runs
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND finished_at IS NOT NULL
{time_filter}
GROUP BY date
ORDER BY date
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
response_data = []
for row in results:
response_data.append({"date": str(row.get("date", "")), "runs": row.get("runs", 0)})
return cast(list[DailyRunsStats], response_data)
except Exception:
logger.exception("Failed to get daily runs statistics")
raise
def get_daily_terminals_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTerminalsStats]:
"""
Get daily terminals statistics using optimized query.
Optimization: Use finished_at IS NOT NULL + COUNT(DISTINCT created_by) (20-100x faster)
"""
logger.debug(
"get_daily_terminals_statistics: tenant_id=%s, app_id=%s, triggered_from=%s",
tenant_id,
app_id,
triggered_from,
)
# Build time range filter
time_filter = ""
if start_date:
time_filter += f" AND __time__ >= to_unixtime(from_iso8601_timestamp('{start_date.isoformat()}'))"
if end_date:
time_filter += f" AND __time__ < to_unixtime(from_iso8601_timestamp('{end_date.isoformat()}'))"
sql = f"""
SELECT DATE(from_unixtime(__time__)) as date, COUNT(DISTINCT created_by) as terminal_count
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND finished_at IS NOT NULL
{time_filter}
GROUP BY date
ORDER BY date
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
response_data = []
for row in results:
response_data.append({"date": str(row.get("date", "")), "terminal_count": row.get("terminal_count", 0)})
return cast(list[DailyTerminalsStats], response_data)
except Exception:
logger.exception("Failed to get daily terminals statistics")
raise
def get_daily_token_cost_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[DailyTokenCostStats]:
"""
Get daily token cost statistics using optimized query.
Optimization: Use finished_at IS NOT NULL + SUM(total_tokens) (20-100x faster)
"""
logger.debug(
"get_daily_token_cost_statistics: tenant_id=%s, app_id=%s, triggered_from=%s",
tenant_id,
app_id,
triggered_from,
)
# Build time range filter
time_filter = ""
if start_date:
time_filter += f" AND __time__ >= to_unixtime(from_iso8601_timestamp('{start_date.isoformat()}'))"
if end_date:
time_filter += f" AND __time__ < to_unixtime(from_iso8601_timestamp('{end_date.isoformat()}'))"
sql = f"""
SELECT DATE(from_unixtime(__time__)) as date, SUM(total_tokens) as token_count
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND finished_at IS NOT NULL
{time_filter}
GROUP BY date
ORDER BY date
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
response_data = []
for row in results:
response_data.append({"date": str(row.get("date", "")), "token_count": row.get("token_count", 0)})
return cast(list[DailyTokenCostStats], response_data)
except Exception:
logger.exception("Failed to get daily token cost statistics")
raise
def get_average_app_interaction_statistics(
self,
tenant_id: str,
app_id: str,
triggered_from: str,
start_date: datetime | None = None,
end_date: datetime | None = None,
timezone: str = "UTC",
) -> list[AverageInteractionStats]:
"""
Get average app interaction statistics using optimized query.
Optimization: Use finished_at IS NOT NULL + AVG (20-100x faster)
"""
logger.debug(
"get_average_app_interaction_statistics: tenant_id=%s, app_id=%s, triggered_from=%s",
tenant_id,
app_id,
triggered_from,
)
# Build time range filter
time_filter = ""
if start_date:
time_filter += f" AND __time__ >= to_unixtime(from_iso8601_timestamp('{start_date.isoformat()}'))"
if end_date:
time_filter += f" AND __time__ < to_unixtime(from_iso8601_timestamp('{end_date.isoformat()}'))"
sql = f"""
SELECT
AVG(sub.interactions) AS interactions,
sub.date
FROM (
SELECT
DATE(from_unixtime(__time__)) AS date,
created_by,
COUNT(DISTINCT id) AS interactions
FROM {AliyunLogStore.workflow_execution_logstore}
WHERE tenant_id='{tenant_id}'
AND app_id='{app_id}'
AND triggered_from='{triggered_from}'
AND finished_at IS NOT NULL
{time_filter}
GROUP BY date, created_by
) sub
GROUP BY sub.date
"""
try:
results = self.logstore_client.execute_sql(
sql=sql, query="*", logstore=AliyunLogStore.workflow_execution_logstore
)
response_data = []
for row in results:
response_data.append(
{
"date": str(row.get("date", "")),
"interactions": float(row.get("interactions", 0)),
}
)
return cast(list[AverageInteractionStats], response_data)
except Exception:
logger.exception("Failed to get average app interaction statistics")
raise

View File

@ -0,0 +1,164 @@
import json
import logging
import os
import time
from typing import Union
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker
from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository
from core.workflow.entities import WorkflowExecution
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
from extensions.logstore.aliyun_logstore import AliyunLogStore
from libs.helper import extract_tenant_id
from models import (
Account,
CreatorUserRole,
EndUser,
)
from models.enums import WorkflowRunTriggeredFrom
logger = logging.getLogger(__name__)
class LogstoreWorkflowExecutionRepository(WorkflowExecutionRepository):
def __init__(
self,
session_factory: sessionmaker | Engine,
user: Union[Account, EndUser],
app_id: str | None,
triggered_from: WorkflowRunTriggeredFrom | None,
):
"""
Initialize the repository with a SQLAlchemy sessionmaker or engine and context information.
Args:
session_factory: SQLAlchemy sessionmaker or engine for creating sessions
user: Account or EndUser object containing tenant_id, user ID, and role information
app_id: App ID for filtering by application (can be None)
triggered_from: Source of the execution trigger (DEBUGGING or APP_RUN)
"""
logger.debug(
"LogstoreWorkflowExecutionRepository.__init__: app_id=%s, triggered_from=%s", app_id, triggered_from
)
# Initialize LogStore client
# Note: Project/logstore/index initialization is done at app startup via ext_logstore
self.logstore_client = AliyunLogStore()
# Extract tenant_id from user
tenant_id = extract_tenant_id(user)
if not tenant_id:
raise ValueError("User must have a tenant_id or current_tenant_id")
self._tenant_id = tenant_id
# Store app context
self._app_id = app_id
# Extract user context
self._triggered_from = triggered_from
self._creator_user_id = user.id
# Determine user role based on user type
self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER
# Initialize SQL repository for dual-write support
self.sql_repository = SQLAlchemyWorkflowExecutionRepository(session_factory, user, app_id, triggered_from)
# Control flag for dual-write (write to both LogStore and SQL database)
# Set to True to enable dual-write for safe migration, False to use LogStore only
self._enable_dual_write = os.environ.get("LOGSTORE_DUAL_WRITE_ENABLED", "true").lower() == "true"
def _to_logstore_model(self, domain_model: WorkflowExecution) -> list[tuple[str, str]]:
"""
Convert a domain model to a logstore model (List[Tuple[str, str]]).
Args:
domain_model: The domain model to convert
Returns:
The logstore model as a list of key-value tuples
"""
logger.debug(
"_to_logstore_model: id=%s, workflow_id=%s, status=%s",
domain_model.id_,
domain_model.workflow_id,
domain_model.status.value,
)
# Use values from constructor if provided
if not self._triggered_from:
raise ValueError("triggered_from is required in repository constructor")
if not self._creator_user_id:
raise ValueError("created_by is required in repository constructor")
if not self._creator_user_role:
raise ValueError("created_by_role is required in repository constructor")
# Generate log_version as nanosecond timestamp for record versioning
log_version = str(time.time_ns())
logstore_model = [
("id", domain_model.id_),
("log_version", log_version), # Add log_version field for append-only writes
("tenant_id", self._tenant_id),
("app_id", self._app_id or ""),
("workflow_id", domain_model.workflow_id),
(
"triggered_from",
self._triggered_from.value if hasattr(self._triggered_from, "value") else str(self._triggered_from),
),
("type", domain_model.workflow_type.value),
("version", domain_model.workflow_version),
("graph", json.dumps(domain_model.graph, ensure_ascii=False) if domain_model.graph else "{}"),
("inputs", json.dumps(domain_model.inputs, ensure_ascii=False) if domain_model.inputs else "{}"),
("outputs", json.dumps(domain_model.outputs, ensure_ascii=False) if domain_model.outputs else "{}"),
("status", domain_model.status.value),
("error_message", domain_model.error_message or ""),
("total_tokens", str(domain_model.total_tokens)),
("total_steps", str(domain_model.total_steps)),
("exceptions_count", str(domain_model.exceptions_count)),
(
"created_by_role",
self._creator_user_role.value
if hasattr(self._creator_user_role, "value")
else str(self._creator_user_role),
),
("created_by", self._creator_user_id),
("started_at", domain_model.started_at.isoformat() if domain_model.started_at else ""),
("finished_at", domain_model.finished_at.isoformat() if domain_model.finished_at else ""),
]
return logstore_model
def save(self, execution: WorkflowExecution) -> None:
"""
Save or update a WorkflowExecution domain entity to the logstore.
This method serves as a domain-to-logstore adapter that:
1. Converts the domain entity to its logstore representation
2. Persists the logstore model using Aliyun SLS
3. Maintains proper multi-tenancy by including tenant context during conversion
4. Optionally writes to SQL database for dual-write support (controlled by LOGSTORE_DUAL_WRITE_ENABLED)
Args:
execution: The WorkflowExecution domain entity to persist
"""
logger.debug(
"save: id=%s, workflow_id=%s, status=%s", execution.id_, execution.workflow_id, execution.status.value
)
try:
logstore_model = self._to_logstore_model(execution)
self.logstore_client.put_log(AliyunLogStore.workflow_execution_logstore, logstore_model)
logger.debug("Saved workflow execution to logstore: id=%s", execution.id_)
except Exception:
logger.exception("Failed to save workflow execution to logstore: id=%s", execution.id_)
raise
# Dual-write to SQL database if enabled (for safe migration)
if self._enable_dual_write:
try:
self.sql_repository.save(execution)
logger.debug("Dual-write: saved workflow execution to SQL database: id=%s", execution.id_)
except Exception:
logger.exception("Failed to dual-write workflow execution to SQL database: id=%s", execution.id_)
# Don't raise - LogStore write succeeded, SQL is just a backup

View File

@ -0,0 +1,366 @@
"""
LogStore implementation of the WorkflowNodeExecutionRepository.
This module provides a LogStore-based repository for WorkflowNodeExecution entities,
using Aliyun SLS LogStore with append-only writes and version control.
"""
import json
import logging
import os
import time
from collections.abc import Sequence
from datetime import datetime
from typing import Any, Union
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker
from core.model_runtime.utils.encoders import jsonable_encoder
from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository
from core.workflow.entities import WorkflowNodeExecution
from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus
from core.workflow.enums import NodeType
from core.workflow.repositories.workflow_node_execution_repository import OrderConfig, WorkflowNodeExecutionRepository
from core.workflow.workflow_type_encoder import WorkflowRuntimeTypeConverter
from extensions.logstore.aliyun_logstore import AliyunLogStore
from libs.helper import extract_tenant_id
from models import (
Account,
CreatorUserRole,
EndUser,
WorkflowNodeExecutionTriggeredFrom,
)
logger = logging.getLogger(__name__)
def _dict_to_workflow_node_execution(data: dict[str, Any]) -> WorkflowNodeExecution:
"""
Convert LogStore result dictionary to WorkflowNodeExecution domain model.
Args:
data: Dictionary from LogStore query result
Returns:
WorkflowNodeExecution domain model instance
"""
logger.debug("_dict_to_workflow_node_execution: data keys=%s", list(data.keys())[:5])
# Parse JSON fields
inputs = json.loads(data.get("inputs", "{}"))
process_data = json.loads(data.get("process_data", "{}"))
outputs = json.loads(data.get("outputs", "{}"))
metadata = json.loads(data.get("execution_metadata", "{}"))
# Convert metadata to domain enum keys
domain_metadata = {}
for k, v in metadata.items():
try:
domain_metadata[WorkflowNodeExecutionMetadataKey(k)] = v
except ValueError:
# Skip invalid metadata keys
continue
# Convert status to domain enum
status = WorkflowNodeExecutionStatus(data.get("status", "running"))
# Parse datetime fields
created_at = datetime.fromisoformat(data.get("created_at", "")) if data.get("created_at") else datetime.now()
finished_at = datetime.fromisoformat(data.get("finished_at", "")) if data.get("finished_at") else None
return WorkflowNodeExecution(
id=data.get("id", ""),
node_execution_id=data.get("node_execution_id"),
workflow_id=data.get("workflow_id", ""),
workflow_execution_id=data.get("workflow_run_id"),
index=int(data.get("index", 0)),
predecessor_node_id=data.get("predecessor_node_id"),
node_id=data.get("node_id", ""),
node_type=NodeType(data.get("node_type", "start")),
title=data.get("title", ""),
inputs=inputs,
process_data=process_data,
outputs=outputs,
status=status,
error=data.get("error"),
elapsed_time=float(data.get("elapsed_time", 0.0)),
metadata=domain_metadata,
created_at=created_at,
finished_at=finished_at,
)
class LogstoreWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository):
"""
LogStore implementation of the WorkflowNodeExecutionRepository interface.
This implementation uses Aliyun SLS LogStore with an append-only write strategy:
- Each save() operation appends a new record with a version timestamp
- Updates are simulated by writing new records with higher version numbers
- Queries retrieve the latest version using finished_at IS NOT NULL filter
- Multi-tenancy is maintained through tenant_id filtering
Version Strategy:
version = time.time_ns() # Nanosecond timestamp for unique ordering
"""
def __init__(
self,
session_factory: sessionmaker | Engine,
user: Union[Account, EndUser],
app_id: str | None,
triggered_from: WorkflowNodeExecutionTriggeredFrom | None,
):
"""
Initialize the repository with a SQLAlchemy sessionmaker or engine and context information.
Args:
session_factory: SQLAlchemy sessionmaker or engine for creating sessions
user: Account or EndUser object containing tenant_id, user ID, and role information
app_id: App ID for filtering by application (can be None)
triggered_from: Source of the execution trigger (SINGLE_STEP or WORKFLOW_RUN)
"""
logger.debug(
"LogstoreWorkflowNodeExecutionRepository.__init__: app_id=%s, triggered_from=%s", app_id, triggered_from
)
# Initialize LogStore client
self.logstore_client = AliyunLogStore()
# Extract tenant_id from user
tenant_id = extract_tenant_id(user)
if not tenant_id:
raise ValueError("User must have a tenant_id or current_tenant_id")
self._tenant_id = tenant_id
# Store app context
self._app_id = app_id
# Extract user context
self._triggered_from = triggered_from
self._creator_user_id = user.id
# Determine user role based on user type
self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER
# Initialize SQL repository for dual-write support
self.sql_repository = SQLAlchemyWorkflowNodeExecutionRepository(session_factory, user, app_id, triggered_from)
# Control flag for dual-write (write to both LogStore and SQL database)
# Set to True to enable dual-write for safe migration, False to use LogStore only
self._enable_dual_write = os.environ.get("LOGSTORE_DUAL_WRITE_ENABLED", "true").lower() == "true"
def _to_logstore_model(self, domain_model: WorkflowNodeExecution) -> Sequence[tuple[str, str]]:
logger.debug(
"_to_logstore_model: id=%s, node_id=%s, status=%s",
domain_model.id,
domain_model.node_id,
domain_model.status.value,
)
if not self._triggered_from:
raise ValueError("triggered_from is required in repository constructor")
if not self._creator_user_id:
raise ValueError("created_by is required in repository constructor")
if not self._creator_user_role:
raise ValueError("created_by_role is required in repository constructor")
# Generate log_version as nanosecond timestamp for record versioning
log_version = str(time.time_ns())
json_converter = WorkflowRuntimeTypeConverter()
logstore_model = [
("id", domain_model.id),
("log_version", log_version), # Add log_version field for append-only writes
("tenant_id", self._tenant_id),
("app_id", self._app_id or ""),
("workflow_id", domain_model.workflow_id),
(
"triggered_from",
self._triggered_from.value if hasattr(self._triggered_from, "value") else str(self._triggered_from),
),
("workflow_run_id", domain_model.workflow_execution_id or ""),
("index", str(domain_model.index)),
("predecessor_node_id", domain_model.predecessor_node_id or ""),
("node_execution_id", domain_model.node_execution_id or ""),
("node_id", domain_model.node_id),
("node_type", domain_model.node_type.value),
("title", domain_model.title),
(
"inputs",
json.dumps(json_converter.to_json_encodable(domain_model.inputs), ensure_ascii=False)
if domain_model.inputs
else "{}",
),
(
"process_data",
json.dumps(json_converter.to_json_encodable(domain_model.process_data), ensure_ascii=False)
if domain_model.process_data
else "{}",
),
(
"outputs",
json.dumps(json_converter.to_json_encodable(domain_model.outputs), ensure_ascii=False)
if domain_model.outputs
else "{}",
),
("status", domain_model.status.value),
("error", domain_model.error or ""),
("elapsed_time", str(domain_model.elapsed_time)),
(
"execution_metadata",
json.dumps(jsonable_encoder(domain_model.metadata), ensure_ascii=False)
if domain_model.metadata
else "{}",
),
("created_at", domain_model.created_at.isoformat() if domain_model.created_at else ""),
("created_by_role", self._creator_user_role.value),
("created_by", self._creator_user_id),
("finished_at", domain_model.finished_at.isoformat() if domain_model.finished_at else ""),
]
return logstore_model
def save(self, execution: WorkflowNodeExecution) -> None:
"""
Save or update a NodeExecution domain entity to LogStore.
This method serves as a domain-to-logstore adapter that:
1. Converts the domain entity to its logstore representation
2. Appends a new record with a log_version timestamp
3. Maintains proper multi-tenancy by including tenant context during conversion
4. Optionally writes to SQL database for dual-write support (controlled by LOGSTORE_DUAL_WRITE_ENABLED)
Each save operation creates a new record. Updates are simulated by writing
new records with higher log_version numbers.
Args:
execution: The NodeExecution domain entity to persist
"""
logger.debug(
"save: id=%s, node_execution_id=%s, status=%s",
execution.id,
execution.node_execution_id,
execution.status.value,
)
try:
logstore_model = self._to_logstore_model(execution)
self.logstore_client.put_log(AliyunLogStore.workflow_node_execution_logstore, logstore_model)
logger.debug(
"Saved node execution to LogStore: id=%s, node_execution_id=%s, status=%s",
execution.id,
execution.node_execution_id,
execution.status.value,
)
except Exception:
logger.exception(
"Failed to save node execution to LogStore: id=%s, node_execution_id=%s",
execution.id,
execution.node_execution_id,
)
raise
# Dual-write to SQL database if enabled (for safe migration)
if self._enable_dual_write:
try:
self.sql_repository.save(execution)
logger.debug("Dual-write: saved node execution to SQL database: id=%s", execution.id)
except Exception:
logger.exception("Failed to dual-write node execution to SQL database: id=%s", execution.id)
# Don't raise - LogStore write succeeded, SQL is just a backup
def save_execution_data(self, execution: WorkflowNodeExecution) -> None:
"""
Save or update the inputs, process_data, or outputs associated with a specific
node_execution record.
For LogStore implementation, this is similar to save() since we always write
complete records. We append a new record with updated data fields.
Args:
execution: The NodeExecution instance with data to save
"""
logger.debug("save_execution_data: id=%s, node_execution_id=%s", execution.id, execution.node_execution_id)
# In LogStore, we simply write a new complete record with the data
# The log_version timestamp will ensure this is treated as the latest version
self.save(execution)
def get_by_workflow_run(
self,
workflow_run_id: str,
order_config: OrderConfig | None = None,
) -> Sequence[WorkflowNodeExecution]:
"""
Retrieve all NodeExecution instances for a specific workflow run.
Uses LogStore SQL query with finished_at IS NOT NULL filter for deduplication.
This ensures we only get the final version of each node execution.
Args:
workflow_run_id: The workflow run ID
order_config: Optional configuration for ordering results
order_config.order_by: List of fields to order by (e.g., ["index", "created_at"])
order_config.order_direction: Direction to order ("asc" or "desc")
Returns:
A list of NodeExecution instances
Note:
This method filters by finished_at IS NOT NULL to avoid duplicates from
version updates. For complete history including intermediate states,
a different query strategy would be needed.
"""
logger.debug("get_by_workflow_run: workflow_run_id=%s, order_config=%s", workflow_run_id, order_config)
# Build SQL query with deduplication using finished_at IS NOT NULL
# This optimization avoids window functions for common case where we only
# want the final state of each node execution
# Build ORDER BY clause
order_clause = ""
if order_config and order_config.order_by:
order_fields = []
for field in order_config.order_by:
# Map domain field names to logstore field names if needed
field_name = field
if order_config.order_direction == "desc":
order_fields.append(f"{field_name} DESC")
else:
order_fields.append(f"{field_name} ASC")
if order_fields:
order_clause = "ORDER BY " + ", ".join(order_fields)
sql = f"""
SELECT *
FROM {AliyunLogStore.workflow_node_execution_logstore}
WHERE workflow_run_id='{workflow_run_id}'
AND tenant_id='{self._tenant_id}'
AND finished_at IS NOT NULL
"""
if self._app_id:
sql += f" AND app_id='{self._app_id}'"
if order_clause:
sql += f" {order_clause}"
try:
# Execute SQL query
results = self.logstore_client.execute_sql(
sql=sql,
query="*",
logstore=AliyunLogStore.workflow_node_execution_logstore,
)
# Convert LogStore results to WorkflowNodeExecution domain models
executions = []
for row in results:
try:
execution = _dict_to_workflow_node_execution(row)
executions.append(execution)
except Exception as e:
logger.warning("Failed to convert row to WorkflowNodeExecution: %s, row=%s", e, row)
continue
return executions
except Exception:
logger.exception("Failed to retrieve node executions from LogStore: workflow_run_id=%s", workflow_run_id)
raise

View File

@ -26,6 +26,7 @@ class AliyunOssStorage(BaseStorage):
self.bucket_name,
connect_timeout=30,
region=region,
cloudbox_id=dify_config.ALIYUN_CLOUDBOX_ID,
)
def save(self, filename, data):

View File

@ -17,6 +17,7 @@ class HuaweiObsStorage(BaseStorage):
access_key_id=dify_config.HUAWEI_OBS_ACCESS_KEY,
secret_access_key=dify_config.HUAWEI_OBS_SECRET_KEY,
server=dify_config.HUAWEI_OBS_SERVER,
path_style=dify_config.HUAWEI_OBS_PATH_STYLE,
)
def save(self, filename, data):

View File

@ -87,15 +87,16 @@ class OpenDALStorage(BaseStorage):
if not self.exists(path):
raise FileNotFoundError("Path not found")
all_files = self.op.scan(path=path)
# Use the new OpenDAL 0.46.0+ API with recursive listing
lister = self.op.list(path, recursive=True)
if files and directories:
logger.debug("files and directories on %s scanned", path)
return [f.path for f in all_files]
return [entry.path for entry in lister]
if files:
logger.debug("files on %s scanned", path)
return [f.path for f in all_files if not f.path.endswith("/")]
return [entry.path for entry in lister if not entry.metadata.is_dir]
elif directories:
logger.debug("directories on %s scanned", path)
return [f.path for f in all_files if f.path.endswith("/")]
return [entry.path for entry in lister if entry.metadata.is_dir]
else:
raise ValueError("At least one of files or directories must be True")

View File

@ -11,6 +11,7 @@ from collections.abc import Generator, Mapping
from datetime import datetime
from hashlib import sha256
from typing import TYPE_CHECKING, Annotated, Any, Optional, Union, cast
from uuid import UUID
from zoneinfo import available_timezones
from flask import Response, stream_with_context
@ -119,6 +120,19 @@ def uuid_value(value: Any) -> str:
raise ValueError(error)
def normalize_uuid(value: str | UUID) -> str:
if not value:
return ""
try:
return uuid_value(value)
except ValueError as exc:
raise ValueError("must be a valid UUID") from exc
UUIDStrOrEmpty = Annotated[str, AfterValidator(normalize_uuid)]
def alphanumeric(value: str):
# check if the value is alphanumeric and underlined
if re.match(r"^[a-zA-Z0-9_]+$", value):

View File

@ -4,6 +4,7 @@ version = "1.11.1"
requires-python = ">=3.11,<3.13"
dependencies = [
"aliyun-log-python-sdk~=0.9.37",
"arize-phoenix-otel~=0.9.2",
"azure-identity==1.16.1",
"beautifulsoup4==4.12.2",
@ -31,6 +32,7 @@ dependencies = [
"httpx[socks]~=0.27.0",
"jieba==0.42.1",
"json-repair>=0.41.1",
"jsonschema>=4.25.1",
"langfuse~=2.51.3",
"langsmith~=0.1.77",
"markdown~=3.5.1",
@ -67,7 +69,7 @@ dependencies = [
"pydantic-extra-types~=2.10.3",
"pydantic-settings~=2.11.0",
"pyjwt~=2.10.1",
"pypdfium2==4.30.0",
"pypdfium2==5.2.0",
"python-docx~=1.1.0",
"python-dotenv==1.0.1",
"pyyaml~=6.0.1",
@ -91,7 +93,6 @@ dependencies = [
"weaviate-client==4.17.0",
"apscheduler>=3.11.0",
"weave>=0.52.16",
"jsonschema>=4.25.1",
]
# Before adding new dependency, consider place it in
# alphabet order (a-z) and suitable group.

View File

@ -155,6 +155,7 @@ class AppDslService:
parsed_url.scheme == "https"
and parsed_url.netloc == "github.com"
and parsed_url.path.endswith((".yml", ".yaml"))
and "/blob/" in parsed_url.path
):
yaml_url = yaml_url.replace("https://github.com", "https://raw.githubusercontent.com")
yaml_url = yaml_url.replace("/blob/", "/")

View File

@ -26,7 +26,7 @@ class FirecrawlAuth(ApiKeyAuthBase):
"limit": 1,
"scrapeOptions": {"onlyMainContent": True},
}
response = self._post_request(f"{self.base_url}/v1/crawl", options, headers)
response = self._post_request(self._build_url("v1/crawl"), options, headers)
if response.status_code == 200:
return True
else:
@ -35,15 +35,17 @@ class FirecrawlAuth(ApiKeyAuthBase):
def _prepare_headers(self):
return {"Content-Type": "application/json", "Authorization": f"Bearer {self.api_key}"}
def _build_url(self, path: str) -> str:
# ensure exactly one slash between base and path, regardless of user-provided base_url
return f"{self.base_url.rstrip('/')}/{path.lstrip('/')}"
def _post_request(self, url, data, headers):
return httpx.post(url, headers=headers, json=data)
def _handle_error(self, response):
if response.status_code in {402, 409, 500}:
error_message = response.json().get("error", "Unknown error occurred")
raise Exception(f"Failed to authorize. Status code: {response.status_code}. Error: {error_message}")
else:
if response.text:
error_message = json.loads(response.text).get("error", "Unknown error occurred")
raise Exception(f"Failed to authorize. Status code: {response.status_code}. Error: {error_message}")
raise Exception(f"Unexpected error occurred while trying to authorize. Status code: {response.status_code}")
try:
payload = response.json()
except json.JSONDecodeError:
payload = {}
error_message = payload.get("error") or payload.get("message") or (response.text or "Unknown error occurred")
raise Exception(f"Failed to authorize. Status code: {response.status_code}. Error: {error_message}")

View File

@ -1,8 +1,12 @@
import logging
import os
from collections.abc import Sequence
from typing import Literal
import httpx
from pydantic import TypeAdapter
from tenacity import retry, retry_if_exception_type, stop_before_delay, wait_fixed
from typing_extensions import TypedDict
from werkzeug.exceptions import InternalServerError
from enums.cloud_plan import CloudPlan
@ -11,6 +15,15 @@ from extensions.ext_redis import redis_client
from libs.helper import RateLimiter
from models import Account, TenantAccountJoin, TenantAccountRole
logger = logging.getLogger(__name__)
class SubscriptionPlan(TypedDict):
"""Tenant subscriptionplan information."""
plan: str
expiration_date: int
class BillingService:
base_url = os.environ.get("BILLING_API_URL", "BILLING_API_URL")
@ -239,3 +252,39 @@ class BillingService:
def sync_partner_tenants_bindings(cls, account_id: str, partner_key: str, click_id: str):
payload = {"account_id": account_id, "click_id": click_id}
return cls._send_request("PUT", f"/partners/{partner_key}/tenants", json=payload)
@classmethod
def get_plan_bulk(cls, tenant_ids: Sequence[str]) -> dict[str, SubscriptionPlan]:
"""
Bulk fetch billing subscription plan via billing API.
Payload: {"tenant_ids": ["t1", "t2", ...]} (max 200 per request)
Returns:
Mapping of tenant_id -> {plan: str, expiration_date: int}
"""
results: dict[str, SubscriptionPlan] = {}
subscription_adapter = TypeAdapter(SubscriptionPlan)
chunk_size = 200
for i in range(0, len(tenant_ids), chunk_size):
chunk = tenant_ids[i : i + chunk_size]
try:
resp = cls._send_request("POST", "/subscription/plan/batch", json={"tenant_ids": chunk})
data = resp.get("data", {})
for tenant_id, plan in data.items():
subscription_plan = subscription_adapter.validate_python(plan)
results[tenant_id] = subscription_plan
except Exception:
logger.exception("Failed to fetch billing info batch for tenants: %s", chunk)
continue
return results
@classmethod
def get_expired_subscription_cleanup_whitelist(cls) -> Sequence[str]:
resp = cls._send_request("GET", "/subscription/cleanup/whitelist")
data = resp.get("data", [])
tenant_whitelist = []
for item in data:
tenant_whitelist.append(item["tenant_id"])
return tenant_whitelist

View File

@ -6,7 +6,9 @@ from typing import Any, Union
from sqlalchemy import asc, desc, func, or_, select
from sqlalchemy.orm import Session
from configs import dify_config
from core.app.entities.app_invoke_entities import InvokeFrom
from core.db.session_factory import session_factory
from core.llm_generator.llm_generator import LLMGenerator
from core.variables.types import SegmentType
from core.workflow.nodes.variable_assigner.common.impl import conversation_variable_updater_factory
@ -202,6 +204,7 @@ class ConversationService:
user: Union[Account, EndUser] | None,
limit: int,
last_id: str | None,
variable_name: str | None = None,
) -> InfiniteScrollPagination:
conversation = cls.get_conversation(app_model, conversation_id, user)
@ -212,7 +215,25 @@ class ConversationService:
.order_by(ConversationVariable.created_at)
)
with Session(db.engine) as session:
# Apply variable_name filter if provided
if variable_name:
# Filter using JSON extraction to match variable names case-insensitively
escaped_variable_name = variable_name.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
# Filter using JSON extraction to match variable names case-insensitively
if dify_config.DB_TYPE in ["mysql", "oceanbase", "seekdb"]:
stmt = stmt.where(
func.json_extract(ConversationVariable.data, "$.name").ilike(
f"%{escaped_variable_name}%", escape="\\"
)
)
elif dify_config.DB_TYPE == "postgresql":
stmt = stmt.where(
func.json_extract_path_text(ConversationVariable.data, "name").ilike(
f"%{escaped_variable_name}%", escape="\\"
)
)
with session_factory.create_session() as session:
if last_id:
last_variable = session.scalar(stmt.where(ConversationVariable.id == last_id))
if not last_variable:
@ -279,7 +300,7 @@ class ConversationService:
.where(ConversationVariable.id == variable_id)
)
with Session(db.engine) as session:
with session_factory.create_session() as session:
existing_variable = session.scalar(stmt)
if not existing_variable:
raise ConversationVariableNotExistsError()

View File

@ -23,7 +23,7 @@ class RagPipelineDatasetCreateEntity(BaseModel):
description: str
icon_info: IconInfo
permission: str
partial_member_list: list[str] | None = None
partial_member_list: list[dict[str, str]] | None = None
yaml_content: str | None = None

View File

@ -15,7 +15,6 @@ from sqlalchemy.orm import Session
from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration, MCPProviderEntity
from core.helper import encrypter
from core.helper.provider_cache import NoOpProviderCredentialCache
from core.helper.tool_provider_cache import ToolProviderListCache
from core.mcp.auth.auth_flow import auth
from core.mcp.auth_client import MCPClientWithAuthRetry
from core.mcp.error import MCPAuthError, MCPError
@ -65,6 +64,15 @@ class ServerUrlValidationResult(BaseModel):
return self.needs_validation and self.validation_passed and self.reconnect_result is not None
class ProviderUrlValidationData(BaseModel):
"""Data required for URL validation, extracted from database to perform network operations outside of session"""
current_server_url_hash: str
headers: dict[str, str]
timeout: float | None
sse_read_timeout: float | None
class MCPToolManageService:
"""Service class for managing MCP tools and providers."""
@ -166,9 +174,6 @@ class MCPToolManageService:
self._session.add(mcp_tool)
self._session.flush()
# Invalidate tool providers cache
ToolProviderListCache.invalidate_cache(tenant_id)
mcp_providers = ToolTransformService.mcp_provider_to_user_provider(mcp_tool, for_list=True)
return mcp_providers
@ -192,7 +197,7 @@ class MCPToolManageService:
Update an MCP provider.
Args:
validation_result: Pre-validation result from validate_server_url_change.
validation_result: Pre-validation result from validate_server_url_standalone.
If provided and contains reconnect_result, it will be used
instead of performing network operations.
"""
@ -251,8 +256,6 @@ class MCPToolManageService:
# Flush changes to database
self._session.flush()
# Invalidate tool providers cache
ToolProviderListCache.invalidate_cache(tenant_id)
except IntegrityError as e:
self._handle_integrity_error(e, name, server_url, server_identifier)
@ -261,9 +264,6 @@ class MCPToolManageService:
mcp_tool = self.get_provider(provider_id=provider_id, tenant_id=tenant_id)
self._session.delete(mcp_tool)
# Invalidate tool providers cache
ToolProviderListCache.invalidate_cache(tenant_id)
def list_providers(
self, *, tenant_id: str, for_list: bool = False, include_sensitive: bool = True
) -> list[ToolProviderApiEntity]:
@ -546,30 +546,39 @@ class MCPToolManageService:
)
return self.execute_auth_actions(auth_result)
def _reconnect_provider(self, *, server_url: str, provider: MCPToolProvider) -> ReconnectResult:
"""Attempt to reconnect to MCP provider with new server URL."""
def get_provider_for_url_validation(self, *, tenant_id: str, provider_id: str) -> ProviderUrlValidationData:
"""
Get provider data required for URL validation.
This method performs database read and should be called within a session.
Returns:
ProviderUrlValidationData: Data needed for standalone URL validation
"""
provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id)
provider_entity = provider.to_entity()
headers = provider_entity.headers
return ProviderUrlValidationData(
current_server_url_hash=provider.server_url_hash,
headers=provider_entity.headers,
timeout=provider_entity.timeout,
sse_read_timeout=provider_entity.sse_read_timeout,
)
try:
tools = self._retrieve_remote_mcp_tools(server_url, headers, provider_entity)
return ReconnectResult(
authed=True,
tools=json.dumps([tool.model_dump() for tool in tools]),
encrypted_credentials=EMPTY_CREDENTIALS_JSON,
)
except MCPAuthError:
return ReconnectResult(authed=False, tools=EMPTY_TOOLS_JSON, encrypted_credentials=EMPTY_CREDENTIALS_JSON)
except MCPError as e:
raise ValueError(f"Failed to re-connect MCP server: {e}") from e
def validate_server_url_change(
self, *, tenant_id: str, provider_id: str, new_server_url: str
@staticmethod
def validate_server_url_standalone(
*,
tenant_id: str,
new_server_url: str,
validation_data: ProviderUrlValidationData,
) -> ServerUrlValidationResult:
"""
Validate server URL change by attempting to connect to the new server.
This method should be called BEFORE update_provider to perform network operations
outside of the database transaction.
This method performs network operations and MUST be called OUTSIDE of any database session
to avoid holding locks during network I/O.
Args:
tenant_id: Tenant ID for encryption
new_server_url: The new server URL to validate
validation_data: Provider data obtained from get_provider_for_url_validation
Returns:
ServerUrlValidationResult: Validation result with connection status and tools if successful
@ -579,25 +588,30 @@ class MCPToolManageService:
return ServerUrlValidationResult(needs_validation=False)
# Validate URL format
if not self._is_valid_url(new_server_url):
parsed = urlparse(new_server_url)
if not all([parsed.scheme, parsed.netloc]) or parsed.scheme not in ["http", "https"]:
raise ValueError("Server URL is not valid.")
# Always encrypt and hash the URL
encrypted_server_url = encrypter.encrypt_token(tenant_id, new_server_url)
new_server_url_hash = hashlib.sha256(new_server_url.encode()).hexdigest()
# Get current provider
provider = self.get_provider(provider_id=provider_id, tenant_id=tenant_id)
# Check if URL is actually different
if new_server_url_hash == provider.server_url_hash:
if new_server_url_hash == validation_data.current_server_url_hash:
# URL hasn't changed, but still return the encrypted data
return ServerUrlValidationResult(
needs_validation=False, encrypted_server_url=encrypted_server_url, server_url_hash=new_server_url_hash
needs_validation=False,
encrypted_server_url=encrypted_server_url,
server_url_hash=new_server_url_hash,
)
# Perform validation by attempting to connect
reconnect_result = self._reconnect_provider(server_url=new_server_url, provider=provider)
# Perform network validation - this is the expensive operation that should be outside session
reconnect_result = MCPToolManageService._reconnect_with_url(
server_url=new_server_url,
headers=validation_data.headers,
timeout=validation_data.timeout,
sse_read_timeout=validation_data.sse_read_timeout,
)
return ServerUrlValidationResult(
needs_validation=True,
validation_passed=True,
@ -606,6 +620,38 @@ class MCPToolManageService:
server_url_hash=new_server_url_hash,
)
@staticmethod
def _reconnect_with_url(
*,
server_url: str,
headers: dict[str, str],
timeout: float | None,
sse_read_timeout: float | None,
) -> ReconnectResult:
"""
Attempt to connect to MCP server with given URL.
This is a static method that performs network I/O without database access.
"""
from core.mcp.mcp_client import MCPClient
try:
with MCPClient(
server_url=server_url,
headers=headers,
timeout=timeout,
sse_read_timeout=sse_read_timeout,
) as mcp_client:
tools = mcp_client.list_tools()
return ReconnectResult(
authed=True,
tools=json.dumps([tool.model_dump() for tool in tools]),
encrypted_credentials=EMPTY_CREDENTIALS_JSON,
)
except MCPAuthError:
return ReconnectResult(authed=False, tools=EMPTY_TOOLS_JSON, encrypted_credentials=EMPTY_CREDENTIALS_JSON)
except MCPError as e:
raise ValueError(f"Failed to re-connect MCP server: {e}") from e
def _build_tool_provider_response(
self, db_provider: MCPToolProvider, provider_entity: MCPProviderEntity, tools: list
) -> ToolProviderApiEntity:

View File

@ -2,7 +2,6 @@ import logging
import time
import click
import sqlalchemy as sa
from celery import shared_task
from sqlalchemy import select
@ -12,7 +11,7 @@ from core.rag.index_processor.index_processor_factory import IndexProcessorFacto
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from models.dataset import Dataset, Document, DocumentSegment
from models.source import DataSourceOauthBinding
from services.datasource_provider_service import DatasourceProviderService
logger = logging.getLogger(__name__)
@ -48,27 +47,36 @@ def document_indexing_sync_task(dataset_id: str, document_id: str):
page_id = data_source_info["notion_page_id"]
page_type = data_source_info["type"]
page_edited_time = data_source_info["last_edited_time"]
credential_id = data_source_info.get("credential_id")
data_source_binding = (
db.session.query(DataSourceOauthBinding)
.where(
sa.and_(
DataSourceOauthBinding.tenant_id == document.tenant_id,
DataSourceOauthBinding.provider == "notion",
DataSourceOauthBinding.disabled == False,
DataSourceOauthBinding.source_info["workspace_id"] == f'"{workspace_id}"',
)
)
.first()
# Get credentials from datasource provider
datasource_provider_service = DatasourceProviderService()
credential = datasource_provider_service.get_datasource_credentials(
tenant_id=document.tenant_id,
credential_id=credential_id,
provider="notion_datasource",
plugin_id="langgenius/notion_datasource",
)
if not data_source_binding:
raise ValueError("Data source binding not found.")
if not credential:
logger.error(
"Datasource credential not found for document %s, tenant_id: %s, credential_id: %s",
document_id,
document.tenant_id,
credential_id,
)
document.indexing_status = "error"
document.error = "Datasource credential not found. Please reconnect your Notion workspace."
document.stopped_at = naive_utc_now()
db.session.commit()
db.session.close()
return
loader = NotionExtractor(
notion_workspace_id=workspace_id,
notion_obj_id=page_id,
notion_page_type=page_type,
notion_access_token=data_source_binding.access_token,
notion_access_token=credential.get("integration_secret"),
tenant_id=document.tenant_id,
)

View File

@ -6,6 +6,7 @@ import pytest
from core.app.entities.app_invoke_entities import InvokeFrom
from core.workflow.entities import GraphInitParams
from core.workflow.enums import WorkflowNodeExecutionStatus
from core.workflow.graph import Graph
from core.workflow.nodes.http_request.node import HttpRequestNode
from core.workflow.nodes.node_factory import DifyNodeFactory
@ -169,13 +170,14 @@ def test_custom_authorization_header(setup_http_mock):
@pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True)
def test_custom_auth_with_empty_api_key_does_not_set_header(setup_http_mock):
"""Test: In custom authentication mode, when the api_key is empty, no header should be set."""
def test_custom_auth_with_empty_api_key_raises_error(setup_http_mock):
"""Test: In custom authentication mode, when the api_key is empty, AuthorizationConfigError should be raised."""
from core.workflow.nodes.http_request.entities import (
HttpRequestNodeAuthorization,
HttpRequestNodeData,
HttpRequestNodeTimeout,
)
from core.workflow.nodes.http_request.exc import AuthorizationConfigError
from core.workflow.nodes.http_request.executor import Executor
from core.workflow.runtime import VariablePool
from core.workflow.system_variable import SystemVariable
@ -208,16 +210,13 @@ def test_custom_auth_with_empty_api_key_does_not_set_header(setup_http_mock):
ssl_verify=True,
)
# Create executor
executor = Executor(
node_data=node_data, timeout=HttpRequestNodeTimeout(connect=10, read=30, write=10), variable_pool=variable_pool
)
# Get assembled headers
headers = executor._assembling_headers()
# When api_key is empty, the custom header should NOT be set
assert "X-Custom-Auth" not in headers
# Create executor should raise AuthorizationConfigError
with pytest.raises(AuthorizationConfigError, match="API key is required"):
Executor(
node_data=node_data,
timeout=HttpRequestNodeTimeout(connect=10, read=30, write=10),
variable_pool=variable_pool,
)
@pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True)
@ -305,9 +304,10 @@ def test_basic_authorization_with_custom_header_ignored(setup_http_mock):
@pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True)
def test_custom_authorization_with_empty_api_key(setup_http_mock):
"""
Test that custom authorization doesn't set header when api_key is empty.
This test verifies the fix for issue #23554.
Test that custom authorization raises error when api_key is empty.
This test verifies the fix for issue #21830.
"""
node = init_http_node(
config={
"id": "1",
@ -333,11 +333,10 @@ def test_custom_authorization_with_empty_api_key(setup_http_mock):
)
result = node._run()
assert result.process_data is not None
data = result.process_data.get("request", "")
# Custom header should NOT be set when api_key is empty
assert "X-Custom-Auth:" not in data
# Should fail with AuthorizationConfigError
assert result.status == WorkflowNodeExecutionStatus.FAILED
assert "API key is required" in result.error
assert result.error_type == "AuthorizationConfigError"
@pytest.mark.parametrize("setup_http_mock", [["none"]], indirect=True)

View File

@ -2,7 +2,9 @@ from unittest.mock import patch
import pytest
from faker import Faker
from pydantic import TypeAdapter, ValidationError
from core.tools.entities.tool_entities import ApiProviderSchemaType
from models import Account, Tenant
from models.tools import ApiToolProvider
from services.tools.api_tools_manage_service import ApiToolManageService
@ -298,7 +300,7 @@ class TestApiToolManageService:
provider_name = fake.company()
icon = {"type": "emoji", "value": "🔧"}
credentials = {"auth_type": "none", "api_key_header": "X-API-Key", "api_key_value": ""}
schema_type = "openapi"
schema_type = ApiProviderSchemaType.OPENAPI
schema = self._create_test_openapi_schema()
privacy_policy = "https://example.com/privacy"
custom_disclaimer = "Custom disclaimer text"
@ -364,7 +366,7 @@ class TestApiToolManageService:
provider_name = fake.company()
icon = {"type": "emoji", "value": "🔧"}
credentials = {"auth_type": "none"}
schema_type = "openapi"
schema_type = ApiProviderSchemaType.OPENAPI
schema = self._create_test_openapi_schema()
privacy_policy = "https://example.com/privacy"
custom_disclaimer = "Custom disclaimer text"
@ -428,21 +430,10 @@ class TestApiToolManageService:
labels = ["test"]
# Act & Assert: Try to create provider with invalid schema type
with pytest.raises(ValueError) as exc_info:
ApiToolManageService.create_api_tool_provider(
user_id=account.id,
tenant_id=tenant.id,
provider_name=provider_name,
icon=icon,
credentials=credentials,
schema_type=schema_type,
schema=schema,
privacy_policy=privacy_policy,
custom_disclaimer=custom_disclaimer,
labels=labels,
)
with pytest.raises(ValidationError) as exc_info:
TypeAdapter(ApiProviderSchemaType).validate_python(schema_type)
assert "invalid schema type" in str(exc_info.value)
assert "validation error" in str(exc_info.value)
def test_create_api_tool_provider_missing_auth_type(
self, flask_req_ctx_with_containers, db_session_with_containers, mock_external_service_dependencies
@ -464,7 +455,7 @@ class TestApiToolManageService:
provider_name = fake.company()
icon = {"type": "emoji", "value": "🔧"}
credentials = {} # Missing auth_type
schema_type = "openapi"
schema_type = ApiProviderSchemaType.OPENAPI
schema = self._create_test_openapi_schema()
privacy_policy = "https://example.com/privacy"
custom_disclaimer = "Custom disclaimer text"
@ -507,7 +498,7 @@ class TestApiToolManageService:
provider_name = fake.company()
icon = {"type": "emoji", "value": "🔑"}
credentials = {"auth_type": "api_key", "api_key_header": "X-API-Key", "api_key_value": fake.uuid4()}
schema_type = "openapi"
schema_type = ApiProviderSchemaType.OPENAPI
schema = self._create_test_openapi_schema()
privacy_policy = "https://example.com/privacy"
custom_disclaimer = "Custom disclaimer text"

View File

@ -1308,18 +1308,17 @@ class TestMCPToolManageService:
type("MockTool", (), {"model_dump": lambda self: {"name": "test_tool_2", "description": "Test tool 2"}})(),
]
with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client:
with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client:
# Setup mock client
mock_client_instance = mock_mcp_client.return_value.__enter__.return_value
mock_client_instance.list_tools.return_value = mock_tools
# Act: Execute the method under test
from extensions.ext_database import db
service = MCPToolManageService(db.session())
result = service._reconnect_provider(
result = MCPToolManageService._reconnect_with_url(
server_url="https://example.com/mcp",
provider=mcp_provider,
headers={"X-Test": "1"},
timeout=mcp_provider.timeout,
sse_read_timeout=mcp_provider.sse_read_timeout,
)
# Assert: Verify the expected outcomes
@ -1337,8 +1336,12 @@ class TestMCPToolManageService:
assert tools_data[1]["name"] == "test_tool_2"
# Verify mock interactions
provider_entity = mcp_provider.to_entity()
mock_mcp_client.assert_called_once()
mock_mcp_client.assert_called_once_with(
server_url="https://example.com/mcp",
headers={"X-Test": "1"},
timeout=mcp_provider.timeout,
sse_read_timeout=mcp_provider.sse_read_timeout,
)
def test_re_connect_mcp_provider_auth_error(self, db_session_with_containers, mock_external_service_dependencies):
"""
@ -1361,19 +1364,18 @@ class TestMCPToolManageService:
)
# Mock MCPClient to raise authentication error
with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client:
with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client:
from core.mcp.error import MCPAuthError
mock_client_instance = mock_mcp_client.return_value.__enter__.return_value
mock_client_instance.list_tools.side_effect = MCPAuthError("Authentication required")
# Act: Execute the method under test
from extensions.ext_database import db
service = MCPToolManageService(db.session())
result = service._reconnect_provider(
result = MCPToolManageService._reconnect_with_url(
server_url="https://example.com/mcp",
provider=mcp_provider,
headers={},
timeout=mcp_provider.timeout,
sse_read_timeout=mcp_provider.sse_read_timeout,
)
# Assert: Verify the expected outcomes
@ -1404,18 +1406,17 @@ class TestMCPToolManageService:
)
# Mock MCPClient to raise connection error
with patch("services.tools.mcp_tools_manage_service.MCPClientWithAuthRetry") as mock_mcp_client:
with patch("core.mcp.mcp_client.MCPClient") as mock_mcp_client:
from core.mcp.error import MCPError
mock_client_instance = mock_mcp_client.return_value.__enter__.return_value
mock_client_instance.list_tools.side_effect = MCPError("Connection failed")
# Act & Assert: Verify proper error handling
from extensions.ext_database import db
service = MCPToolManageService(db.session())
with pytest.raises(ValueError, match="Failed to re-connect MCP server: Connection failed"):
service._reconnect_provider(
MCPToolManageService._reconnect_with_url(
server_url="https://example.com/mcp",
provider=mcp_provider,
headers={"X-Test": "1"},
timeout=mcp_provider.timeout,
sse_read_timeout=mcp_provider.sse_read_timeout,
)

View File

@ -163,34 +163,17 @@ class TestActivateApi:
"account": mock_account,
}
@pytest.fixture
def mock_token_pair(self):
"""Create mock token pair object."""
token_pair = MagicMock()
token_pair.access_token = "access_token"
token_pair.refresh_token = "refresh_token"
token_pair.csrf_token = "csrf_token"
token_pair.model_dump.return_value = {
"access_token": "access_token",
"refresh_token": "refresh_token",
"csrf_token": "csrf_token",
}
return token_pair
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
@patch("controllers.console.auth.activate.RegisterService.revoke_token")
@patch("controllers.console.auth.activate.db")
@patch("controllers.console.auth.activate.AccountService.login")
def test_successful_account_activation(
self,
mock_login,
mock_db,
mock_revoke_token,
mock_get_invitation,
app,
mock_invitation,
mock_account,
mock_token_pair,
):
"""
Test successful account activation.
@ -198,12 +181,10 @@ class TestActivateApi:
Verifies that:
- Account is activated with user preferences
- Account status is set to ACTIVE
- User is logged in after activation
- Invitation token is revoked
"""
# Arrange
mock_get_invitation.return_value = mock_invitation
mock_login.return_value = mock_token_pair
# Act
with app.test_request_context(
@ -230,7 +211,6 @@ class TestActivateApi:
assert mock_account.initialized_at is not None
mock_revoke_token.assert_called_once_with("workspace-123", "invitee@example.com", "valid_token")
mock_db.session.commit.assert_called_once()
mock_login.assert_called_once()
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
def test_activation_with_invalid_token(self, mock_get_invitation, app):
@ -264,17 +244,14 @@ class TestActivateApi:
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
@patch("controllers.console.auth.activate.RegisterService.revoke_token")
@patch("controllers.console.auth.activate.db")
@patch("controllers.console.auth.activate.AccountService.login")
def test_activation_sets_interface_theme(
self,
mock_login,
mock_db,
mock_revoke_token,
mock_get_invitation,
app,
mock_invitation,
mock_account,
mock_token_pair,
):
"""
Test that activation sets default interface theme.
@ -284,7 +261,6 @@ class TestActivateApi:
"""
# Arrange
mock_get_invitation.return_value = mock_invitation
mock_login.return_value = mock_token_pair
# Act
with app.test_request_context(
@ -317,17 +293,14 @@ class TestActivateApi:
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
@patch("controllers.console.auth.activate.RegisterService.revoke_token")
@patch("controllers.console.auth.activate.db")
@patch("controllers.console.auth.activate.AccountService.login")
def test_activation_with_different_locales(
self,
mock_login,
mock_db,
mock_revoke_token,
mock_get_invitation,
app,
mock_invitation,
mock_account,
mock_token_pair,
language,
timezone,
):
@ -341,7 +314,6 @@ class TestActivateApi:
"""
# Arrange
mock_get_invitation.return_value = mock_invitation
mock_login.return_value = mock_token_pair
# Act
with app.test_request_context(
@ -367,27 +339,23 @@ class TestActivateApi:
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
@patch("controllers.console.auth.activate.RegisterService.revoke_token")
@patch("controllers.console.auth.activate.db")
@patch("controllers.console.auth.activate.AccountService.login")
def test_activation_returns_token_data(
def test_activation_returns_success_response(
self,
mock_login,
mock_db,
mock_revoke_token,
mock_get_invitation,
app,
mock_invitation,
mock_token_pair,
):
"""
Test that activation returns authentication tokens.
Test that activation returns a success response without authentication tokens.
Verifies that:
- Token pair is returned in response
- All token types are included (access, refresh, csrf)
- Response contains a success result
- No token data is returned
"""
# Arrange
mock_get_invitation.return_value = mock_invitation
mock_login.return_value = mock_token_pair
# Act
with app.test_request_context(
@ -406,24 +374,18 @@ class TestActivateApi:
response = api.post()
# Assert
assert "data" in response
assert response["data"]["access_token"] == "access_token"
assert response["data"]["refresh_token"] == "refresh_token"
assert response["data"]["csrf_token"] == "csrf_token"
assert response == {"result": "success"}
@patch("controllers.console.auth.activate.RegisterService.get_invitation_if_token_valid")
@patch("controllers.console.auth.activate.RegisterService.revoke_token")
@patch("controllers.console.auth.activate.db")
@patch("controllers.console.auth.activate.AccountService.login")
def test_activation_without_workspace_id(
self,
mock_login,
mock_db,
mock_revoke_token,
mock_get_invitation,
app,
mock_invitation,
mock_token_pair,
):
"""
Test account activation without workspace_id.
@ -434,7 +396,6 @@ class TestActivateApi:
"""
# Arrange
mock_get_invitation.return_value = mock_invitation
mock_login.return_value = mock_token_pair
# Act
with app.test_request_context(

View File

@ -0,0 +1,236 @@
from __future__ import annotations
import builtins
import uuid
from datetime import UTC, datetime
from unittest.mock import MagicMock
import pytest
from flask import Flask
from flask.views import MethodView as FlaskMethodView
_NEEDS_METHOD_VIEW_CLEANUP = False
if not hasattr(builtins, "MethodView"):
builtins.MethodView = FlaskMethodView
_NEEDS_METHOD_VIEW_CLEANUP = True
from constants import HIDDEN_VALUE
from controllers.console.extension import (
APIBasedExtensionAPI,
APIBasedExtensionDetailAPI,
CodeBasedExtensionAPI,
)
if _NEEDS_METHOD_VIEW_CLEANUP:
delattr(builtins, "MethodView")
from models.account import AccountStatus
from models.api_based_extension import APIBasedExtension
def _make_extension(
*,
name: str = "Sample Extension",
api_endpoint: str = "https://example.com/api",
api_key: str = "super-secret-key",
) -> APIBasedExtension:
extension = APIBasedExtension(
tenant_id="tenant-123",
name=name,
api_endpoint=api_endpoint,
api_key=api_key,
)
extension.id = f"{uuid.uuid4()}"
extension.created_at = datetime.now(tz=UTC)
return extension
@pytest.fixture(autouse=True)
def _mock_console_guards(monkeypatch: pytest.MonkeyPatch) -> MagicMock:
"""Bypass console decorators so handlers can run in isolation."""
import controllers.console.extension as extension_module
from controllers.console import wraps as wraps_module
account = MagicMock()
account.status = AccountStatus.ACTIVE
account.current_tenant_id = "tenant-123"
account.id = "account-123"
account.is_authenticated = True
monkeypatch.setattr(wraps_module.dify_config, "EDITION", "CLOUD")
monkeypatch.setattr("libs.login.dify_config.LOGIN_DISABLED", True)
monkeypatch.delenv("INIT_PASSWORD", raising=False)
monkeypatch.setattr(extension_module, "current_account_with_tenant", lambda: (account, "tenant-123"))
monkeypatch.setattr(wraps_module, "current_account_with_tenant", lambda: (account, "tenant-123"))
# The login_required decorator consults the shared LocalProxy in libs.login.
monkeypatch.setattr("libs.login.current_user", account)
monkeypatch.setattr("libs.login.check_csrf_token", lambda *_, **__: None)
return account
@pytest.fixture(autouse=True)
def _restx_mask_defaults(app: Flask):
app.config.setdefault("RESTX_MASK_HEADER", "X-Fields")
app.config.setdefault("RESTX_MASK_SWAGGER", False)
def test_code_based_extension_get_returns_service_data(app: Flask, monkeypatch: pytest.MonkeyPatch):
service_result = {"entrypoint": "main:agent"}
service_mock = MagicMock(return_value=service_result)
monkeypatch.setattr(
"controllers.console.extension.CodeBasedExtensionService.get_code_based_extension",
service_mock,
)
with app.test_request_context(
"/console/api/code-based-extension",
method="GET",
query_string={"module": "workflow.tools"},
):
response = CodeBasedExtensionAPI().get()
assert response == {"module": "workflow.tools", "data": service_result}
service_mock.assert_called_once_with("workflow.tools")
def test_api_based_extension_get_returns_tenant_extensions(app: Flask, monkeypatch: pytest.MonkeyPatch):
extension = _make_extension(name="Weather API", api_key="abcdefghi123")
service_mock = MagicMock(return_value=[extension])
monkeypatch.setattr(
"controllers.console.extension.APIBasedExtensionService.get_all_by_tenant_id",
service_mock,
)
with app.test_request_context("/console/api/api-based-extension", method="GET"):
response = APIBasedExtensionAPI().get()
assert response[0]["id"] == extension.id
assert response[0]["name"] == "Weather API"
assert response[0]["api_endpoint"] == extension.api_endpoint
assert response[0]["api_key"].startswith(extension.api_key[:3])
service_mock.assert_called_once_with("tenant-123")
def test_api_based_extension_post_creates_extension(app: Flask, monkeypatch: pytest.MonkeyPatch):
saved_extension = _make_extension(name="Docs API", api_key="saved-secret")
save_mock = MagicMock(return_value=saved_extension)
monkeypatch.setattr("controllers.console.extension.APIBasedExtensionService.save", save_mock)
payload = {
"name": "Docs API",
"api_endpoint": "https://docs.example.com/hook",
"api_key": "plain-secret",
}
with app.test_request_context("/console/api/api-based-extension", method="POST", json=payload):
response = APIBasedExtensionAPI().post()
args, _ = save_mock.call_args
created_extension: APIBasedExtension = args[0]
assert created_extension.tenant_id == "tenant-123"
assert created_extension.name == payload["name"]
assert created_extension.api_endpoint == payload["api_endpoint"]
assert created_extension.api_key == payload["api_key"]
assert response["name"] == saved_extension.name
save_mock.assert_called_once()
def test_api_based_extension_detail_get_fetches_extension(app: Flask, monkeypatch: pytest.MonkeyPatch):
extension = _make_extension(name="Docs API", api_key="abcdefg12345")
service_mock = MagicMock(return_value=extension)
monkeypatch.setattr(
"controllers.console.extension.APIBasedExtensionService.get_with_tenant_id",
service_mock,
)
extension_id = uuid.uuid4()
with app.test_request_context(f"/console/api/api-based-extension/{extension_id}", method="GET"):
response = APIBasedExtensionDetailAPI().get(extension_id)
assert response["id"] == extension.id
assert response["name"] == extension.name
service_mock.assert_called_once_with("tenant-123", str(extension_id))
def test_api_based_extension_detail_post_keeps_hidden_api_key(app: Flask, monkeypatch: pytest.MonkeyPatch):
existing_extension = _make_extension(name="Docs API", api_key="keep-me")
get_mock = MagicMock(return_value=existing_extension)
save_mock = MagicMock(return_value=existing_extension)
monkeypatch.setattr(
"controllers.console.extension.APIBasedExtensionService.get_with_tenant_id",
get_mock,
)
monkeypatch.setattr("controllers.console.extension.APIBasedExtensionService.save", save_mock)
payload = {
"name": "Docs API Updated",
"api_endpoint": "https://docs.example.com/v2",
"api_key": HIDDEN_VALUE,
}
extension_id = uuid.uuid4()
with app.test_request_context(
f"/console/api/api-based-extension/{extension_id}",
method="POST",
json=payload,
):
response = APIBasedExtensionDetailAPI().post(extension_id)
assert existing_extension.name == payload["name"]
assert existing_extension.api_endpoint == payload["api_endpoint"]
assert existing_extension.api_key == "keep-me"
save_mock.assert_called_once_with(existing_extension)
assert response["name"] == payload["name"]
def test_api_based_extension_detail_post_updates_api_key_when_provided(app: Flask, monkeypatch: pytest.MonkeyPatch):
existing_extension = _make_extension(name="Docs API", api_key="old-secret")
get_mock = MagicMock(return_value=existing_extension)
save_mock = MagicMock(return_value=existing_extension)
monkeypatch.setattr(
"controllers.console.extension.APIBasedExtensionService.get_with_tenant_id",
get_mock,
)
monkeypatch.setattr("controllers.console.extension.APIBasedExtensionService.save", save_mock)
payload = {
"name": "Docs API Updated",
"api_endpoint": "https://docs.example.com/v2",
"api_key": "new-secret",
}
extension_id = uuid.uuid4()
with app.test_request_context(
f"/console/api/api-based-extension/{extension_id}",
method="POST",
json=payload,
):
response = APIBasedExtensionDetailAPI().post(extension_id)
assert existing_extension.api_key == "new-secret"
save_mock.assert_called_once_with(existing_extension)
assert response["name"] == payload["name"]
def test_api_based_extension_detail_delete_removes_extension(app: Flask, monkeypatch: pytest.MonkeyPatch):
existing_extension = _make_extension()
get_mock = MagicMock(return_value=existing_extension)
delete_mock = MagicMock()
monkeypatch.setattr(
"controllers.console.extension.APIBasedExtensionService.get_with_tenant_id",
get_mock,
)
monkeypatch.setattr("controllers.console.extension.APIBasedExtensionService.delete", delete_mock)
extension_id = uuid.uuid4()
with app.test_request_context(
f"/console/api/api-based-extension/{extension_id}",
method="DELETE",
):
response, status = APIBasedExtensionDetailAPI().delete(extension_id)
delete_mock.assert_called_once_with(existing_extension)
assert response == {"result": "success"}
assert status == 204

Some files were not shown because too many files have changed in this diff Show More