diff --git a/.claude/skills/frontend-testing/CHECKLIST.md b/.claude/skills/frontend-testing/CHECKLIST.md
new file mode 100644
index 0000000000..b960067264
--- /dev/null
+++ b/.claude/skills/frontend-testing/CHECKLIST.md
@@ -0,0 +1,205 @@
+# Test Generation Checklist
+
+Use this checklist when generating or reviewing tests for Dify frontend components.
+
+## Pre-Generation
+
+- [ ] Read the component source code completely
+- [ ] Identify component type (component, hook, utility, page)
+- [ ] Run `pnpm analyze-component ` if available
+- [ ] Note complexity score and features detected
+- [ ] Check for existing tests in the same directory
+- [ ] **Identify ALL files in the directory** that need testing (not just index)
+
+## Testing Strategy
+
+### ⚠️ Incremental Workflow (CRITICAL for Multi-File)
+
+- [ ] **NEVER generate all tests at once** - process one file at a time
+- [ ] Order files by complexity: utilities → hooks → simple → complex → integration
+- [ ] Create a todo list to track progress before starting
+- [ ] For EACH file: write → run test → verify pass → then next
+- [ ] **DO NOT proceed** to next file until current one passes
+
+### Path-Level Coverage
+
+- [ ] **Test ALL files** in the assigned directory/path
+- [ ] List all components, hooks, utilities that need coverage
+- [ ] Decide: single spec file (integration) or multiple spec files (unit)
+
+### Complexity Assessment
+
+- [ ] Run `pnpm analyze-component ` for complexity score
+- [ ] **Complexity > 50**: Consider refactoring before testing
+- [ ] **500+ lines**: Consider splitting before testing
+- [ ] **30-50 complexity**: Use multiple describe blocks, organized structure
+
+### Integration vs Mocking
+
+- [ ] **DO NOT mock base components** (`Loading`, `Button`, `Tooltip`, etc.)
+- [ ] Import real project components instead of mocking
+- [ ] Only mock: API calls, complex context providers, third-party libs with side effects
+- [ ] Prefer integration testing when using single spec file
+
+## Required Test Sections
+
+### All Components MUST Have
+
+- [ ] **Rendering tests** - Component renders without crashing
+- [ ] **Props tests** - Required props, optional props, default values
+- [ ] **Edge cases** - null, undefined, empty values, boundaries
+
+### Conditional Sections (Add When Feature Present)
+
+| Feature | Add Tests For |
+|---------|---------------|
+| `useState` | Initial state, transitions, cleanup |
+| `useEffect` | Execution, dependencies, cleanup |
+| Event handlers | onClick, onChange, onSubmit, keyboard |
+| API calls | Loading, success, error states |
+| Routing | Navigation, params, query strings |
+| `useCallback`/`useMemo` | Referential equality |
+| Context | Provider values, consumer behavior |
+| Forms | Validation, submission, error display |
+
+## Code Quality Checklist
+
+### Structure
+
+- [ ] Uses `describe` blocks to group related tests
+- [ ] Test names follow `should when ` pattern
+- [ ] AAA pattern (Arrange-Act-Assert) is clear
+- [ ] Comments explain complex test scenarios
+
+### Mocks
+
+- [ ] **DO NOT mock base components** (`@/app/components/base/*`)
+- [ ] `jest.clearAllMocks()` in `beforeEach` (not `afterEach`)
+- [ ] Shared mock state reset in `beforeEach`
+- [ ] i18n uses shared mock (auto-loaded); only override locally for custom translations
+- [ ] Router mocks match actual Next.js API
+- [ ] Mocks reflect actual component conditional behavior
+- [ ] Only mock: API services, complex context providers, third-party libs
+
+### Queries
+
+- [ ] Prefer semantic queries (`getByRole`, `getByLabelText`)
+- [ ] Use `queryBy*` for absence assertions
+- [ ] Use `findBy*` for async elements
+- [ ] `getByTestId` only as last resort
+
+### Async
+
+- [ ] All async tests use `async/await`
+- [ ] `waitFor` wraps async assertions
+- [ ] Fake timers properly setup/teardown
+- [ ] No floating promises
+
+### TypeScript
+
+- [ ] No `any` types without justification
+- [ ] Mock data uses actual types from source
+- [ ] Factory functions have proper return types
+
+## Coverage Goals (Per File)
+
+For the current file being tested:
+
+- [ ] 100% function coverage
+- [ ] 100% statement coverage
+- [ ] >95% branch coverage
+- [ ] >95% line coverage
+
+## Post-Generation (Per File)
+
+**Run these checks after EACH test file, not just at the end:**
+
+- [ ] Run `pnpm test -- path/to/file.spec.tsx` - **MUST PASS before next file**
+- [ ] Fix any failures immediately
+- [ ] Mark file as complete in todo list
+- [ ] Only then proceed to next file
+
+### After All Files Complete
+
+- [ ] Run full directory test: `pnpm test -- path/to/directory/`
+- [ ] Check coverage report: `pnpm test -- --coverage`
+- [ ] Run `pnpm lint:fix` on all test files
+- [ ] Run `pnpm type-check:tsgo`
+
+## Common Issues to Watch
+
+### False Positives
+
+```typescript
+// ❌ Mock doesn't match actual behavior
+jest.mock('./Component', () => () => Mocked
)
+
+// ✅ Mock matches actual conditional logic
+jest.mock('./Component', () => ({ isOpen }: any) =>
+ isOpen ? Content
: null
+)
+```
+
+### State Leakage
+
+```typescript
+// ❌ Shared state not reset
+let mockState = false
+jest.mock('./useHook', () => () => mockState)
+
+// ✅ Reset in beforeEach
+beforeEach(() => {
+ mockState = false
+})
+```
+
+### Async Race Conditions
+
+```typescript
+// ❌ Not awaited
+it('loads data', () => {
+ render()
+ expect(screen.getByText('Data')).toBeInTheDocument()
+})
+
+// ✅ Properly awaited
+it('loads data', async () => {
+ render()
+ await waitFor(() => {
+ expect(screen.getByText('Data')).toBeInTheDocument()
+ })
+})
+```
+
+### Missing Edge Cases
+
+Always test these scenarios:
+
+- `null` / `undefined` inputs
+- Empty strings / arrays / objects
+- Boundary values (0, -1, MAX_INT)
+- Error states
+- Loading states
+- Disabled states
+
+## Quick Commands
+
+```bash
+# Run specific test
+pnpm test -- path/to/file.spec.tsx
+
+# Run with coverage
+pnpm test -- --coverage path/to/file.spec.tsx
+
+# Watch mode
+pnpm test -- --watch path/to/file.spec.tsx
+
+# Update snapshots (use sparingly)
+pnpm test -- -u path/to/file.spec.tsx
+
+# Analyze component
+pnpm analyze-component path/to/component.tsx
+
+# Review existing test
+pnpm analyze-component path/to/component.tsx --review
+```
diff --git a/.claude/skills/frontend-testing/SKILL.md b/.claude/skills/frontend-testing/SKILL.md
new file mode 100644
index 0000000000..06cb672141
--- /dev/null
+++ b/.claude/skills/frontend-testing/SKILL.md
@@ -0,0 +1,321 @@
+---
+name: Dify Frontend Testing
+description: Generate Jest + React Testing Library tests for Dify frontend components, hooks, and utilities. Triggers on testing, spec files, coverage, Jest, RTL, unit tests, integration tests, or write/review test requests.
+---
+
+# Dify Frontend Testing Skill
+
+This skill enables Claude to generate high-quality, comprehensive frontend tests for the Dify project following established conventions and best practices.
+
+> **⚠️ Authoritative Source**: This skill is derived from `web/testing/testing.md`. When in doubt, always refer to that document as the canonical specification.
+
+## When to Apply This Skill
+
+Apply this skill when the user:
+
+- Asks to **write tests** for a component, hook, or utility
+- Asks to **review existing tests** for completeness
+- Mentions **Jest**, **React Testing Library**, **RTL**, or **spec files**
+- Requests **test coverage** improvement
+- Uses `pnpm analyze-component` output as context
+- Mentions **testing**, **unit tests**, or **integration tests** for frontend code
+- Wants to understand **testing patterns** in the Dify codebase
+
+**Do NOT apply** when:
+
+- User is asking about backend/API tests (Python/pytest)
+- User is asking about E2E tests (Playwright/Cypress)
+- User is only asking conceptual questions without code context
+
+## Quick Reference
+
+### Tech Stack
+
+| Tool | Version | Purpose |
+|------|---------|---------|
+| Jest | 29.7 | Test runner |
+| React Testing Library | 16.0 | Component testing |
+| happy-dom | - | Test environment |
+| nock | 14.0 | HTTP mocking |
+| TypeScript | 5.x | Type safety |
+
+### Key Commands
+
+```bash
+# Run all tests
+pnpm test
+
+# Watch mode
+pnpm test -- --watch
+
+# Run specific file
+pnpm test -- path/to/file.spec.tsx
+
+# Generate coverage report
+pnpm test -- --coverage
+
+# Analyze component complexity
+pnpm analyze-component
+
+# Review existing test
+pnpm analyze-component --review
+```
+
+### File Naming
+
+- Test files: `ComponentName.spec.tsx` (same directory as component)
+- Integration tests: `web/__tests__/` directory
+
+## Test Structure Template
+
+```typescript
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import Component from './index'
+
+// ✅ Import real project components (DO NOT mock these)
+// import Loading from '@/app/components/base/loading'
+// import { ChildComponent } from './child-component'
+
+// ✅ Mock external dependencies only
+jest.mock('@/service/api')
+jest.mock('next/navigation', () => ({
+ useRouter: () => ({ push: jest.fn() }),
+ usePathname: () => '/test',
+}))
+
+// Shared state for mocks (if needed)
+let mockSharedState = false
+
+describe('ComponentName', () => {
+ beforeEach(() => {
+ jest.clearAllMocks() // ✅ Reset mocks BEFORE each test
+ mockSharedState = false // ✅ Reset shared state
+ })
+
+ // Rendering tests (REQUIRED)
+ describe('Rendering', () => {
+ it('should render without crashing', () => {
+ // Arrange
+ const props = { title: 'Test' }
+
+ // Act
+ render()
+
+ // Assert
+ expect(screen.getByText('Test')).toBeInTheDocument()
+ })
+ })
+
+ // Props tests (REQUIRED)
+ describe('Props', () => {
+ it('should apply custom className', () => {
+ render()
+ expect(screen.getByRole('button')).toHaveClass('custom')
+ })
+ })
+
+ // User Interactions
+ describe('User Interactions', () => {
+ it('should handle click events', () => {
+ const handleClick = jest.fn()
+ render()
+
+ fireEvent.click(screen.getByRole('button'))
+
+ expect(handleClick).toHaveBeenCalledTimes(1)
+ })
+ })
+
+ // Edge Cases (REQUIRED)
+ describe('Edge Cases', () => {
+ it('should handle null data', () => {
+ render()
+ expect(screen.getByText(/no data/i)).toBeInTheDocument()
+ })
+
+ it('should handle empty array', () => {
+ render()
+ expect(screen.getByText(/empty/i)).toBeInTheDocument()
+ })
+ })
+})
+```
+
+## Testing Workflow (CRITICAL)
+
+### ⚠️ Incremental Approach Required
+
+**NEVER generate all test files at once.** For complex components or multi-file directories:
+
+1. **Analyze & Plan**: List all files, order by complexity (simple → complex)
+1. **Process ONE at a time**: Write test → Run test → Fix if needed → Next
+1. **Verify before proceeding**: Do NOT continue to next file until current passes
+
+```
+For each file:
+ ┌────────────────────────────────────────┐
+ │ 1. Write test │
+ │ 2. Run: pnpm test -- .spec.tsx │
+ │ 3. PASS? → Mark complete, next file │
+ │ FAIL? → Fix first, then continue │
+ └────────────────────────────────────────┘
+```
+
+### Complexity-Based Order
+
+Process in this order for multi-file testing:
+
+1. 🟢 Utility functions (simplest)
+1. 🟢 Custom hooks
+1. 🟡 Simple components (presentational)
+1. 🟡 Medium components (state, effects)
+1. 🔴 Complex components (API, routing)
+1. 🔴 Integration tests (index files - last)
+
+### When to Refactor First
+
+- **Complexity > 50**: Break into smaller pieces before testing
+- **500+ lines**: Consider splitting before testing
+- **Many dependencies**: Extract logic into hooks first
+
+> 📖 See `guides/workflow.md` for complete workflow details and todo list format.
+
+## Testing Strategy
+
+### Path-Level Testing (Directory Testing)
+
+When assigned to test a directory/path, test **ALL content** within that path:
+
+- Test all components, hooks, utilities in the directory (not just `index` file)
+- Use incremental approach: one file at a time, verify each before proceeding
+- Goal: 100% coverage of ALL files in the directory
+
+### Integration Testing First
+
+**Prefer integration testing** when writing tests for a directory:
+
+- ✅ **Import real project components** directly (including base components and siblings)
+- ✅ **Only mock**: API services (`@/service/*`), `next/navigation`, complex context providers
+- ❌ **DO NOT mock** base components (`@/app/components/base/*`)
+- ❌ **DO NOT mock** sibling/child components in the same directory
+
+> See [Test Structure Template](#test-structure-template) for correct import/mock patterns.
+
+## Core Principles
+
+### 1. AAA Pattern (Arrange-Act-Assert)
+
+Every test should clearly separate:
+
+- **Arrange**: Setup test data and render component
+- **Act**: Perform user actions
+- **Assert**: Verify expected outcomes
+
+### 2. Black-Box Testing
+
+- Test observable behavior, not implementation details
+- Use semantic queries (getByRole, getByLabelText)
+- Avoid testing internal state directly
+- **Prefer pattern matching over hardcoded strings** in assertions:
+
+```typescript
+// ❌ Avoid: hardcoded text assertions
+expect(screen.getByText('Loading...')).toBeInTheDocument()
+
+// ✅ Better: role-based queries
+expect(screen.getByRole('status')).toBeInTheDocument()
+
+// ✅ Better: pattern matching
+expect(screen.getByText(/loading/i)).toBeInTheDocument()
+```
+
+### 3. Single Behavior Per Test
+
+Each test verifies ONE user-observable behavior:
+
+```typescript
+// ✅ Good: One behavior
+it('should disable button when loading', () => {
+ render()
+ expect(screen.getByRole('button')).toBeDisabled()
+})
+
+// ❌ Bad: Multiple behaviors
+it('should handle loading state', () => {
+ render()
+ expect(screen.getByRole('button')).toBeDisabled()
+ expect(screen.getByText('Loading...')).toBeInTheDocument()
+ expect(screen.getByRole('button')).toHaveClass('loading')
+})
+```
+
+### 4. Semantic Naming
+
+Use `should when `:
+
+```typescript
+it('should show error message when validation fails')
+it('should call onSubmit when form is valid')
+it('should disable input when isReadOnly is true')
+```
+
+## Required Test Scenarios
+
+### Always Required (All Components)
+
+1. **Rendering**: Component renders without crashing
+1. **Props**: Required props, optional props, default values
+1. **Edge Cases**: null, undefined, empty values, boundary conditions
+
+### Conditional (When Present)
+
+| Feature | Test Focus |
+|---------|-----------|
+| `useState` | Initial state, transitions, cleanup |
+| `useEffect` | Execution, dependencies, cleanup |
+| Event handlers | All onClick, onChange, onSubmit, keyboard |
+| API calls | Loading, success, error states |
+| Routing | Navigation, params, query strings |
+| `useCallback`/`useMemo` | Referential equality |
+| Context | Provider values, consumer behavior |
+| Forms | Validation, submission, error display |
+
+## Coverage Goals (Per File)
+
+For each test file generated, aim for:
+
+- ✅ **100%** function coverage
+- ✅ **100%** statement coverage
+- ✅ **>95%** branch coverage
+- ✅ **>95%** line coverage
+
+> **Note**: For multi-file directories, process one file at a time with full coverage each. See `guides/workflow.md`.
+
+## Detailed Guides
+
+For more detailed information, refer to:
+
+- `guides/workflow.md` - **Incremental testing workflow** (MUST READ for multi-file testing)
+- `guides/mocking.md` - Mock patterns and best practices
+- `guides/async-testing.md` - Async operations and API calls
+- `guides/domain-components.md` - Workflow, Dataset, Configuration testing
+- `guides/common-patterns.md` - Frequently used testing patterns
+
+## Authoritative References
+
+### Primary Specification (MUST follow)
+
+- **`web/testing/testing.md`** - The canonical testing specification. This skill is derived from this document.
+
+### Reference Examples in Codebase
+
+- `web/utils/classnames.spec.ts` - Utility function tests
+- `web/app/components/base/button/index.spec.tsx` - Component tests
+- `web/__mocks__/provider-context.ts` - Mock factory example
+
+### Project Configuration
+
+- `web/jest.config.ts` - Jest configuration
+- `web/jest.setup.ts` - Test environment setup
+- `web/testing/analyze-component.js` - Component analysis tool
+- `web/__mocks__/react-i18next.ts` - Shared i18n mock (auto-loaded by Jest, no explicit mock needed; override locally only for custom translations)
diff --git a/.claude/skills/frontend-testing/guides/async-testing.md b/.claude/skills/frontend-testing/guides/async-testing.md
new file mode 100644
index 0000000000..f9912debbf
--- /dev/null
+++ b/.claude/skills/frontend-testing/guides/async-testing.md
@@ -0,0 +1,345 @@
+# Async Testing Guide
+
+## Core Async Patterns
+
+### 1. waitFor - Wait for Condition
+
+```typescript
+import { render, screen, waitFor } from '@testing-library/react'
+
+it('should load and display data', async () => {
+ render()
+
+ // Wait for element to appear
+ await waitFor(() => {
+ expect(screen.getByText('Loaded Data')).toBeInTheDocument()
+ })
+})
+
+it('should hide loading spinner after load', async () => {
+ render()
+
+ // Wait for element to disappear
+ await waitFor(() => {
+ expect(screen.queryByText('Loading...')).not.toBeInTheDocument()
+ })
+})
+```
+
+### 2. findBy\* - Async Queries
+
+```typescript
+it('should show user name after fetch', async () => {
+ render()
+
+ // findBy returns a promise, auto-waits up to 1000ms
+ const userName = await screen.findByText('John Doe')
+ expect(userName).toBeInTheDocument()
+
+ // findByRole with options
+ const button = await screen.findByRole('button', { name: /submit/i })
+ expect(button).toBeEnabled()
+})
+```
+
+### 3. userEvent for Async Interactions
+
+```typescript
+import userEvent from '@testing-library/user-event'
+
+it('should submit form', async () => {
+ const user = userEvent.setup()
+ const onSubmit = jest.fn()
+
+ render()
+
+ // userEvent methods are async
+ await user.type(screen.getByLabelText('Email'), 'test@example.com')
+ await user.click(screen.getByRole('button', { name: /submit/i }))
+
+ await waitFor(() => {
+ expect(onSubmit).toHaveBeenCalledWith({ email: 'test@example.com' })
+ })
+})
+```
+
+## Fake Timers
+
+### When to Use Fake Timers
+
+- Testing components with `setTimeout`/`setInterval`
+- Testing debounce/throttle behavior
+- Testing animations or delayed transitions
+- Testing polling or retry logic
+
+### Basic Fake Timer Setup
+
+```typescript
+describe('Debounced Search', () => {
+ beforeEach(() => {
+ jest.useFakeTimers()
+ })
+
+ afterEach(() => {
+ jest.useRealTimers()
+ })
+
+ it('should debounce search input', async () => {
+ const onSearch = jest.fn()
+ render()
+
+ // Type in the input
+ fireEvent.change(screen.getByRole('textbox'), { target: { value: 'query' } })
+
+ // Search not called immediately
+ expect(onSearch).not.toHaveBeenCalled()
+
+ // Advance timers
+ jest.advanceTimersByTime(300)
+
+ // Now search is called
+ expect(onSearch).toHaveBeenCalledWith('query')
+ })
+})
+```
+
+### Fake Timers with Async Code
+
+```typescript
+it('should retry on failure', async () => {
+ jest.useFakeTimers()
+ const fetchData = jest.fn()
+ .mockRejectedValueOnce(new Error('Network error'))
+ .mockResolvedValueOnce({ data: 'success' })
+
+ render()
+
+ // First call fails
+ await waitFor(() => {
+ expect(fetchData).toHaveBeenCalledTimes(1)
+ })
+
+ // Advance timer for retry
+ jest.advanceTimersByTime(1000)
+
+ // Second call succeeds
+ await waitFor(() => {
+ expect(fetchData).toHaveBeenCalledTimes(2)
+ expect(screen.getByText('success')).toBeInTheDocument()
+ })
+
+ jest.useRealTimers()
+})
+```
+
+### Common Fake Timer Utilities
+
+```typescript
+// Run all pending timers
+jest.runAllTimers()
+
+// Run only pending timers (not new ones created during execution)
+jest.runOnlyPendingTimers()
+
+// Advance by specific time
+jest.advanceTimersByTime(1000)
+
+// Get current fake time
+jest.now()
+
+// Clear all timers
+jest.clearAllTimers()
+```
+
+## API Testing Patterns
+
+### Loading → Success → Error States
+
+```typescript
+describe('DataFetcher', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ })
+
+ it('should show loading state', () => {
+ mockedApi.fetchData.mockImplementation(() => new Promise(() => {})) // Never resolves
+
+ render()
+
+ expect(screen.getByTestId('loading-spinner')).toBeInTheDocument()
+ })
+
+ it('should show data on success', async () => {
+ mockedApi.fetchData.mockResolvedValue({ items: ['Item 1', 'Item 2'] })
+
+ render()
+
+ // Use findBy* for multiple async elements (better error messages than waitFor with multiple assertions)
+ const item1 = await screen.findByText('Item 1')
+ const item2 = await screen.findByText('Item 2')
+ expect(item1).toBeInTheDocument()
+ expect(item2).toBeInTheDocument()
+
+ expect(screen.queryByTestId('loading-spinner')).not.toBeInTheDocument()
+ })
+
+ it('should show error on failure', async () => {
+ mockedApi.fetchData.mockRejectedValue(new Error('Failed to fetch'))
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText(/failed to fetch/i)).toBeInTheDocument()
+ })
+ })
+
+ it('should retry on error', async () => {
+ mockedApi.fetchData.mockRejectedValue(new Error('Network error'))
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByRole('button', { name: /retry/i })).toBeInTheDocument()
+ })
+
+ mockedApi.fetchData.mockResolvedValue({ items: ['Item 1'] })
+ fireEvent.click(screen.getByRole('button', { name: /retry/i }))
+
+ await waitFor(() => {
+ expect(screen.getByText('Item 1')).toBeInTheDocument()
+ })
+ })
+})
+```
+
+### Testing Mutations
+
+```typescript
+it('should submit form and show success', async () => {
+ const user = userEvent.setup()
+ mockedApi.createItem.mockResolvedValue({ id: '1', name: 'New Item' })
+
+ render()
+
+ await user.type(screen.getByLabelText('Name'), 'New Item')
+ await user.click(screen.getByRole('button', { name: /create/i }))
+
+ // Button should be disabled during submission
+ expect(screen.getByRole('button', { name: /creating/i })).toBeDisabled()
+
+ await waitFor(() => {
+ expect(screen.getByText(/created successfully/i)).toBeInTheDocument()
+ })
+
+ expect(mockedApi.createItem).toHaveBeenCalledWith({ name: 'New Item' })
+})
+```
+
+## useEffect Testing
+
+### Testing Effect Execution
+
+```typescript
+it('should fetch data on mount', async () => {
+ const fetchData = jest.fn().mockResolvedValue({ data: 'test' })
+
+ render()
+
+ await waitFor(() => {
+ expect(fetchData).toHaveBeenCalledTimes(1)
+ })
+})
+```
+
+### Testing Effect Dependencies
+
+```typescript
+it('should refetch when id changes', async () => {
+ const fetchData = jest.fn().mockResolvedValue({ data: 'test' })
+
+ const { rerender } = render()
+
+ await waitFor(() => {
+ expect(fetchData).toHaveBeenCalledWith('1')
+ })
+
+ rerender()
+
+ await waitFor(() => {
+ expect(fetchData).toHaveBeenCalledWith('2')
+ expect(fetchData).toHaveBeenCalledTimes(2)
+ })
+})
+```
+
+### Testing Effect Cleanup
+
+```typescript
+it('should cleanup subscription on unmount', () => {
+ const subscribe = jest.fn()
+ const unsubscribe = jest.fn()
+ subscribe.mockReturnValue(unsubscribe)
+
+ const { unmount } = render()
+
+ expect(subscribe).toHaveBeenCalledTimes(1)
+
+ unmount()
+
+ expect(unsubscribe).toHaveBeenCalledTimes(1)
+})
+```
+
+## Common Async Pitfalls
+
+### ❌ Don't: Forget to await
+
+```typescript
+// Bad - test may pass even if assertion fails
+it('should load data', () => {
+ render()
+ waitFor(() => {
+ expect(screen.getByText('Data')).toBeInTheDocument()
+ })
+})
+
+// Good - properly awaited
+it('should load data', async () => {
+ render()
+ await waitFor(() => {
+ expect(screen.getByText('Data')).toBeInTheDocument()
+ })
+})
+```
+
+### ❌ Don't: Use multiple assertions in single waitFor
+
+```typescript
+// Bad - if first assertion fails, won't know about second
+await waitFor(() => {
+ expect(screen.getByText('Title')).toBeInTheDocument()
+ expect(screen.getByText('Description')).toBeInTheDocument()
+})
+
+// Good - separate waitFor or use findBy
+const title = await screen.findByText('Title')
+const description = await screen.findByText('Description')
+expect(title).toBeInTheDocument()
+expect(description).toBeInTheDocument()
+```
+
+### ❌ Don't: Mix fake timers with real async
+
+```typescript
+// Bad - fake timers don't work well with real Promises
+jest.useFakeTimers()
+await waitFor(() => {
+ expect(screen.getByText('Data')).toBeInTheDocument()
+}) // May timeout!
+
+// Good - use runAllTimers or advanceTimersByTime
+jest.useFakeTimers()
+render()
+jest.runAllTimers()
+expect(screen.getByText('Data')).toBeInTheDocument()
+```
diff --git a/.claude/skills/frontend-testing/guides/common-patterns.md b/.claude/skills/frontend-testing/guides/common-patterns.md
new file mode 100644
index 0000000000..84a6045b04
--- /dev/null
+++ b/.claude/skills/frontend-testing/guides/common-patterns.md
@@ -0,0 +1,449 @@
+# Common Testing Patterns
+
+## Query Priority
+
+Use queries in this order (most to least preferred):
+
+```typescript
+// 1. getByRole - Most recommended (accessibility)
+screen.getByRole('button', { name: /submit/i })
+screen.getByRole('textbox', { name: /email/i })
+screen.getByRole('heading', { level: 1 })
+
+// 2. getByLabelText - Form fields
+screen.getByLabelText('Email address')
+screen.getByLabelText(/password/i)
+
+// 3. getByPlaceholderText - When no label
+screen.getByPlaceholderText('Search...')
+
+// 4. getByText - Non-interactive elements
+screen.getByText('Welcome to Dify')
+screen.getByText(/loading/i)
+
+// 5. getByDisplayValue - Current input value
+screen.getByDisplayValue('current value')
+
+// 6. getByAltText - Images
+screen.getByAltText('Company logo')
+
+// 7. getByTitle - Tooltip elements
+screen.getByTitle('Close')
+
+// 8. getByTestId - Last resort only!
+screen.getByTestId('custom-element')
+```
+
+## Event Handling Patterns
+
+### Click Events
+
+```typescript
+// Basic click
+fireEvent.click(screen.getByRole('button'))
+
+// With userEvent (preferred for realistic interaction)
+const user = userEvent.setup()
+await user.click(screen.getByRole('button'))
+
+// Double click
+await user.dblClick(screen.getByRole('button'))
+
+// Right click
+await user.pointer({ keys: '[MouseRight]', target: screen.getByRole('button') })
+```
+
+### Form Input
+
+```typescript
+const user = userEvent.setup()
+
+// Type in input
+await user.type(screen.getByRole('textbox'), 'Hello World')
+
+// Clear and type
+await user.clear(screen.getByRole('textbox'))
+await user.type(screen.getByRole('textbox'), 'New value')
+
+// Select option
+await user.selectOptions(screen.getByRole('combobox'), 'option-value')
+
+// Check checkbox
+await user.click(screen.getByRole('checkbox'))
+
+// Upload file
+const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
+await user.upload(screen.getByLabelText(/upload/i), file)
+```
+
+### Keyboard Events
+
+```typescript
+const user = userEvent.setup()
+
+// Press Enter
+await user.keyboard('{Enter}')
+
+// Press Escape
+await user.keyboard('{Escape}')
+
+// Keyboard shortcut
+await user.keyboard('{Control>}a{/Control}') // Ctrl+A
+
+// Tab navigation
+await user.tab()
+
+// Arrow keys
+await user.keyboard('{ArrowDown}')
+await user.keyboard('{ArrowUp}')
+```
+
+## Component State Testing
+
+### Testing State Transitions
+
+```typescript
+describe('Counter', () => {
+ it('should increment count', async () => {
+ const user = userEvent.setup()
+ render()
+
+ // Initial state
+ expect(screen.getByText('Count: 0')).toBeInTheDocument()
+
+ // Trigger transition
+ await user.click(screen.getByRole('button', { name: /increment/i }))
+
+ // New state
+ expect(screen.getByText('Count: 1')).toBeInTheDocument()
+ })
+})
+```
+
+### Testing Controlled Components
+
+```typescript
+describe('ControlledInput', () => {
+ it('should call onChange with new value', async () => {
+ const user = userEvent.setup()
+ const handleChange = jest.fn()
+
+ render()
+
+ await user.type(screen.getByRole('textbox'), 'a')
+
+ expect(handleChange).toHaveBeenCalledWith('a')
+ })
+
+ it('should display controlled value', () => {
+ render()
+
+ expect(screen.getByRole('textbox')).toHaveValue('controlled')
+ })
+})
+```
+
+## Conditional Rendering Testing
+
+```typescript
+describe('ConditionalComponent', () => {
+ it('should show loading state', () => {
+ render()
+
+ expect(screen.getByText(/loading/i)).toBeInTheDocument()
+ expect(screen.queryByTestId('data-content')).not.toBeInTheDocument()
+ })
+
+ it('should show error state', () => {
+ render()
+
+ expect(screen.getByText(/failed to load/i)).toBeInTheDocument()
+ })
+
+ it('should show data when loaded', () => {
+ render()
+
+ expect(screen.getByText('Test')).toBeInTheDocument()
+ })
+
+ it('should show empty state when no data', () => {
+ render()
+
+ expect(screen.getByText(/no data/i)).toBeInTheDocument()
+ })
+})
+```
+
+## List Rendering Testing
+
+```typescript
+describe('ItemList', () => {
+ const items = [
+ { id: '1', name: 'Item 1' },
+ { id: '2', name: 'Item 2' },
+ { id: '3', name: 'Item 3' },
+ ]
+
+ it('should render all items', () => {
+ render()
+
+ expect(screen.getAllByRole('listitem')).toHaveLength(3)
+ items.forEach(item => {
+ expect(screen.getByText(item.name)).toBeInTheDocument()
+ })
+ })
+
+ it('should handle item selection', async () => {
+ const user = userEvent.setup()
+ const onSelect = jest.fn()
+
+ render()
+
+ await user.click(screen.getByText('Item 2'))
+
+ expect(onSelect).toHaveBeenCalledWith(items[1])
+ })
+
+ it('should handle empty list', () => {
+ render()
+
+ expect(screen.getByText(/no items/i)).toBeInTheDocument()
+ })
+})
+```
+
+## Modal/Dialog Testing
+
+```typescript
+describe('Modal', () => {
+ it('should not render when closed', () => {
+ render()
+
+ expect(screen.queryByRole('dialog')).not.toBeInTheDocument()
+ })
+
+ it('should render when open', () => {
+ render()
+
+ expect(screen.getByRole('dialog')).toBeInTheDocument()
+ })
+
+ it('should call onClose when clicking overlay', async () => {
+ const user = userEvent.setup()
+ const handleClose = jest.fn()
+
+ render()
+
+ await user.click(screen.getByTestId('modal-overlay'))
+
+ expect(handleClose).toHaveBeenCalled()
+ })
+
+ it('should call onClose when pressing Escape', async () => {
+ const user = userEvent.setup()
+ const handleClose = jest.fn()
+
+ render()
+
+ await user.keyboard('{Escape}')
+
+ expect(handleClose).toHaveBeenCalled()
+ })
+
+ it('should trap focus inside modal', async () => {
+ const user = userEvent.setup()
+
+ render(
+
+
+
+
+ )
+
+ // Focus should cycle within modal
+ await user.tab()
+ expect(screen.getByText('First')).toHaveFocus()
+
+ await user.tab()
+ expect(screen.getByText('Second')).toHaveFocus()
+
+ await user.tab()
+ expect(screen.getByText('First')).toHaveFocus() // Cycles back
+ })
+})
+```
+
+## Form Testing
+
+```typescript
+describe('LoginForm', () => {
+ it('should submit valid form', async () => {
+ const user = userEvent.setup()
+ const onSubmit = jest.fn()
+
+ render()
+
+ await user.type(screen.getByLabelText(/email/i), 'test@example.com')
+ await user.type(screen.getByLabelText(/password/i), 'password123')
+ await user.click(screen.getByRole('button', { name: /sign in/i }))
+
+ expect(onSubmit).toHaveBeenCalledWith({
+ email: 'test@example.com',
+ password: 'password123',
+ })
+ })
+
+ it('should show validation errors', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ // Submit empty form
+ await user.click(screen.getByRole('button', { name: /sign in/i }))
+
+ expect(screen.getByText(/email is required/i)).toBeInTheDocument()
+ expect(screen.getByText(/password is required/i)).toBeInTheDocument()
+ })
+
+ it('should validate email format', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ await user.type(screen.getByLabelText(/email/i), 'invalid-email')
+ await user.click(screen.getByRole('button', { name: /sign in/i }))
+
+ expect(screen.getByText(/invalid email/i)).toBeInTheDocument()
+ })
+
+ it('should disable submit button while submitting', async () => {
+ const user = userEvent.setup()
+ const onSubmit = jest.fn(() => new Promise(resolve => setTimeout(resolve, 100)))
+
+ render()
+
+ await user.type(screen.getByLabelText(/email/i), 'test@example.com')
+ await user.type(screen.getByLabelText(/password/i), 'password123')
+ await user.click(screen.getByRole('button', { name: /sign in/i }))
+
+ expect(screen.getByRole('button', { name: /signing in/i })).toBeDisabled()
+
+ await waitFor(() => {
+ expect(screen.getByRole('button', { name: /sign in/i })).toBeEnabled()
+ })
+ })
+})
+```
+
+## Data-Driven Tests with test.each
+
+```typescript
+describe('StatusBadge', () => {
+ test.each([
+ ['success', 'bg-green-500'],
+ ['warning', 'bg-yellow-500'],
+ ['error', 'bg-red-500'],
+ ['info', 'bg-blue-500'],
+ ])('should apply correct class for %s status', (status, expectedClass) => {
+ render()
+
+ expect(screen.getByTestId('status-badge')).toHaveClass(expectedClass)
+ })
+
+ test.each([
+ { input: null, expected: 'Unknown' },
+ { input: undefined, expected: 'Unknown' },
+ { input: '', expected: 'Unknown' },
+ { input: 'invalid', expected: 'Unknown' },
+ ])('should show "Unknown" for invalid input: $input', ({ input, expected }) => {
+ render()
+
+ expect(screen.getByText(expected)).toBeInTheDocument()
+ })
+})
+```
+
+## Debugging Tips
+
+```typescript
+// Print entire DOM
+screen.debug()
+
+// Print specific element
+screen.debug(screen.getByRole('button'))
+
+// Log testing playground URL
+screen.logTestingPlaygroundURL()
+
+// Pretty print DOM
+import { prettyDOM } from '@testing-library/react'
+console.log(prettyDOM(screen.getByRole('dialog')))
+
+// Check available roles
+import { getRoles } from '@testing-library/react'
+console.log(getRoles(container))
+```
+
+## Common Mistakes to Avoid
+
+### ❌ Don't Use Implementation Details
+
+```typescript
+// Bad - testing implementation
+expect(component.state.isOpen).toBe(true)
+expect(wrapper.find('.internal-class').length).toBe(1)
+
+// Good - testing behavior
+expect(screen.getByRole('dialog')).toBeInTheDocument()
+```
+
+### ❌ Don't Forget Cleanup
+
+```typescript
+// Bad - may leak state between tests
+it('test 1', () => {
+ render()
+})
+
+// Good - cleanup is automatic with RTL, but reset mocks
+beforeEach(() => {
+ jest.clearAllMocks()
+})
+```
+
+### ❌ Don't Use Exact String Matching (Prefer Black-Box Assertions)
+
+```typescript
+// ❌ Bad - hardcoded strings are brittle
+expect(screen.getByText('Submit Form')).toBeInTheDocument()
+expect(screen.getByText('Loading...')).toBeInTheDocument()
+
+// ✅ Good - role-based queries (most semantic)
+expect(screen.getByRole('button', { name: /submit/i })).toBeInTheDocument()
+expect(screen.getByRole('status')).toBeInTheDocument()
+
+// ✅ Good - pattern matching (flexible)
+expect(screen.getByText(/submit/i)).toBeInTheDocument()
+expect(screen.getByText(/loading/i)).toBeInTheDocument()
+
+// ✅ Good - test behavior, not exact UI text
+expect(screen.getByRole('button')).toBeDisabled()
+expect(screen.getByRole('alert')).toBeInTheDocument()
+```
+
+**Why prefer black-box assertions?**
+
+- Text content may change (i18n, copy updates)
+- Role-based queries test accessibility
+- Pattern matching is resilient to minor changes
+- Tests focus on behavior, not implementation details
+
+### ❌ Don't Assert on Absence Without Query
+
+```typescript
+// Bad - throws if not found
+expect(screen.getByText('Error')).not.toBeInTheDocument() // Error!
+
+// Good - use queryBy for absence assertions
+expect(screen.queryByText('Error')).not.toBeInTheDocument()
+```
diff --git a/.claude/skills/frontend-testing/guides/domain-components.md b/.claude/skills/frontend-testing/guides/domain-components.md
new file mode 100644
index 0000000000..ed2cc6eb8a
--- /dev/null
+++ b/.claude/skills/frontend-testing/guides/domain-components.md
@@ -0,0 +1,523 @@
+# Domain-Specific Component Testing
+
+This guide covers testing patterns for Dify's domain-specific components.
+
+## Workflow Components (`workflow/`)
+
+Workflow components handle node configuration, data flow, and graph operations.
+
+### Key Test Areas
+
+1. **Node Configuration**
+1. **Data Validation**
+1. **Variable Passing**
+1. **Edge Connections**
+1. **Error Handling**
+
+### Example: Node Configuration Panel
+
+```typescript
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
+import NodeConfigPanel from './node-config-panel'
+import { createMockNode, createMockWorkflowContext } from '@/__mocks__/workflow'
+
+// Mock workflow context
+jest.mock('@/app/components/workflow/hooks', () => ({
+ useWorkflowStore: () => mockWorkflowStore,
+ useNodesInteractions: () => mockNodesInteractions,
+}))
+
+let mockWorkflowStore = {
+ nodes: [],
+ edges: [],
+ updateNode: jest.fn(),
+}
+
+let mockNodesInteractions = {
+ handleNodeSelect: jest.fn(),
+ handleNodeDelete: jest.fn(),
+}
+
+describe('NodeConfigPanel', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ mockWorkflowStore = {
+ nodes: [],
+ edges: [],
+ updateNode: jest.fn(),
+ }
+ })
+
+ describe('Node Configuration', () => {
+ it('should render node type selector', () => {
+ const node = createMockNode({ type: 'llm' })
+ render()
+
+ expect(screen.getByLabelText(/model/i)).toBeInTheDocument()
+ })
+
+ it('should update node config on change', async () => {
+ const user = userEvent.setup()
+ const node = createMockNode({ type: 'llm' })
+
+ render()
+
+ await user.selectOptions(screen.getByLabelText(/model/i), 'gpt-4')
+
+ expect(mockWorkflowStore.updateNode).toHaveBeenCalledWith(
+ node.id,
+ expect.objectContaining({ model: 'gpt-4' })
+ )
+ })
+ })
+
+ describe('Data Validation', () => {
+ it('should show error for invalid input', async () => {
+ const user = userEvent.setup()
+ const node = createMockNode({ type: 'code' })
+
+ render()
+
+ // Enter invalid code
+ const codeInput = screen.getByLabelText(/code/i)
+ await user.clear(codeInput)
+ await user.type(codeInput, 'invalid syntax {{{')
+
+ await waitFor(() => {
+ expect(screen.getByText(/syntax error/i)).toBeInTheDocument()
+ })
+ })
+
+ it('should validate required fields', async () => {
+ const node = createMockNode({ type: 'http', data: { url: '' } })
+
+ render()
+
+ fireEvent.click(screen.getByRole('button', { name: /save/i }))
+
+ await waitFor(() => {
+ expect(screen.getByText(/url is required/i)).toBeInTheDocument()
+ })
+ })
+ })
+
+ describe('Variable Passing', () => {
+ it('should display available variables from upstream nodes', () => {
+ const upstreamNode = createMockNode({
+ id: 'node-1',
+ type: 'start',
+ data: { outputs: [{ name: 'user_input', type: 'string' }] },
+ })
+ const currentNode = createMockNode({
+ id: 'node-2',
+ type: 'llm',
+ })
+
+ mockWorkflowStore.nodes = [upstreamNode, currentNode]
+ mockWorkflowStore.edges = [{ source: 'node-1', target: 'node-2' }]
+
+ render()
+
+ // Variable selector should show upstream variables
+ fireEvent.click(screen.getByRole('button', { name: /add variable/i }))
+
+ expect(screen.getByText('user_input')).toBeInTheDocument()
+ })
+
+ it('should insert variable into prompt template', async () => {
+ const user = userEvent.setup()
+ const node = createMockNode({ type: 'llm' })
+
+ render()
+
+ // Click variable button
+ await user.click(screen.getByRole('button', { name: /insert variable/i }))
+ await user.click(screen.getByText('user_input'))
+
+ const promptInput = screen.getByLabelText(/prompt/i)
+ expect(promptInput).toHaveValue(expect.stringContaining('{{user_input}}'))
+ })
+ })
+})
+```
+
+## Dataset Components (`dataset/`)
+
+Dataset components handle file uploads, data display, and search/filter operations.
+
+### Key Test Areas
+
+1. **File Upload**
+1. **File Type Validation**
+1. **Pagination**
+1. **Search & Filtering**
+1. **Data Format Handling**
+
+### Example: Document Uploader
+
+```typescript
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
+import DocumentUploader from './document-uploader'
+
+jest.mock('@/service/datasets', () => ({
+ uploadDocument: jest.fn(),
+ parseDocument: jest.fn(),
+}))
+
+import * as datasetService from '@/service/datasets'
+const mockedService = datasetService as jest.Mocked
+
+describe('DocumentUploader', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ })
+
+ describe('File Upload', () => {
+ it('should accept valid file types', async () => {
+ const user = userEvent.setup()
+ const onUpload = jest.fn()
+ mockedService.uploadDocument.mockResolvedValue({ id: 'doc-1' })
+
+ render()
+
+ const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
+ const input = screen.getByLabelText(/upload/i)
+
+ await user.upload(input, file)
+
+ await waitFor(() => {
+ expect(mockedService.uploadDocument).toHaveBeenCalledWith(
+ expect.any(FormData)
+ )
+ })
+ })
+
+ it('should reject invalid file types', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ const file = new File(['content'], 'test.exe', { type: 'application/x-msdownload' })
+ const input = screen.getByLabelText(/upload/i)
+
+ await user.upload(input, file)
+
+ expect(screen.getByText(/unsupported file type/i)).toBeInTheDocument()
+ expect(mockedService.uploadDocument).not.toHaveBeenCalled()
+ })
+
+ it('should show upload progress', async () => {
+ const user = userEvent.setup()
+
+ // Mock upload with progress
+ mockedService.uploadDocument.mockImplementation(() => {
+ return new Promise((resolve) => {
+ setTimeout(() => resolve({ id: 'doc-1' }), 100)
+ })
+ })
+
+ render()
+
+ const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
+ await user.upload(screen.getByLabelText(/upload/i), file)
+
+ expect(screen.getByRole('progressbar')).toBeInTheDocument()
+
+ await waitFor(() => {
+ expect(screen.queryByRole('progressbar')).not.toBeInTheDocument()
+ })
+ })
+ })
+
+ describe('Error Handling', () => {
+ it('should handle upload failure', async () => {
+ const user = userEvent.setup()
+ mockedService.uploadDocument.mockRejectedValue(new Error('Upload failed'))
+
+ render()
+
+ const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
+ await user.upload(screen.getByLabelText(/upload/i), file)
+
+ await waitFor(() => {
+ expect(screen.getByText(/upload failed/i)).toBeInTheDocument()
+ })
+ })
+
+ it('should allow retry after failure', async () => {
+ const user = userEvent.setup()
+ mockedService.uploadDocument
+ .mockRejectedValueOnce(new Error('Network error'))
+ .mockResolvedValueOnce({ id: 'doc-1' })
+
+ render()
+
+ const file = new File(['content'], 'test.pdf', { type: 'application/pdf' })
+ await user.upload(screen.getByLabelText(/upload/i), file)
+
+ await waitFor(() => {
+ expect(screen.getByRole('button', { name: /retry/i })).toBeInTheDocument()
+ })
+
+ await user.click(screen.getByRole('button', { name: /retry/i }))
+
+ await waitFor(() => {
+ expect(screen.getByText(/uploaded successfully/i)).toBeInTheDocument()
+ })
+ })
+ })
+})
+```
+
+### Example: Document List with Pagination
+
+```typescript
+describe('DocumentList', () => {
+ describe('Pagination', () => {
+ it('should load first page on mount', async () => {
+ mockedService.getDocuments.mockResolvedValue({
+ data: [{ id: '1', name: 'Doc 1' }],
+ total: 50,
+ page: 1,
+ pageSize: 10,
+ })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText('Doc 1')).toBeInTheDocument()
+ })
+
+ expect(mockedService.getDocuments).toHaveBeenCalledWith('ds-1', { page: 1 })
+ })
+
+ it('should navigate to next page', async () => {
+ const user = userEvent.setup()
+ mockedService.getDocuments.mockResolvedValue({
+ data: [{ id: '1', name: 'Doc 1' }],
+ total: 50,
+ page: 1,
+ pageSize: 10,
+ })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText('Doc 1')).toBeInTheDocument()
+ })
+
+ mockedService.getDocuments.mockResolvedValue({
+ data: [{ id: '11', name: 'Doc 11' }],
+ total: 50,
+ page: 2,
+ pageSize: 10,
+ })
+
+ await user.click(screen.getByRole('button', { name: /next/i }))
+
+ await waitFor(() => {
+ expect(screen.getByText('Doc 11')).toBeInTheDocument()
+ })
+ })
+ })
+
+ describe('Search & Filtering', () => {
+ it('should filter by search query', async () => {
+ const user = userEvent.setup()
+ jest.useFakeTimers()
+
+ render()
+
+ await user.type(screen.getByPlaceholderText(/search/i), 'test query')
+
+ // Debounce
+ jest.advanceTimersByTime(300)
+
+ await waitFor(() => {
+ expect(mockedService.getDocuments).toHaveBeenCalledWith(
+ 'ds-1',
+ expect.objectContaining({ search: 'test query' })
+ )
+ })
+
+ jest.useRealTimers()
+ })
+ })
+})
+```
+
+## Configuration Components (`app/configuration/`, `config/`)
+
+Configuration components handle forms, validation, and data persistence.
+
+### Key Test Areas
+
+1. **Form Validation**
+1. **Save/Reset**
+1. **Required vs Optional Fields**
+1. **Configuration Persistence**
+1. **Error Feedback**
+
+### Example: App Configuration Form
+
+```typescript
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
+import AppConfigForm from './app-config-form'
+
+jest.mock('@/service/apps', () => ({
+ updateAppConfig: jest.fn(),
+ getAppConfig: jest.fn(),
+}))
+
+import * as appService from '@/service/apps'
+const mockedService = appService as jest.Mocked
+
+describe('AppConfigForm', () => {
+ const defaultConfig = {
+ name: 'My App',
+ description: '',
+ icon: 'default',
+ openingStatement: '',
+ }
+
+ beforeEach(() => {
+ jest.clearAllMocks()
+ mockedService.getAppConfig.mockResolvedValue(defaultConfig)
+ })
+
+ describe('Form Validation', () => {
+ it('should require app name', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ // Clear name field
+ await user.clear(screen.getByLabelText(/name/i))
+ await user.click(screen.getByRole('button', { name: /save/i }))
+
+ expect(screen.getByText(/name is required/i)).toBeInTheDocument()
+ expect(mockedService.updateAppConfig).not.toHaveBeenCalled()
+ })
+
+ it('should validate name length', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toBeInTheDocument()
+ })
+
+ // Enter very long name
+ await user.clear(screen.getByLabelText(/name/i))
+ await user.type(screen.getByLabelText(/name/i), 'a'.repeat(101))
+
+ expect(screen.getByText(/name must be less than 100 characters/i)).toBeInTheDocument()
+ })
+
+ it('should allow empty optional fields', async () => {
+ const user = userEvent.setup()
+ mockedService.updateAppConfig.mockResolvedValue({ success: true })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ // Leave description empty (optional)
+ await user.click(screen.getByRole('button', { name: /save/i }))
+
+ await waitFor(() => {
+ expect(mockedService.updateAppConfig).toHaveBeenCalled()
+ })
+ })
+ })
+
+ describe('Save/Reset Functionality', () => {
+ it('should save configuration', async () => {
+ const user = userEvent.setup()
+ mockedService.updateAppConfig.mockResolvedValue({ success: true })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ await user.clear(screen.getByLabelText(/name/i))
+ await user.type(screen.getByLabelText(/name/i), 'Updated App')
+ await user.click(screen.getByRole('button', { name: /save/i }))
+
+ await waitFor(() => {
+ expect(mockedService.updateAppConfig).toHaveBeenCalledWith(
+ 'app-1',
+ expect.objectContaining({ name: 'Updated App' })
+ )
+ })
+
+ expect(screen.getByText(/saved successfully/i)).toBeInTheDocument()
+ })
+
+ it('should reset to default values', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ // Make changes
+ await user.clear(screen.getByLabelText(/name/i))
+ await user.type(screen.getByLabelText(/name/i), 'Changed Name')
+
+ // Reset
+ await user.click(screen.getByRole('button', { name: /reset/i }))
+
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ it('should show unsaved changes warning', async () => {
+ const user = userEvent.setup()
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ // Make changes
+ await user.type(screen.getByLabelText(/name/i), ' Updated')
+
+ expect(screen.getByText(/unsaved changes/i)).toBeInTheDocument()
+ })
+ })
+
+ describe('Error Handling', () => {
+ it('should show error on save failure', async () => {
+ const user = userEvent.setup()
+ mockedService.updateAppConfig.mockRejectedValue(new Error('Server error'))
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByLabelText(/name/i)).toHaveValue('My App')
+ })
+
+ await user.click(screen.getByRole('button', { name: /save/i }))
+
+ await waitFor(() => {
+ expect(screen.getByText(/failed to save/i)).toBeInTheDocument()
+ })
+ })
+ })
+})
+```
diff --git a/.claude/skills/frontend-testing/guides/mocking.md b/.claude/skills/frontend-testing/guides/mocking.md
new file mode 100644
index 0000000000..bf0bd79690
--- /dev/null
+++ b/.claude/skills/frontend-testing/guides/mocking.md
@@ -0,0 +1,363 @@
+# Mocking Guide for Dify Frontend Tests
+
+## ⚠️ Important: What NOT to Mock
+
+### DO NOT Mock Base Components
+
+**Never mock components from `@/app/components/base/`** such as:
+
+- `Loading`, `Spinner`
+- `Button`, `Input`, `Select`
+- `Tooltip`, `Modal`, `Dropdown`
+- `Icon`, `Badge`, `Tag`
+
+**Why?**
+
+- Base components will have their own dedicated tests
+- Mocking them creates false positives (tests pass but real integration fails)
+- Using real components tests actual integration behavior
+
+```typescript
+// ❌ WRONG: Don't mock base components
+jest.mock('@/app/components/base/loading', () => () => Loading
)
+jest.mock('@/app/components/base/button', () => ({ children }: any) => )
+
+// ✅ CORRECT: Import and use real base components
+import Loading from '@/app/components/base/loading'
+import Button from '@/app/components/base/button'
+// They will render normally in tests
+```
+
+### What TO Mock
+
+Only mock these categories:
+
+1. **API services** (`@/service/*`) - Network calls
+1. **Complex context providers** - When setup is too difficult
+1. **Third-party libraries with side effects** - `next/navigation`, external SDKs
+1. **i18n** - Always mock to return keys
+
+## Mock Placement
+
+| Location | Purpose |
+|----------|---------|
+| `web/__mocks__/` | Reusable mocks shared across multiple test files |
+| Test file | Test-specific mocks, inline with `jest.mock()` |
+
+## Essential Mocks
+
+### 1. i18n (Auto-loaded via Shared Mock)
+
+A shared mock is available at `web/__mocks__/react-i18next.ts` and is auto-loaded by Jest.
+**No explicit mock needed** for most tests - it returns translation keys as-is.
+
+For tests requiring custom translations, override the mock:
+
+```typescript
+jest.mock('react-i18next', () => ({
+ useTranslation: () => ({
+ t: (key: string) => {
+ const translations: Record = {
+ 'my.custom.key': 'Custom translation',
+ }
+ return translations[key] || key
+ },
+ }),
+}))
+```
+
+### 2. Next.js Router
+
+```typescript
+const mockPush = jest.fn()
+const mockReplace = jest.fn()
+
+jest.mock('next/navigation', () => ({
+ useRouter: () => ({
+ push: mockPush,
+ replace: mockReplace,
+ back: jest.fn(),
+ prefetch: jest.fn(),
+ }),
+ usePathname: () => '/current-path',
+ useSearchParams: () => new URLSearchParams('?key=value'),
+}))
+
+describe('Component', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ })
+
+ it('should navigate on click', () => {
+ render()
+ fireEvent.click(screen.getByRole('button'))
+ expect(mockPush).toHaveBeenCalledWith('/expected-path')
+ })
+})
+```
+
+### 3. Portal Components (with Shared State)
+
+```typescript
+// ⚠️ Important: Use shared state for components that depend on each other
+let mockPortalOpenState = false
+
+jest.mock('@/app/components/base/portal-to-follow-elem', () => ({
+ PortalToFollowElem: ({ children, open, ...props }: any) => {
+ mockPortalOpenState = open || false // Update shared state
+ return {children}
+ },
+ PortalToFollowElemContent: ({ children }: any) => {
+ // ✅ Matches actual: returns null when portal is closed
+ if (!mockPortalOpenState) return null
+ return {children}
+ },
+ PortalToFollowElemTrigger: ({ children }: any) => (
+ {children}
+ ),
+}))
+
+describe('Component', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ mockPortalOpenState = false // ✅ Reset shared state
+ })
+})
+```
+
+### 4. API Service Mocks
+
+```typescript
+import * as api from '@/service/api'
+
+jest.mock('@/service/api')
+
+const mockedApi = api as jest.Mocked
+
+describe('Component', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+
+ // Setup default mock implementation
+ mockedApi.fetchData.mockResolvedValue({ data: [] })
+ })
+
+ it('should show data on success', async () => {
+ mockedApi.fetchData.mockResolvedValue({ data: [{ id: 1 }] })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText('1')).toBeInTheDocument()
+ })
+ })
+
+ it('should show error on failure', async () => {
+ mockedApi.fetchData.mockRejectedValue(new Error('Network error'))
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText(/error/i)).toBeInTheDocument()
+ })
+ })
+})
+```
+
+### 5. HTTP Mocking with Nock
+
+```typescript
+import nock from 'nock'
+
+const GITHUB_HOST = 'https://api.github.com'
+const GITHUB_PATH = '/repos/owner/repo'
+
+const mockGithubApi = (status: number, body: Record, delayMs = 0) => {
+ return nock(GITHUB_HOST)
+ .get(GITHUB_PATH)
+ .delay(delayMs)
+ .reply(status, body)
+}
+
+describe('GithubComponent', () => {
+ afterEach(() => {
+ nock.cleanAll()
+ })
+
+ it('should display repo info', async () => {
+ mockGithubApi(200, { name: 'dify', stars: 1000 })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText('dify')).toBeInTheDocument()
+ })
+ })
+
+ it('should handle API error', async () => {
+ mockGithubApi(500, { message: 'Server error' })
+
+ render()
+
+ await waitFor(() => {
+ expect(screen.getByText(/error/i)).toBeInTheDocument()
+ })
+ })
+})
+```
+
+### 6. Context Providers
+
+```typescript
+import { ProviderContext } from '@/context/provider-context'
+import { createMockProviderContextValue, createMockPlan } from '@/__mocks__/provider-context'
+
+describe('Component with Context', () => {
+ it('should render for free plan', () => {
+ const mockContext = createMockPlan('sandbox')
+
+ render(
+
+
+
+ )
+
+ expect(screen.getByText('Upgrade')).toBeInTheDocument()
+ })
+
+ it('should render for pro plan', () => {
+ const mockContext = createMockPlan('professional')
+
+ render(
+
+
+
+ )
+
+ expect(screen.queryByText('Upgrade')).not.toBeInTheDocument()
+ })
+})
+```
+
+### 7. SWR / React Query
+
+```typescript
+// SWR
+jest.mock('swr', () => ({
+ __esModule: true,
+ default: jest.fn(),
+}))
+
+import useSWR from 'swr'
+const mockedUseSWR = useSWR as jest.Mock
+
+describe('Component with SWR', () => {
+ it('should show loading state', () => {
+ mockedUseSWR.mockReturnValue({
+ data: undefined,
+ error: undefined,
+ isLoading: true,
+ })
+
+ render()
+ expect(screen.getByText(/loading/i)).toBeInTheDocument()
+ })
+})
+
+// React Query
+import { QueryClient, QueryClientProvider } from '@tanstack/react-query'
+
+const createTestQueryClient = () => new QueryClient({
+ defaultOptions: {
+ queries: { retry: false },
+ mutations: { retry: false },
+ },
+})
+
+const renderWithQueryClient = (ui: React.ReactElement) => {
+ const queryClient = createTestQueryClient()
+ return render(
+
+ {ui}
+
+ )
+}
+```
+
+## Mock Best Practices
+
+### ✅ DO
+
+1. **Use real base components** - Import from `@/app/components/base/` directly
+1. **Use real project components** - Prefer importing over mocking
+1. **Reset mocks in `beforeEach`**, not `afterEach`
+1. **Match actual component behavior** in mocks (when mocking is necessary)
+1. **Use factory functions** for complex mock data
+1. **Import actual types** for type safety
+1. **Reset shared mock state** in `beforeEach`
+
+### ❌ DON'T
+
+1. **Don't mock base components** (`Loading`, `Button`, `Tooltip`, etc.)
+1. Don't mock components you can import directly
+1. Don't create overly simplified mocks that miss conditional logic
+1. Don't forget to clean up nock after each test
+1. Don't use `any` types in mocks without necessity
+
+### Mock Decision Tree
+
+```
+Need to use a component in test?
+│
+├─ Is it from @/app/components/base/*?
+│ └─ YES → Import real component, DO NOT mock
+│
+├─ Is it a project component?
+│ └─ YES → Prefer importing real component
+│ Only mock if setup is extremely complex
+│
+├─ Is it an API service (@/service/*)?
+│ └─ YES → Mock it
+│
+├─ Is it a third-party lib with side effects?
+│ └─ YES → Mock it (next/navigation, external SDKs)
+│
+└─ Is it i18n?
+ └─ YES → Uses shared mock (auto-loaded). Override only for custom translations
+```
+
+## Factory Function Pattern
+
+```typescript
+// __mocks__/data-factories.ts
+import type { User, Project } from '@/types'
+
+export const createMockUser = (overrides: Partial = {}): User => ({
+ id: 'user-1',
+ name: 'Test User',
+ email: 'test@example.com',
+ role: 'member',
+ createdAt: new Date().toISOString(),
+ ...overrides,
+})
+
+export const createMockProject = (overrides: Partial = {}): Project => ({
+ id: 'project-1',
+ name: 'Test Project',
+ description: 'A test project',
+ owner: createMockUser(),
+ members: [],
+ createdAt: new Date().toISOString(),
+ ...overrides,
+})
+
+// Usage in tests
+it('should display project owner', () => {
+ const project = createMockProject({
+ owner: createMockUser({ name: 'John Doe' }),
+ })
+
+ render()
+ expect(screen.getByText('John Doe')).toBeInTheDocument()
+})
+```
diff --git a/.claude/skills/frontend-testing/guides/workflow.md b/.claude/skills/frontend-testing/guides/workflow.md
new file mode 100644
index 0000000000..b0f2994bde
--- /dev/null
+++ b/.claude/skills/frontend-testing/guides/workflow.md
@@ -0,0 +1,269 @@
+# Testing Workflow Guide
+
+This guide defines the workflow for generating tests, especially for complex components or directories with multiple files.
+
+## Scope Clarification
+
+This guide addresses **multi-file workflow** (how to process multiple test files). For coverage requirements within a single test file, see `web/testing/testing.md` § Coverage Goals.
+
+| Scope | Rule |
+|-------|------|
+| **Single file** | Complete coverage in one generation (100% function, >95% branch) |
+| **Multi-file directory** | Process one file at a time, verify each before proceeding |
+
+## ⚠️ Critical Rule: Incremental Approach for Multi-File Testing
+
+When testing a **directory with multiple files**, **NEVER generate all test files at once.** Use an incremental, verify-as-you-go approach.
+
+### Why Incremental?
+
+| Batch Approach (❌) | Incremental Approach (✅) |
+|---------------------|---------------------------|
+| Generate 5+ tests at once | Generate 1 test at a time |
+| Run tests only at the end | Run test immediately after each file |
+| Multiple failures compound | Single point of failure, easy to debug |
+| Hard to identify root cause | Clear cause-effect relationship |
+| Mock issues affect many files | Mock issues caught early |
+| Messy git history | Clean, atomic commits possible |
+
+## Single File Workflow
+
+When testing a **single component, hook, or utility**:
+
+```
+1. Read source code completely
+2. Run `pnpm analyze-component ` (if available)
+3. Check complexity score and features detected
+4. Write the test file
+5. Run test: `pnpm test -- .spec.tsx`
+6. Fix any failures
+7. Verify coverage meets goals (100% function, >95% branch)
+```
+
+## Directory/Multi-File Workflow (MUST FOLLOW)
+
+When testing a **directory or multiple files**, follow this strict workflow:
+
+### Step 1: Analyze and Plan
+
+1. **List all files** that need tests in the directory
+1. **Categorize by complexity**:
+ - 🟢 **Simple**: Utility functions, simple hooks, presentational components
+ - 🟡 **Medium**: Components with state, effects, or event handlers
+ - 🔴 **Complex**: Components with API calls, routing, or many dependencies
+1. **Order by dependency**: Test dependencies before dependents
+1. **Create a todo list** to track progress
+
+### Step 2: Determine Processing Order
+
+Process files in this recommended order:
+
+```
+1. Utility functions (simplest, no React)
+2. Custom hooks (isolated logic)
+3. Simple presentational components (few/no props)
+4. Medium complexity components (state, effects)
+5. Complex components (API, routing, many deps)
+6. Container/index components (integration tests - last)
+```
+
+**Rationale**:
+
+- Simpler files help establish mock patterns
+- Hooks used by components should be tested first
+- Integration tests (index files) depend on child components working
+
+### Step 3: Process Each File Incrementally
+
+**For EACH file in the ordered list:**
+
+```
+┌─────────────────────────────────────────────┐
+│ 1. Write test file │
+│ 2. Run: pnpm test -- .spec.tsx │
+│ 3. If FAIL → Fix immediately, re-run │
+│ 4. If PASS → Mark complete in todo list │
+│ 5. ONLY THEN proceed to next file │
+└─────────────────────────────────────────────┘
+```
+
+**DO NOT proceed to the next file until the current one passes.**
+
+### Step 4: Final Verification
+
+After all individual tests pass:
+
+```bash
+# Run all tests in the directory together
+pnpm test -- path/to/directory/
+
+# Check coverage
+pnpm test -- --coverage path/to/directory/
+```
+
+## Component Complexity Guidelines
+
+Use `pnpm analyze-component ` to assess complexity before testing.
+
+### 🔴 Very Complex Components (Complexity > 50)
+
+**Consider refactoring BEFORE testing:**
+
+- Break component into smaller, testable pieces
+- Extract complex logic into custom hooks
+- Separate container and presentational layers
+
+**If testing as-is:**
+
+- Use integration tests for complex workflows
+- Use `test.each()` for data-driven testing
+- Multiple `describe` blocks for organization
+- Consider testing major sections separately
+
+### 🟡 Medium Complexity (Complexity 30-50)
+
+- Group related tests in `describe` blocks
+- Test integration scenarios between internal parts
+- Focus on state transitions and side effects
+- Use helper functions to reduce test complexity
+
+### 🟢 Simple Components (Complexity < 30)
+
+- Standard test structure
+- Focus on props, rendering, and edge cases
+- Usually straightforward to test
+
+### 📏 Large Files (500+ lines)
+
+Regardless of complexity score:
+
+- **Strongly consider refactoring** before testing
+- If testing as-is, test major sections separately
+- Create helper functions for test setup
+- May need multiple test files
+
+## Todo List Format
+
+When testing multiple files, use a todo list like this:
+
+```
+Testing: path/to/directory/
+
+Ordered by complexity (simple → complex):
+
+☐ utils/helper.ts [utility, simple]
+☐ hooks/use-custom-hook.ts [hook, simple]
+☐ empty-state.tsx [component, simple]
+☐ item-card.tsx [component, medium]
+☐ list.tsx [component, complex]
+☐ index.tsx [integration]
+
+Progress: 0/6 complete
+```
+
+Update status as you complete each:
+
+- ☐ → ⏳ (in progress)
+- ⏳ → ✅ (complete and verified)
+- ⏳ → ❌ (blocked, needs attention)
+
+## When to Stop and Verify
+
+**Always run tests after:**
+
+- Completing a test file
+- Making changes to fix a failure
+- Modifying shared mocks
+- Updating test utilities or helpers
+
+**Signs you should pause:**
+
+- More than 2 consecutive test failures
+- Mock-related errors appearing
+- Unclear why a test is failing
+- Test passing but coverage unexpectedly low
+
+## Common Pitfalls to Avoid
+
+### ❌ Don't: Generate Everything First
+
+```
+# BAD: Writing all files then testing
+Write component-a.spec.tsx
+Write component-b.spec.tsx
+Write component-c.spec.tsx
+Write component-d.spec.tsx
+Run pnpm test ← Multiple failures, hard to debug
+```
+
+### ✅ Do: Verify Each Step
+
+```
+# GOOD: Incremental with verification
+Write component-a.spec.tsx
+Run pnpm test -- component-a.spec.tsx ✅
+Write component-b.spec.tsx
+Run pnpm test -- component-b.spec.tsx ✅
+...continue...
+```
+
+### ❌ Don't: Skip Verification for "Simple" Components
+
+Even simple components can have:
+
+- Import errors
+- Missing mock setup
+- Incorrect assumptions about props
+
+**Always verify, regardless of perceived simplicity.**
+
+### ❌ Don't: Continue When Tests Fail
+
+Failing tests compound:
+
+- A mock issue in file A affects files B, C, D
+- Fixing A later requires revisiting all dependent tests
+- Time wasted on debugging cascading failures
+
+**Fix failures immediately before proceeding.**
+
+## Integration with Claude's Todo Feature
+
+When using Claude for multi-file testing:
+
+1. **Ask Claude to create a todo list** before starting
+1. **Request one file at a time** or ensure Claude processes incrementally
+1. **Verify each test passes** before asking for the next
+1. **Mark todos complete** as you progress
+
+Example prompt:
+
+```
+Test all components in `path/to/directory/`.
+First, analyze the directory and create a todo list ordered by complexity.
+Then, process ONE file at a time, waiting for my confirmation that tests pass
+before proceeding to the next.
+```
+
+## Summary Checklist
+
+Before starting multi-file testing:
+
+- [ ] Listed all files needing tests
+- [ ] Ordered by complexity (simple → complex)
+- [ ] Created todo list for tracking
+- [ ] Understand dependencies between files
+
+During testing:
+
+- [ ] Processing ONE file at a time
+- [ ] Running tests after EACH file
+- [ ] Fixing failures BEFORE proceeding
+- [ ] Updating todo list progress
+
+After completion:
+
+- [ ] All individual tests pass
+- [ ] Full directory test run passes
+- [ ] Coverage goals met
+- [ ] Todo list shows all complete
diff --git a/.claude/skills/frontend-testing/templates/component-test.template.tsx b/.claude/skills/frontend-testing/templates/component-test.template.tsx
new file mode 100644
index 0000000000..9b1542b676
--- /dev/null
+++ b/.claude/skills/frontend-testing/templates/component-test.template.tsx
@@ -0,0 +1,289 @@
+/**
+ * Test Template for React Components
+ *
+ * WHY THIS STRUCTURE?
+ * - Organized sections make tests easy to navigate and maintain
+ * - Mocks at top ensure consistent test isolation
+ * - Factory functions reduce duplication and improve readability
+ * - describe blocks group related scenarios for better debugging
+ *
+ * INSTRUCTIONS:
+ * 1. Replace `ComponentName` with your component name
+ * 2. Update import path
+ * 3. Add/remove test sections based on component features (use analyze-component)
+ * 4. Follow AAA pattern: Arrange → Act → Assert
+ *
+ * RUN FIRST: pnpm analyze-component to identify required test scenarios
+ */
+
+import { render, screen, fireEvent, waitFor } from '@testing-library/react'
+import userEvent from '@testing-library/user-event'
+// import ComponentName from './index'
+
+// ============================================================================
+// Mocks
+// ============================================================================
+// WHY: Mocks must be hoisted to top of file (Jest requirement).
+// They run BEFORE imports, so keep them before component imports.
+
+// i18n (always required in Dify)
+// WHY: Returns key instead of translation so tests don't depend on i18n files
+jest.mock('react-i18next', () => ({
+ useTranslation: () => ({
+ t: (key: string) => key,
+ }),
+}))
+
+// Router (if component uses useRouter, usePathname, useSearchParams)
+// WHY: Isolates tests from Next.js routing, enables testing navigation behavior
+// const mockPush = jest.fn()
+// jest.mock('next/navigation', () => ({
+// useRouter: () => ({ push: mockPush }),
+// usePathname: () => '/test-path',
+// }))
+
+// API services (if component fetches data)
+// WHY: Prevents real network calls, enables testing all states (loading/success/error)
+// jest.mock('@/service/api')
+// import * as api from '@/service/api'
+// const mockedApi = api as jest.Mocked
+
+// Shared mock state (for portal/dropdown components)
+// WHY: Portal components like PortalToFollowElem need shared state between
+// parent and child mocks to correctly simulate open/close behavior
+// let mockOpenState = false
+
+// ============================================================================
+// Test Data Factories
+// ============================================================================
+// WHY FACTORIES?
+// - Avoid hard-coded test data scattered across tests
+// - Easy to create variations with overrides
+// - Type-safe when using actual types from source
+// - Single source of truth for default test values
+
+// const createMockProps = (overrides = {}) => ({
+// // Default props that make component render successfully
+// ...overrides,
+// })
+
+// const createMockItem = (overrides = {}) => ({
+// id: 'item-1',
+// name: 'Test Item',
+// ...overrides,
+// })
+
+// ============================================================================
+// Test Helpers
+// ============================================================================
+
+// const renderComponent = (props = {}) => {
+// return render()
+// }
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe('ComponentName', () => {
+ // WHY beforeEach with clearAllMocks?
+ // - Ensures each test starts with clean slate
+ // - Prevents mock call history from leaking between tests
+ // - MUST be beforeEach (not afterEach) to reset BEFORE assertions like toHaveBeenCalledTimes
+ beforeEach(() => {
+ jest.clearAllMocks()
+ // Reset shared mock state if used (CRITICAL for portal/dropdown tests)
+ // mockOpenState = false
+ })
+
+ // --------------------------------------------------------------------------
+ // Rendering Tests (REQUIRED - Every component MUST have these)
+ // --------------------------------------------------------------------------
+ // WHY: Catches import errors, missing providers, and basic render issues
+ describe('Rendering', () => {
+ it('should render without crashing', () => {
+ // Arrange - Setup data and mocks
+ // const props = createMockProps()
+
+ // Act - Render the component
+ // render()
+
+ // Assert - Verify expected output
+ // Prefer getByRole for accessibility; it's what users "see"
+ // expect(screen.getByRole('...')).toBeInTheDocument()
+ })
+
+ it('should render with default props', () => {
+ // WHY: Verifies component works without optional props
+ // render()
+ // expect(screen.getByText('...')).toBeInTheDocument()
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Props Tests (REQUIRED - Every component MUST test prop behavior)
+ // --------------------------------------------------------------------------
+ // WHY: Props are the component's API contract. Test them thoroughly.
+ describe('Props', () => {
+ it('should apply custom className', () => {
+ // WHY: Common pattern in Dify - components should merge custom classes
+ // render()
+ // expect(screen.getByTestId('component')).toHaveClass('custom-class')
+ })
+
+ it('should use default values for optional props', () => {
+ // WHY: Verifies TypeScript defaults work at runtime
+ // render()
+ // expect(screen.getByRole('...')).toHaveAttribute('...', 'default-value')
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // User Interactions (if component has event handlers - on*, handle*)
+ // --------------------------------------------------------------------------
+ // WHY: Event handlers are core functionality. Test from user's perspective.
+ describe('User Interactions', () => {
+ it('should call onClick when clicked', async () => {
+ // WHY userEvent over fireEvent?
+ // - userEvent simulates real user behavior (focus, hover, then click)
+ // - fireEvent is lower-level, doesn't trigger all browser events
+ // const user = userEvent.setup()
+ // const handleClick = jest.fn()
+ // render()
+ //
+ // await user.click(screen.getByRole('button'))
+ //
+ // expect(handleClick).toHaveBeenCalledTimes(1)
+ })
+
+ it('should call onChange when value changes', async () => {
+ // const user = userEvent.setup()
+ // const handleChange = jest.fn()
+ // render()
+ //
+ // await user.type(screen.getByRole('textbox'), 'new value')
+ //
+ // expect(handleChange).toHaveBeenCalled()
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // State Management (if component uses useState/useReducer)
+ // --------------------------------------------------------------------------
+ // WHY: Test state through observable UI changes, not internal state values
+ describe('State Management', () => {
+ it('should update state on interaction', async () => {
+ // WHY test via UI, not state?
+ // - State is implementation detail; UI is what users see
+ // - If UI works correctly, state must be correct
+ // const user = userEvent.setup()
+ // render()
+ //
+ // // Initial state - verify what user sees
+ // expect(screen.getByText('Initial')).toBeInTheDocument()
+ //
+ // // Trigger state change via user action
+ // await user.click(screen.getByRole('button'))
+ //
+ // // New state - verify UI updated
+ // expect(screen.getByText('Updated')).toBeInTheDocument()
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Async Operations (if component fetches data - useSWR, useQuery, fetch)
+ // --------------------------------------------------------------------------
+ // WHY: Async operations have 3 states users experience: loading, success, error
+ describe('Async Operations', () => {
+ it('should show loading state', () => {
+ // WHY never-resolving promise?
+ // - Keeps component in loading state for assertion
+ // - Alternative: use fake timers
+ // mockedApi.fetchData.mockImplementation(() => new Promise(() => {}))
+ // render()
+ //
+ // expect(screen.getByText(/loading/i)).toBeInTheDocument()
+ })
+
+ it('should show data on success', async () => {
+ // WHY waitFor?
+ // - Component updates asynchronously after fetch resolves
+ // - waitFor retries assertion until it passes or times out
+ // mockedApi.fetchData.mockResolvedValue({ items: ['Item 1'] })
+ // render()
+ //
+ // await waitFor(() => {
+ // expect(screen.getByText('Item 1')).toBeInTheDocument()
+ // })
+ })
+
+ it('should show error on failure', async () => {
+ // mockedApi.fetchData.mockRejectedValue(new Error('Network error'))
+ // render()
+ //
+ // await waitFor(() => {
+ // expect(screen.getByText(/error/i)).toBeInTheDocument()
+ // })
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Edge Cases (REQUIRED - Every component MUST handle edge cases)
+ // --------------------------------------------------------------------------
+ // WHY: Real-world data is messy. Components must handle:
+ // - Null/undefined from API failures or optional fields
+ // - Empty arrays/strings from user clearing data
+ // - Boundary values (0, MAX_INT, special characters)
+ describe('Edge Cases', () => {
+ it('should handle null value', () => {
+ // WHY test null specifically?
+ // - API might return null for missing data
+ // - Prevents "Cannot read property of null" in production
+ // render()
+ // expect(screen.getByText(/no data/i)).toBeInTheDocument()
+ })
+
+ it('should handle undefined value', () => {
+ // WHY test undefined separately from null?
+ // - TypeScript treats them differently
+ // - Optional props are undefined, not null
+ // render()
+ // expect(screen.getByText(/no data/i)).toBeInTheDocument()
+ })
+
+ it('should handle empty array', () => {
+ // WHY: Empty state often needs special UI (e.g., "No items yet")
+ // render()
+ // expect(screen.getByText(/empty/i)).toBeInTheDocument()
+ })
+
+ it('should handle empty string', () => {
+ // WHY: Empty strings are truthy in JS but visually empty
+ // render()
+ // expect(screen.getByText(/placeholder/i)).toBeInTheDocument()
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Accessibility (optional but recommended for Dify's enterprise users)
+ // --------------------------------------------------------------------------
+ // WHY: Dify has enterprise customers who may require accessibility compliance
+ describe('Accessibility', () => {
+ it('should have accessible name', () => {
+ // WHY getByRole with name?
+ // - Tests that screen readers can identify the element
+ // - Enforces proper labeling practices
+ // render()
+ // expect(screen.getByRole('button', { name: /test label/i })).toBeInTheDocument()
+ })
+
+ it('should support keyboard navigation', async () => {
+ // WHY: Some users can't use a mouse
+ // const user = userEvent.setup()
+ // render()
+ //
+ // await user.tab()
+ // expect(screen.getByRole('button')).toHaveFocus()
+ })
+ })
+})
diff --git a/.claude/skills/frontend-testing/templates/hook-test.template.ts b/.claude/skills/frontend-testing/templates/hook-test.template.ts
new file mode 100644
index 0000000000..4fb7fd21ec
--- /dev/null
+++ b/.claude/skills/frontend-testing/templates/hook-test.template.ts
@@ -0,0 +1,207 @@
+/**
+ * Test Template for Custom Hooks
+ *
+ * Instructions:
+ * 1. Replace `useHookName` with your hook name
+ * 2. Update import path
+ * 3. Add/remove test sections based on hook features
+ */
+
+import { renderHook, act, waitFor } from '@testing-library/react'
+// import { useHookName } from './use-hook-name'
+
+// ============================================================================
+// Mocks
+// ============================================================================
+
+// API services (if hook fetches data)
+// jest.mock('@/service/api')
+// import * as api from '@/service/api'
+// const mockedApi = api as jest.Mocked
+
+// ============================================================================
+// Test Helpers
+// ============================================================================
+
+// Wrapper for hooks that need context
+// const createWrapper = (contextValue = {}) => {
+// return ({ children }: { children: React.ReactNode }) => (
+//
+// {children}
+//
+// )
+// }
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe('useHookName', () => {
+ beforeEach(() => {
+ jest.clearAllMocks()
+ })
+
+ // --------------------------------------------------------------------------
+ // Initial State
+ // --------------------------------------------------------------------------
+ describe('Initial State', () => {
+ it('should return initial state', () => {
+ // const { result } = renderHook(() => useHookName())
+ //
+ // expect(result.current.value).toBe(initialValue)
+ // expect(result.current.isLoading).toBe(false)
+ })
+
+ it('should accept initial value from props', () => {
+ // const { result } = renderHook(() => useHookName({ initialValue: 'custom' }))
+ //
+ // expect(result.current.value).toBe('custom')
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // State Updates
+ // --------------------------------------------------------------------------
+ describe('State Updates', () => {
+ it('should update value when setValue is called', () => {
+ // const { result } = renderHook(() => useHookName())
+ //
+ // act(() => {
+ // result.current.setValue('new value')
+ // })
+ //
+ // expect(result.current.value).toBe('new value')
+ })
+
+ it('should reset to initial value', () => {
+ // const { result } = renderHook(() => useHookName({ initialValue: 'initial' }))
+ //
+ // act(() => {
+ // result.current.setValue('changed')
+ // })
+ // expect(result.current.value).toBe('changed')
+ //
+ // act(() => {
+ // result.current.reset()
+ // })
+ // expect(result.current.value).toBe('initial')
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Async Operations
+ // --------------------------------------------------------------------------
+ describe('Async Operations', () => {
+ it('should fetch data on mount', async () => {
+ // mockedApi.fetchData.mockResolvedValue({ data: 'test' })
+ //
+ // const { result } = renderHook(() => useHookName())
+ //
+ // // Initially loading
+ // expect(result.current.isLoading).toBe(true)
+ //
+ // // Wait for data
+ // await waitFor(() => {
+ // expect(result.current.isLoading).toBe(false)
+ // })
+ //
+ // expect(result.current.data).toEqual({ data: 'test' })
+ })
+
+ it('should handle fetch error', async () => {
+ // mockedApi.fetchData.mockRejectedValue(new Error('Network error'))
+ //
+ // const { result } = renderHook(() => useHookName())
+ //
+ // await waitFor(() => {
+ // expect(result.current.error).toBeTruthy()
+ // })
+ //
+ // expect(result.current.error?.message).toBe('Network error')
+ })
+
+ it('should refetch when dependency changes', async () => {
+ // mockedApi.fetchData.mockResolvedValue({ data: 'test' })
+ //
+ // const { result, rerender } = renderHook(
+ // ({ id }) => useHookName(id),
+ // { initialProps: { id: '1' } }
+ // )
+ //
+ // await waitFor(() => {
+ // expect(mockedApi.fetchData).toHaveBeenCalledWith('1')
+ // })
+ //
+ // rerender({ id: '2' })
+ //
+ // await waitFor(() => {
+ // expect(mockedApi.fetchData).toHaveBeenCalledWith('2')
+ // })
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Side Effects
+ // --------------------------------------------------------------------------
+ describe('Side Effects', () => {
+ it('should call callback when value changes', () => {
+ // const callback = jest.fn()
+ // const { result } = renderHook(() => useHookName({ onChange: callback }))
+ //
+ // act(() => {
+ // result.current.setValue('new value')
+ // })
+ //
+ // expect(callback).toHaveBeenCalledWith('new value')
+ })
+
+ it('should cleanup on unmount', () => {
+ // const cleanup = jest.fn()
+ // jest.spyOn(window, 'addEventListener')
+ // jest.spyOn(window, 'removeEventListener')
+ //
+ // const { unmount } = renderHook(() => useHookName())
+ //
+ // expect(window.addEventListener).toHaveBeenCalled()
+ //
+ // unmount()
+ //
+ // expect(window.removeEventListener).toHaveBeenCalled()
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Edge Cases
+ // --------------------------------------------------------------------------
+ describe('Edge Cases', () => {
+ it('should handle null input', () => {
+ // const { result } = renderHook(() => useHookName(null))
+ //
+ // expect(result.current.value).toBeNull()
+ })
+
+ it('should handle rapid updates', () => {
+ // const { result } = renderHook(() => useHookName())
+ //
+ // act(() => {
+ // result.current.setValue('1')
+ // result.current.setValue('2')
+ // result.current.setValue('3')
+ // })
+ //
+ // expect(result.current.value).toBe('3')
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // With Context (if hook uses context)
+ // --------------------------------------------------------------------------
+ describe('With Context', () => {
+ it('should use context value', () => {
+ // const wrapper = createWrapper({ someValue: 'context-value' })
+ // const { result } = renderHook(() => useHookName(), { wrapper })
+ //
+ // expect(result.current.contextValue).toBe('context-value')
+ })
+ })
+})
diff --git a/.claude/skills/frontend-testing/templates/utility-test.template.ts b/.claude/skills/frontend-testing/templates/utility-test.template.ts
new file mode 100644
index 0000000000..ec13b5f5bd
--- /dev/null
+++ b/.claude/skills/frontend-testing/templates/utility-test.template.ts
@@ -0,0 +1,154 @@
+/**
+ * Test Template for Utility Functions
+ *
+ * Instructions:
+ * 1. Replace `utilityFunction` with your function name
+ * 2. Update import path
+ * 3. Use test.each for data-driven tests
+ */
+
+// import { utilityFunction } from './utility'
+
+// ============================================================================
+// Tests
+// ============================================================================
+
+describe('utilityFunction', () => {
+ // --------------------------------------------------------------------------
+ // Basic Functionality
+ // --------------------------------------------------------------------------
+ describe('Basic Functionality', () => {
+ it('should return expected result for valid input', () => {
+ // expect(utilityFunction('input')).toBe('expected-output')
+ })
+
+ it('should handle multiple arguments', () => {
+ // expect(utilityFunction('a', 'b', 'c')).toBe('abc')
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Data-Driven Tests
+ // --------------------------------------------------------------------------
+ describe('Input/Output Mapping', () => {
+ test.each([
+ // [input, expected]
+ ['input1', 'output1'],
+ ['input2', 'output2'],
+ ['input3', 'output3'],
+ ])('should return %s for input %s', (input, expected) => {
+ // expect(utilityFunction(input)).toBe(expected)
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Edge Cases
+ // --------------------------------------------------------------------------
+ describe('Edge Cases', () => {
+ it('should handle empty string', () => {
+ // expect(utilityFunction('')).toBe('')
+ })
+
+ it('should handle null', () => {
+ // expect(utilityFunction(null)).toBe(null)
+ // or
+ // expect(() => utilityFunction(null)).toThrow()
+ })
+
+ it('should handle undefined', () => {
+ // expect(utilityFunction(undefined)).toBe(undefined)
+ // or
+ // expect(() => utilityFunction(undefined)).toThrow()
+ })
+
+ it('should handle empty array', () => {
+ // expect(utilityFunction([])).toEqual([])
+ })
+
+ it('should handle empty object', () => {
+ // expect(utilityFunction({})).toEqual({})
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Boundary Conditions
+ // --------------------------------------------------------------------------
+ describe('Boundary Conditions', () => {
+ it('should handle minimum value', () => {
+ // expect(utilityFunction(0)).toBe(0)
+ })
+
+ it('should handle maximum value', () => {
+ // expect(utilityFunction(Number.MAX_SAFE_INTEGER)).toBe(...)
+ })
+
+ it('should handle negative numbers', () => {
+ // expect(utilityFunction(-1)).toBe(...)
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Type Coercion (if applicable)
+ // --------------------------------------------------------------------------
+ describe('Type Handling', () => {
+ it('should handle numeric string', () => {
+ // expect(utilityFunction('123')).toBe(123)
+ })
+
+ it('should handle boolean', () => {
+ // expect(utilityFunction(true)).toBe(...)
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Error Cases
+ // --------------------------------------------------------------------------
+ describe('Error Handling', () => {
+ it('should throw for invalid input', () => {
+ // expect(() => utilityFunction('invalid')).toThrow('Error message')
+ })
+
+ it('should throw with specific error type', () => {
+ // expect(() => utilityFunction('invalid')).toThrow(ValidationError)
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Complex Objects (if applicable)
+ // --------------------------------------------------------------------------
+ describe('Object Handling', () => {
+ it('should preserve object structure', () => {
+ // const input = { a: 1, b: 2 }
+ // expect(utilityFunction(input)).toEqual({ a: 1, b: 2 })
+ })
+
+ it('should handle nested objects', () => {
+ // const input = { nested: { deep: 'value' } }
+ // expect(utilityFunction(input)).toEqual({ nested: { deep: 'transformed' } })
+ })
+
+ it('should not mutate input', () => {
+ // const input = { a: 1 }
+ // const inputCopy = { ...input }
+ // utilityFunction(input)
+ // expect(input).toEqual(inputCopy)
+ })
+ })
+
+ // --------------------------------------------------------------------------
+ // Array Handling (if applicable)
+ // --------------------------------------------------------------------------
+ describe('Array Handling', () => {
+ it('should process all elements', () => {
+ // expect(utilityFunction([1, 2, 3])).toEqual([2, 4, 6])
+ })
+
+ it('should handle single element array', () => {
+ // expect(utilityFunction([1])).toEqual([2])
+ })
+
+ it('should preserve order', () => {
+ // expect(utilityFunction(['c', 'a', 'b'])).toEqual(['c', 'a', 'b'])
+ })
+ })
+})
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000000..190c0c185b
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,5 @@
+[run]
+omit =
+ api/tests/*
+ api/migrations/*
+ api/core/rag/datasource/vdb/*
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 8246544061..ddec42e0ee 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -11,7 +11,7 @@
"nodeGypDependencies": true,
"version": "lts"
},
- "ghcr.io/devcontainers-contrib/features/npm-package:1": {
+ "ghcr.io/devcontainers-extra/features/npm-package:1": {
"package": "typescript",
"version": "latest"
},
diff --git a/.devcontainer/post_create_command.sh b/.devcontainer/post_create_command.sh
index 2e787ab855..a26fd076ed 100755
--- a/.devcontainer/post_create_command.sh
+++ b/.devcontainer/post_create_command.sh
@@ -6,11 +6,10 @@ cd web && pnpm install
pipx install uv
echo "alias start-api=\"cd $WORKSPACE_ROOT/api && uv run python -m flask run --host 0.0.0.0 --port=5001 --debug\"" >> ~/.bashrc
-echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage\"" >> ~/.bashrc
+echo "alias start-worker=\"cd $WORKSPACE_ROOT/api && uv run python -m celery -A app.celery worker -P threads -c 1 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor\"" >> ~/.bashrc
echo "alias start-web=\"cd $WORKSPACE_ROOT/web && pnpm dev\"" >> ~/.bashrc
echo "alias start-web-prod=\"cd $WORKSPACE_ROOT/web && pnpm build && pnpm start\"" >> ~/.bashrc
echo "alias start-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env up -d\"" >> ~/.bashrc
echo "alias stop-containers=\"cd $WORKSPACE_ROOT/docker && docker-compose -f docker-compose.middleware.yaml -p dify --env-file middleware.env down\"" >> ~/.bashrc
source /home/vscode/.bashrc
-
diff --git a/.editorconfig b/.editorconfig
index 374da0b5d2..be14939ddb 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -29,7 +29,7 @@ trim_trailing_whitespace = false
# Matches multiple files with brace expansion notation
# Set default charset
-[*.{js,tsx}]
+[*.{js,jsx,ts,tsx,mjs}]
indent_style = space
indent_size = 2
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000000..d6f326d4dc
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,234 @@
+# CODEOWNERS
+# This file defines code ownership for the Dify project.
+# Each line is a file pattern followed by one or more owners.
+# Owners can be @username, @org/team-name, or email addresses.
+# For more information, see: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners
+
+* @crazywoola @laipz8200 @Yeuoly
+
+# Backend (default owner, more specific rules below will override)
+api/ @QuantumGhost
+
+# Backend - MCP
+api/core/mcp/ @Nov1c444
+api/core/entities/mcp_provider.py @Nov1c444
+api/services/tools/mcp_tools_manage_service.py @Nov1c444
+api/controllers/mcp/ @Nov1c444
+api/controllers/console/app/mcp_server.py @Nov1c444
+api/tests/**/*mcp* @Nov1c444
+
+# Backend - Workflow - Engine (Core graph execution engine)
+api/core/workflow/graph_engine/ @laipz8200 @QuantumGhost
+api/core/workflow/runtime/ @laipz8200 @QuantumGhost
+api/core/workflow/graph/ @laipz8200 @QuantumGhost
+api/core/workflow/graph_events/ @laipz8200 @QuantumGhost
+api/core/workflow/node_events/ @laipz8200 @QuantumGhost
+api/core/model_runtime/ @laipz8200 @QuantumGhost
+
+# Backend - Workflow - Nodes (Agent, Iteration, Loop, LLM)
+api/core/workflow/nodes/agent/ @Nov1c444
+api/core/workflow/nodes/iteration/ @Nov1c444
+api/core/workflow/nodes/loop/ @Nov1c444
+api/core/workflow/nodes/llm/ @Nov1c444
+
+# Backend - RAG (Retrieval Augmented Generation)
+api/core/rag/ @JohnJyong
+api/services/rag_pipeline/ @JohnJyong
+api/services/dataset_service.py @JohnJyong
+api/services/knowledge_service.py @JohnJyong
+api/services/external_knowledge_service.py @JohnJyong
+api/services/hit_testing_service.py @JohnJyong
+api/services/metadata_service.py @JohnJyong
+api/services/vector_service.py @JohnJyong
+api/services/entities/knowledge_entities/ @JohnJyong
+api/services/entities/external_knowledge_entities/ @JohnJyong
+api/controllers/console/datasets/ @JohnJyong
+api/controllers/service_api/dataset/ @JohnJyong
+api/models/dataset.py @JohnJyong
+api/tasks/rag_pipeline/ @JohnJyong
+api/tasks/add_document_to_index_task.py @JohnJyong
+api/tasks/batch_clean_document_task.py @JohnJyong
+api/tasks/clean_document_task.py @JohnJyong
+api/tasks/clean_notion_document_task.py @JohnJyong
+api/tasks/document_indexing_task.py @JohnJyong
+api/tasks/document_indexing_sync_task.py @JohnJyong
+api/tasks/document_indexing_update_task.py @JohnJyong
+api/tasks/duplicate_document_indexing_task.py @JohnJyong
+api/tasks/recover_document_indexing_task.py @JohnJyong
+api/tasks/remove_document_from_index_task.py @JohnJyong
+api/tasks/retry_document_indexing_task.py @JohnJyong
+api/tasks/sync_website_document_indexing_task.py @JohnJyong
+api/tasks/batch_create_segment_to_index_task.py @JohnJyong
+api/tasks/create_segment_to_index_task.py @JohnJyong
+api/tasks/delete_segment_from_index_task.py @JohnJyong
+api/tasks/disable_segment_from_index_task.py @JohnJyong
+api/tasks/disable_segments_from_index_task.py @JohnJyong
+api/tasks/enable_segment_to_index_task.py @JohnJyong
+api/tasks/enable_segments_to_index_task.py @JohnJyong
+api/tasks/clean_dataset_task.py @JohnJyong
+api/tasks/deal_dataset_index_update_task.py @JohnJyong
+api/tasks/deal_dataset_vector_index_task.py @JohnJyong
+
+# Backend - Plugins
+api/core/plugin/ @Mairuis @Yeuoly @Stream29
+api/services/plugin/ @Mairuis @Yeuoly @Stream29
+api/controllers/console/workspace/plugin.py @Mairuis @Yeuoly @Stream29
+api/controllers/inner_api/plugin/ @Mairuis @Yeuoly @Stream29
+api/tasks/process_tenant_plugin_autoupgrade_check_task.py @Mairuis @Yeuoly @Stream29
+
+# Backend - Trigger/Schedule/Webhook
+api/controllers/trigger/ @Mairuis @Yeuoly
+api/controllers/console/app/workflow_trigger.py @Mairuis @Yeuoly
+api/controllers/console/workspace/trigger_providers.py @Mairuis @Yeuoly
+api/core/trigger/ @Mairuis @Yeuoly
+api/core/app/layers/trigger_post_layer.py @Mairuis @Yeuoly
+api/services/trigger/ @Mairuis @Yeuoly
+api/models/trigger.py @Mairuis @Yeuoly
+api/fields/workflow_trigger_fields.py @Mairuis @Yeuoly
+api/repositories/workflow_trigger_log_repository.py @Mairuis @Yeuoly
+api/repositories/sqlalchemy_workflow_trigger_log_repository.py @Mairuis @Yeuoly
+api/libs/schedule_utils.py @Mairuis @Yeuoly
+api/services/workflow/scheduler.py @Mairuis @Yeuoly
+api/schedule/trigger_provider_refresh_task.py @Mairuis @Yeuoly
+api/schedule/workflow_schedule_task.py @Mairuis @Yeuoly
+api/tasks/trigger_processing_tasks.py @Mairuis @Yeuoly
+api/tasks/trigger_subscription_refresh_tasks.py @Mairuis @Yeuoly
+api/tasks/workflow_schedule_tasks.py @Mairuis @Yeuoly
+api/tasks/workflow_cfs_scheduler/ @Mairuis @Yeuoly
+api/events/event_handlers/sync_plugin_trigger_when_app_created.py @Mairuis @Yeuoly
+api/events/event_handlers/update_app_triggers_when_app_published_workflow_updated.py @Mairuis @Yeuoly
+api/events/event_handlers/sync_workflow_schedule_when_app_published.py @Mairuis @Yeuoly
+api/events/event_handlers/sync_webhook_when_app_created.py @Mairuis @Yeuoly
+
+# Backend - Async Workflow
+api/services/async_workflow_service.py @Mairuis @Yeuoly
+api/tasks/async_workflow_tasks.py @Mairuis @Yeuoly
+
+# Backend - Billing
+api/services/billing_service.py @hj24 @zyssyz123
+api/controllers/console/billing/ @hj24 @zyssyz123
+
+# Backend - Enterprise
+api/configs/enterprise/ @GarfieldDai @GareArc
+api/services/enterprise/ @GarfieldDai @GareArc
+api/services/feature_service.py @GarfieldDai @GareArc
+api/controllers/console/feature.py @GarfieldDai @GareArc
+api/controllers/web/feature.py @GarfieldDai @GareArc
+
+# Backend - Database Migrations
+api/migrations/ @snakevash @laipz8200
+
+# Frontend
+web/ @iamjoel
+
+# Frontend - App - Orchestration
+web/app/components/workflow/ @iamjoel @zxhlyh
+web/app/components/workflow-app/ @iamjoel @zxhlyh
+web/app/components/app/configuration/ @iamjoel @zxhlyh
+web/app/components/app/app-publisher/ @iamjoel @zxhlyh
+
+# Frontend - WebApp - Chat
+web/app/components/base/chat/ @iamjoel @zxhlyh
+
+# Frontend - WebApp - Completion
+web/app/components/share/text-generation/ @iamjoel @zxhlyh
+
+# Frontend - App - List and Creation
+web/app/components/apps/ @JzoNgKVO @iamjoel
+web/app/components/app/create-app-dialog/ @JzoNgKVO @iamjoel
+web/app/components/app/create-app-modal/ @JzoNgKVO @iamjoel
+web/app/components/app/create-from-dsl-modal/ @JzoNgKVO @iamjoel
+
+# Frontend - App - API Documentation
+web/app/components/develop/ @JzoNgKVO @iamjoel
+
+# Frontend - App - Logs and Annotations
+web/app/components/app/workflow-log/ @JzoNgKVO @iamjoel
+web/app/components/app/log/ @JzoNgKVO @iamjoel
+web/app/components/app/log-annotation/ @JzoNgKVO @iamjoel
+web/app/components/app/annotation/ @JzoNgKVO @iamjoel
+
+# Frontend - App - Monitoring
+web/app/(commonLayout)/app/(appDetailLayout)/\[appId\]/overview/ @JzoNgKVO @iamjoel
+web/app/components/app/overview/ @JzoNgKVO @iamjoel
+
+# Frontend - App - Settings
+web/app/components/app-sidebar/ @JzoNgKVO @iamjoel
+
+# Frontend - RAG - Hit Testing
+web/app/components/datasets/hit-testing/ @JzoNgKVO @iamjoel
+
+# Frontend - RAG - List and Creation
+web/app/components/datasets/list/ @iamjoel @WTW0313
+web/app/components/datasets/create/ @iamjoel @WTW0313
+web/app/components/datasets/create-from-pipeline/ @iamjoel @WTW0313
+web/app/components/datasets/external-knowledge-base/ @iamjoel @WTW0313
+
+# Frontend - RAG - Orchestration (general rule first, specific rules below override)
+web/app/components/rag-pipeline/ @iamjoel @WTW0313
+web/app/components/rag-pipeline/components/rag-pipeline-main.tsx @iamjoel @zxhlyh
+web/app/components/rag-pipeline/store/ @iamjoel @zxhlyh
+
+# Frontend - RAG - Documents List
+web/app/components/datasets/documents/list.tsx @iamjoel @WTW0313
+web/app/components/datasets/documents/create-from-pipeline/ @iamjoel @WTW0313
+
+# Frontend - RAG - Segments List
+web/app/components/datasets/documents/detail/ @iamjoel @WTW0313
+
+# Frontend - RAG - Settings
+web/app/components/datasets/settings/ @iamjoel @WTW0313
+
+# Frontend - Ecosystem - Plugins
+web/app/components/plugins/ @iamjoel @zhsama
+
+# Frontend - Ecosystem - Tools
+web/app/components/tools/ @iamjoel @Yessenia-d
+
+# Frontend - Ecosystem - MarketPlace
+web/app/components/plugins/marketplace/ @iamjoel @Yessenia-d
+
+# Frontend - Login and Registration
+web/app/signin/ @douxc @iamjoel
+web/app/signup/ @douxc @iamjoel
+web/app/reset-password/ @douxc @iamjoel
+web/app/install/ @douxc @iamjoel
+web/app/init/ @douxc @iamjoel
+web/app/forgot-password/ @douxc @iamjoel
+web/app/account/ @douxc @iamjoel
+
+# Frontend - Service Authentication
+web/service/base.ts @douxc @iamjoel
+
+# Frontend - WebApp Authentication and Access Control
+web/app/(shareLayout)/components/ @douxc @iamjoel
+web/app/(shareLayout)/webapp-signin/ @douxc @iamjoel
+web/app/(shareLayout)/webapp-reset-password/ @douxc @iamjoel
+web/app/components/app/app-access-control/ @douxc @iamjoel
+
+# Frontend - Explore Page
+web/app/components/explore/ @CodingOnStar @iamjoel
+
+# Frontend - Personal Settings
+web/app/components/header/account-setting/ @CodingOnStar @iamjoel
+web/app/components/header/account-dropdown/ @CodingOnStar @iamjoel
+
+# Frontend - Analytics
+web/app/components/base/ga/ @CodingOnStar @iamjoel
+
+# Frontend - Base Components
+web/app/components/base/ @iamjoel @zxhlyh
+
+# Frontend - Utils and Hooks
+web/utils/classnames.ts @iamjoel @zxhlyh
+web/utils/time.ts @iamjoel @zxhlyh
+web/utils/format.ts @iamjoel @zxhlyh
+web/utils/clipboard.ts @iamjoel @zxhlyh
+web/hooks/use-document-title.ts @iamjoel @zxhlyh
+
+# Frontend - Billing and Education
+web/app/components/billing/ @iamjoel @zxhlyh
+web/app/education-apply/ @iamjoel @zxhlyh
+
+# Frontend - Workspace
+web/app/components/header/account-dropdown/workplace-selector/ @iamjoel @zxhlyh
diff --git a/.github/ISSUE_TEMPLATE/refactor.yml b/.github/ISSUE_TEMPLATE/refactor.yml
index cf74dcc546..dbe8cbb602 100644
--- a/.github/ISSUE_TEMPLATE/refactor.yml
+++ b/.github/ISSUE_TEMPLATE/refactor.yml
@@ -1,8 +1,6 @@
-name: "✨ Refactor"
-description: Refactor existing code for improved readability and maintainability.
-title: "[Chore/Refactor] "
-labels:
- - refactor
+name: "✨ Refactor or Chore"
+description: Refactor existing code or perform maintenance chores to improve readability and reliability.
+title: "[Refactor/Chore] "
body:
- type: checkboxes
attributes:
@@ -11,7 +9,7 @@ body:
options:
- label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
required: true
- - label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
+ - label: This is only for refactors or chores; if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
required: true
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
required: true
@@ -25,14 +23,14 @@ body:
id: description
attributes:
label: Description
- placeholder: "Describe the refactor you are proposing."
+ placeholder: "Describe the refactor or chore you are proposing."
validations:
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
- placeholder: "Explain why this refactor is necessary."
+ placeholder: "Explain why this refactor or chore is necessary."
validations:
required: false
- type: textarea
diff --git a/.github/ISSUE_TEMPLATE/tracker.yml b/.github/ISSUE_TEMPLATE/tracker.yml
deleted file mode 100644
index 35fedefc75..0000000000
--- a/.github/ISSUE_TEMPLATE/tracker.yml
+++ /dev/null
@@ -1,13 +0,0 @@
-name: "👾 Tracker"
-description: For inner usages, please do not use this template.
-title: "[Tracker] "
-labels:
- - tracker
-body:
- - type: textarea
- id: content
- attributes:
- label: Blockers
- placeholder: "- [ ] ..."
- validations:
- required: true
diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml
index 116fc59ee8..ab7878dc64 100644
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -39,25 +39,11 @@ jobs:
- name: Install dependencies
run: uv sync --project api --dev
- - name: Run Unit tests
- run: |
- uv run --project api bash dev/pytest/pytest_unit_tests.sh
-
- name: Run pyrefly check
run: |
cd api
uv add --dev pyrefly
uv run pyrefly check || true
- - name: Coverage Summary
- run: |
- set -x
- # Extract coverage percentage and create a summary
- TOTAL_COVERAGE=$(python -c 'import json; print(json.load(open("coverage.json"))["totals"]["percent_covered_display"])')
-
- # Create a detailed coverage summary
- echo "### Test Coverage Summary :test_tube:" >> $GITHUB_STEP_SUMMARY
- echo "Total Coverage: ${TOTAL_COVERAGE}%" >> $GITHUB_STEP_SUMMARY
- uv run --project api coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
- name: Run dify config tests
run: uv run --project api dev/pytest/pytest_config_tests.py
@@ -76,7 +62,7 @@ jobs:
compose-file: |
docker/docker-compose.middleware.yaml
services: |
- db
+ db_postgres
redis
sandbox
ssrf_proxy
@@ -85,11 +71,26 @@ jobs:
run: |
cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env
- - name: Run Workflow
- run: uv run --project api bash dev/pytest/pytest_workflow.sh
+ - name: Run API Tests
+ env:
+ STORAGE_TYPE: opendal
+ OPENDAL_SCHEME: fs
+ OPENDAL_FS_ROOT: /tmp/dify-storage
+ run: |
+ uv run --project api pytest \
+ --timeout "${PYTEST_TIMEOUT:-180}" \
+ api/tests/integration_tests/workflow \
+ api/tests/integration_tests/tools \
+ api/tests/test_containers_integration_tests \
+ api/tests/unit_tests
- - name: Run Tool
- run: uv run --project api bash dev/pytest/pytest_tools.sh
+ - name: Coverage Summary
+ run: |
+ set -x
+ # Extract coverage percentage and create a summary
+ TOTAL_COVERAGE=$(python -c 'import json; print(json.load(open("coverage.json"))["totals"]["percent_covered_display"])')
- - name: Run TestContainers
- run: uv run --project api bash dev/pytest/pytest_testcontainers.sh
+ # Create a detailed coverage summary
+ echo "### Test Coverage Summary :test_tube:" >> $GITHUB_STEP_SUMMARY
+ echo "Total Coverage: ${TOTAL_COVERAGE}%" >> $GITHUB_STEP_SUMMARY
+ uv run --project api coverage report --format=markdown >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/autofix.yml b/.github/workflows/autofix.yml
index 0cae2ef552..d7a58ce93d 100644
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -2,6 +2,8 @@ name: autofix.ci
on:
pull_request:
branches: ["main"]
+ push:
+ branches: ["main"]
permissions:
contents: read
@@ -11,11 +13,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
-
- # Use uv to ensure we have the same ruff version in CI and locally.
- - uses: astral-sh/setup-uv@v6
+ - uses: actions/setup-python@v5
with:
python-version: "3.11"
+
+ - uses: astral-sh/setup-uv@v6
+
- run: |
cd api
uv sync --dev
@@ -26,12 +29,18 @@ jobs:
# Format code
uv run ruff format ..
+ - name: count migration progress
+ run: |
+ cd api
+ ./cnt_base.sh
+
- name: ast-grep
run: |
- uvx --from ast-grep-cli sg --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all
- uvx --from ast-grep-cli sg --pattern 'session.query($WHATEVER).filter($HERE)' --rewrite 'session.query($WHATEVER).where($HERE)' -l py --update-all
- uvx --from ast-grep-cli sg -p '$A = db.Column($$$B)' -r '$A = mapped_column($$$B)' -l py --update-all
- uvx --from ast-grep-cli sg -p '$A : $T = db.Column($$$B)' -r '$A : $T = mapped_column($$$B)' -l py --update-all
+ # ast-grep exits 1 if no matches are found; allow idempotent runs.
+ uvx --from ast-grep-cli ast-grep --pattern 'db.session.query($WHATEVER).filter($HERE)' --rewrite 'db.session.query($WHATEVER).where($HERE)' -l py --update-all || true
+ uvx --from ast-grep-cli ast-grep --pattern 'session.query($WHATEVER).filter($HERE)' --rewrite 'session.query($WHATEVER).where($HERE)' -l py --update-all || true
+ uvx --from ast-grep-cli ast-grep -p '$A = db.Column($$$B)' -r '$A = mapped_column($$$B)' -l py --update-all || true
+ uvx --from ast-grep-cli ast-grep -p '$A : $T = db.Column($$$B)' -r '$A : $T = mapped_column($$$B)' -l py --update-all || true
# Convert Optional[T] to T | None (ignoring quoted types)
cat > /tmp/optional-rule.yml << 'EOF'
id: convert-optional-to-union
@@ -49,14 +58,15 @@ jobs:
pattern: $T
fix: $T | None
EOF
- uvx --from ast-grep-cli sg scan --inline-rules "$(cat /tmp/optional-rule.yml)" --update-all
+ uvx --from ast-grep-cli ast-grep scan . --inline-rules "$(cat /tmp/optional-rule.yml)" --update-all
# Fix forward references that were incorrectly converted (Python doesn't support "Type" | None syntax)
find . -name "*.py" -type f -exec sed -i.bak -E 's/"([^"]+)" \| None/Optional["\1"]/g; s/'"'"'([^'"'"']+)'"'"' \| None/Optional['"'"'\1'"'"']/g' {} \;
find . -name "*.py.bak" -type f -delete
+ # mdformat breaks YAML front matter in markdown files. Add --exclude for directories containing YAML front matter.
- name: mdformat
run: |
- uvx mdformat .
+ uvx --python 3.13 mdformat . --exclude ".claude/skills/**"
- name: Install pnpm
uses: pnpm/action-setup@v4
@@ -77,7 +87,6 @@ jobs:
- name: oxlint
working-directory: ./web
- run: |
- pnpx oxlint --fix
+ run: pnpm exec oxlint --config .oxlintrc.json --fix .
- uses: autofix-ci/action@635ffb0c9798bd160680f18fd73371e355b85f27
diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml
index 24a9da4400..f7f464a601 100644
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -4,8 +4,7 @@ on:
push:
branches:
- "main"
- - "deploy/dev"
- - "deploy/enterprise"
+ - "deploy/**"
- "build/**"
- "release/e-*"
- "hotfix/**"
diff --git a/.github/workflows/db-migration-test.yml b/.github/workflows/db-migration-test.yml
index b9961a4714..101d973466 100644
--- a/.github/workflows/db-migration-test.yml
+++ b/.github/workflows/db-migration-test.yml
@@ -8,7 +8,7 @@ concurrency:
cancel-in-progress: true
jobs:
- db-migration-test:
+ db-migration-test-postgres:
runs-on: ubuntu-latest
steps:
@@ -45,7 +45,7 @@ jobs:
compose-file: |
docker/docker-compose.middleware.yaml
services: |
- db
+ db_postgres
redis
- name: Prepare configs
@@ -57,3 +57,60 @@ jobs:
env:
DEBUG: true
run: uv run --directory api flask upgrade-db
+
+ db-migration-test-mysql:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Setup UV and Python
+ uses: astral-sh/setup-uv@v6
+ with:
+ enable-cache: true
+ python-version: "3.12"
+ cache-dependency-glob: api/uv.lock
+
+ - name: Install dependencies
+ run: uv sync --project api
+ - name: Ensure Offline migration are supported
+ run: |
+ # upgrade
+ uv run --directory api flask db upgrade 'base:head' --sql
+ # downgrade
+ uv run --directory api flask db downgrade 'head:base' --sql
+
+ - name: Prepare middleware env for MySQL
+ run: |
+ cd docker
+ cp middleware.env.example middleware.env
+ sed -i 's/DB_TYPE=postgresql/DB_TYPE=mysql/' middleware.env
+ sed -i 's/DB_HOST=db_postgres/DB_HOST=db_mysql/' middleware.env
+ sed -i 's/DB_PORT=5432/DB_PORT=3306/' middleware.env
+ sed -i 's/DB_USERNAME=postgres/DB_USERNAME=mysql/' middleware.env
+
+ - name: Set up Middlewares
+ uses: hoverkraft-tech/compose-action@v2.0.2
+ with:
+ compose-file: |
+ docker/docker-compose.middleware.yaml
+ services: |
+ db_mysql
+ redis
+
+ - name: Prepare configs for MySQL
+ run: |
+ cd api
+ cp .env.example .env
+ sed -i 's/DB_TYPE=postgresql/DB_TYPE=mysql/' .env
+ sed -i 's/DB_PORT=5432/DB_PORT=3306/' .env
+ sed -i 's/DB_USERNAME=postgres/DB_USERNAME=root/' .env
+
+ - name: Run DB Migration
+ env:
+ DEBUG: true
+ run: uv run --directory api flask upgrade-db
diff --git a/.github/workflows/deploy-rag-dev.yml b/.github/workflows/deploy-trigger-dev.yml
similarity index 75%
rename from .github/workflows/deploy-rag-dev.yml
rename to .github/workflows/deploy-trigger-dev.yml
index 86265aad6d..2d9a904fc5 100644
--- a/.github/workflows/deploy-rag-dev.yml
+++ b/.github/workflows/deploy-trigger-dev.yml
@@ -1,4 +1,4 @@
-name: Deploy RAG Dev
+name: Deploy Trigger Dev
permissions:
contents: read
@@ -7,7 +7,7 @@ on:
workflow_run:
workflows: ["Build and Push API & Web"]
branches:
- - "deploy/rag-dev"
+ - "deploy/trigger-dev"
types:
- completed
@@ -16,12 +16,12 @@ jobs:
runs-on: ubuntu-latest
if: |
github.event.workflow_run.conclusion == 'success' &&
- github.event.workflow_run.head_branch == 'deploy/rag-dev'
+ github.event.workflow_run.head_branch == 'deploy/trigger-dev'
steps:
- name: Deploy to server
uses: appleboy/ssh-action@v0.1.8
with:
- host: ${{ secrets.RAG_SSH_HOST }}
+ host: ${{ secrets.TRIGGER_SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |
diff --git a/.github/workflows/expose_service_ports.sh b/.github/workflows/expose_service_ports.sh
index 01772ccf9f..e7d5f60288 100755
--- a/.github/workflows/expose_service_ports.sh
+++ b/.github/workflows/expose_service_ports.sh
@@ -1,6 +1,7 @@
#!/bin/bash
yq eval '.services.weaviate.ports += ["8080:8080"]' -i docker/docker-compose.yaml
+yq eval '.services.weaviate.ports += ["50051:50051"]' -i docker/docker-compose.yaml
yq eval '.services.qdrant.ports += ["6333:6333"]' -i docker/docker-compose.yaml
yq eval '.services.chroma.ports += ["8000:8000"]' -i docker/docker-compose.yaml
yq eval '.services["milvus-standalone"].ports += ["19530:19530"]' -i docker/docker-compose.yaml
@@ -13,4 +14,4 @@ yq eval '.services.tidb.ports += ["4000:4000"]' -i docker/tidb/docker-compose.ya
yq eval '.services.oceanbase.ports += ["2881:2881"]' -i docker/docker-compose.yaml
yq eval '.services.opengauss.ports += ["6600:6600"]' -i docker/docker-compose.yaml
-echo "Ports exposed for sandbox, weaviate, tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase, opengauss"
+echo "Ports exposed for sandbox, weaviate (HTTP 8080, gRPC 50051), tidb, qdrant, chroma, milvus, pgvector, pgvecto-rs, elasticsearch, couchbase, opengauss"
diff --git a/.github/workflows/semantic-pull-request.yml b/.github/workflows/semantic-pull-request.yml
new file mode 100644
index 0000000000..b15c26a096
--- /dev/null
+++ b/.github/workflows/semantic-pull-request.yml
@@ -0,0 +1,21 @@
+name: Semantic Pull Request
+
+on:
+ pull_request:
+ types:
+ - opened
+ - edited
+ - reopened
+ - synchronize
+
+jobs:
+ lint:
+ name: Validate PR title
+ permissions:
+ pull-requests: read
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check title
+ uses: amannn/action-semantic-pull-request@v6.1.1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml
index 06584c1b78..5a8a34be79 100644
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -103,6 +103,11 @@ jobs:
run: |
pnpm run lint
+ - name: Web type check
+ if: steps.changed-files.outputs.any_changed == 'true'
+ working-directory: ./web
+ run: pnpm run type-check:tsgo
+
docker-compose-template:
name: Docker Compose Template
runs-on: ubuntu-latest
diff --git a/.github/workflows/translate-i18n-base-on-english.yml b/.github/workflows/translate-i18n-base-on-english.yml
index 836c3e0b02..fe8e2ebc2b 100644
--- a/.github/workflows/translate-i18n-base-on-english.yml
+++ b/.github/workflows/translate-i18n-base-on-english.yml
@@ -20,22 +20,22 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
- fetch-depth: 2
+ fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Check for file changes in i18n/en-US
id: check_files
run: |
- recent_commit_sha=$(git rev-parse HEAD)
- second_recent_commit_sha=$(git rev-parse HEAD~1)
- changed_files=$(git diff --name-only $recent_commit_sha $second_recent_commit_sha -- 'i18n/en-US/*.ts')
+ git fetch origin "${{ github.event.before }}" || true
+ git fetch origin "${{ github.sha }}" || true
+ changed_files=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'i18n/en-US/*.ts')
echo "Changed files: $changed_files"
if [ -n "$changed_files" ]; then
echo "FILES_CHANGED=true" >> $GITHUB_ENV
file_args=""
for file in $changed_files; do
filename=$(basename "$file" .ts)
- file_args="$file_args --file=$filename"
+ file_args="$file_args --file $filename"
done
echo "FILE_ARGS=$file_args" >> $GITHUB_ENV
echo "File arguments: $file_args"
@@ -77,12 +77,15 @@ jobs:
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
- commit-message: Update i18n files and type definitions based on en-US changes
- title: 'chore: translate i18n files and update type definitions'
+ commit-message: 'chore(i18n): update translations based on en-US changes'
+ title: 'chore(i18n): translate i18n files and update type definitions'
body: |
This PR was automatically created to update i18n files and TypeScript type definitions based on changes in en-US locale.
-
+
+ **Triggered by:** ${{ github.sha }}
+
**Changes included:**
- Updated translation files for all locales
- Regenerated TypeScript type definitions for type safety
- branch: chore/automated-i18n-updates
+ branch: chore/automated-i18n-updates-${{ github.sha }}
+ delete-branch: true
diff --git a/.github/workflows/vdb-tests.yml b/.github/workflows/vdb-tests.yml
index f54f5d6c64..291171e5c7 100644
--- a/.github/workflows/vdb-tests.yml
+++ b/.github/workflows/vdb-tests.yml
@@ -51,13 +51,13 @@ jobs:
- name: Expose Service Ports
run: sh .github/workflows/expose_service_ports.sh
- - name: Set up Vector Store (TiDB)
- uses: hoverkraft-tech/compose-action@v2.0.2
- with:
- compose-file: docker/tidb/docker-compose.yaml
- services: |
- tidb
- tiflash
+# - name: Set up Vector Store (TiDB)
+# uses: hoverkraft-tech/compose-action@v2.0.2
+# with:
+# compose-file: docker/tidb/docker-compose.yaml
+# services: |
+# tidb
+# tiflash
- name: Set up Vector Stores (Weaviate, Qdrant, PGVector, Milvus, PgVecto-RS, Chroma, MyScale, ElasticSearch, Couchbase, OceanBase)
uses: hoverkraft-tech/compose-action@v2.0.2
@@ -83,8 +83,8 @@ jobs:
ls -lah .
cp api/tests/integration_tests/.env.example api/tests/integration_tests/.env
- - name: Check VDB Ready (TiDB)
- run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py
+# - name: Check VDB Ready (TiDB)
+# run: uv run --project api python api/tests/integration_tests/vdb/tidb_vector/check_tiflash_ready.py
- name: Test Vector Stores
run: uv run --project api bash dev/pytest/pytest_vdb.sh
diff --git a/.gitignore b/.gitignore
index 22a2c42566..5ad728c3da 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,9 @@ __pycache__/
# C extensions
*.so
+# *db files
+*.db
+
# Distribution / packaging
.Python
build/
@@ -97,6 +100,7 @@ __pypackages__/
# Celery stuff
celerybeat-schedule
+celerybeat-schedule.db
celerybeat.pid
# SageMath parsed files
@@ -182,7 +186,10 @@ docker/volumes/couchbase/*
docker/volumes/oceanbase/*
docker/volumes/plugin_daemon/*
docker/volumes/matrixone/*
+docker/volumes/mysql/*
+docker/volumes/seekdb/*
!docker/volumes/oceanbase/init.d
+docker/volumes/iris/*
docker/nginx/conf.d/default.conf
docker/nginx/ssl/*
@@ -234,4 +241,7 @@ scripts/stress-test/reports/
# mcp
.playwright-mcp/
-.serena/
\ No newline at end of file
+.serena/
+
+# settings
+*.local.json
diff --git a/.nvmrc b/.nvmrc
new file mode 100644
index 0000000000..7af24b7ddb
--- /dev/null
+++ b/.nvmrc
@@ -0,0 +1 @@
+22.11.0
diff --git a/.vscode/launch.json.template b/.vscode/launch.json.template
index f5a7f0893b..cb934d01b5 100644
--- a/.vscode/launch.json.template
+++ b/.vscode/launch.json.template
@@ -8,8 +8,7 @@
"module": "flask",
"env": {
"FLASK_APP": "app.py",
- "FLASK_ENV": "development",
- "GEVENT_SUPPORT": "True"
+ "FLASK_ENV": "development"
},
"args": [
"run",
@@ -28,9 +27,7 @@
"type": "debugpy",
"request": "launch",
"module": "celery",
- "env": {
- "GEVENT_SUPPORT": "True"
- },
+ "env": {},
"args": [
"-A",
"app.celery",
@@ -40,7 +37,7 @@
"-c",
"1",
"-Q",
- "dataset,generation,mail,ops_trace",
+ "dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor",
"--loglevel",
"INFO"
],
diff --git a/AGENTS.md b/AGENTS.md
index 5859cd1bd9..782861ad36 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -14,7 +14,7 @@ The codebase is split into:
- Run backend CLI commands through `uv run --project api `.
-- Backend QA gate requires passing `make lint`, `make type-check`, and `uv run --project api --dev dev/pytest/pytest_unit_tests.sh` before review.
+- Before submission, all backend modifications must pass local checks: `make lint`, `make type-check`, and `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
- Use Makefile targets for linting and formatting; `make lint` and `make type-check` cover the required checks.
@@ -24,8 +24,8 @@ The codebase is split into:
```bash
cd web
-pnpm lint
pnpm lint:fix
+pnpm type-check:tsgo
pnpm test
```
@@ -39,7 +39,7 @@ pnpm test
## Language Style
- **Python**: Keep type hints on functions and attributes, and implement relevant special methods (e.g., `__repr__`, `__str__`).
-- **TypeScript**: Use the strict config, lean on ESLint + Prettier workflows, and avoid `any` types.
+- **TypeScript**: Use the strict config, rely on ESLint (`pnpm lint:fix` preferred) plus `pnpm type-check:tsgo`, and avoid `any` types.
## General Practices
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index fdc414b047..20a7d6c6f6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -77,6 +77,8 @@ How we prioritize:
For setting up the frontend service, please refer to our comprehensive [guide](https://github.com/langgenius/dify/blob/main/web/README.md) in the `web/README.md` file. This document provides detailed instructions to help you set up the frontend environment properly.
+**Testing**: All React components must have comprehensive test coverage. See [web/testing/testing.md](https://github.com/langgenius/dify/blob/main/web/testing/testing.md) for the canonical frontend testing guidelines and follow every requirement described there.
+
#### Backend
For setting up the backend service, kindly refer to our detailed [instructions](https://github.com/langgenius/dify/blob/main/api/README.md) in the `api/README.md` file. This document contains step-by-step guidance to help you get the backend up and running smoothly.
diff --git a/Makefile b/Makefile
index 19c398ec82..07afd8187e 100644
--- a/Makefile
+++ b/Makefile
@@ -70,6 +70,11 @@ type-check:
@uv run --directory api --dev basedpyright
@echo "✅ Type check complete"
+test:
+ @echo "🧪 Running backend unit tests..."
+ @uv run --project api --dev dev/pytest/pytest_unit_tests.sh
+ @echo "✅ Tests complete"
+
# Build Docker images
build-web:
@echo "Building web Docker image: $(WEB_IMAGE):$(VERSION)..."
@@ -119,6 +124,7 @@ help:
@echo " make check - Check code with ruff"
@echo " make lint - Format and fix code with ruff"
@echo " make type-check - Run type checking with basedpyright"
+ @echo " make test - Run backend unit tests"
@echo ""
@echo "Docker Build Targets:"
@echo " make build-web - Build web Docker image"
@@ -128,4 +134,4 @@ help:
@echo " make build-push-all - Build and push all Docker images"
# Phony targets
-.PHONY: build-web build-api push-web push-api build-all push-all build-push-all dev-setup prepare-docker prepare-web prepare-api dev-clean help format check lint type-check
+.PHONY: build-web build-api push-web push-api build-all push-all build-push-all dev-setup prepare-docker prepare-web prepare-api dev-clean help format check lint type-check test
diff --git a/README.md b/README.md
index aadced582d..b71764a214 100644
--- a/README.md
+++ b/README.md
@@ -36,6 +36,12 @@
+
+
+
+
+
+
@@ -63,7 +69,7 @@ Dify is an open-source platform for developing LLM applications. Its intuitive i
> - CPU >= 2 Core
> - RAM >= 4 GiB
-
+
The easiest way to start the Dify server is through [Docker Compose](docker/docker-compose.yaml). Before running Dify with the following commands, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
@@ -109,15 +115,15 @@ All of Dify's offerings come with corresponding APIs, so you could effortlessly
## Using Dify
-- **Cloud **
+- **Cloud
**
We host a [Dify Cloud](https://dify.ai) service for anyone to try with zero setup. It provides all the capabilities of the self-deployed version, and includes 200 free GPT-4 calls in the sandbox plan.
-- **Self-hosting Dify Community Edition**
+- **Self-hosting Dify Community Edition
**
Quickly get Dify running in your environment with this [starter guide](#quick-start).
Use our [documentation](https://docs.dify.ai) for further references and more in-depth instructions.
-- **Dify for enterprise / organizations**
- We provide additional enterprise-centric features. [Log your questions for us through this chatbot](https://udify.app/chat/22L1zSxg6yW1cWQg) or [send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss enterprise needs.
+- **Dify for enterprise / organizations
**
+ We provide additional enterprise-centric features. [Send us an email](mailto:business@dify.ai?subject=%5BGitHub%5DBusiness%20License%20Inquiry) to discuss your enterprise needs.
> For startups and small businesses using AWS, check out [Dify Premium on AWS Marketplace](https://aws.amazon.com/marketplace/pp/prodview-t22mebxzwjhu6) and deploy it to your own AWS VPC with one click. It's an affordable AMI offering with the option to create apps with custom logo and branding.
@@ -129,8 +135,31 @@ Star Dify on GitHub and be instantly notified of new releases.
## Advanced Setup
+### Custom configurations
+
If you need to customize the configuration, please refer to the comments in our [.env.example](docker/.env.example) file and update the corresponding values in your `.env` file. Additionally, you might need to make adjustments to the `docker-compose.yaml` file itself, such as changing image versions, port mappings, or volume mounts, based on your specific deployment environment and requirements. After making any changes, please re-run `docker-compose up -d`. You can find the full list of available environment variables [here](https://docs.dify.ai/getting-started/install-self-hosted/environments).
+#### Customizing Suggested Questions
+
+You can now customize the "Suggested Questions After Answer" feature to better fit your use case. For example, to generate longer, more technical questions:
+
+```bash
+# In your .env file
+SUGGESTED_QUESTIONS_PROMPT='Please help me predict the five most likely technical follow-up questions a developer would ask. Focus on implementation details, best practices, and architecture considerations. Keep each question between 40-60 characters. Output must be JSON array: ["question1","question2","question3","question4","question5"]'
+SUGGESTED_QUESTIONS_MAX_TOKENS=512
+SUGGESTED_QUESTIONS_TEMPERATURE=0.3
+```
+
+See the [Suggested Questions Configuration Guide](docs/suggested-questions-configuration.md) for detailed examples and usage instructions.
+
+### Metrics Monitoring with Grafana
+
+Import the dashboard to Grafana, using Dify's PostgreSQL database as data source, to monitor metrics in granularity of apps, tenants, messages, and more.
+
+- [Grafana Dashboard by @bowenliang123](https://github.com/bowenliang123/dify-grafana-dashboard)
+
+### Deployment with Kubernetes
+
If you'd like to configure a highly-available setup, there are community-contributed [Helm Charts](https://helm.sh/) and YAML files which allow Dify to be deployed on Kubernetes.
- [Helm Chart by @LeoQuote](https://github.com/douban/charts/tree/master/charts/dify)
diff --git a/api/.env.example b/api/.env.example
index a462bfdbec..ace4c4ea1b 100644
--- a/api/.env.example
+++ b/api/.env.example
@@ -27,6 +27,9 @@ FILES_URL=http://localhost:5001
# Example: INTERNAL_FILES_URL=http://api:5001
INTERNAL_FILES_URL=http://127.0.0.1:5001
+# TRIGGER URL
+TRIGGER_URL=http://localhost:5001
+
# The time in seconds after the signature is rejected
FILES_ACCESS_TIMEOUT=300
@@ -69,12 +72,15 @@ REDIS_CLUSTERS_PASSWORD=
# celery configuration
CELERY_BROKER_URL=redis://:difyai123456@localhost:${REDIS_PORT}/1
CELERY_BACKEND=redis
-# PostgreSQL database configuration
+
+# Database configuration
+DB_TYPE=postgresql
DB_USERNAME=postgres
DB_PASSWORD=difyai123456
DB_HOST=localhost
DB_PORT=5432
DB_DATABASE=dify
+
SQLALCHEMY_POOL_PRE_PING=true
SQLALCHEMY_POOL_TIMEOUT=30
@@ -156,9 +162,11 @@ SUPABASE_URL=your-server-url
# CORS configuration
WEB_API_CORS_ALLOW_ORIGINS=http://localhost:3000,*
CONSOLE_CORS_ALLOW_ORIGINS=http://localhost:3000,*
+# When the frontend and backend run on different subdomains, set COOKIE_DOMAIN to the site’s top-level domain (e.g., `example.com`). Leading dots are optional.
+COOKIE_DOMAIN=
# Vector database configuration
-# Supported values are `weaviate`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `oceanbase`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
+# Supported values are `weaviate`, `oceanbase`, `qdrant`, `milvus`, `myscale`, `relyt`, `pgvector`, `pgvecto-rs`, `chroma`, `opensearch`, `oracle`, `tencent`, `elasticsearch`, `elasticsearch-ja`, `analyticdb`, `couchbase`, `vikingdb`, `opengauss`, `tablestore`,`vastbase`,`tidb`,`tidb_on_qdrant`,`baidu`,`lindorm`,`huawei_cloud`,`upstash`, `matrixone`.
VECTOR_STORE=weaviate
# Prefix used to create collection name in vector database
VECTOR_INDEX_NAME_PREFIX=Vector_index
@@ -168,6 +176,18 @@ WEAVIATE_ENDPOINT=http://localhost:8080
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
WEAVIATE_GRPC_ENABLED=false
WEAVIATE_BATCH_SIZE=100
+WEAVIATE_TOKENIZATION=word
+
+# OceanBase Vector configuration
+OCEANBASE_VECTOR_HOST=127.0.0.1
+OCEANBASE_VECTOR_PORT=2881
+OCEANBASE_VECTOR_USER=root@test
+OCEANBASE_VECTOR_PASSWORD=difyai123456
+OCEANBASE_VECTOR_DATABASE=test
+OCEANBASE_MEMORY_LIMIT=6G
+OCEANBASE_ENABLE_HYBRID_SEARCH=false
+OCEANBASE_FULLTEXT_PARSER=ik
+SEEKDB_MEMORY_LIMIT=2G
# Qdrant configuration, use `http://localhost:6333` for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
QDRANT_URL=http://localhost:6333
@@ -334,14 +354,14 @@ LINDORM_PASSWORD=admin
LINDORM_USING_UGC=True
LINDORM_QUERY_TIMEOUT=1
-# OceanBase Vector configuration
-OCEANBASE_VECTOR_HOST=127.0.0.1
-OCEANBASE_VECTOR_PORT=2881
-OCEANBASE_VECTOR_USER=root@test
-OCEANBASE_VECTOR_PASSWORD=difyai123456
-OCEANBASE_VECTOR_DATABASE=test
-OCEANBASE_MEMORY_LIMIT=6G
-OCEANBASE_ENABLE_HYBRID_SEARCH=false
+# AlibabaCloud MySQL Vector configuration
+ALIBABACLOUD_MYSQL_HOST=127.0.0.1
+ALIBABACLOUD_MYSQL_PORT=3306
+ALIBABACLOUD_MYSQL_USER=root
+ALIBABACLOUD_MYSQL_PASSWORD=root
+ALIBABACLOUD_MYSQL_DATABASE=dify
+ALIBABACLOUD_MYSQL_MAX_CONNECTION=5
+ALIBABACLOUD_MYSQL_HNSW_M=6
# openGauss configuration
OPENGAUSS_HOST=127.0.0.1
@@ -359,6 +379,12 @@ UPLOAD_IMAGE_FILE_SIZE_LIMIT=10
UPLOAD_VIDEO_FILE_SIZE_LIMIT=100
UPLOAD_AUDIO_FILE_SIZE_LIMIT=50
+# Comma-separated list of file extensions blocked from upload for security reasons.
+# Extensions should be lowercase without dots (e.g., exe,bat,sh,dll).
+# Empty by default to allow all file types.
+# Recommended: exe,bat,cmd,com,scr,vbs,ps1,msi,dll
+UPLOAD_FILE_EXTENSION_BLACKLIST=
+
# Model configuration
MULTIMODAL_SEND_FORMAT=base64
PROMPT_GENERATION_MAX_TOKENS=512
@@ -425,6 +451,9 @@ CODE_EXECUTION_SSL_VERIFY=True
CODE_EXECUTION_POOL_MAX_CONNECTIONS=100
CODE_EXECUTION_POOL_MAX_KEEPALIVE_CONNECTIONS=20
CODE_EXECUTION_POOL_KEEPALIVE_EXPIRY=5.0
+CODE_EXECUTION_CONNECT_TIMEOUT=10
+CODE_EXECUTION_READ_TIMEOUT=60
+CODE_EXECUTION_WRITE_TIMEOUT=10
CODE_MAX_NUMBER=9223372036854775807
CODE_MIN_NUMBER=-9223372036854775808
CODE_MAX_STRING_LENGTH=400000
@@ -445,6 +474,9 @@ HTTP_REQUEST_NODE_MAX_BINARY_SIZE=10485760
HTTP_REQUEST_NODE_MAX_TEXT_SIZE=1048576
HTTP_REQUEST_NODE_SSL_VERIFY=True
+# Webhook request configuration
+WEBHOOK_REQUEST_BODY_MAX_SIZE=10485760
+
# Respect X-* headers to redirect clients
RESPECT_XFORWARD_HEADERS_ENABLED=false
@@ -500,7 +532,7 @@ API_WORKFLOW_NODE_EXECUTION_REPOSITORY=repositories.sqlalchemy_api_workflow_node
API_WORKFLOW_RUN_REPOSITORY=repositories.sqlalchemy_api_workflow_run_repository.DifyAPISQLAlchemyWorkflowRunRepository
# Workflow log cleanup configuration
# Enable automatic cleanup of workflow run logs to manage database size
-WORKFLOW_LOG_CLEANUP_ENABLED=true
+WORKFLOW_LOG_CLEANUP_ENABLED=false
# Number of days to retain workflow run logs (default: 30 days)
WORKFLOW_LOG_RETENTION_DAYS=30
# Batch size for workflow log cleanup operations (default: 100)
@@ -508,6 +540,7 @@ WORKFLOW_LOG_CLEANUP_BATCH_SIZE=100
# App configuration
APP_MAX_EXECUTION_TIME=1200
+APP_DEFAULT_ACTIVE_REQUESTS=0
APP_MAX_ACTIVE_REQUESTS=0
# Celery beat configuration
@@ -522,6 +555,12 @@ ENABLE_CLEAN_MESSAGES=false
ENABLE_MAIL_CLEAN_DOCUMENT_NOTIFY_TASK=false
ENABLE_DATASETS_QUEUE_MONITOR=false
ENABLE_CHECK_UPGRADABLE_PLUGIN_TASK=true
+ENABLE_WORKFLOW_SCHEDULE_POLLER_TASK=true
+# Interval time in minutes for polling scheduled workflows(default: 1 min)
+WORKFLOW_SCHEDULE_POLLER_INTERVAL=1
+WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE=100
+# Maximum number of scheduled workflows to dispatch per tick (0 for unlimited)
+WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK=0
# Position configuration
POSITION_TOOL_PINS=
@@ -593,3 +632,42 @@ SWAGGER_UI_PATH=/swagger-ui.html
# Whether to encrypt dataset IDs when exporting DSL files (default: true)
# Set to false to export dataset IDs as plain text for easier cross-environment import
DSL_EXPORT_ENCRYPT_DATASET_ID=true
+
+# Suggested Questions After Answer Configuration
+# These environment variables allow customization of the suggested questions feature
+#
+# Custom prompt for generating suggested questions (optional)
+# If not set, uses the default prompt that generates 3 questions under 20 characters each
+# Example: "Please help me predict the five most likely technical follow-up questions a developer would ask. Focus on implementation details, best practices, and architecture considerations. Keep each question between 40-60 characters. Output must be JSON array: [\"question1\",\"question2\",\"question3\",\"question4\",\"question5\"]"
+# SUGGESTED_QUESTIONS_PROMPT=
+
+# Maximum number of tokens for suggested questions generation (default: 256)
+# Adjust this value for longer questions or more questions
+# SUGGESTED_QUESTIONS_MAX_TOKENS=256
+
+# Temperature for suggested questions generation (default: 0.0)
+# Higher values (0.5-1.0) produce more creative questions, lower values (0.0-0.3) produce more focused questions
+# SUGGESTED_QUESTIONS_TEMPERATURE=0
+
+# Tenant isolated task queue configuration
+TENANT_ISOLATED_TASK_CONCURRENCY=1
+
+# Maximum number of segments for dataset segments API (0 for unlimited)
+DATASET_MAX_SEGMENTS_PER_REQUEST=0
+
+# Multimodal knowledgebase limit
+SINGLE_CHUNK_ATTACHMENT_LIMIT=10
+ATTACHMENT_IMAGE_FILE_SIZE_LIMIT=2
+ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT=60
+IMAGE_FILE_BATCH_LIMIT=10
+
+# Maximum allowed CSV file size for annotation import in megabytes
+ANNOTATION_IMPORT_FILE_SIZE_LIMIT=2
+#Maximum number of annotation records allowed in a single import
+ANNOTATION_IMPORT_MAX_RECORDS=10000
+# Minimum number of annotation records required in a single import
+ANNOTATION_IMPORT_MIN_RECORDS=1
+ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE=5
+ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR=20
+# Maximum number of concurrent annotation import tasks per tenant
+ANNOTATION_IMPORT_MAX_CONCURRENT=5
\ No newline at end of file
diff --git a/api/.importlinter b/api/.importlinter
index 98fe5f50bb..24ece72b30 100644
--- a/api/.importlinter
+++ b/api/.importlinter
@@ -16,6 +16,7 @@ layers =
graph
nodes
node_events
+ runtime
entities
containers =
core.workflow
diff --git a/api/.ruff.toml b/api/.ruff.toml
index 5a29e1d8fa..7206f7fa0f 100644
--- a/api/.ruff.toml
+++ b/api/.ruff.toml
@@ -36,17 +36,20 @@ select = [
"UP", # pyupgrade rules
"W191", # tab-indentation
"W605", # invalid-escape-sequence
+ "G001", # don't use str format to logging messages
+ "G003", # don't use + in logging messages
+ "G004", # don't use f-strings to format logging messages
+ "UP042", # use StrEnum,
+ "S110", # disallow the try-except-pass pattern.
+
# security related linting rules
# RCE proctection (sort of)
"S102", # exec-builtin, disallow use of `exec`
"S307", # suspicious-eval-usage, disallow use of `eval` and `ast.literal_eval`
"S301", # suspicious-pickle-usage, disallow use of `pickle` and its wrappers.
"S302", # suspicious-marshal-usage, disallow use of `marshal` module
- "S311", # suspicious-non-cryptographic-random-usage
- "G001", # don't use str format to logging messages
- "G003", # don't use + in logging messages
- "G004", # don't use f-strings to format logging messages
- "UP042", # use StrEnum
+ "S311", # suspicious-non-cryptographic-random-usage,
+
]
ignore = [
@@ -91,18 +94,16 @@ ignore = [
"configs/*" = [
"N802", # invalid-function-name
]
-"core/model_runtime/callbacks/base_callback.py" = [
- "T201",
-]
-"core/workflow/callbacks/workflow_logging_callback.py" = [
- "T201",
-]
+"core/model_runtime/callbacks/base_callback.py" = ["T201"]
+"core/workflow/callbacks/workflow_logging_callback.py" = ["T201"]
"libs/gmpy2_pkcs10aep_cipher.py" = [
"N803", # invalid-argument-name
]
"tests/*" = [
"F811", # redefined-while-unused
- "T201", # allow print in tests
+ "T201", # allow print in tests,
+ "S110", # allow ignoring exceptions in tests code (currently)
+
]
[lint.pyflakes]
diff --git a/api/.vscode/launch.json.example b/api/.vscode/launch.json.example
index b9e32e2511..092c66e798 100644
--- a/api/.vscode/launch.json.example
+++ b/api/.vscode/launch.json.example
@@ -54,7 +54,7 @@
"--loglevel",
"DEBUG",
"-Q",
- "dataset,generation,mail,ops_trace,app_deletion"
+ "dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor"
]
}
]
diff --git a/api/AGENTS.md b/api/AGENTS.md
new file mode 100644
index 0000000000..17398ec4b8
--- /dev/null
+++ b/api/AGENTS.md
@@ -0,0 +1,62 @@
+# Agent Skill Index
+
+Start with the section that best matches your need. Each entry lists the problems it solves plus key files/concepts so you know what to expect before opening it.
+
+______________________________________________________________________
+
+## Platform Foundations
+
+- **[Infrastructure Overview](agent_skills/infra.md)**\
+ When to read this:
+
+ - You need to understand where a feature belongs in the architecture.
+ - You’re wiring storage, Redis, vector stores, or OTEL.
+ - You’re about to add CLI commands or async jobs.\
+ What it covers: configuration stack (`configs/app_config.py`, remote settings), storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`), Redis conventions (`extensions/ext_redis.py`), plugin runtime topology, vector-store factory (`core/rag/datasource/vdb/*`), observability hooks, SSRF proxy usage, and core CLI commands.
+
+- **[Coding Style](agent_skills/coding_style.md)**\
+ When to read this:
+
+ - You’re writing or reviewing backend code and need the authoritative checklist.
+ - You’re unsure about Pydantic validators, SQLAlchemy session usage, or logging patterns.
+ - You want the exact lint/type/test commands used in PRs.\
+ Includes: Ruff & BasedPyright commands, no-annotation policy, session examples (`with Session(db.engine, ...)`), `@field_validator` usage, logging expectations, and the rule set for file size, helpers, and package management.
+
+______________________________________________________________________
+
+## Plugin & Extension Development
+
+- **[Plugin Systems](agent_skills/plugin.md)**\
+ When to read this:
+
+ - You’re building or debugging a marketplace plugin.
+ - You need to know how manifests, providers, daemons, and migrations fit together.\
+ What it covers: plugin manifests (`core/plugin/entities/plugin.py`), installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands), runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent), daemon coordination (`core/plugin/entities/plugin_daemon.py`), and how provider registries surface capabilities to the rest of the platform.
+
+- **[Plugin OAuth](agent_skills/plugin_oauth.md)**\
+ When to read this:
+
+ - You must integrate OAuth for a plugin or datasource.
+ - You’re handling credential encryption or refresh flows.\
+ Topics: credential storage, encryption helpers (`core/helper/provider_encryption.py`), OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`), and how console/API layers expose the flows.
+
+______________________________________________________________________
+
+## Workflow Entry & Execution
+
+- **[Trigger Concepts](agent_skills/trigger.md)**\
+ When to read this:
+ - You’re debugging why a workflow didn’t start.
+ - You’re adding a new trigger type or hook.
+ - You need to trace async execution, draft debugging, or webhook/schedule pipelines.\
+ Details: Start-node taxonomy, webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`), async orchestration (`services/async_workflow_service.py`, Celery queues), debug event bus, and storage/logging interactions.
+
+______________________________________________________________________
+
+## Additional Notes for Agents
+
+- All skill docs assume you follow the coding style guide—run Ruff/BasedPyright/tests listed there before submitting changes.
+- When you cannot find an answer in these briefs, search the codebase using the paths referenced (e.g., `core/plugin/impl/tool.py`, `services/dataset_service.py`).
+- If you run into cross-cutting concerns (tenancy, configuration, storage), check the infrastructure guide first; it links to most supporting modules.
+- Keep multi-tenancy and configuration central: everything flows through `configs.dify_config` and `tenant_id`.
+- When touching plugins or triggers, consult both the system overview and the specialised doc to ensure you adjust lifecycle, storage, and observability consistently.
diff --git a/api/Dockerfile b/api/Dockerfile
index 79a4892768..02df91bfc1 100644
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -15,7 +15,11 @@ FROM base AS packages
# RUN sed -i 's@deb.debian.org@mirrors.aliyun.com@g' /etc/apt/sources.list.d/debian.sources
RUN apt-get update \
- && apt-get install -y --no-install-recommends gcc g++ libc-dev libffi-dev libgmp-dev libmpfr-dev libmpc-dev
+ && apt-get install -y --no-install-recommends \
+ # basic environment
+ g++ \
+ # for building gmpy2
+ libmpfr-dev libmpc-dev
# Install Python dependencies
COPY pyproject.toml uv.lock ./
@@ -44,14 +48,22 @@ ENV PYTHONIOENCODING=utf-8
WORKDIR /app/api
+# Create non-root user
+ARG dify_uid=1001
+RUN groupadd -r -g ${dify_uid} dify && \
+ useradd -r -u ${dify_uid} -g ${dify_uid} -s /bin/bash dify && \
+ chown -R dify:dify /app
+
RUN \
apt-get update \
# Install dependencies
&& apt-get install -y --no-install-recommends \
# basic environment
- curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
+ curl nodejs \
+ # for gmpy2 \
+ libgmp-dev libmpfr-dev libmpc-dev \
# For Security
- expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
+ expat libldap-2.5-0=2.5.13+dfsg-5 perl libsqlite3-0=3.40.1-2+deb12u2 zlib1g=1:1.2.13.dfsg-1 \
# install fonts to support the use of tools like pypdfium2
fonts-noto-cjk \
# install a package to improve the accuracy of guessing mime type and file extension
@@ -63,24 +75,29 @@ RUN \
# Copy Python environment and packages
ENV VIRTUAL_ENV=/app/api/.venv
-COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV}
+COPY --from=packages --chown=dify:dify ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Download nltk data
-RUN python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger')"
+RUN mkdir -p /usr/local/share/nltk_data && NLTK_DATA=/usr/local/share/nltk_data python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger'); nltk.download('stopwords')" \
+ && chmod -R 755 /usr/local/share/nltk_data
ENV TIKTOKEN_CACHE_DIR=/app/api/.tiktoken_cache
-RUN python -c "import tiktoken; tiktoken.encoding_for_model('gpt2')"
+RUN python -c "import tiktoken; tiktoken.encoding_for_model('gpt2')" \
+ && chown -R dify:dify ${TIKTOKEN_CACHE_DIR}
# Copy source code
-COPY . /app/api/
+COPY --chown=dify:dify . /app/api/
+
+# Prepare entrypoint script
+COPY --chown=dify:dify --chmod=755 docker/entrypoint.sh /entrypoint.sh
-# Copy entrypoint
-COPY docker/entrypoint.sh /entrypoint.sh
-RUN chmod +x /entrypoint.sh
ARG COMMIT_SHA
ENV COMMIT_SHA=${COMMIT_SHA}
+ENV NLTK_DATA=/usr/local/share/nltk_data
+
+USER dify
ENTRYPOINT ["/bin/bash", "/entrypoint.sh"]
diff --git a/api/README.md b/api/README.md
index e75ea3d354..2dab2ec6e6 100644
--- a/api/README.md
+++ b/api/README.md
@@ -15,8 +15,8 @@
```bash
cd ../docker
cp middleware.env.example middleware.env
- # change the profile to other vector database if you are not using weaviate
- docker compose -f docker-compose.middleware.yaml --profile weaviate -p dify up -d
+ # change the profile to mysql if you are not using postgres,change the profile to other vector database if you are not using weaviate
+ docker compose -f docker-compose.middleware.yaml --profile postgresql --profile weaviate -p dify up -d
cd ../api
```
@@ -26,6 +26,10 @@
cp .env.example .env
```
+> [!IMPORTANT]
+>
+> When the frontend and backend run on different subdomains, set COOKIE_DOMAIN to the site’s top-level domain (e.g., `example.com`). The frontend and backend must be under the same top-level domain in order to share authentication cookies.
+
1. Generate a `SECRET_KEY` in the `.env` file.
bash for Linux
@@ -80,7 +84,7 @@
1. If you need to handle and debug the async tasks (e.g. dataset importing and documents indexing), please start the worker service.
```bash
-uv run celery -A app.celery worker -P gevent -c 2 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation
+uv run celery -A app.celery worker -P threads -c 2 --loglevel INFO -Q dataset,priority_dataset,priority_pipeline,pipeline,mail,ops_trace,app_deletion,plugin,workflow_storage,conversation,workflow,schedule_poller,schedule_executor,triggered_workflow_dispatcher,trigger_refresh_executor
```
Additionally, if you want to debug the celery scheduled tasks, you can run the following command in another terminal to start the beat service:
diff --git a/api/agent_skills/coding_style.md b/api/agent_skills/coding_style.md
new file mode 100644
index 0000000000..a2b66f0bd5
--- /dev/null
+++ b/api/agent_skills/coding_style.md
@@ -0,0 +1,115 @@
+## Linter
+
+- Always follow `.ruff.toml`.
+- Run `uv run ruff check --fix --unsafe-fixes`.
+- Keep each line under 100 characters (including spaces).
+
+## Code Style
+
+- `snake_case` for variables and functions.
+- `PascalCase` for classes.
+- `UPPER_CASE` for constants.
+
+## Rules
+
+- Use Pydantic v2 standard.
+- Use `uv` for package management.
+- Do not override dunder methods like `__init__`, `__iadd__`, etc.
+- Never launch services (`uv run app.py`, `flask run`, etc.); running tests under `tests/` is allowed.
+- Prefer simple functions over classes for lightweight helpers.
+- Keep files below 800 lines; split when necessary.
+- Keep code readable—no clever hacks.
+- Never use `print`; log with `logger = logging.getLogger(__name__)`.
+
+## Guiding Principles
+
+- Mirror the project’s layered architecture: controller → service → core/domain.
+- Reuse existing helpers in `core/`, `services/`, and `libs/` before creating new abstractions.
+- Optimise for observability: deterministic control flow, clear logging, actionable errors.
+
+## SQLAlchemy Patterns
+
+- Models inherit from `models.base.Base`; never create ad-hoc metadata or engines.
+
+- Open sessions with context managers:
+
+ ```python
+ from sqlalchemy.orm import Session
+
+ with Session(db.engine, expire_on_commit=False) as session:
+ stmt = select(Workflow).where(
+ Workflow.id == workflow_id,
+ Workflow.tenant_id == tenant_id,
+ )
+ workflow = session.execute(stmt).scalar_one_or_none()
+ ```
+
+- Use SQLAlchemy expressions; avoid raw SQL unless necessary.
+
+- Introduce repository abstractions only for very large tables (e.g., workflow executions) to support alternative storage strategies.
+
+- Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.).
+
+## Storage & External IO
+
+- Access storage via `extensions.ext_storage.storage`.
+- Use `core.helper.ssrf_proxy` for outbound HTTP fetches.
+- Background tasks that touch storage must be idempotent and log the relevant object identifiers.
+
+## Pydantic Usage
+
+- Define DTOs with Pydantic v2 models and forbid extras by default.
+
+- Use `@field_validator` / `@model_validator` for domain rules.
+
+- Example:
+
+ ```python
+ from pydantic import BaseModel, ConfigDict, HttpUrl, field_validator
+
+ class TriggerConfig(BaseModel):
+ endpoint: HttpUrl
+ secret: str
+
+ model_config = ConfigDict(extra="forbid")
+
+ @field_validator("secret")
+ def ensure_secret_prefix(cls, value: str) -> str:
+ if not value.startswith("dify_"):
+ raise ValueError("secret must start with dify_")
+ return value
+ ```
+
+## Generics & Protocols
+
+- Use `typing.Protocol` to define behavioural contracts (e.g., cache interfaces).
+- Apply generics (`TypeVar`, `Generic`) for reusable utilities like caches or providers.
+- Validate dynamic inputs at runtime when generics cannot enforce safety alone.
+
+## Error Handling & Logging
+
+- Raise domain-specific exceptions (`services/errors`, `core/errors`) and translate to HTTP responses in controllers.
+- Declare `logger = logging.getLogger(__name__)` at module top.
+- Include tenant/app/workflow identifiers in log context.
+- Log retryable events at `warning`, terminal failures at `error`.
+
+## Tooling & Checks
+
+- Format/lint: `uv run --project api --dev ruff format ./api` and `uv run --project api --dev ruff check --fix --unsafe-fixes ./api`.
+- Type checks: `uv run --directory api --dev basedpyright`.
+- Tests: `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
+- Run all of the above before submitting your work.
+
+## Controllers & Services
+
+- Controllers: parse input via Pydantic, invoke services, return serialised responses; no business logic.
+- Services: coordinate repositories, providers, background tasks; keep side effects explicit.
+- Avoid repositories unless necessary; direct SQLAlchemy usage is preferred for typical tables.
+- Document non-obvious behaviour with concise comments.
+
+## Miscellaneous
+
+- Use `configs.dify_config` for configuration—never read environment variables directly.
+- Maintain tenant awareness end-to-end; `tenant_id` must flow through every layer touching shared resources.
+- Queue async work through `services/async_workflow_service`; implement tasks under `tasks/` with explicit queue selection.
+- Keep experimental scripts under `dev/`; do not ship them in production builds.
diff --git a/api/agent_skills/infra.md b/api/agent_skills/infra.md
new file mode 100644
index 0000000000..bc36c7bf64
--- /dev/null
+++ b/api/agent_skills/infra.md
@@ -0,0 +1,96 @@
+## Configuration
+
+- Import `configs.dify_config` for every runtime toggle. Do not read environment variables directly.
+- Add new settings to the proper mixin inside `configs/` (deployment, feature, middleware, etc.) so they load through `DifyConfig`.
+- Remote overrides come from the optional providers in `configs/remote_settings_sources`; keep defaults in code safe when the value is missing.
+- Example: logging pulls targets from `extensions/ext_logging.py`, and model provider URLs are assembled in `services/entities/model_provider_entities.py`.
+
+## Dependencies
+
+- Runtime dependencies live in `[project].dependencies` inside `pyproject.toml`. Optional clients go into the `storage`, `tools`, or `vdb` groups under `[dependency-groups]`.
+- Always pin versions and keep the list alphabetised. Shared tooling (lint, typing, pytest) belongs in the `dev` group.
+- When code needs a new package, explain why in the PR and run `uv lock` so the lockfile stays current.
+
+## Storage & Files
+
+- Use `extensions.ext_storage.storage` for all blob IO; it already respects the configured backend.
+- Convert files for workflows with helpers in `core/file/file_manager.py`; they handle signed URLs and multimodal payloads.
+- When writing controller logic, delegate upload quotas and metadata to `services/file_service.py` instead of touching storage directly.
+- All outbound HTTP fetches (webhooks, remote files) must go through the SSRF-safe client in `core/helper/ssrf_proxy.py`; it wraps `httpx` with the allow/deny rules configured for the platform.
+
+## Redis & Shared State
+
+- Access Redis through `extensions.ext_redis.redis_client`. For locking, reuse `redis_client.lock`.
+- Prefer higher-level helpers when available: rate limits use `libs.helper.RateLimiter`, provider metadata uses caches in `core/helper/provider_cache.py`.
+
+## Models
+
+- SQLAlchemy models sit in `models/` and inherit from the shared declarative `Base` defined in `models/base.py` (metadata configured via `models/engine.py`).
+- `models/__init__.py` exposes grouped aggregates: account/tenant models, app and conversation tables, datasets, providers, workflow runs, triggers, etc. Import from there to avoid deep path churn.
+- Follow the DDD boundary: persistence objects live in `models/`, repositories under `repositories/` translate them into domain entities, and services consume those repositories.
+- When adding a table, create the model class, register it in `models/__init__.py`, wire a repository if needed, and generate an Alembic migration as described below.
+
+## Vector Stores
+
+- Vector client implementations live in `core/rag/datasource/vdb/`, with a common factory in `core/rag/datasource/vdb/vector_factory.py` and enums in `core/rag/datasource/vdb/vector_type.py`.
+- Retrieval pipelines call these providers through `core/rag/datasource/retrieval_service.py` and dataset ingestion flows in `services/dataset_service.py`.
+- The CLI helper `flask vdb-migrate` orchestrates bulk migrations using routines in `commands.py`; reuse that pattern when adding new backend transitions.
+- To add another store, mirror the provider layout, register it with the factory, and include any schema changes in Alembic migrations.
+
+## Observability & OTEL
+
+- OpenTelemetry settings live under the observability mixin in `configs/observability`. Toggle exporters and sampling via `dify_config`, not ad-hoc env reads.
+- HTTP, Celery, Redis, SQLAlchemy, and httpx instrumentation is initialised in `extensions/ext_app_metrics.py` and `extensions/ext_request_logging.py`; reuse these hooks when adding new workers or entrypoints.
+- When creating background tasks or external calls, propagate tracing context with helpers in the existing instrumented clients (e.g. use the shared `httpx` session from `core/helper/http_client_pooling.py`).
+- If you add a new external integration, ensure spans and metrics are emitted by wiring the appropriate OTEL instrumentation package in `pyproject.toml` and configuring it in `extensions/`.
+
+## Ops Integrations
+
+- Langfuse support and other tracing bridges live under `core/ops/opik_trace`. Config toggles sit in `configs/observability`, while exporters are initialised in the OTEL extensions mentioned above.
+- External monitoring services should follow this pattern: keep client code in `core/ops`, expose switches via `dify_config`, and hook initialisation in `extensions/ext_app_metrics.py` or sibling modules.
+- Before instrumenting new code paths, check whether existing context helpers (e.g. `extensions/ext_request_logging.py`) already capture the necessary metadata.
+
+## Controllers, Services, Core
+
+- Controllers only parse HTTP input and call a service method. Keep business rules in `services/`.
+- Services enforce tenant rules, quotas, and orchestration, then call into `core/` engines (workflow execution, tools, LLMs).
+- When adding a new endpoint, search for an existing service to extend before introducing a new layer. Example: workflow APIs pipe through `services/workflow_service.py` into `core/workflow`.
+
+## Plugins, Tools, Providers
+
+- In Dify a plugin is a tenant-installable bundle that declares one or more providers (tool, model, datasource, trigger, endpoint, agent strategy) plus its resource needs and version metadata. The manifest (`core/plugin/entities/plugin.py`) mirrors what you see in the marketplace documentation.
+- Installation, upgrades, and migrations are orchestrated by `services/plugin/plugin_service.py` together with helpers such as `services/plugin/plugin_migration.py`.
+- Runtime loading happens through the implementations under `core/plugin/impl/*` (tool/model/datasource/trigger/endpoint/agent). These modules normalise plugin providers so that downstream systems (`core/tools/tool_manager.py`, `services/model_provider_service.py`, `services/trigger/*`) can treat builtin and plugin capabilities the same way.
+- For remote execution, plugin daemons (`core/plugin/entities/plugin_daemon.py`, `core/plugin/impl/plugin.py`) manage lifecycle hooks, credential forwarding, and background workers that keep plugin processes in sync with the main application.
+- Acquire tool implementations through `core/tools/tool_manager.py`; it resolves builtin, plugin, and workflow-as-tool providers uniformly, injecting the right context (tenant, credentials, runtime config).
+- To add a new plugin capability, extend the relevant `core/plugin/entities` schema and register the implementation in the matching `core/plugin/impl` module rather than importing the provider directly.
+
+## Async Workloads
+
+see `agent_skills/trigger.md` for more detailed documentation.
+
+- Enqueue background work through `services/async_workflow_service.py`. It routes jobs to the tiered Celery queues defined in `tasks/`.
+- Workers boot from `celery_entrypoint.py` and execute functions in `tasks/workflow_execution_tasks.py`, `tasks/trigger_processing_tasks.py`, etc.
+- Scheduled workflows poll from `schedule/workflow_schedule_tasks.py`. Follow the same pattern if you need new periodic jobs.
+
+## Database & Migrations
+
+- SQLAlchemy models live under `models/` and map directly to migration files in `migrations/versions`.
+- Generate migrations with `uv run --project api flask db revision --autogenerate -m ""`, then review the diff; never hand-edit the database outside Alembic.
+- Apply migrations locally using `uv run --project api flask db upgrade`; production deploys expect the same history.
+- If you add tenant-scoped data, confirm the upgrade includes tenant filters or defaults consistent with the service logic touching those tables.
+
+## CLI Commands
+
+- Maintenance commands from `commands.py` are registered on the Flask CLI. Run them via `uv run --project api flask `.
+- Use the built-in `db` commands from Flask-Migrate for schema operations (`flask db upgrade`, `flask db stamp`, etc.). Only fall back to custom helpers if you need their extra behaviour.
+- Custom entries such as `flask reset-password`, `flask reset-email`, and `flask vdb-migrate` handle self-hosted account recovery and vector database migrations.
+- Before adding a new command, check whether an existing service can be reused and ensure the command guards edition-specific behaviour (many enforce `SELF_HOSTED`). Document any additions in the PR.
+- Ruff helpers are run directly with `uv`: `uv run --project api --dev ruff format ./api` for formatting and `uv run --project api --dev ruff check ./api` (add `--fix` if you want automatic fixes).
+
+## When You Add Features
+
+- Check for an existing helper or service before writing a new util.
+- Uphold tenancy: every service method should receive the tenant ID from controller wrappers such as `controllers/console/wraps.py`.
+- Update or create tests alongside behaviour changes (`tests/unit_tests` for fast coverage, `tests/integration_tests` when touching orchestrations).
+- Run `uv run --project api --dev ruff check ./api`, `uv run --directory api --dev basedpyright`, and `uv run --project api --dev dev/pytest/pytest_unit_tests.sh` before submitting changes.
diff --git a/api/agent_skills/plugin.md b/api/agent_skills/plugin.md
new file mode 100644
index 0000000000..954ddd236b
--- /dev/null
+++ b/api/agent_skills/plugin.md
@@ -0,0 +1 @@
+// TBD
diff --git a/api/agent_skills/plugin_oauth.md b/api/agent_skills/plugin_oauth.md
new file mode 100644
index 0000000000..954ddd236b
--- /dev/null
+++ b/api/agent_skills/plugin_oauth.md
@@ -0,0 +1 @@
+// TBD
diff --git a/api/agent_skills/trigger.md b/api/agent_skills/trigger.md
new file mode 100644
index 0000000000..f4b076332c
--- /dev/null
+++ b/api/agent_skills/trigger.md
@@ -0,0 +1,53 @@
+## Overview
+
+Trigger is a collection of nodes that we called `Start` nodes, also, the concept of `Start` is the same as `RootNode` in the workflow engine `core/workflow/graph_engine`, On the other hand, `Start` node is the entry point of workflows, every workflow run always starts from a `Start` node.
+
+## Trigger nodes
+
+- `UserInput`
+- `Trigger Webhook`
+- `Trigger Schedule`
+- `Trigger Plugin`
+
+### UserInput
+
+Before `Trigger` concept is introduced, it's what we called `Start` node, but now, to avoid confusion, it was renamed to `UserInput` node, has a strong relation with `ServiceAPI` in `controllers/service_api/app`
+
+1. `UserInput` node introduces a list of arguments that need to be provided by the user, finally it will be converted into variables in the workflow variable pool.
+1. `ServiceAPI` accept those arguments, and pass through them into `UserInput` node.
+1. For its detailed implementation, please refer to `core/workflow/nodes/start`
+
+### Trigger Webhook
+
+Inside Webhook Node, Dify provided a UI panel that allows user define a HTTP manifest `core/workflow/nodes/trigger_webhook/entities.py`.`WebhookData`, also, Dify generates a random webhook id for each `Trigger Webhook` node, the implementation was implemented in `core/trigger/utils/endpoint.py`, as you can see, `webhook-debug` is a debug mode for webhook, you may find it in `controllers/trigger/webhook.py`.
+
+Finally, requests to `webhook` endpoint will be converted into variables in workflow variable pool during workflow execution.
+
+### Trigger Schedule
+
+`Trigger Schedule` node is a node that allows user define a schedule to trigger the workflow, detailed manifest is here `core/workflow/nodes/trigger_schedule/entities.py`, we have a poller and executor to handle millions of schedules, see `docker/entrypoint.sh` / `schedule/workflow_schedule_task.py` for help.
+
+To Achieve this, a `WorkflowSchedulePlan` model was introduced in `models/trigger.py`, and a `events/event_handlers/sync_workflow_schedule_when_app_published.py` was used to sync workflow schedule plans when app is published.
+
+### Trigger Plugin
+
+`Trigger Plugin` node allows user define there own distributed trigger plugin, whenever a request was received, Dify forwards it to the plugin and wait for parsed variables from it.
+
+1. Requests were saved in storage by `services/trigger/trigger_request_service.py`, referenced by `services/trigger/trigger_service.py`.`TriggerService`.`process_endpoint`
+1. Plugins accept those requests and parse variables from it, see `core/plugin/impl/trigger.py` for details.
+
+A `subscription` concept was out here by Dify, it means an endpoint address from Dify was bound to thirdparty webhook service like `Github` `Slack` `Linear` `GoogleDrive` `Gmail` etc. Once a subscription was created, Dify continually receives requests from the platforms and handle them one by one.
+
+## Worker Pool / Async Task
+
+All the events that triggered a new workflow run is always in async mode, a unified entrypoint can be found here `services/async_workflow_service.py`.`AsyncWorkflowService`.`trigger_workflow_async`.
+
+The infrastructure we used is `celery`, we've already configured it in `docker/entrypoint.sh`, and the consumers are in `tasks/async_workflow_tasks.py`, 3 queues were used to handle different tiers of users, `PROFESSIONAL_QUEUE` `TEAM_QUEUE` `SANDBOX_QUEUE`.
+
+## Debug Strategy
+
+Dify divided users into 2 groups: builders / end users.
+
+Builders are the users who create workflows, in this stage, debugging a workflow becomes a critical part of the workflow development process, as the start node in workflows, trigger nodes can `listen` to the events from `WebhookDebug` `Schedule` `Plugin`, debugging process was created in `controllers/console/app/workflow.py`.`DraftWorkflowTriggerNodeApi`.
+
+A polling process can be considered as combine of few single `poll` operations, each `poll` operation fetches events cached in `Redis`, returns `None` if no event was found, more detailed implemented: `core/trigger/debug/event_bus.py` was used to handle the polling process, and `core/trigger/debug/event_selectors.py` was used to select the event poller based on the trigger type.
diff --git a/api/app.py b/api/app.py
index e0a903b10d..99f70f32d5 100644
--- a/api/app.py
+++ b/api/app.py
@@ -1,7 +1,7 @@
import sys
-def is_db_command():
+def is_db_command() -> bool:
if len(sys.argv) > 1 and sys.argv[0].endswith("flask") and sys.argv[1] == "db":
return True
return False
@@ -13,23 +13,12 @@ if is_db_command():
app = create_migrations_app()
else:
- # It seems that JetBrains Python debugger does not work well with gevent,
- # so we need to disable gevent in debug mode.
- # If you are using debugpy and set GEVENT_SUPPORT=True, you can debug with gevent.
- # if (flask_debug := os.environ.get("FLASK_DEBUG", "0")) and flask_debug.lower() in {"false", "0", "no"}:
- # from gevent import monkey
+ # Gunicorn and Celery handle monkey patching automatically in production by
+ # specifying the `gevent` worker class. Manual monkey patching is not required here.
#
- # # gevent
- # monkey.patch_all()
+ # See `api/docker/entrypoint.sh` (lines 33 and 47) for details.
#
- # from grpc.experimental import gevent as grpc_gevent # type: ignore
- #
- # # grpc gevent
- # grpc_gevent.init_gevent()
-
- # import psycogreen.gevent # type: ignore
- #
- # psycogreen.gevent.patch_psycopg()
+ # For third-party library patching, refer to `gunicorn.conf.py` and `celery_entrypoint.py`.
from app_factory import create_app
diff --git a/api/app_factory.py b/api/app_factory.py
index 17c376de77..026310a8aa 100644
--- a/api/app_factory.py
+++ b/api/app_factory.py
@@ -1,6 +1,8 @@
import logging
import time
+from opentelemetry.trace import get_current_span
+
from configs import dify_config
from contexts.wrapper import RecyclableContextVar
from dify_app import DifyApp
@@ -18,6 +20,7 @@ def create_flask_app_with_configs() -> DifyApp:
"""
dify_app = DifyApp(__name__)
dify_app.config.from_mapping(dify_config.model_dump())
+ dify_app.config["RESTX_INCLUDE_ALL_MODELS"] = True
# add before request hook
@dify_app.before_request
@@ -25,8 +28,25 @@ def create_flask_app_with_configs() -> DifyApp:
# add an unique identifier to each request
RecyclableContextVar.increment_thread_recycles()
+ # add after request hook for injecting X-Trace-Id header from OpenTelemetry span context
+ @dify_app.after_request
+ def add_trace_id_header(response):
+ try:
+ span = get_current_span()
+ ctx = span.get_span_context() if span else None
+ if ctx and ctx.is_valid:
+ trace_id_hex = format(ctx.trace_id, "032x")
+ # Avoid duplicates if some middleware added it
+ if "X-Trace-Id" not in response.headers:
+ response.headers["X-Trace-Id"] = trace_id_hex
+ except Exception:
+ # Never break the response due to tracing header injection
+ logger.warning("Failed to add trace ID to response header", exc_info=True)
+ return response
+
# Capture the decorator's return value to avoid pyright reportUnusedFunction
_ = before_request
+ _ = add_trace_id_header
return dify_app
@@ -50,6 +70,7 @@ def initialize_extensions(app: DifyApp):
ext_commands,
ext_compress,
ext_database,
+ ext_forward_refs,
ext_hosting_provider,
ext_import_modules,
ext_logging,
@@ -62,6 +83,7 @@ def initialize_extensions(app: DifyApp):
ext_redis,
ext_request_logging,
ext_sentry,
+ ext_session_factory,
ext_set_secretkey,
ext_storage,
ext_timezone,
@@ -74,6 +96,7 @@ def initialize_extensions(app: DifyApp):
ext_warnings,
ext_import_modules,
ext_orjson,
+ ext_forward_refs,
ext_set_secretkey,
ext_compress,
ext_code_based_extension,
@@ -92,6 +115,7 @@ def initialize_extensions(app: DifyApp):
ext_commands,
ext_otel,
ext_request_logging,
+ ext_session_factory,
]
for ext in extensions:
short_name = ext.__name__.split(".")[-1]
diff --git a/api/cnt_base.sh b/api/cnt_base.sh
new file mode 100755
index 0000000000..9e407f3584
--- /dev/null
+++ b/api/cnt_base.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+set -euxo pipefail
+
+for pattern in "Base" "TypeBase"; do
+ printf "%s " "$pattern"
+ grep "($pattern):" -r --include='*.py' --exclude-dir=".venv" --exclude-dir="tests" . | wc -l
+done
diff --git a/api/commands.py b/api/commands.py
index 82efe34611..a8d89ac200 100644
--- a/api/commands.py
+++ b/api/commands.py
@@ -15,12 +15,12 @@ from sqlalchemy.orm import sessionmaker
from configs import dify_config
from constants.languages import languages
from core.helper import encrypter
+from core.plugin.entities.plugin_daemon import CredentialType
from core.plugin.impl.plugin import PluginInstaller
from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.index_processor.constant.built_in_field import BuiltInField
from core.rag.models.document import Document
-from core.tools.entities.tool_entities import CredentialType
from core.tools.utils.system_oauth_encryption import encrypt_system_oauth_params
from events.app_event import app_was_created
from extensions.ext_database import db
@@ -321,6 +321,8 @@ def migrate_knowledge_vector_database():
)
datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
+ if not datasets.items:
+ break
except SQLAlchemyError:
raise
@@ -1137,6 +1139,7 @@ def remove_orphaned_files_on_storage(force: bool):
click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
except Exception as e:
click.echo(click.style(f"Error fetching keys: {str(e)}", fg="red"))
+ return
all_files_on_storage = []
for storage_path in storage_paths:
@@ -1227,6 +1230,55 @@ def setup_system_tool_oauth_client(provider, client_params):
click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green"))
+@click.command("setup-system-trigger-oauth-client", help="Setup system trigger oauth client.")
+@click.option("--provider", prompt=True, help="Provider name")
+@click.option("--client-params", prompt=True, help="Client Params")
+def setup_system_trigger_oauth_client(provider, client_params):
+ """
+ Setup system trigger oauth client
+ """
+ from models.provider_ids import TriggerProviderID
+ from models.trigger import TriggerOAuthSystemClient
+
+ provider_id = TriggerProviderID(provider)
+ provider_name = provider_id.provider_name
+ plugin_id = provider_id.plugin_id
+
+ try:
+ # json validate
+ click.echo(click.style(f"Validating client params: {client_params}", fg="yellow"))
+ client_params_dict = TypeAdapter(dict[str, Any]).validate_json(client_params)
+ click.echo(click.style("Client params validated successfully.", fg="green"))
+
+ click.echo(click.style(f"Encrypting client params: {client_params}", fg="yellow"))
+ click.echo(click.style(f"Using SECRET_KEY: `{dify_config.SECRET_KEY}`", fg="yellow"))
+ oauth_client_params = encrypt_system_oauth_params(client_params_dict)
+ click.echo(click.style("Client params encrypted successfully.", fg="green"))
+ except Exception as e:
+ click.echo(click.style(f"Error parsing client params: {str(e)}", fg="red"))
+ return
+
+ deleted_count = (
+ db.session.query(TriggerOAuthSystemClient)
+ .filter_by(
+ provider=provider_name,
+ plugin_id=plugin_id,
+ )
+ .delete()
+ )
+ if deleted_count > 0:
+ click.echo(click.style(f"Deleted {deleted_count} existing oauth client params.", fg="yellow"))
+
+ oauth_client = TriggerOAuthSystemClient(
+ provider=provider_name,
+ plugin_id=plugin_id,
+ encrypted_oauth_params=oauth_client_params,
+ )
+ db.session.add(oauth_client)
+ db.session.commit()
+ click.echo(click.style(f"OAuth client params setup successfully. id: {oauth_client.id}", fg="green"))
+
+
def _find_orphaned_draft_variables(batch_size: int = 1000) -> list[str]:
"""
Find draft variables that reference non-existent apps.
@@ -1420,7 +1472,10 @@ def setup_datasource_oauth_client(provider, client_params):
@click.command("transform-datasource-credentials", help="Transform datasource credentials.")
-def transform_datasource_credentials():
+@click.option(
+ "--environment", prompt=True, help="the environment to transform datasource credentials", default="online"
+)
+def transform_datasource_credentials(environment: str):
"""
Transform datasource credentials
"""
@@ -1431,9 +1486,14 @@ def transform_datasource_credentials():
notion_plugin_id = "langgenius/notion_datasource"
firecrawl_plugin_id = "langgenius/firecrawl_datasource"
jina_plugin_id = "langgenius/jina_datasource"
- notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage]
- firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage]
- jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage]
+ if environment == "online":
+ notion_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(notion_plugin_id) # pyright: ignore[reportPrivateUsage]
+ firecrawl_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(firecrawl_plugin_id) # pyright: ignore[reportPrivateUsage]
+ jina_plugin_unique_identifier = plugin_migration._fetch_plugin_unique_identifier(jina_plugin_id) # pyright: ignore[reportPrivateUsage]
+ else:
+ notion_plugin_unique_identifier = None
+ firecrawl_plugin_unique_identifier = None
+ jina_plugin_unique_identifier = None
oauth_credential_type = CredentialType.OAUTH2
api_key_credential_type = CredentialType.API_KEY
@@ -1521,6 +1581,14 @@ def transform_datasource_credentials():
auth_count = 0
for firecrawl_tenant_credential in firecrawl_tenant_credentials:
auth_count += 1
+ if not firecrawl_tenant_credential.credentials:
+ click.echo(
+ click.style(
+ f"Skipping firecrawl credential for tenant {tenant_id} due to missing credentials.",
+ fg="yellow",
+ )
+ )
+ continue
# get credential api key
credentials_json = json.loads(firecrawl_tenant_credential.credentials)
api_key = credentials_json.get("config", {}).get("api_key")
@@ -1576,6 +1644,14 @@ def transform_datasource_credentials():
auth_count = 0
for jina_tenant_credential in jina_tenant_credentials:
auth_count += 1
+ if not jina_tenant_credential.credentials:
+ click.echo(
+ click.style(
+ f"Skipping jina credential for tenant {tenant_id} due to missing credentials.",
+ fg="yellow",
+ )
+ )
+ continue
# get credential api key
credentials_json = json.loads(jina_tenant_credential.credentials)
api_key = credentials_json.get("config", {}).get("api_key")
@@ -1583,7 +1659,7 @@ def transform_datasource_credentials():
"integration_secret": api_key,
}
datasource_provider = DatasourceProvider(
- provider="jina",
+ provider="jinareader",
tenant_id=tenant_id,
plugin_id=jina_plugin_id,
auth_type=api_key_credential_type.value,
diff --git a/api/configs/feature/__init__.py b/api/configs/feature/__init__.py
index 5b871f69f9..e16ca52f46 100644
--- a/api/configs/feature/__init__.py
+++ b/api/configs/feature/__init__.py
@@ -73,14 +73,14 @@ class AppExecutionConfig(BaseSettings):
description="Maximum allowed execution time for the application in seconds",
default=1200,
)
+ APP_DEFAULT_ACTIVE_REQUESTS: NonNegativeInt = Field(
+ description="Default number of concurrent active requests per app (0 for unlimited)",
+ default=0,
+ )
APP_MAX_ACTIVE_REQUESTS: NonNegativeInt = Field(
description="Maximum number of concurrent active requests per app (0 for unlimited)",
default=0,
)
- APP_DAILY_RATE_LIMIT: NonNegativeInt = Field(
- description="Maximum number of requests per app per day",
- default=5000,
- )
class CodeExecutionSandboxConfig(BaseSettings):
@@ -174,6 +174,33 @@ class CodeExecutionSandboxConfig(BaseSettings):
)
+class TriggerConfig(BaseSettings):
+ """
+ Configuration for trigger
+ """
+
+ WEBHOOK_REQUEST_BODY_MAX_SIZE: PositiveInt = Field(
+ description="Maximum allowed size for webhook request bodies in bytes",
+ default=10485760,
+ )
+
+
+class AsyncWorkflowConfig(BaseSettings):
+ """
+ Configuration for async workflow
+ """
+
+ ASYNC_WORKFLOW_SCHEDULER_GRANULARITY: int = Field(
+ description="Granularity for async workflow scheduler, "
+ "sometime, few users could block the queue due to some time-consuming tasks, "
+ "to avoid this, workflow can be suspended if needed, to achieve"
+ "this, a time-based checker is required, every granularity seconds, "
+ "the checker will check the workflow queue and suspend the workflow",
+ default=120,
+ ge=1,
+ )
+
+
class PluginConfig(BaseSettings):
"""
Plugin configs
@@ -189,6 +216,11 @@ class PluginConfig(BaseSettings):
default="plugin-api-key",
)
+ PLUGIN_DAEMON_TIMEOUT: PositiveFloat | None = Field(
+ description="Timeout in seconds for requests to the plugin daemon (set to None to disable)",
+ default=300.0,
+ )
+
INNER_API_KEY_FOR_PLUGIN: str = Field(description="Inner api key for plugin", default="inner-api-key")
PLUGIN_REMOTE_INSTALL_HOST: str = Field(
@@ -258,6 +290,8 @@ class EndpointConfig(BaseSettings):
description="Template url for endpoint plugin", default="http://localhost:5002/e/{hook_id}"
)
+ TRIGGER_URL: str = Field(description="Template url for triggers", default="http://localhost:5001")
+
class FileAccessConfig(BaseSettings):
"""
@@ -326,12 +360,93 @@ class FileUploadConfig(BaseSettings):
default=10,
)
+ IMAGE_FILE_BATCH_LIMIT: PositiveInt = Field(
+ description="Maximum number of files allowed in a image batch upload operation",
+ default=10,
+ )
+
+ SINGLE_CHUNK_ATTACHMENT_LIMIT: PositiveInt = Field(
+ description="Maximum number of files allowed in a single chunk attachment",
+ default=10,
+ )
+
+ ATTACHMENT_IMAGE_FILE_SIZE_LIMIT: NonNegativeInt = Field(
+ description="Maximum allowed image file size for attachments in megabytes",
+ default=2,
+ )
+
+ ATTACHMENT_IMAGE_DOWNLOAD_TIMEOUT: NonNegativeInt = Field(
+ description="Timeout for downloading image attachments in seconds",
+ default=60,
+ )
+
+ # Annotation Import Security Configurations
+ ANNOTATION_IMPORT_FILE_SIZE_LIMIT: NonNegativeInt = Field(
+ description="Maximum allowed CSV file size for annotation import in megabytes",
+ default=2,
+ )
+
+ ANNOTATION_IMPORT_MAX_RECORDS: PositiveInt = Field(
+ description="Maximum number of annotation records allowed in a single import",
+ default=10000,
+ )
+
+ ANNOTATION_IMPORT_MIN_RECORDS: PositiveInt = Field(
+ description="Minimum number of annotation records required in a single import",
+ default=1,
+ )
+
+ ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE: PositiveInt = Field(
+ description="Maximum number of annotation import requests per minute per tenant",
+ default=5,
+ )
+
+ ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR: PositiveInt = Field(
+ description="Maximum number of annotation import requests per hour per tenant",
+ default=20,
+ )
+
+ ANNOTATION_IMPORT_MAX_CONCURRENT: PositiveInt = Field(
+ description="Maximum number of concurrent annotation import tasks per tenant",
+ default=2,
+ )
+
+ inner_UPLOAD_FILE_EXTENSION_BLACKLIST: str = Field(
+ description=(
+ "Comma-separated list of file extensions that are blocked from upload. "
+ "Extensions should be lowercase without dots (e.g., 'exe,bat,sh,dll'). "
+ "Empty by default to allow all file types."
+ ),
+ validation_alias=AliasChoices("UPLOAD_FILE_EXTENSION_BLACKLIST"),
+ default="",
+ )
+
+ @computed_field # type: ignore[misc]
+ @property
+ def UPLOAD_FILE_EXTENSION_BLACKLIST(self) -> set[str]:
+ """
+ Parse and return the blacklist as a set of lowercase extensions.
+ Returns an empty set if no blacklist is configured.
+ """
+ if not self.inner_UPLOAD_FILE_EXTENSION_BLACKLIST:
+ return set()
+ return {
+ ext.strip().lower().strip(".")
+ for ext in self.inner_UPLOAD_FILE_EXTENSION_BLACKLIST.split(",")
+ if ext.strip()
+ }
+
class HttpConfig(BaseSettings):
"""
HTTP-related configurations for the application
"""
+ COOKIE_DOMAIN: str = Field(
+ description="Explicit cookie domain for console/service cookies when sharing across subdomains",
+ default="",
+ )
+
API_COMPRESSION_ENABLED: bool = Field(
description="Enable or disable gzip compression for HTTP responses",
default=False,
@@ -489,7 +604,10 @@ class LoggingConfig(BaseSettings):
LOG_FORMAT: str = Field(
description="Format string for log messages",
- default="%(asctime)s.%(msecs)03d %(levelname)s [%(threadName)s] [%(filename)s:%(lineno)d] - %(message)s",
+ default=(
+ "%(asctime)s.%(msecs)03d %(levelname)s [%(threadName)s] "
+ "[%(filename)s:%(lineno)d] %(trace_id)s - %(message)s"
+ ),
)
LOG_DATEFORMAT: str | None = Field(
@@ -543,7 +661,7 @@ class UpdateConfig(BaseSettings):
class WorkflowVariableTruncationConfig(BaseSettings):
WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE: PositiveInt = Field(
- # 100KB
+ # 1000 KiB
1024_000,
description="Maximum size for variable to trigger final truncation.",
)
@@ -910,6 +1028,11 @@ class DataSetConfig(BaseSettings):
default=True,
)
+ DATASET_MAX_SEGMENTS_PER_REQUEST: NonNegativeInt = Field(
+ description="Maximum number of segments for dataset segments API (0 for unlimited)",
+ default=0,
+ )
+
class WorkspaceConfig(BaseSettings):
"""
@@ -985,6 +1108,44 @@ class CeleryScheduleTasksConfig(BaseSettings):
description="Enable check upgradable plugin task",
default=True,
)
+ ENABLE_WORKFLOW_SCHEDULE_POLLER_TASK: bool = Field(
+ description="Enable workflow schedule poller task",
+ default=True,
+ )
+ WORKFLOW_SCHEDULE_POLLER_INTERVAL: int = Field(
+ description="Workflow schedule poller interval in minutes",
+ default=1,
+ )
+ WORKFLOW_SCHEDULE_POLLER_BATCH_SIZE: int = Field(
+ description="Maximum number of schedules to process in each poll batch",
+ default=100,
+ )
+ WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK: int = Field(
+ description="Maximum schedules to dispatch per tick (0=unlimited, circuit breaker)",
+ default=0,
+ )
+
+ # Trigger provider refresh (simple version)
+ ENABLE_TRIGGER_PROVIDER_REFRESH_TASK: bool = Field(
+ description="Enable trigger provider refresh poller",
+ default=True,
+ )
+ TRIGGER_PROVIDER_REFRESH_INTERVAL: int = Field(
+ description="Trigger provider refresh poller interval in minutes",
+ default=1,
+ )
+ TRIGGER_PROVIDER_REFRESH_BATCH_SIZE: int = Field(
+ description="Max trigger subscriptions to process per tick",
+ default=200,
+ )
+ TRIGGER_PROVIDER_CREDENTIAL_THRESHOLD_SECONDS: int = Field(
+ description="Proactive credential refresh threshold in seconds",
+ default=60 * 60,
+ )
+ TRIGGER_PROVIDER_SUBSCRIPTION_THRESHOLD_SECONDS: int = Field(
+ description="Proactive subscription refresh threshold in seconds",
+ default=60 * 60,
+ )
class PositionConfig(BaseSettings):
@@ -1083,7 +1244,7 @@ class AccountConfig(BaseSettings):
class WorkflowLogConfig(BaseSettings):
- WORKFLOW_LOG_CLEANUP_ENABLED: bool = Field(default=True, description="Enable workflow run log cleanup")
+ WORKFLOW_LOG_CLEANUP_ENABLED: bool = Field(default=False, description="Enable workflow run log cleanup")
WORKFLOW_LOG_RETENTION_DAYS: int = Field(default=30, description="Retention days for workflow run logs")
WORKFLOW_LOG_CLEANUP_BATCH_SIZE: int = Field(
default=100, description="Batch size for workflow run log cleanup operations"
@@ -1102,12 +1263,21 @@ class SwaggerUIConfig(BaseSettings):
)
+class TenantIsolatedTaskQueueConfig(BaseSettings):
+ TENANT_ISOLATED_TASK_CONCURRENCY: int = Field(
+ description="Number of tasks allowed to be delivered concurrently from isolated queue per tenant",
+ default=1,
+ )
+
+
class FeatureConfig(
# place the configs in alphabet order
AppExecutionConfig,
AuthConfig, # Changed from OAuthConfig to AuthConfig
BillingConfig,
CodeExecutionSandboxConfig,
+ TriggerConfig,
+ AsyncWorkflowConfig,
PluginConfig,
MarketplaceConfig,
DataSetConfig,
@@ -1126,6 +1296,7 @@ class FeatureConfig(
RagEtlConfig,
RepositoryConfig,
SecurityConfig,
+ TenantIsolatedTaskQueueConfig,
ToolConfig,
UpdateConfig,
WorkflowConfig,
diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py
index 62b3cc9842..63f75924bf 100644
--- a/api/configs/middleware/__init__.py
+++ b/api/configs/middleware/__init__.py
@@ -18,6 +18,7 @@ from .storage.opendal_storage_config import OpenDALStorageConfig
from .storage.supabase_storage_config import SupabaseStorageConfig
from .storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig
from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
+from .vdb.alibabacloud_mysql_config import AlibabaCloudMySQLConfig
from .vdb.analyticdb_config import AnalyticdbConfig
from .vdb.baidu_vector_config import BaiduVectorDBConfig
from .vdb.chroma_config import ChromaConfig
@@ -25,6 +26,7 @@ from .vdb.clickzetta_config import ClickzettaConfig
from .vdb.couchbase_config import CouchbaseConfig
from .vdb.elasticsearch_config import ElasticsearchConfig
from .vdb.huawei_cloud_config import HuaweiCloudConfig
+from .vdb.iris_config import IrisVectorConfig
from .vdb.lindorm_config import LindormConfig
from .vdb.matrixone_config import MatrixoneConfig
from .vdb.milvus_config import MilvusConfig
@@ -104,6 +106,12 @@ class KeywordStoreConfig(BaseSettings):
class DatabaseConfig(BaseSettings):
+ # Database type selector
+ DB_TYPE: Literal["postgresql", "mysql", "oceanbase", "seekdb"] = Field(
+ description="Database type to use. OceanBase is MySQL-compatible.",
+ default="postgresql",
+ )
+
DB_HOST: str = Field(
description="Hostname or IP address of the database server.",
default="localhost",
@@ -139,12 +147,12 @@ class DatabaseConfig(BaseSettings):
default="",
)
- SQLALCHEMY_DATABASE_URI_SCHEME: str = Field(
- description="Database URI scheme for SQLAlchemy connection.",
- default="postgresql",
- )
+ @computed_field # type: ignore[prop-decorator]
+ @property
+ def SQLALCHEMY_DATABASE_URI_SCHEME(self) -> str:
+ return "postgresql" if self.DB_TYPE == "postgresql" else "mysql+pymysql"
- @computed_field # type: ignore[misc]
+ @computed_field # type: ignore[prop-decorator]
@property
def SQLALCHEMY_DATABASE_URI(self) -> str:
db_extras = (
@@ -197,21 +205,21 @@ class DatabaseConfig(BaseSettings):
default=os.cpu_count() or 1,
)
- @computed_field # type: ignore[misc]
+ @computed_field # type: ignore[prop-decorator]
@property
def SQLALCHEMY_ENGINE_OPTIONS(self) -> dict[str, Any]:
# Parse DB_EXTRAS for 'options'
db_extras_dict = dict(parse_qsl(self.DB_EXTRAS))
options = db_extras_dict.get("options", "")
- # Always include timezone
- timezone_opt = "-c timezone=UTC"
- if options:
- # Merge user options and timezone
- merged_options = f"{options} {timezone_opt}"
- else:
- merged_options = timezone_opt
-
- connect_args = {"options": merged_options}
+ connect_args = {}
+ # Use the dynamic SQLALCHEMY_DATABASE_URI_SCHEME property
+ if self.SQLALCHEMY_DATABASE_URI_SCHEME.startswith("postgresql"):
+ timezone_opt = "-c timezone=UTC"
+ if options:
+ merged_options = f"{options} {timezone_opt}"
+ else:
+ merged_options = timezone_opt
+ connect_args = {"options": merged_options}
return {
"pool_size": self.SQLALCHEMY_POOL_SIZE,
@@ -329,7 +337,9 @@ class MiddlewareConfig(
ChromaConfig,
ClickzettaConfig,
HuaweiCloudConfig,
+ IrisVectorConfig,
MilvusConfig,
+ AlibabaCloudMySQLConfig,
MyScaleConfig,
OpenSearchConfig,
OracleConfig,
diff --git a/api/configs/middleware/vdb/alibabacloud_mysql_config.py b/api/configs/middleware/vdb/alibabacloud_mysql_config.py
new file mode 100644
index 0000000000..a76400ed1c
--- /dev/null
+++ b/api/configs/middleware/vdb/alibabacloud_mysql_config.py
@@ -0,0 +1,54 @@
+from pydantic import Field, PositiveInt
+from pydantic_settings import BaseSettings
+
+
+class AlibabaCloudMySQLConfig(BaseSettings):
+ """
+ Configuration settings for AlibabaCloud MySQL vector database
+ """
+
+ ALIBABACLOUD_MYSQL_HOST: str = Field(
+ description="Hostname or IP address of the AlibabaCloud MySQL server (e.g., 'localhost' or 'mysql.aliyun.com')",
+ default="localhost",
+ )
+
+ ALIBABACLOUD_MYSQL_PORT: PositiveInt = Field(
+ description="Port number on which the AlibabaCloud MySQL server is listening (default is 3306)",
+ default=3306,
+ )
+
+ ALIBABACLOUD_MYSQL_USER: str = Field(
+ description="Username for authenticating with AlibabaCloud MySQL (default is 'root')",
+ default="root",
+ )
+
+ ALIBABACLOUD_MYSQL_PASSWORD: str = Field(
+ description="Password for authenticating with AlibabaCloud MySQL (default is an empty string)",
+ default="",
+ )
+
+ ALIBABACLOUD_MYSQL_DATABASE: str = Field(
+ description="Name of the AlibabaCloud MySQL database to connect to (default is 'dify')",
+ default="dify",
+ )
+
+ ALIBABACLOUD_MYSQL_MAX_CONNECTION: PositiveInt = Field(
+ description="Maximum number of connections in the connection pool",
+ default=5,
+ )
+
+ ALIBABACLOUD_MYSQL_CHARSET: str = Field(
+ description="Character set for AlibabaCloud MySQL connection (default is 'utf8mb4')",
+ default="utf8mb4",
+ )
+
+ ALIBABACLOUD_MYSQL_DISTANCE_FUNCTION: str = Field(
+ description="Distance function used for vector similarity search in AlibabaCloud MySQL "
+ "(e.g., 'cosine', 'euclidean')",
+ default="cosine",
+ )
+
+ ALIBABACLOUD_MYSQL_HNSW_M: PositiveInt = Field(
+ description="Maximum number of connections per layer for HNSW vector index (default is 6, range: 3-200)",
+ default=6,
+ )
diff --git a/api/configs/middleware/vdb/iris_config.py b/api/configs/middleware/vdb/iris_config.py
new file mode 100644
index 0000000000..c532d191c3
--- /dev/null
+++ b/api/configs/middleware/vdb/iris_config.py
@@ -0,0 +1,91 @@
+"""Configuration for InterSystems IRIS vector database."""
+
+from pydantic import Field, PositiveInt, model_validator
+from pydantic_settings import BaseSettings
+
+
+class IrisVectorConfig(BaseSettings):
+ """Configuration settings for IRIS vector database connection and pooling."""
+
+ IRIS_HOST: str | None = Field(
+ description="Hostname or IP address of the IRIS server.",
+ default="localhost",
+ )
+
+ IRIS_SUPER_SERVER_PORT: PositiveInt | None = Field(
+ description="Port number for IRIS connection.",
+ default=1972,
+ )
+
+ IRIS_USER: str | None = Field(
+ description="Username for IRIS authentication.",
+ default="_SYSTEM",
+ )
+
+ IRIS_PASSWORD: str | None = Field(
+ description="Password for IRIS authentication.",
+ default="Dify@1234",
+ )
+
+ IRIS_SCHEMA: str | None = Field(
+ description="Schema name for IRIS tables.",
+ default="dify",
+ )
+
+ IRIS_DATABASE: str | None = Field(
+ description="Database namespace for IRIS connection.",
+ default="USER",
+ )
+
+ IRIS_CONNECTION_URL: str | None = Field(
+ description="Full connection URL for IRIS (overrides individual fields if provided).",
+ default=None,
+ )
+
+ IRIS_MIN_CONNECTION: PositiveInt = Field(
+ description="Minimum number of connections in the pool.",
+ default=1,
+ )
+
+ IRIS_MAX_CONNECTION: PositiveInt = Field(
+ description="Maximum number of connections in the pool.",
+ default=3,
+ )
+
+ IRIS_TEXT_INDEX: bool = Field(
+ description="Enable full-text search index using %iFind.Index.Basic.",
+ default=True,
+ )
+
+ IRIS_TEXT_INDEX_LANGUAGE: str = Field(
+ description="Language for full-text search index (e.g., 'en', 'ja', 'zh', 'de').",
+ default="en",
+ )
+
+ @model_validator(mode="before")
+ @classmethod
+ def validate_config(cls, values: dict) -> dict:
+ """Validate IRIS configuration values.
+
+ Args:
+ values: Configuration dictionary
+
+ Returns:
+ Validated configuration dictionary
+
+ Raises:
+ ValueError: If required fields are missing or pool settings are invalid
+ """
+ # Only validate required fields if IRIS is being used as the vector store
+ # This allows the config to be loaded even when IRIS is not in use
+
+ # vector_store = os.environ.get("VECTOR_STORE", "")
+ # We rely on Pydantic defaults for required fields if they are missing from env.
+ # Strict existence check is removed to allow defaults to work.
+
+ min_conn = values.get("IRIS_MIN_CONNECTION", 1)
+ max_conn = values.get("IRIS_MAX_CONNECTION", 3)
+ if min_conn > max_conn:
+ raise ValueError("IRIS_MIN_CONNECTION must be less than or equal to IRIS_MAX_CONNECTION")
+
+ return values
diff --git a/api/configs/middleware/vdb/weaviate_config.py b/api/configs/middleware/vdb/weaviate_config.py
index 6a79412ab8..6f4fccaa7f 100644
--- a/api/configs/middleware/vdb/weaviate_config.py
+++ b/api/configs/middleware/vdb/weaviate_config.py
@@ -22,7 +22,17 @@ class WeaviateConfig(BaseSettings):
default=True,
)
+ WEAVIATE_GRPC_ENDPOINT: str | None = Field(
+ description="URL of the Weaviate gRPC server (e.g., 'grpc://localhost:50051' or 'grpcs://weaviate.example.com:443')",
+ default=None,
+ )
+
WEAVIATE_BATCH_SIZE: PositiveInt = Field(
description="Number of objects to be processed in a single batch operation (default is 100)",
default=100,
)
+
+ WEAVIATE_TOKENIZATION: str | None = Field(
+ description="Tokenization for Weaviate (default is word)",
+ default="word",
+ )
diff --git a/api/constants/__init__.py b/api/constants/__init__.py
index 9141fbea95..e441395afc 100644
--- a/api/constants/__init__.py
+++ b/api/constants/__init__.py
@@ -55,3 +55,16 @@ else:
"properties",
}
DOCUMENT_EXTENSIONS: set[str] = convert_to_lower_and_upper_set(_doc_extensions)
+
+# console
+COOKIE_NAME_ACCESS_TOKEN = "access_token"
+COOKIE_NAME_REFRESH_TOKEN = "refresh_token"
+COOKIE_NAME_CSRF_TOKEN = "csrf_token"
+
+# webapp
+COOKIE_NAME_WEBAPP_ACCESS_TOKEN = "webapp_access_token"
+COOKIE_NAME_PASSPORT = "passport"
+
+HEADER_NAME_CSRF_TOKEN = "X-CSRF-Token"
+HEADER_NAME_APP_CODE = "X-App-Code"
+HEADER_NAME_PASSPORT = "X-App-Passport"
diff --git a/api/constants/languages.py b/api/constants/languages.py
index a509ddcf5d..8c1ce368ac 100644
--- a/api/constants/languages.py
+++ b/api/constants/languages.py
@@ -20,6 +20,7 @@ language_timezone_mapping = {
"sl-SI": "Europe/Ljubljana",
"th-TH": "Asia/Bangkok",
"id-ID": "Asia/Jakarta",
+ "ar-TN": "Africa/Tunis",
}
languages = list(language_timezone_mapping.keys())
@@ -31,3 +32,9 @@ def supported_language(lang):
error = f"{lang} is not a valid language."
raise ValueError(error)
+
+
+def get_valid_language(lang: str | None) -> str:
+ if lang and lang in languages:
+ return lang
+ return languages[0]
diff --git a/api/constants/pipeline_templates.json b/api/constants/pipeline_templates.json
new file mode 100644
index 0000000000..32b42769e3
--- /dev/null
+++ b/api/constants/pipeline_templates.json
@@ -0,0 +1,7343 @@
+{
+ "pipeline_templates": {
+ "en-US": {
+ "pipeline_templates": [
+ {
+ "id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3",
+ "name": "General Mode-ECO",
+ "description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.",
+ "icon": {
+ "icon_type": "image",
+ "icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
+ "icon_background": null,
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAT1klEQVR4Ae1dzXPcRBbvlsZ2xo6dcbwXinyMC+IDW5WY08IJh2NyIFRxJLvhHyDxaWv3kuS0e4v5CwjLHqmCHMgxMbVbBZxIOEAVCWXnq7hsMiaJPf4aad9Pmh5rNBqPPmdamtdVdkutVuv1r396\/fX0RgpNwspvterurqjatqiatlWxhKgYUhyHeLaQFYrwh5OqE3v+SSkqtrruSS\/yoRRijbBa89bRSZN7aVLYq7hu2eKBgfzSWLXpeqkkVmdfmXau4fogA8nc37CyUqs0TLEghfUOEatKhJoXspNU\/ZVqOJ8mbXGHCLlq2\/ZdKY07ZkMsz85Ot5E6a2T6QsB7j2oL9Aa+QxVdoArhryMYhiEMUnmmaQpJKg1\/SEMgcJxzHJumm4ZjFVR+dT4MMWEp8OcNOLdI3algWQ3KQ52GbTl5LcuNGw2L8lEfExBASiHt5YZhfDZ3ZPpOQJZUkzIjIDSdZVgXbCnfI4kXlNQgS6lkOkQD2UZGRlqEU3k47g8CjUZDgIy7uzsUN8TOzm7bg4kcq0Tpq68f+8P1tgspnqROQId4JXGRXrlLalwG0o2NjRLZRh3y4ZyDngiAhNvbWw4ZlZYEEUlLXH\/t6PTVtKVOlQn3H\/7vnLSNazSuqELQkZGSOHCg7MRpC87lZY\/A1tZ2i4x4GoiYtkZMhYCk9aoN0\/6UZFyAoEw8oFCcAK24vr7uHTd+ZY7IxTRm0okJuPKodtGy7SvobtG1lstl0npjxUGfa9JCABqxXq8rItJs2VpMOj6MTUBnrGeKyzQXuwQJR0dHxMTERGu22pKaDwqFAMaFICHIiEDtv3Ti2Mxi3ErGIiC6XMuwv6Sx3jxrvbjQ5\/u+zc0th4hY+sHSjTEq34\/TJUcmYJN8tzHRwDrd1NRka70u35Cy9FERgDZ8\/vyF0yUTkVaNEXk6KgkjEdBLPqzhTU4eZPJFbbWC5QcJX7x46awjxiFhaAL6yQfNx+t5BWNTzOqgG4YmxGJ2VBKGIiCTL2bLDNFtcUnYubEaAFpzwlFFt8uaLwAgTnJ6Q3ADHKEluaq1bX9JiqvSC5qeBPz1YQ07G\/OYcGDMx91uL0iH9zq4oeYF4MyuaV3uhca+XTBtrV0QwvgUBR86NMUTjl5o8nUHAUxMfv\/9uWOBQ13z4onjM0vdoOlKQGfcZ9o\/YIdjfHycdze6IcjpgQhgnXBjYwPX1mjb7s1uyzNdu2Da270G8sGKhbfWAjHmxH0QAGewO0ah0thx7AQCcwcS0O16xTmM+7C3y4ERiIOAZ2t24f7D2rmgMgIJSCZVzuAR5FNWyUE3cxojsB8CmDsoBUbfp1wLmhV3EPDXR7XLapsN3S8HRiAJAuiKYZ5Hw7nqrmE5hive8joISJ9QXUAGqE8OjEAaCMAoGYE04kW\/FmwjIMZ+0H5gLP44MAJpIODhU4W04AVvmW0EVGO\/0VE2KPWCxMfJEfBoQXyk1gotAq48rs3z2K+FCx+kjAC0ICYlFBbwma4qvkVA+jzvAhK561XQcJw2Aq1JrWUtqLJbBJSGfAeJ3P0qaDhOGwF8lotAmtDhGo4dAmJmQiZd80hgDQgUOGSBABwSqG5YzYYdAjbMxgIeyOTLAnYuUyEA8oGECPAPhNghoG1LR\/sZhnsRFzgwAlkgAHtBJ9juONAhIDHzFBLhp4UDI5AlAoqAjmc0elCTgKKKhwZ5nkI6B0YgLQSUkqPe2FF6zS7YnYAodqb1MC6HEfAj0JyEILmKfyWajVTJixxbvQCNnISNDUvcvl0X9+7tiKfPGuLp04Yj+fi4IY68WhKnTo2KkyfHxMyMfmN6EBAWVrCahldciVVpadu3MQOenJzMSRMMp5gg2uefvxC\/3HPdYvRC4a23DoizZya0IyLM9fEJJ\/mOPF2SdqOCoaBHNfaqV9+v443\/\/vtN8csvO+Lxk93WG3\/kSEnMHDbpjR8TADvrMEg5bt3eEDdvbpCZe7Bn06C6f\/fdprh7d8sh4bvvjgdlGUgalmKcb4jtRlX++uDpJWLitbGxMTLB0kdIhQwA\/PzfL3oCj+4Gb3tWRBykHF\/fXBdff72uIIkVA5uzZ\/UwscO3IvhmBB8sleCNHlvE8M+sW\/jii5cCb36YgO7pX58\/d7Rj2kAPUg7UP4h8cydonEdjvVOesd7jx7viEf3dvPmScGjXlCBxuSyFDprQ09tWSrBUBfU8iWHaO\/M8ACws+bzC4L563RIffJDOeHaQcuClQrfrDePjUpwhbfbu6c7eCkMS\/L1Nw5FbNEm5SVpzg7BQAXXBcGXQkxP1mYchjePOMgwE1ImAGLsEvfUKyF4xwEeXmTQMWg4QxjvmA\/kuXZwOJJ+\/ru+eLotLlypivNxqYnoxbZrEPPdnHeg59bzyOCTQaRsOwCcN6I69b3+c8gYpB7QfXgBvgOaDhgsbkPeMb9z3Cy3dJMUl7PO75VPKjjzrTu+9Ht1y9zkdoAP8pAFv+3fftjdglDIHLcfdH9s1+MyMEUrz+esITTh3on2L9fatuj9bX8\/xuy8ItCR4SDsC3kmh61Rohl0vU\/m98aDl+PFu+1rfmTMHveJFOj5J4z5vuBdyHdF7T1bH1AO7v8Gmyyy4Riv7aYUnT+KXNWg5MKP1BuxwxA2YKXvD02d7ExNver+OPTYHVYN+xYkWovWZhGAZIa2QpCsftBz+cdrRo\/EJ6J\/1JsElrbZR5WjXBSvBOB4OBLQjoP9tTdIMRyPMGP3PGbQc\/ucn0Vp+bY4FaV2CdgR8NcFYxw\/q9OH41Ru0HDM+2ZOsaz7xDWuOHmmfFftx6+d5axKi1mb6+fCgZ83NpQfOqVPxDRQGLceJuXa\/PD\/6lmWCsOuW5l\/PPHmyvexu92WV7uFaxaCtOK0mIW+\/VW5bvY8LAtbNsCUVNwxaDv9WGxaQb91q35YLUzdsZ\/q7b2zHDTK0EXCQggQ9G+OT839Ovo+bZN0Mcg1aDjzfv4AMTeYfzwVhqNKwlOPfS4a1kH98qfIPIo4\/SMpQWqxbJbHagOlREu2nqjZoOc6fn2rrDbC7s7RUC6UJofmWPlnr2EsGNjoF8+PFv16BQMqRoC7CvfEGjVNosgaz8yjhNFmJnDsXf9fA\/6xBygET+9KIFD\/9tLcrskvLpD\/9vC2+IwNdZWgwNeXqEXS1MNy9cWNd\/Oe\/dfrRaRpgecJ77x0Uf3xjsN2vEqded7dJ5f2HzxwpDx+eVte0ir+lveEg+za\/kLAU+fDDKTGf0fhmkHKg601iHQSsdDJIhTzPntUQCe0J6EhJ\/0CAH2mf+Blt1alxEMYy2KI6QTPnt\/50QEBjZB0GJUeQfV+Yuu5nPxjm\/qzy5I6AWQGRp3LRxUIb+s20utUBVtPnz09qNelQsjIBFRI5jEFEmGvBYubxE7Lv23DHeugR8JEWeoTTC7Sc1YceIS58TMC4yPF9qSCgCJj9oCkVcbmQoiLABCxqy+akXkzAnDRUUcVkAha1ZXNSLyZgThqqqGIyAYvasjmpFxMwJw1VVDGZgEVt2ZzUiwmYk4Yqqpjxv\/UrKiL71At+WnTwTKqLHPtAFfpSbqxhQtcog4zYe9XBM6kucqQBsdqKywUB8cYHeUhV5lhZekiFZXFUz6RoIJjUwwYviWW3t6F1kcMrU5Lj3BCQPZMKxwSrqAapWo8B2TOpcJx0BpEvzx5SvZpT2y44iRk6XJIl8ZCKsdY\/\/lnr+KCnm2dSL6BBlsvojv\/+t8ORDUN1kcNbv7SOVRes5TIMLH6D3vqwlU\/qIRXk18EzqS5yhMU9Tj4tCQjgk4a4HlKhdfwm74PwTKqLHEnbodf92hGQPZO6TVZkD6leUmpHQPZM6jbP0HhI9bJRh2P2TOq2QpE9pHp5pp0GVN\/8eoWMe4xxVNSgi2dSXeSIil\/U\/NoRMGoFOH++EdCOgGl6borjIdX\/\/DhaVFHCr82xHhg26CJHWHnj5tOOgOyZ1G3KofGQGpe5Wd3HnkldZIvsIdXLHe00IHsmdZunyB5StSYgxkmD9JCK5+vgmVQXObxkyeJYOw2ISrJnUrep2UNqFpQPWSZ7JhWOdyv2kBqSMFllY8+kxTZI1dYe0E\/oYfdMGmRn6Mco6Jw9pAahkrM0LEbDRMxvptWtGll5JtVFjm71jpKuDFJzowGjVC6rvCCADp5JdZEjCc5MwCTo8b2JEVAE1HIZJnHtuIDcIMAEzE1TFVNQJmAx2zU3tWIC5qapiikoE7CY7ZqbWjEBc9NUxRSUCVjMds1NrZiAuWmqYgrKBCxmu+amVlp7x1Io6uIRlOVQLZJerPVeMPY82TPpXmPrgseeRPGP1FactgTUxSMoyxGfZPvdqQhofrz41yvIWC6X98vf12swfbpxY13s7Li\/gxvl4bu7Qvz087Zzy9zcaJRbO\/KyHB2QpJZQr286ZWk3BoTGCfIN2G+PoCxHalzbtyCtumCMcdgz6V576YLHnkTpHakuWKtlGHR57Jl0r5F1wWNPovSPtCEg3na\/yfsweybVBY\/0KddeokHuctaQZNvRB\/ztRSU708UjKMuRrB3D3O3h2ppBvNOCgLp4BGU5wlAoWZ42AiYrKr27dfEIynKk16ZhStJmDKiLR1CWIwxt0sujDQHTqxKXlCcEtCGgLh5BWY7s6WtZ7oRX0vzDEFKs4pGNhpX9k\/d5gi4eQVmOfRoppUtqEmJLEFCToItHUJajv4QAAbVYhtHFIyjL0WcCWrb9Ox5p24PtgnXxCMpyZE9Ay3J\/v0UKuapNF4xq6+IRlOXIloTeTTfYA85LKRdKJVOMjIxk++QepY+PG0IHj6AsR4+GSnh5Z2dH7JLhJk1GbshfHzy9ZEt5bWxsTExMjCcsOp3bYQUSZBMYpfSzZybE2bMTUW7pyMtydECSSsLGxobY3NwCARdLDWk7azE0Ckyl8DQKAXnKZUPc\/JrMs+rRxqZpegRlOdJozc4yLMttUymNVXnvUW1B2vZt0zTFoUNTnbkHmAKTJGghv5lWN5GK7plUFzy64R82\/cWLF\/S5BXXBUp6WKyu1asO0VwzDEJXKobBl9DUfgGfPpHuQ64LHnkTRjtbWfhfQguaInHV+Pe\/+w2dO\/zs9XRE0IYlWGudmBCIioMzxXz92WLrLMLa7Hae2SCKWx9kZgdAI7O421wBtcQc3uQSU7gmmxxwYgSwRUIvQNA15gOc0NaDtnCh2ZikAlz3cCGD9zw22VwPay0hU7HQz8H9GIH0EGo1mFyyNPQKaDXMZj4IG5HFg+qBziXsIYPkFwWyIZcROFzw7Ow2LmGWQj7thwMIhCwQU+cgQ9U6Tc80xID2NyPcNHrq97fpVyUIALnO4Edje3nIAsIXLNZy4kxDnyFhGxAQEChyyQEBpQMsyrqvyWwQ8cXR6mRKdblhlVJk4ZgSSIrC1teXsftA2x+rc7LQzAUGZLQLihPaEbyDe3Kwj4sAIpIaA6lltIa96C20joEGqkRi6Bg3IWtALEx8nQUDxCdrv9WPT171ltREQMxMy0f8EGVgLemHi4yQIrK+vO7cTtz7zl0OkbA9kHVOxDPsH+mSuOj5eFgcOHGjPwGeMQAQEMPZbX9+gr3\/F6mvHDs\/6b23TgLgILUh2Wos4hhtVXpgGEhziIIBvzZUrXv\/YT5XXQUBcoH76K4qcGfHLl676VDdwzAiERQDDuKb181f+sZ8qI5CAuGg25EekNmlCskPjQdehtLqJY0agFwL45mNraxtd7xoZnjo9atA9XQlIXfEq2UxfxU1Qo4N23REkPKfpiYDb9bpLedT1Ls6+QlzqEroSEPlfOz69RIPATzAOhB0\/k7ALipzcQgAcAVecuQNxp1vXq24gDbl\/aM6Kb9OseB4fLk1NTbLZ\/v6QDe1VkO75cyiqBm1qiDuvHT\/8Zi8w9tWAuBmzYsOS71OBqygYD+CZcS9Yh+96G\/loycUYle+HQaGnBlSF4Os5Wh+EJqyyJlSocAwEOsg3Ik\/vN+7zohaagLjJT8KDBw8K0+ypRL3P4+OCIYAx38uXL91uF5ovAvkARSQC4gYvCfEt8eTkJJMQwAxhUBMOrPURkSKTD5BFJiBuapLwS0xM8B1xuXyAt+wAzBAFrPPV63Wn+8WEA2O+sN2uF6ZYBFQF3H\/wdImmxBdxPjY2SiQsszZU4BQ0xngPxgXb281PeGmpxbSMK5isxqlyIgLigfcf1i5IYV8j1woVdMnQhvC0xaF4CLRpPdrhIOuWqyeOzywlqWliAuLh6JIbprhG86FzOAcRJyYmyN+gdr8GC\/E4REQA9nzY1\/XYiC7T9tpHcbpc\/6NTIaAq1NGGtn0ZSzVIAwFHR0dZIyqAchb7iUdkWcXWWtNYJZXapEpAJdG9B0+v0O8\/\/EURERrRJeMYa0UFkoYxxnf4LHdnZ9sxJMA5ApHEMVQuWcZS3LFet+pmQkD1ML9GVOkgIxazS6USddeITXWJ4z4hAHLhD9ZO2OHCX4BjgmVpyxuGJa6nTTxVzUwJqB6y8rg2T2tGNFmR72DpRqV7Y2hJLGpjWQfHiNUfSKqCe71dbJVP5RmGWBHIX1eszSHgVw+UBsM6ncqvSNa00\/PfjvNlyvsNNcJy80vJoDyppbW3ZGrFdi+IJiwVmrAsEEBYQzxFa0jVbqTsXgpfSQUBuOWDZzSbnFNJYxnuMrLSdN3k7TsBuwmy8lutSo6TqkTICkhpCatCv6Z9HPlp4FulyAm4jiUfdY6YlGVHmvd6EY+p4daoB13rqFvzp9cofY2Wx5zr9NNsDwxhrDXop7EIq1Ua+aymMYPteHaMhP8DKleEJHlBQFwAAAAASUVORK5CYII="
+ },
+ "copyright": "Copyright 2023 Dify",
+ "privacy_policy": "https:\/\/dify.ai\n",
+ "position": 1,
+ "chunk_structure": "text_model",
+ "language": "en-US"
+ },
+ {
+ "id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4",
+ "name": "Parent-child-HQ",
+ "description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.",
+ "icon": {
+ "icon_type": "image",
+ "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
+ "icon_background": null,
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAYkklEQVR4Ae2dz28cx5XHq2f4m5JIyo4R2+t46B+H1S5gGUiwa1\/EAFmvkUtsIHGOq6y9Z1vJHyDpD0iknG2vneMmBmxfFo5twPTFzmIDRAYS7cFKSMU\/FCS2RVKiSIpk975PNWtYU9M9nB\/dM8PueoLY3TXVVV2vv\/N+1auaQA0JLV27XpNHqe3K\/yAIZ1WkZitK3c\/jhUEwG8g150I1\/df+E8hn+5\/bnxT3PFArMuaVhgFyTfkeBSpa5jRU6irlUVhZrsafL8\/fPac\/4\/NBUtDvzpeWrs\/ujquFqgpPhZWgJsA6Kc9Q6\/dz+P6EA5G6FFXUsoqij6Kocqm6pRbn5+fqAO4Hj\/oCQJFuCzKYU5GKOPK\/iSqViqoEgaqOVFUgR\/5TBgVy5Bqq7pXpi70\/pr5dVvTzKBJuyn+buA6tsnB3V+oIzqJQ1w1DOYaR2pUj54kkoBTJuahGKr+Yv2vuUmKdDAtzAyCSLpwMTwdR8D153gXzzIBlpFrVQKvKcXR0tA44U8cf+8OBXQEoYNzZ3la7O7tqe2fH7XhZoHr+obvvfNX9IKvrzAEI8NSEej4KoheMXQboxsfH1OjYmAafkWZZDcK3kx0HAOHtrS21vb1jS8ll0Umvit14Prue4pYyBeCVz794qhJULkjTNZofHRlRE1OT+si1p8PFga2t2zEY9yVj5hIxEwDiwYpF8oqwdwEWe+DBheIQUnH95npdIkaBeqMSBWey8KR7BuDVv1x\/Xkzdc6hbVOvk5KSamBgvDvf9SOocQCJubGzEQJRwThiFZ3q1D7sGoLb1JtVZ8bxe4AnHxkbV9PR03VutP7U\/KRQH8J4BIWCExNa\/+ODX7zjT7SC7AqBWuVH0ugQ3T3qp1y3rD\/d9m5tbGog6FEToJgie7kYldwzAPXvvPWFfjTjdsWNH6\/G6w81S\/\/SdcgBpuLZ2w9iGeMrf7hSEHQHQBh8xvKNHj3jwdfrWClYfEN64cVMRUxTqGIRtA9AFH5LPx\/MKhqYuh4MaRhJ2A8K2AOjB1+WbKdFt3YIwnmw9gFHS+OtSpYba9ZLvAGaV9GO0IdgAI2AFzOhIyQH8OBCAS3+5fkGJt4vDgc3n1e4BHC3xx2Cj7hcIZiQX4OxB7Gipgq9c++K05Ki8QsMzM8e8w3EQN\/3nmgM4JqurazoDRyThmQfvueNiGmtSAajtviD6HTMcU1NTfnYjjYO+PJEDxAlv3boluXRqRTKiHk0Lz6Sr4CC6APjIYvFTa4k89oUtOABmmB0DQ3t5Aom1EwGI6hXP+insPuZ2PXkOdMMBa2p24crn159KaiMRgGL3aeMR8Jms5KSbfZnnQCsO4DsYAVYRjZrkFTcBUGw\/wFcDeKhfT54DvXAAVUx6nlAtnAh14ordXhMARV+fpsL0kWm7nj\/3HOiaAyQlQyIRn3elYAMAsf2kXg3E7qGW+zx5DvTEgTqexCEJx8PTdmMNADS239i4Tyi1meTPe+eAJQVZpFanOgCXPr1+Ukq97VdnjT\/JkgNIQZwSoQXxMxZM23UAhpVYNI6OaoPRfO6PngOZccA4tbLUc8E0WgegJBOeotCrX8Maf8yaAyzLhQzWONcA1J6JTB5T4J0PuOApDw6wIUFdDbN+XEgDcHd8d4ELDz644CkvDgA+QKhpSi1w1ACUD7T0q8i+LJ48B\/LkAHv\/QOFubAdqAMraukcoHB2RyWNPngM5cmAvYRU7sEY32uUV51hfVKsxHvnA0z4H1rYj9dZnW+ry6q7683qoLq\/sqFUpo9zQfVMV9XfTVfWPs1V1YmZEPXbXqKLMUyMH2IxKU6C00ItjLnsOiEFn4y3lvAJcL368qT7827b+fxAXPrkVKv5T39A\/CBife2jSg9EwRI57TgglNf4EewuOlkg+mJ2doazUZID30scbDRKuV6Y8UxtXPz4x5aWiMHJlZVWvJRY1PI8ErMHcpI0fKS8T\/fTyhsoaeIZ\/v1zeUvwHhD85Ue4cS1sKVnajXR2PCSpiCZaUUJ1PvLuifnb5VqrUe\/xro+o\/Hp5Q\/\/n4UYU0S6L7pqoaXNRNI\/r45\/++rtV1Wp2il4\/secKyPWZtpFoJZAmd6GJRwWUkpNLZj9YTgXdsNNCge+7hScU59FMBEPe49OQ9Y+rcyem6itX24F+3E9vWgH9nRV381hH1r3Jf2chIQFkrMjsiWwbPwlr2Zy4bAaafidp1CbChJgGeIUDz7Ac31B\/EA3bpJ6JWf5ygVl+6spkIbO7H1vx3aa+MKtkAUGIxsyMCuxoMqRdyUQJKAx9qFlAYiQcrfv35bXX20nqT2kTlPvfweANQW9WnTTt0Q11UMlQmu9As85D0v\/vrqS9lAiCASpJ85x+ZagJTGlAB368WjtVVrkaR\/Dmo\/q8\/EzCLyrcJEBIzTLMt7bpFOxfXI7ifQVXMHF3RRuiMB1X6wv\/ebChFMr126lgD+Kh39qNkFY2954Kv3frPiYR9+zuzDRKWhwGUtFEGMsJOFq3P1SVgGQbOGH+wuNqkBl87NaMIGhsCCNRLAkSSvddp\/WNjstOEo45Rzc9+sKbBaZ6jqMe6wytsKBUAUY8uqFC7Nvio85LMgLi2Gir35cePSN1GlmVVH7D9YWVXmwZJDk1RwViREEycl1VwLxjguXYfNpft6Rr7LQl8qNwk8NFmr\/VtcL2oZ2CKrYqtSY+aJOrHADR62WZGkc6Nt2nGhETD24UAZ6sQC3ab7RVnWR+v+78krmhAzPGlj5kx2Q8BmWcu4rEU0WcA4waPecF4nnyGvdcqvueCL8v65x6ZlhBM\/EUwACuDFDRjbTRoTGnBjh\/KjIRNSD\/Ub1b2W6\/2IRKWZymjFCyFBHz5SuNsxzO1sXqIxbx0A1ATYrHtPaSkCcnkVd\/uj2f5wErrMs9WxGNsAzIXLP+KSIDn9+Jd2kTWSxJlEWIxKp2jS520T17h2nYotmfxZETd3xD\/o8L+bTCqqNkwrvp1QcE1KpRwjGv4M2OSFA\/Mu755xrdk1qSIVAegYK\/wNuDl1ebkAfulAiZ3VoPPTUjGrst53vXt\/lgCUHQqPABd9Wu\/UFRiUoiFQDSJqS7lXf8xySO0U\/pZf1J0KjwAP11PliKd2GOAoB\/1fyCeOcmqhlj8VHQqPABdZwAVmueUWi\/tux42K++KToUHoPsCh8nec+1JO+DNc7uAdMdShOvSAdBeq4t0HNQUXJo9WQRQdTKGwgMQqWJLEhNbyyrLGSnWSVb0QfU7eXlFqFt4ALp5d6syK\/fix8mJpq5KNC94UCEZW1qbZynasfAAZIrrk1v7Ad0zkg1thzrMC3VXtVGOik4LyeRdn\/7vk60+ik6FB+B9041TWUng60eIxZ1lAdxJsyw24OxEWbu8SOeFB+CJmXQpgspNCsm0sg\/zrO8Ci02Oik6FH+GT946rM79tXIXGSx02ey8JaOywVXQqPADxgt0pLnYjYFcCO+426JAMz2Iv18R29U5IQb5+j39tpMHxwA50wZdmj\/XLPrSn4GD7cw9NFIT7rYdReAmoX6ZsscFefyYeyJFr1mMMQ1Y0ywWQwDaVQf0y3lIAEGkXg20\/w4VFSp\/qMMt+mQFA3iEWu32A5y6YYrlAGdRvaQDIQFl+6UrBtJSrTkImvapowOdKP7Naz3whinxsDJIVeKRGCqYNEa+431nRfCHc1XoAuizSj3dRChVsQIdkeevz7aYlmIMIybALwjlnkyKew5W+5tmLeiyNBDQv8GXZ4dT2gClflcU\/a7f3nQBUolkFZ+4zR+w3N6Wr0\/p44d9\/f9U0qY88E+2WjUolAXm5qLfzshj8zG\/3d8jCK37i3VXFIvEn7x1LnSLr1d6jf9SuK\/kop98yqV7GDAV\/uvaVTrs9fnwuLinJXwDo2l8MHUlkwjWGFajGpCm4TkI4tGk2QTftukdMhLJsVPnVV\/HSg9JJQF46KjNtuWYS+FyVSxudpGgh9fB23bZpxybqHOQs2fWLcF46AAK+tFkP94UCBpJNbeL+drKoARvAS\/vZBwM06tjARD2Tw1iW3VJLpYLTwEeQ+q3PtkUyJq+gA4DMJzOllzRrAZgADD\/PgIPBUtCktC8DZOZ5cYaw+WKHZM18VD9e+OaRQoPQqOBDA0CkBL\/X9uEXOzqM8omsmTWSAwCQ98eLfezOUW3QU2YTdfE8CX\/YZDsWqMC0bTvse7o9N1LPDTQDatspMu3bIOx1\/KbNYTkeGgAitV6WReL2HnrtMBGJxIs2nuX3319rkkrU4SXbRH8AMclBset1cm6AZ\/\/eiHt\/GggZww0JE\/U6fre\/QV8PPQD5xh\/kNbbDRHY+oC0XUEjLt7+T\/tt4ABFH5WX5rY\/fd7lAHJX8mKjtVsCzx5AGQrtOp+eMH8962DY5GmoAptlqnTI\/rT7gY1d8V02n1TdgZJ8ZVPgnstsCZYZoB8eBdjEFyMImEbbd9k07HPMAIVrgVwszdW1g9zeocwPAofOCecHsFm+\/YMMko8pwCPhtXqNekXDscEoq\/UHORBzTa54NMX0kHennPlHXSu17xPe+9mW9Kv3\/3\/eO1697OQHEjJM2Xep2\/OYLjeND+8NEQ+WEGEa54AM0F741rT3RdpiHFGHz8CSvFskHgHslG4C09dn37+i1Sf2lSwoRZTX+YZKERgIOzVww3\/gk5hMieftfZjoCDc4F93CvSyzLZHH6sFE\/xm++4MM0\/qEBIA6HK\/kIkTA\/240txT3xBuCNu83TR56hlm6BXdbxDwUAAYWbHIr0yiI1iTCGKwlZbO6CvVvgZHFfmcc\/FAAk7mYTNo8brLU\/7\/Q8jgc2rg8mtjgsVObxDxyA2D5ujA7J143aTQMUbeHE2BQHdgdvC5Z9\/AMHoLsRN9IPJyJrwvO1Qc2Ld\/vOus922nOfoWzjHzgAP\/yi8Udknry39xBJ2ot3bUHmlQdNZR\/\/wAHo7oPMrgV5kRv\/cxMT8uq3VbtlH\/\/AAejuBJ\/njlDMntjElNqgqezjHzgAscVsynPS3Ezdmf7cvk15P4\/uM5Rt\/AMHYD9ftu9r+DgwcADaninsyTNA3CxtGpNWB\/F6yj7+gQPwG84Opmk\/LJMFONzfBB6GLXDLPv6BA\/CEkx704d\/yC42QrmVTng6P3U+r87KPf+AAfOzOxvw0fi08L3KDvqwfaZdQ379c3tRrN554d6XpNsrMWmNX1TdVtgoOy\/itR870dOAAdDOHeXmtVpR1O3qm+1z7sp2gN\/ewVPKf5Dfc2OqXdpLih5TxGSD8+ze\/0ke3v6RnH\/bxJz1zlmUDByBG+A+dqbesc\/YAtTvhz3Rfq5AH97A\/DDuXumt323kBgJF72Xa3Vf7dsI6\/nTFmUWfgAGQQz8refTYhObLM2UvKtWuVbUP\/T7yz0pQiZj9ju+ekfj3xzmqT9LXvH7bx28+W93mjAZZ3byntEyBmnhZJY4gXh4Tqda+UeP+WRruSvtygtOk3jzUpAJps77Q1GcM0fsOHfh2HZk0IKi+WFI3TY90uK6Q9JJ+b6Eq2Cen6bvwNhhugcLSJe7JYkwLQ0lanDcP47THnfW7WhAwNABlwDABWxDWCkBeHymw3TQsnBjsyCUhJGw3RdwyAlaZ7kJb0nQRY7ksj2sPutKU6dRlL\/AVotn4GOf60ceRVPpQAZLCxCrzRBEI+4+Wxjx4ZM2b5IuW8OALYH0gMMW0zIKRYrAIbExK4H8LhcKWlvW1HXKvzv4DQtWeR6uxRmESDGn\/Ss+RZNrQAZNBpkqBbhgC+NMln+nN\/pwPJx6KmLIgwjisJf\/PduVQ7tN\/jz2KMnbZhANisBzptKYf6Rk0Bgl6JNlB5tJlGbogGwLbyktPaSSunLdq0qdWalH6P336ufp8PlQ2YNHikAQAhrtYumdga4Y1WwKM9bDUCxzbZu1LZ5b2cu9uw8Yz\/893ZlrFI+st7\/L2MqZd7jQQcegCaQQIUptJIYb8ssw5\/FpuPMoiX+Q1JNj0xW5Xt2UY62pfFzF6YfpBUvxFg5EEA3Twz7V\/45rQ4Vu1J+bzGn8c422nTAHAo4oDtPDAgwwtu1xNup03q9HtNhu2QsCblmVp7T5rX+NvrPb9a6YZRfn0OVctlX5Mx6JdRUYHSqR1R2JgaP+gH61f\/ZV+T0S8+2\/1E0R7WBHsVFe0BUE7KSLZNxvhbJSj0yh\/XIXL77rX9w3J\/HYCCvdKr4MPy0or6nKUHIMa9TYQ98iJX4rl959XvMLdbegCWfU3GoMFZegCWfU3GIAAY2k6IKKBlHmI3zE\/1DGKQ7fZZ9jUZ7fIpy3reCbG4WfY1GRYrBnJakfBfqeOAOALDuCZlIGgYQKeVIIj0LydHUTlVMDwv85qMAWBOhbtxwnGgguXSOyG8AALEbuoXa1LsedtuX1Sna1K67ecw3Wd8EJ65IvMfy5yEJXVCGDuUlLNHGthByyrju5v\/EvMjy5rfK7Ep61xDu+3Dcm60bajCq5XK3lxw3TU+LKPI+DmxBeOs6cbEUbOsspN8RHL\/kpZ1Aj76KHsA2vaCgyvXvjhdUZVXxsfH1PR0NinoGWOjr82VZU1GX5nqdHbzxk11e3tbBZXg6WDp2vWFSEXvVatVNTNzzKlazssyrMkY5Ju9sXZDbe\/sSCJW8G2ckGUepi4WuSg5lWlNxiBetTXpsaxn4v907SudizU3O4tYHMQzDW2fRV2TMUiGm3T8B+4+HhgALskD1WZnZ1Sl4iMzSS8HrzaPNSlJfRW5bEdigGura0r076UHvn78Ub0mROIylwSKtW0xDMfHs\/+RmCIwFM81jzUpReBNJ2MwQWgVqqvctyfuIn0BOj15DuTJgR1xPqAoiC5x1AAUL3iRi3DHAxA+eMqPA7t7GBNTbx+A1a3qIl0iAcu6OCk\/lvuWbQ4QftF0Sy1y1BJwfn5uRbyRRUIxO6GXgppB\/k\/mHKiDTxwQMEcHdZc3VNH7FNy+3biTPGWePAey4MDtzXh7FdGyGmu0WQegTMctUnB7ywMQPnjKngNGAlZGKq+a1usAnL97btGoYVPRVPJHz4FeObC1tWUyrpbn75rTDght1gGoOwiiNzlu3mpMIdKf+T+eAz1wwGhWmf89bzfTCMANEY2SnoUE9FLQZpM\/74UDFp6WRdO+arfVAEA8E\/GEf04FLwVtNvnzXjiwfnNd3y7x5l+47YjZ10hLS9dno4nod1Jam5qaVBMT7e1f19iKv\/IciDmA7be+fouLZUk+mHf50iAB+VDHBKPgDOcbG5s+MA0jPHXFAdKuwBDk2n6mwSYA8sH8PXNviGjUgemb67H4NDf4o+dAuxzAjGOtURSoN1zbz7SRCMD4w+BH2iGRDJnNzf1fMDI3+qPnQCsObErQeYtJDfYA3NOoSfVTASiIXQ7C2GVGjFpZrEnt+DLPgToHYtUbh\/ICAR9Yqn\/onKQCkHqiii\/iFTNHTB6\/B6HDPX\/ZxAEwAlbADNhJU73mxiYv2HxgjtorHo\/eE1F6koVLx44e9Wn7hjn+2MABQLeGoCKvVJKcH7jn+KMNFRIuWkpA6muvOAieltNlGl67Iegu6X7SCfzzRXscaACfYCWIMXMgfw6UgKYFWb5ZY\/mmXNe8JDRc8Uc40AQ+WW7Zyu6zudY2ALnJBeGRo0dU1S9isvlZunNsPhaaa7WL5OsAfDCrIwBygw1CVtAdPXbUgxDGlJCMw7G3r1DH4INlHQOQmzQIo+h1ufuk6Ho1OTnhp+xgTImION\/GxoZWvzgc2Hztql2bTV0B0DTwx8+\/vCgdP8\/1+NiYmpC5Y6+SDXeKecTeI7mAvV0guf55ZatyzqTYdzrqngBIZyINT8sSuwvyLZhFJSMN\/driTl\/D4ajfIPVkhkOiIecfvOeOi708fc8ApHNUsqjjC\/JteIprgDh9ZFqNjhya30LksT2lcIB8PuZ1rRzRRXE2ftSNynW7yASAplEtDVV0Vq5rlAHAMdn2zUtEuHH4KAF4y3pqTZJVshpNpgA0D\/XHa1+ek2\/Iv8l1jTIkogbjxLiXijBkSAn7jrXh25JEsCWL07jWhLrF1tusXOzW1ksbci4ANJ25EtGUA8bqSFWNyLEi03sj8t9TfzkAuPjPfkDE8NixQG9MYEAXP86iOJlvqg31atbAM6PNFYCmk6W\/Xj8Z7oSnRSqeUhK6MeX2ESmJB01Yp1KNj5zH1\/sA1ddSbpOpZ5cV\/dwAyB2nSRiJyMPbA5POydsD3I4AjfIWe4IvCjTfZ5mu2HiLbvtZXze+yaxbT2iP5AY1rhbCIDwpvxHxiPw6BA5MIigTbvdF2XJA5mzVpTCMrup14VtqMS9Jl\/bYfQdg2oNoTxqbUcI5sli0FkbhrGRK3B\/XD2rmPvnyyi6a8t8mrikvE4ldJmNecYcsL3RZl+nPI\/25\/ALM1UpQWdmV+qJL+JzVaXE9XXlwf\/4f1AC7LPmFaqYAAAAASUVORK5CYII="
+ },
+ "copyright": "Copyright 2023 Dify",
+ "privacy_policy": "https:\/\/dify.ai\n",
+ "position": 2,
+ "chunk_structure": "hierarchical_model",
+ "language": "en-US"
+ },
+ {
+ "id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a",
+ "name": "Simple Q&A",
+ "description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
+ "icon": {
+ "icon_type": "image",
+ "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
+ "icon_background": null,
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAUPklEQVR4Ae1dW4wcxRWt6pl92rseQ7xgYocdIALFeRglkSBEYkkkwF\/YEoT8RDiKwkd+wEryG+P8JpHNTySEQuwkHzEgYX6C4AM2UghISYTzMMrDySzYeION4\/Wu7X3NdOWe6qnempru3Znpefbca427uroe3afP3lv3Vk2NFF0ihdnZSZEVkyUpJqWSOSFUzlPezbg9X6qcFILySOi6Plb8R+WVCq5X5Kf4RMo5wog+liiB8zCPcJzBVV\/67xFwc0r6MxlF9YpiJr99u76G650Ueq\/tlcKlQq5UGprKKO9eXxDZpNgtVBSp2ntffdrbSSXEDBH5z0qqk5nM8nR+az4kcDswaQsBCxdmp4Tw7lVC0VHgUyWe5wmP2JjJZoSkIz7Ig0g64hySKefpk\/J\/prydl\/a0UoQmfWzBuW\/l+aUSlSF6KV+X9X06+kqU6Ih0jJwkpKeF8o7lJyZOxpRpWnbLCAhN5xdH9lMHD9HdTpk7BlmymYwmWoaOAwMDIeFMGT62B4ESERRkLK6uilKxJFaLxcqOpZjxfXXotontRysvNO+s6QQE8URx9AklxZP0Z5fDrYJ0Q0ODYmBwUJPPaLPmPQa31CwEQMKV5WWxulpc05JERBpPHs1vu+FQs\/ox7TSVgKc\/PLfXy3iHzZhuIJsVw6MjAkeW3kNgeXklIKPRjC3QiE0hYOHS7KQqyp8TxFOAmYkHFNIj0IpXr1wNNSINK094WXUgvzW5J52YgO9dPP9ESamnYG5hWkdGRsTw8FB60OcnCRGARlxcXDREnCOH50DS8WHDBAzGeiMH6a\/hSdzh4OCA2LRpU+ithnfNiVQhAO8ZJAQZIUp4R27dNnGg0YdsiIBlk\/sSdbqbtV6j0Pd2vaWlZU3EcijopMyqfY2Y5LoJqMlXkm\/A0UCcbnx8LIzX9TakfPf1IgBtOD+\/EJhkeMoZdV+9JKyLgDb5EMMbG9vM5Kv3raWsPEi4sHBFIKZI06R1k7BmArrkg+bjeF7K2NTg48AMQxM2QsKaCMjka\/DN9FG1RkkYTLZuABTF+F7CmA9mlzXfBmD16WVYQ3ADHAFXwBkdKdkAjw0JWLjw38PUxm44HBjzsdndANE+vgxuWH7Bbr+46eBGcKxrgk+fn91PK1R+joa3bBlnh2MjNPm6RgCOyeXL83oFjiqJA7feeOOROGhiCRiM+7x3MMMxOjrKsxtxCHJ+JAKIE167dg3X5ihGeGdceCbeBBexqEDlsIqFp9YiMebMdRAAZzA7RpIrrxOILB1JQJheWu64F+M+zO2yMAKNIGBNzU6d\/ujc3qg2IgnoeVIPHkE+syo5qjLnMQLrIQDfwSgwWu9+OMorriJg4eKHB800G8wvCyOQBAGYYr0elEIz\/sqwXrhit1dFQAoo7keBTZs32eU4zQg0jAAWJUOkJ59wtWAFATH2g\/YDY3kVc8N4c0UHAYtP+ntC9uUKApqx3+AQLyi1QeJ0cgRCLRh8SS1sMCRg4fxZ\/f1cOB089gvx4USTEIAWLM+iTQVf0w0aDgnoe95+ZA0M8BeIAmj4\/2YjYBQbTZRMmbZDAkqVuReZbH4NNHxsNgL4Wi6EnBHNNaQ1AQuXLuVoCcNuZLDzARRYWoEANiQIzTC+P06iCVgqrUzhhMkHFFhahQDIBxJqKY1O4agJKJWvtZ9H+7KwMAKtRAB7\/0B8vzSFY3kMKD+Hk4GsnjxGkoURaAkCesEqtSwp3owOAg0o5CSlaTVrmY84YWEEWoAANqPSkvG00iszLnBADDtb0C83yQhoBMpOiF62jwxP70yKBAWgWRiBViMAAhqugXsetsVFp1EbP7b6Zrj9\/kQg1ILEPa8kPR2PoeBgf6LBT912BLJlTxj7gXsZpSZxB9gGl4URaAcCRgNiM3qPdg0OItJkm1kYgXYgYAhInkjOM\/GYtcx23AL30c8IGCfEk97Nod1lAvYzJTr37PS9c3kzuvfMHF3n7oV77hMEjLJTpdLWUAP2ybPzY3YBAqHD63lbmIBd8EL6+RaySujfZdO\/UtQNQHzipz\/qhttI7T28\/53vd\/zZwkkPxAFpWUIQiOYwTMdfTD\/eAJvgfnzrXfTMTMAuehn9eCtMwH586130zJ7QPw5Nc8H0j4URaAcCJg5Iu3DkSAOWnRBeDdMO7LkPQiAkIO0dyCaYKdFRBJiAHYWfO2cCMgc6igATsKPwc+dMQOZARxFgAnYUfu6cCcgc6CgCTMCOws+dMwGZAx1FgAnYUfi5cyYgc6CjCDABOwo\/d84EZA50FIGu3xK\/G77D0NE3lLDzbv+ODWvAhC+YqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgSYgMnw49oJEWACJgSQqydDgAmYDD+unRABJmBCALl6MgS6fi64kcd769z74t2PLoiz85fF\/Mqy2DE2LsaHhsVdN+0Uuz420UiTus788rJ4tfBPcXZhPmzjro\/vFHff9InwPEkC9+3Krusn9L27+Wk5Tw0BQY6f\/eWP9PmTQDpOdoxtEQe++CXxyB2fjisSmY92D\/\/hzeq2\/yCI4FvE8Ye+LnaOj0fWrSUT5Hv0xPGqorjXA1+8pyo\/LRmpMMGnPjov9jx\/jAjy+2qCOG\/q7MJl8d3XX6GX\/WtxZn5NkznFKk5BvEO\/ez22bbT56Mu1t1fRePnkxb+fisoWrxVOR+anJbPnCQjy6ZdPJKhH3jp3pibSwNyC2LaMDw2JnWTWbQEJv\/f6b+ysutKvFv4VWR7P99YHZyKvpSGzp00wyPH4KyeqNBNMIkzsp2i8B7JAXvz738Tb9CLPWEQ1pDm+9+ux7xLaz5Zvffbz2oRjTKk1H5lN0yZIPb+8VPeY7dX\/nK56BrvPt8k8301jzTRKT2tAkMO8fPNyQJDff+NxTZIH8reRgwAnYaf4yVf2iON7HxUP5D9piuojSIOxY5zAkTECMh\/88ldCgoHoT9IYzRbbQbHz10u\/+I+\/VVx2HSWMP9MqPUtAvOgXSKvZAvKBIHECwjy7Z2+VJxyMHZfiqoX544PDYdokovLMtVqOgWddaX4Pfvm+UHOjDZRJqxnuWQK6phHkgsdYi\/zgnkqSBiSIHuzD1BqByXUdlx+++bq5rL1hmP16xB374TnuorAOtLctr8WMEe0yvZjuWQJicG4Lxkg2WexrbhplYZZteZtMcZQgzmeLcTSggbUnbY0p6w3toF2MTW0xxHv49s\/Y2eIFMtMYX6ZNepKA0FjvOgR8uM643v23OGPBGE\/zkds\/TR7vlvC9Y8z47VdeEg8+f1QgbQQB41o1sKkDEtttIN+QOPiDChwo5OOZT1FwPW3SkwQ8dfHDqvew6\/ptVXnrZezYvEYqlIN5jRI4Hj8mB8aWVyk2B0IYgTaFg1OvvPXB+xVVYH5tEw7y2\/LcX+OdJbtcL6V7koBRANdqfk3dXduqCXvG8nhNORyhjVzv2VyH04MwTr39o36c+TVt3+967KSl02aGU0NA89JaccQsiOssoB9ox\/snK015rf2vZ35NG1FmGNo3TdK3BLy8vFL1HreUg9bmAszsnuPH9PyyybOPuP44jQdtrQRTji+Dm48bKjL1XUK75teUc82wqzVNuV499iQBbafAAB9nPs1192gHmM0114weohDLqYuV3jYWBtj94\/qh371hmqgKjJuZmLBAOfHcnyuDy9B2CKq7H3tMiKpwWmzCu+322nlPTsVFBX\/fJSLsHK90LNZ7Ge86jow7+4DpMVd7YawHh+ORO3aRF3wsdEQQItlBK2FATiwDs8UlNa7Bm3VncNCX25\/djp1Gf9\/67BfsrJ5N96QGhFapiuNFhFG+S4sD7vnlM\/oDU2oHkd3VJ66mcafHEB4xfcJcYvmVLZhNwZSeq9mivPPn1pn6s9uMS79GfxxpkZ4kIMB3A8TQCjbBUAYa6TItSD1D8TaYSozXINA0rgZy44iumXOvQ2NiftkWmGK73QduuS3SO8aiiCSSJjPckyYYLw8myF58ahwCxOOM2YOmevbBfXrZFeqAhFgL6BIA5Yx2Q7ko0WNGZ\/YEWhHerDstaOpHechYeGqTFGWf3bNPe9SmXtQRwW879ohnT8NC1Z7VgDDDWHxgCwiGVcW2JsTg3n5RUdovagbDNckwra5WRN+oGxUjxJSamdWw79E1\/dCk9qod\/CFEfVxv2P0jsvvopXTPEhAgg1iu8wAS3vOrZ\/Q8LTQTPiBOnDcKEkcRxQ0Co90Hn\/8FeaHva00EbYQ0NKobUsG9naXV1lGEdYnzMDk0tYh7PzDDaVgh07Mm2Lw0LK\/SWs+ZStMvyJqrNeXtIzRX3PItaM7AzK9Nf5kFqHYbcWkQFmPCn3x1bZwIz9o1v1FmOqpNE5S2zXAaFqr2tAbEi8L47ZWvPRapxaJepJ0XFQu0r2NdXj3hDmhTO0YIx8geH742U7nuD9q7ntCRa4bTsFC15wkIwsC8wiPFSmiY0zhzi3x7vBZoqbX1fDb5TBokRNuuqTfX0SbGbIgRBvPCcILWVrEgPINxJzSXG+er1fyavlwzrIcBCT1q03anjvI\/F\/6r0Pl1123t1D1U9OvuadzoHtEgF14QtNwOClBDU5ovEmEdH0y0kVo1HcZ0py4G3zdG3U9tIw22OfjOsWmr247NwrPZz\/W\/\/13STfb8GDAOGKzP0+KETpCHsAe+xmnGY9BSWIUcp+WChqBph4NwTUSbpgwf60MgtQRcDwaYyDfJXLN0HoFUjAE7DyPfQaMIMAEbRY7rNQUBJmBTYORGGkWACdgoclyvKQgwAZsCIzfSKAJMwEaR43pNQYAJ2BQYuZFGESACyjlUVr6eEGm0Ha7HCNSMgFIh1+Y8IVVAQBFm1twQF2QEGkEgJKAUc10\/E+LOZTbywFynexHgMWD3vpu+uDMmYF+85u59SCZg976b1N6Zb5wQJeeyUokZcj8mS74vPK\/zfGx0\/V9q31YKHyx0QoQiL5iFEeggAp4vBMcBO\/gC+r1rTyqld2ZUiqjIwgi0AQG\/VNK9SCln2AS3AXDuohIB44Mg11NSzCDhkxPCwgi0AwFjbX3lv0d+bzAXHLrG7bgD7qOvEVjzguWcVyrPBQtbL\/Y1PPzwrUbALwXW1sMYMENxQHRYLAYDw1Z3zu0zAqEGVD7FAYsBAcNMxocRaDECmPTQQtzz8tu3z+AETgivCdSw8H8tRsA4vOBeEIYpe8KK1wS2GHpuvliOAdJC6JNAQxOQ\/A99srq6yggxAi1FwAShhV96Dx2VNaCvT9bY2dJ74Mb7GIFisaifXnm2BhSZaeT67AlrcPi\/1iFQKnPMk96aCc5kBqfRJTQgOyJAgqVVCKyWNaDIXJtGH9oE57dunZNCTCMUU\/Q5Htgq8Pu93ZB85IDkt+bnQgIiQUGY3+K4slL9G2rIZ2EEkiKwshT8xK1SJc01tBc4IUFiGhkrET\/ih3wWRiApAkYDeiJ71LQVEjC\/bfu0McOmoCnER0YgKQLLtF2yDkDT1G9+YkI7IGgzJGC5g5dxXLq2WD7lAyPQHASMZZVCHbJbrCRgdugotuqABmQtaMPE6SQIhHzS2m87cWxNKggIb1gJ\/2lcZi24BhKnkiFw9cpV3QBFWY65LdGwr1IKly7l1OryO0KKydHRETE8PFxZgM8YgToQwNjv6tVrtPuVmLll4sa8W7VCA+Kijgl68gDSi4tLHJgGECwNIYBlV+AQxB37mQarCIgL+Y\/dcIJUow5MX7kaqE9TgY+MQK0IYBinl\/kJcSI\/UTn2M21EElBfLKpvaoeEVsgsLQUsNpX4yAhshMASBZ2X9aQGfe+jqLRFjaoTS0AsFpSidAiVoEbDVaxRrXAeI2AhEJjeIJQnlX\/ALHq2ioTJWAKiRH7bTUeU9J\/GHPHC\/AKTMISNE3EIgHzgCjhDX798Os70mvo01FtftFdcXHmD3JjdmUxGjI+NCeltWG39RvlqKhEA6eahqLDqmRY5k9d750YPuq4GRGXtFRf9fXCj0fD8ArGb95PeCNe+u+6Qb0aW1L5aQKhZlRVmZydVRr6B+CBrwlqg7Z8yEeS7b71xn41MzQREJZeEm8c2i0wX7CloPxCn24sAxnxXFq4YswvNVzP5cKd1ERAVbBJiQ8ux8TEmIYDpQzEOh1nlUi\/5AFndBEQlTcKs9xIcE9piS4yMDPOUHYDpI0Gcb3FxUXu7cDgw5qvV7NowNURA08C\/Pzp3RCrvCZwPDQ6KYZo7ZpNs0EnnEeM9LC5YKX+FF6EW7+ryU\/l8sMS+3qdOREB0Vjg\/u19J7zBpwxxMMrThEP0iOUv6EKjQerRsjyJ9h27dduORJE+amIDoHCZZZOVh2ux8L85BxE2bN4mBbNf\/Dg5ul2UDBLCeD\/O61hrRaUlTtY2YXLerphDQNKq1oZAHEapBHgg4ODTIGtEA1GPHKuJRLFhPrd1w04lmPUpTCWhu6t8XZp+SSj5miAiNqMk4PMRa0YDUhUeM7\/Dd8FVaRLBMX07DeSAwtzTWu7J0pNGxXtzjtoSApjNXI5p8kDGTzYgsHT2a3svSh6W9CIBc+GA\/IMxwYccCvTFBSDp9P9NEkJfFlcWjzSaeedqWEtB0Ujh\/frcvivulzNyL0I3Jt4\/QkvCgEdbxMsER6eB8jaD6nPJtMeXsvLSnDYHc50RsDqLoaDSYXpNXJhw2IkW+jt25lYPzaaLmb2mOdhrflIwu0rzcyjfZvHZjWyoUCjkxNjpFG1Tv9oT3OVLyk3GkjG2ELzQHAdqWj4ZKJ31Vos3CaX+ghWvTrdJ0cTfcdgLG3UjgSRMZpZejP9FJ+vvNecq7WZeXatLUU0LmhFQ5c66PivKofEVe6k9oc3mzv7f1rPjpteCUrqvgR4h8SbvRU9gE+4HrLZlpZ9JmeLBWtw0n\/w+IOsoy1qfzJgAAAABJRU5ErkJggg=="
+ },
+ "copyright": "Copyright 2023 Dify",
+ "privacy_policy": "https:\/\/dify.ai\n",
+ "position": 3,
+ "chunk_structure": "qa_model",
+ "language": "en-US"
+ },
+ {
+ "id": "982d1788-837a-40c8-b7de-d37b09a9b2bc",
+ "name": "Convert to Markdown",
+ "description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.",
+ "icon": {
+ "icon_type": "image",
+ "icon": "9d658c3a-b22f-487d-8223-db51e9012505",
+ "icon_background": null,
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQfElEQVR4Ae2dT4wbVx3H35vxrjd\/dmMnIZA0UrxtilQuTYUEB5CySD2CSJE4Vl0uHIpQk1sFh7YHqt7aCsGBS7fqEQlSwRGpi8QFJMRyQoKEdaR2U9qkdva\/vfYMv+8b\/7zjsZ2xPTP22PN70u6bP2\/en+\/7+Pf+zMwbrVLiNu9XSpSVUpP+tOsUlKsKtH\/l4Z6rXNrW2uyrc6cthAs6hMVfllyVCou\/Y+eq6sM9x3+sfO6Uxvl7Squqq6yyTT7tl5cvFss4MWmXG3cGNjcrhWZerWjlXFdKlyj9a\/RXcogyOCMX\/nsbBJ93vOWZMPLPKFCg\/\/g7dqRZl070y2Wn6VfteHKqu1tfUGC1QTqX6aJ\/utrasGtqfXm5CEDH5o5zl2CSZN1WKPrrBNMKlR\/bXc6yLKUtrXK2rTSJhj8c+3zboeN0riXkVwrdvxkO3xXpDB\/AD5N\/nFxM7P\/vEbUhLec0m+r8okXhHBPWcRwCkCBskk\/bPZ2B0l23ctb7yxeKGz3DxHgwMQBh6Zy8s0oofd8PHWCxc7YBzSbY5ubm2sD1KtdnBKDfXViy\/LuyHVBgGL2aBChgPGocqQZtN44agdhU2XWcN65ePr8WPBHXfuwAAjy1oF6hX9pNyqRpIgBdPj+v5ufmDXxszQYpxDCCDhLfrIeJqhcgrNVr6oh8n5UsW1qvUb\/xjbj1ixXAO1sPblDD+TZlsoSM5uZy6uTCCeNjfxQXVdBR0pzma+LUq1arGxh9ljF2ixgLgBjBUv\/jPW5q4wCPIYhTUI5zlv0k9AKAu3t7fot4myzirThG0pE7VJufVtDc\/gPwoWk9efKkWlpcjGT1ZhmQaSwbDEqhcEadOnXKDAypDDdQ53c+frAatTwjA4i+3uZW5W3Hcd+hTBTm5+dMJhcW8lHzJNenVAH045eWFk1\/HnVOsxPv3d16iC7XyG6kJhhNLoH3e5pDugard+LECZUUeEk0KSOrNQUXjkuvw8OaOjg48KaCaOrGsvQLozTJQ1tAA5\/rfgT4ME935sxSYvBNQX1nNoswOKh7MAAWqEn+CGwMK8hQALbho1Eu5vBgjk0Ghk1Vws+EAqh7MAAWyOFu1tAQDgygwDcTzMReiKgQDgRgL\/iGmUyOvdQSYaoUAAujWsKBADQDDl+zK\/Clqv5TkZkuCGmQau6KheQuFEBMtaCTCVO7uHi6\/VBASLxyOoMKAEIwYsYFGJjkndfCZHgsgHfuP1il5yhuMt0m4rAY5XymFeA+oddK6ps0T4hnAvq6vgCi36ddc1\/XzPMJfH01lBMBBcAK5oY9p18DS4Eg7d2+ANKQGjPcBcx+JzXJ3M6FbMycAmAGd8fIFfCcQL8C9gQQTS9dcKOT5H5RyHFRoLcCuHeMphjPCdzZqtzoFaongNT0ms4jzKg0vb1kk2ODKAD4uCkmDN\/uNSruAvDu\/QrgKwE8NL\/iRIEoCqApxtM05ErOvNM1IOkCkO4uryL0aTKf4kSBOBTAQ8nGaf1K0Ap2ANjq+5VAbIvaONKXODKugI8n856QX44OALnvl5+XZ\/r8Isl2dAXYCuIlNX9sbQA3P65coxPS9\/OrI9uxKQAryCNimhdc4YjbANKboqs4OOd1GPm8+KJAbArwoJbetlvhSNsAKktfx0Fpflka8eNWAK\/lwpElNKyZbfzDyMTJuxVsnz1bhJcaF3zEPDUZm5KMpOlFfqzcUK0+Mo\/xWzVdxDIgxgI2880V6Ckj3ymhakqziT4gVsWAw\/pA8A2A2tUYgKic5Z3EtjhRIAkFsPaPca1+oNcH1PpZHMzROi3iRIEkFWi9P4KOYAnp8FJTZse2PR5xIi0uTX2YtGgyzfnAYlRw1Bobo8fEmSa4Tec0l1DynmoF0A9suRJ8ix8WlKdeWrKIl6gCAJBZA3sWrQhXQopWCpvfRJWQyCemgN8KWtptFpATWu1oYhmShLOlQI6nYprNEi2Kq0sovqW5O4g9caJAcgqwBaQlmQu0gHBrFVNCUZwoMA4FGECwZ7na6wO2D44jB5JGphXgQYilrCvtdlcAzDQTEys8AaivIHVbbsNNrBKyljAbu6Zyi20LmDURpLyTU4AHvDTsOCMATq4eJGVSAGNfMw+IrxSJEwXGoQDf9HDxCggl6AEoE9Hj0F7SCCggTXBAENkdrwIC4Hj1ltQCCuQ+33EVlo+pWw49pRA4G8Nu1Of5vvpqNYZcZDeKf79lelgjC5DEOzn4Bt32jvcRShp6uNIHHLl65MJRFOB5QLqW7gXLIGQUDeWaCAoEAYwQlVwqCkRTIIcvasOdjelD0En0GaIVUa6OU4GofXrOS67hcZfAsIOTEF8UCFdAAAzXSEIkqIAAmKC4EnW4AgJguEYSIkEFBMAExZWowxUQAMM1khAJKiAAJiiuRB2ugAAYrpGESFABATBBcSXqcAUEwHCNJESCCgiACYorUYcrIACGayQhElRAAExQXIk6XAEBMFwjCZGgAgJgguJK1OEK8BrR4SGnNETwnYhXf7uvfvf3+kilWf12Xv3su\/wpei+KqO+sBPMXNb6RCjbBizJnAd\/64Un1zMXhP0fxzCW7C74J1tvMJJ05AFFzH\/z4tLo8xLI4CPvrF+X7yUlQn0kAl05oA+HSQvhyJIAPwD4xBLBJVNSsxplJAFGZAApghblfkeUT+MJUGv18ZgGEZOjXoU\/Yz\/38eydMmH7n5Xh0BTIH4F\/\/Sx+m8LkffH1e\/fT5Bd8RbxPHXvpW55fj\/7XV7AonB6IpkDkAf\/LBnvq44i0LwdIFYcN0SxBKXPMyXSsuXgUyB+D2gate\/M1uF4Robr\/5ZM40ucG5PsCHaz4JgBtvVWQztswBiGoGSLCE24e0RKLPYcARnG5BGIQV+HxCxbiZSQChH\/pzb\/7hoENKTM8ER7wII32\/Dpli3cksgFARt+R++afDvoLi3Ki37fyRYqCDv1Hd81+bi3T9qOmO47qZvxccJiIgg+ULjnjX\/lJ7LJxh8fJ5gOef6hkW6KjXcz7S6mfaAnKl\/IKaWf\/0zN9oqubNP3Y2zxx2GD8ID0AcxhL2uh4DpVlys1WaCDWDUe44HFvDMEsYhI\/z9g0C0P9j4ePT6osFTLDmABke\/wq6MEvYDz50Fx7XZw2mMw37YgETriW2dGz5OLngPh\/PEnwos1hArvkE\/cdZwmCyvcCcRcvH5RYLyEok7PezhGHJRnmCOyzuNJwXCzjGWuhnCftlYdbhQ7kFwH61n9DxQSHMAnwCYEKQhUUbBmFW4BMAw0hJ8Hw\/CLMEnwCYIGCDRB2EMGvwQaOZHwXH\/Z5t3PEBQnb+bT426\/7MAzgNFZhF8LheZBTMSog\/EQUEwInILomyAgIgKyH+RBQQACciuyTKCgiArIT4E1FAAJyI7JIoKyAAshLiT0QBAXAiskuirIAAyEqIPxEFBMCJyC6JsgICICsh\/kQUEAAnIrskygoIgKyE+BNRQACciOySKCuQe7DjLdbYyHUu2sgBxBcF\/Ap8th0PJ9UWd2IB\/erK9tgVAIBVpOq6nYs1jj0nkmBmFPCxVrVcpQXAzFR9OgrqB1Df3fpik7JVKhTOKMuSFjkdVTTbuXAcR1Wrj1DIshA323Wd+tIJgKmvotnOoAA42\/WbytK5TnvAi0GIKiOXTjOe+Z1UllgylSoFeBBCn4qsigVMVdVkLzMWKESxHZkHzF7tp6DE1AS7ZjzsutIEp6A+MpGFpuN99FG7WqZhMlHjKSukv7G1tNsahNDkoDhRYBwKcGvrKOeepXTrXvDx0HgceZA0MqwAj4LBnuVq17sXrNpzMxmWRoo+DgWardbWVVaZBiF2GYk2GvI18HGIL2kcP3llwwLSAoFliNI2i6KQKJCwAr6bHmVr+WKxjPTwhILMBSasvERvFABrcGCP74SUzRH\/+NgckH+iQLwKNI+7ehuImZfoxU7p6OhI5fP5eFOMGFtc7yBEzMbUXn5hiW1MOorAk9Bk6+4hR17uHNfs+OhMR24lFzOnQKPRMGXSyjUW0ADoWu46jjZat0hMCPknCiSgQKPpzba42joG0K7Z60gLFlAGIgmoLlG2FWgceRbQrql1HDR9wOXlYvXO1hfrNBez4hCE1hx3DdvXpWYjbX2a1AjTykia+8wMH2V1A8why+0eKs0D\/hkH6vXjD6dgX5woEJcCh\/WaiYqeiDasYacNIL0St44DNQEQMohLQAG2gPa8tcbRtwF8+mJxne4Gr+OOCAfkQOKLAlEVqNVq5mYHxVNevlA0AxDE2QYQOzQ0\/hD+\/uEBPHGiQGwKcMvqOvoNf6QdAFo1YxqrsIBiBf0yyXYUBXw8la9eLq754+oAECMTmoZ5FwHECvplku0oCuzu7XmXu+77wXg6AMTJXN16h7wyqD08PAyGl31RYCgF\/H2\/p54493rw4i4AYQVpwaJbCHhwcCgT00HFZH9gBfDYFRiCC\/b9OJIuAHHi6qXibR4R7+22zCdfIb4oMKAC6Ma1Hr26Hez7cRQ9AcRJW+sfkVfFEzLSFLNc4g+qwOFhTdVr5qZG1dJei9rr2r4Aeg+qekNm0xTL0h299JNjPRTwml5vKo+a3lv80HOPoJ3zgMEAT10qvkO3Td7F5PT2zo6sHxMUSPa7FAB8YAXMgJ1+TS9f2NcCcgD7yHpd081jtOU7u7syKGFhxO9SANAZRvDIvas2rl4+d7MrUOBAKIAYFWutX6Dryk16lmtnmywhJSROFPArYFpJYgOMkCtblmHGH6TndiiAuMq8PKL1d2hTIOwpY7YPdsFHrDyu3+dXayAAcUFPCGVg4tcyk9umz+e3fEPAB8EGBhCBgxDKwASqZNfxgKPd7A4JH5QbCkBcwBDywOTR9rbME0KYjDnM86HuzUQzDThorm\/gZtcv1dAA4mJA+OSls8\/xFM3+\/oHCDWf8IsTNtgI80t3f329PtVj10eCDUiMByBJjmO227phg1htNMm4+i5tNBWD18H2Po\/oRClh1lHsLDPD7HaOUOhKASPDqxeIamd\/n6HHW2zDHe3v7JpPyPOEo1ZHOa1CXMC5s9aj7tY46f\/rSOTw5FclRXPG5O\/crq9p1X6MYS4g1R2\/X5efnI622EHzLS96Kg7L9XZx6ATw8UOAzJmU8KYWHVfrnYLgzsQLISf\/nk4ev0y\/kJdov4Rg+AQYYF+bzxsexQV2cgg6a5jSHi6IX+nd4N7x+VKeuVN308VpamAeV8axolOa2l66JAMgJBS0iHweMOdtWuVxO2Zat7JzNp7r8KIJ2RZaBA4PqBdjwh6edMI2CFQsAH46xIzjoRTX9oVVTa3GD50uDN5PzNz+rXGvWnVW6PXOdinetV0qwkpZNKwZrTVB6PrYf7NA6mgQpuy+fsZXGxyV8DuHwlyXHAAXL\/GnFW3kA6zAjzJdocSL0zTk8FiLFtpk+CV5M+4CuiXfE6TVdvCnZI0ish8Zea5ublUIzr1a061wjap6lDJT6QYmS8hfdudTnFyOPmziqmfSH1KtMImzQdNo9AIflMpKydP3EHjuA\/TKyeb9Sot9uiVbtLwBKepanQGGvPNwzTUKJrzt\/2irQEZzzO+wHj\/nPz+J2lQqFvw73cNcp4wAZOXqIRFXPnTJVfI+ajapL+6RdmRZeKWMuF+Em7f4PpXL0Ed9VCt8AAAAASUVORK5CYII="
+ },
+ "copyright": "Copyright 2023 Dify",
+ "privacy_policy": "https:\/\/dify.ai\n",
+ "position": 5,
+ "chunk_structure": "hierarchical_model",
+ "language": "en-US"
+ },
+ {
+ "id": "98374ab6-9dcd-434d-983e-268bec156b43",
+ "name": "LLM Generated Q&A",
+ "description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
+ "icon": {
+ "icon_type": "image",
+ "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
+ "icon_background": null,
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAQjUlEQVR4Ae1dTYwcxRWuqpnd2R\/veqzgxXaw2YEgRSDBEkJEwsFLDkE5xRwicogUR0g55GJWKGfjXBPJyyU3hLkFKRLmkohD4uVgHIVEOCggRTGZNTbesDbysj\/end3prryveqq3Z6bnv3t2tvu91Uz9dHVV99ffvqpX9bpGigGR4tLStMiKaUeKaallXgidV1o9iMtzpc5LISiPhI6bsOqLymvtHa\/KT3BCyhXCiD4B0QJpP49wXMRRV7rXCbgVLd3FjKbzymKxcPSoOYbjeyn0XPsrxbvFvOPkZjNanXQFkU2KGaHDSNXf60ppa1e1EItE5H9qqa9mMqWFwqGCT+B+YNIXAhZvL80KoU5qoSkU+NSJUkooYmMmmxGSQnyQB5EUIg3JVPJMovJlywfzkh7XmtCkT1CQdgN5ruNQGaKXdk1Z16XQ1cKhEPEGcpWQXhBavVmYmrraoExk2bEREJrOLY+epgZ+RFc7a68YZMlmMoZoGQqHhoZ8wtkyHPYHAYcICjKWd3aEU3bETrlc3bAUi66rz31j6uiF6gPRpSInIIgnymNntBQv079dHpcK0uVyw2JoeNiQz2qz6G6Da4oKAZBwu1QSOzvlXS1JRKTx5IXC4fvPRdWOrSdSAl774tYplVHn7ZhuKJsVI2OjAiHL\/kOgVNr2yGg1YwwaMRICFu8uTeuyfIMgngXMTDygkByBVtxY3\/A1Ig0rL6qsnisc6t2S7pmA179cPuNo\/Sq6W3Sto6OjYmQklxz0+U58BKARNzc3LRFXyOCZ63V82DUBvbHe6Fn6b3gZVzg8PCTGx8d9a9W\/ao4kCgFYzyAhyAjRQs0\/fHhqrtub7IqAlS73bWp0hrVet9Dv7\/O2tkqGiJWpoKsyq1\/opkvumICGfI68BEMD83STkxP+fN3+hpSvvlMEoA1XV9e8LhmWckY\/1ykJOyJgkHyYw5uYOMDk6\/SpJaw8SLi2ti4wp0jLpB2TsG0C1pIPmo\/n8xLGpi5vB90wNGE3JGyLgEy+Lp9Mik7rloTeYmsLoGiO722M+dDtsuZrAVZKD6M3BDfAEXAFnDEzJS3waEnA4u3\/nac6ZmBwYMzH3W4LRFN8GNwI2AUzbnn8bCs4mnbB15aXTpOHyhuo+ODBSTY4WqHJxw0CMEy++mrVeOBoR8w9fOTIfCNoGhLQG\/epD7HCMTY2xqsbjRDk\/FAEME947949HFuhOcInG03PNO6Cy3Aq0Hl4sfDSWijGnNkEAXAGq2Mk+YqfQGjpUAKi6yV3x1MY92Ftl4UR6AaBwNLs7LU7t06F1RFKQKWkGTyCfNYrOexkzmMEmiEA28EqMPJ3Px9mFdcRsPjlF2ftMhu6XxZGoBcE0BUbf1CamnG3R4zjSrC+OgLShOJpFBg\/MB4sx3FGoGsE4JQMkUqeqdWCVQTE2A\/aD4xlL+au8eYTaxAI8Mm8JxQ8XEVAO\/YbzrFDaRAkjveOgK8FvZfU\/Ap9AhaXb5r3c2F08NjPx4cjESEALVhZRZv1XtP1KvYJ6Cp1GllDQ\/wCkQcNf0eNgFVstFAya+v2CSh15iQyufu10HAYNQJ4LRdCxojhGuKGgMW7d\/PkwjCDDDY+gAJLHAhgQwK\/G8b74ySGgI6zPYsEkw8osMSFAMgHEhpxxmYRGgJK7Rrtp2hfFhZGIE4EsPcPxHWdWYSVMaB8AomhrFk8RpSFEYgFAeOwSjVLmm9GA54GFHKa4uTNWuEjEiyMQAwIYDMqIxlllF6FcZ4BYtkZQ7tcJSNgEKgYIcZtHxnK7EyKCE1AszACcSMAAlqugXsK2+Ki0bCNH+O+GK4\/nQj4WpC4pxypzHwMTQ6mEw2+674jkK1YwtgPXGW0nsYVYBtcFkagHwhYDYjN6BXtGuzNSFPfzMII9AMBS0CyRPLKzsfsZvbjEriNNCNgjRAl1YN+v8sETDMl9u7e6b1z+SCaV3aNbu+uhVtOCQJW2WnHOeRrwJTcO9\/mACDgG7xKHWQCDsADSfMlKC3wu2zUBbMVnGYe9PXe\/UUPzAOSW4I3Ec0E7OtD4MY8BFL7AsiJ3\/0m0Rz47Je\/2hf3x2PAffGYknuRTMDkPtt9cWdKmB+HprVg+mNhBPqBgJ0HpF048qQBK0YIe8P0A3tugxDwCUh7B3IXzJTYUwSYgHsKPzfOBGQO7CkCTMA9hZ8bZwIyB\/YUASbgnsLPjTMBmQN7isDArgUnfa12T5\/6ADXOGnCAHkYaL4UJmManPkD3zAQcoIeRxksZ2DFg7cPYL\/5ttdfdbjqtY17WgO0yhMvFggATMBZYudJ2EWACtosUl4sFASZgLLBype0iwARsFykuFwsC+8YKjuXuG1R65dZn4sWLb1UdfevUT8R3jx2vyuNE7wiwBgzBcHVruy735upXdXmc0TsCTMAQDFe3t0JyOSsOBJiAIajeXKvXdmF5IadyVocIMAFDAPvkzu263Jtrq3V5nNE7AkzAEAxvhGjAK5\/fCCnJWb0iwASsQRCa7pM7yzW5QqALvsGGSB0uvWYwAWsQvPL5ZzU5u8k\/\/PtfuwmORYIAE7AGxvkP3q\/J2U2+\/tE\/xGqJLeRdRHqPMQEDGJ7\/4LIIG\/\/ZIqulkjjfhKC2HIftI8AErGAF8rVDLmhBlGWJBoHUL8V5Wu2yALHaFRAV5809\/T0xmRtp9zQuF4JAagkIAr3+0d8N8RDvVEDYd4vXDAmfOXZCHJ+c7LQKLk8IJJ6AcCyw67iYYsHnr2Tp3ohgYhlTM6\/85U+GSI99bUo8QCR89D4KJyaNZpzM5ciB4QQTrQkCiSdgrVdLEyx6OvTxl8sCH2jFoCT9XZbgvXYTZyOkG9T4nMgQYAJGBiVX1A0CTMBuUONzIkMg8WNAeDLDysUKBowGeLog\/DhkvbcXVI+T4fHM108YA+SBiYOmqgcmvbCXepN+buIJ2MiNHiSEhwuW3pqtfjQjAKzclx7\/Nn2+xfOBzYBqcizxBGx079BSP\/7mQfF84REzF9jp6sZLjz8V60R0Wqzn1BLQEhNaDCsakHZJOPf0s\/45th4Ou0OAjZAKbiAhutNWYjVfq3J8vD0EmIABnLy13VwgpzqKbttqy+ojnOoWASZgADnPqHgqkFMdfekJNjaqEek9xQSswbBZN\/yD6UdqSnOyVwSYgDUIQguGebY8Rk4Gx3lerwat3pNMwBAMnwnZggOeLizRI8AEDMHUrmQEDz1K7lYs0SPABAzBNIyAYXkhp3JWhwgwAUMAmxyud7PH2JAlegSYgCGYTo4M1+Xyux91kESSkfqluDAU4UaflrXYsPvvZx5rwH6izW3VIbBvNGC3v6PRjSbr9Y25OpQ5oyEC+4aADe8g4gPv\/vc\/4teXL3XtIxjx5SS+OiZg5RHj9c35v70vrtzibdj6yfrUExDvCb\/y5z8y8frJukBbA0vAbsZuuK92x4p2nNdsPxg4nrK7fYAtMUQHloAx3Kup0hLP22otfEsOvEfy2+\/\/kJ0P4noIgXpTRcBWBgaI9\/J3nuXfAwkQJO5oKgjYysDAOu\/ZZ58Tzz\/E\/n5xE662fiKgXBFC57WrhVSy9vi+T7948fcNDQzPA5pfq+z3Q9Za2yZXskLqFaFFXtOXpL+kSaNpFTYw9u5J+wSUggiYMmEDY7AeeGoIyAbGYBHPXk3iCcgGhn3UgxkmloBsYAwm4XBVrjVCtFzJSi0WySaZdlxXKJUM7yw2MAaXfLgy3wgROnlGyOWf\/oJXMAabf1VXp1whaB6QWEnzgEkQfnd3fz1FJbU2P46rNVGRhRHoAwKu45hWpJSLyRj09QE0biI6BKwNghqVlmIREZeMEBZGoB8I2N7W1e51snuxFhwwjftxBdxGqhHYtYLlinKwFgwJ6sVUw8M3HzcCruP1tgpjwAzNA6LBctkbGMbdONfPCPgaULsrSpQ9AvqZjA8jEDMCWPQwQtxThaNHF5GAEZKUuUBzc\/w1sAhYgxfc86ZhKpYwfAJZGIE4EShX5gDJEfoq2jEEJPvDJHZ2duJsm+tmBISdhKbIdcBR0YCuSeyyk5FiBOJBoFwum4q1CmpAkVlArsuWsAGHv+JDwKlwTEm12wVnMsMLaBIakA0RIMESFwI7FQ0oMvcW0IbpgguHDq3Q60gLmIopuzwfGBf4aa\/XJx8ZIIVDhRWfgIjQJMx7CLe3txGwMAKRI7C95e1EobVjuIYGPCPEiywgY7vEBAQOLNEjYDWgEtkLtnafgIXDRxdsN2wL2kIcMgK9IlCiHw03E9C09FuYmjIGCOr0CVhp4B2EW\/c2K0kOGIFoELA9qxT6XLDGagJmcxewVQc0IGvBIEwc7wUBn09G+x0lju1KFQFhDWvhvobDrAV3QeJYbwhsrG+YCmiW5c3ammjYVy3Fu3fzeqf0IW0TMz02NipGRup\/tKX6DE4xAo0RwNhvY+Me+ZuKxYemjhRqS1ZpQBw0c4JKziG+ubnFE9MAgqUrBOB2BQ5Basd+tsI6AuJA4b77L5JqNBPT6xue+rQncMgItIsAhnHGzU+Ii4Wp6rGfrSOUgOZgWf\/cGCTkIbO15bHYnsQhI9AKgS2adC6ZRQ1676OsTY8adk5DAsJZUArnHE6CGvW9WMNq4TxGIICA1\/V6U3lSu3PW6TlQxI82JCBKFA4fm9fSfQ1rxGura0xCHzaONEIA5ANXwBl6\/fK1Rl2vPZ+Ges3FWMXl7UtkxsxkMhkxOTGRyK18m6PAR9tBAKRbhaKC1zM5OZPV+2Sr85pqQJxsrOKy+wLMaFS8ukbsTsg+Mq3A4ePtI1BDvkXp6BfaObulBrSVFJeWpnVGXsL8IGtCiwqHQCCEfM81G\/cFUWubgDiploQHJg6ITEL2FAyCwvH2EcCYb31t3Xa70Hxtkw+tdERAnBAkITa0nJicYBICmBSKNTisl0un5ANkHRMQJxkSZtXbMExoiy0xOjrCS3YAJkWCeb7NzU3T\/cLgwJiv3W43CFNXBLQVfHrn1rzU6gzSueFhMUJrx9wlW3SSGWK8B+eC7corvJhqURulVwsFz8W+07vuiYBorLi8dFpLdZ60YR5dMrRhLpfr9Dq4\/D5AoErrkdsezfSde\/jwkfleLr1nAqJxdMkiK8\/TvgqnkAYRxw+Mi6FsYjfhx22mRuDPh3XdgI\/ogqSl2m663FrQIiGgrdRoQyHPYqoGeSDgcG6YNaIFaJ+FdcSjuWCztHb\/sYtR3UqkBLQX9entpVellj+zRIRGNGQcybFWtCANYIjxHd4N3yEnghK9nIa0J+huaay3vjXf7Viv0e3GQkDbWK1GtPkgYyabEVkKFS3vZenD0l8EQC58sB8QVriwY4HZmMAnnbmeBSLIO2J980LUxLN3GysBbSPF5eUZV5RPS5k5iakbmx8MoSVhQWNaR2W8EHEvvUtQk6b8oNhywbykxy2Bau8Tc3MQTaHVYMYnr0I4bESKfDN3V3uyl14gar5Ha7QLeFMyvEh0udVPMrp6G9ZULBbzYmJsljaonlFCPUFKfroRKRtWwgeiQYC25aOh0lVXO7RZOO0PtHZvIS5N1+iC+07ARhfiWdJERqny9C86Tf+\/eaXVg6a81NP2PC1kXkidt2kTasqj8lV5iU\/Q5vJ2f+\/AveKn17wkHdfejxC5knajp2kT7AdutmSmnUmjsGADzXYd\/T+j7cbUE7Qx3wAAAABJRU5ErkJggg=="
+ },
+ "copyright": "Copyright 2023 Dify",
+ "privacy_policy": "https:\/\/dify.ai\n",
+ "position": 6,
+ "chunk_structure": "qa_model",
+ "language": "en-US"
+ }
+ ]
+ },
+ "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3": {
+ "chunk_structure": "text_model",
+ "description": "In this template, the document content is divided into smaller paragraphs, known as general chunks, which are directly used for matching user queries and retrieval in Economical indexing mode.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/general_chunker:0.0.7@a685cc66820d0471545499d2ff5c87ed7e51525470155dbc2f82e1114cd2a9d6\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: d86a91f4-9a03-4680-a040-e5210e5595e6\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: General Mode-ECO\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751337124089-source-1750836372241-target\n selected: false\n source: '1751337124089'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1751337124089-target\n selected: false\n source: '1753346901505'\n sourceHandle: source\n target: '1751337124089'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: text_model\n index_chunk_variable_selector:\n - '1751337124089'\n - result\n indexing_technique: economy\n keyword_number: 10\n retrieval_model:\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: keyword_search\n top_k: 3\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 532.832924599999\n positionAbsolute:\n x: -417.5334221022782\n y: 532.832924599999\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 265\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides\n content into chunks and retrieves the most relevant ones based on the user’s\n query for LLM processing. You can customize chunking rules—such as delimiter,\n maximum length, and overlap—to fit different document formats or scenarios.\n Preprocessing options are also available to clean up the text by removing\n excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 265\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 344\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 344\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n general_chunks:\n items:\n description: The chunk of the text.\n type: string\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: The text you want to chunk.\n zh_Hans: 你想要分块的文本。\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Input Content\n zh_Hans: 输入变量\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_variable\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The delimiter of the chunks.\n ja_JP: The delimiter of the chunks.\n pt_BR: The delimiter of the chunks.\n zh_Hans: 块的分隔符。\n label:\n en_US: Delimiter\n ja_JP: Delimiter\n pt_BR: Delimiter\n zh_Hans: 分隔符\n llm_description: The delimiter of the chunks, the format of the delimiter\n must be a string.\n max: null\n min: null\n name: delimiter\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The maximum chunk length.\n ja_JP: The maximum chunk length.\n pt_BR: The maximum chunk length.\n zh_Hans: 最大块的长度。\n label:\n en_US: Maximum Chunk Length\n ja_JP: Maximum Chunk Length\n pt_BR: Maximum Chunk Length\n zh_Hans: 最大块的长度\n llm_description: The maximum chunk length, the format of the chunk size\n must be an integer.\n max: null\n min: null\n name: max_chunk_length\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The chunk overlap length.\n ja_JP: The chunk overlap length.\n pt_BR: The chunk overlap length.\n zh_Hans: 块的重叠长度。\n label:\n en_US: Chunk Overlap Length\n ja_JP: Chunk Overlap Length\n pt_BR: Chunk Overlap Length\n zh_Hans: 块的重叠长度\n llm_description: The chunk overlap length, the format of the chunk overlap\n length must be an integer.\n max: null\n min: null\n name: chunk_overlap_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Replace consecutive spaces, newlines and tabs\n zh_Hans: 替换连续的空格、换行符和制表符\n llm_description: Replace consecutive spaces, newlines and tabs, the format\n of the replace must be a boolean.\n max: null\n min: null\n name: replace_consecutive_spaces_newlines_tabs\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Delete all URLs and email addresses\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Delete all URLs and email addresses, the format of the\n delete must be a boolean.\n max: null\n min: null\n name: delete_all_urls_and_email_addresses\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n chunk_overlap_length: ''\n delete_all_urls_and_email_addresses: ''\n delimiter: ''\n input_variable: ''\n max_chunk_length: ''\n replace_consecutive_spaces_newlines_tabs: ''\n provider_id: langgenius/general_chunker/general_chunker\n provider_name: langgenius/general_chunker/general_chunker\n provider_type: builtin\n selected: false\n title: General Chunker\n tool_configurations: {}\n tool_description: A tool for general text chunking mode, the chunks retrieved\n and recalled are the same.\n tool_label: General Chunker\n tool_name: general_chunker\n tool_node_version: '2'\n tool_parameters:\n chunk_overlap_length:\n type: variable\n value:\n - rag\n - shared\n - Chunk_Overlap_Length\n delete_all_urls_and_email_addresses:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n delimiter:\n type: mixed\n value: '{{#rag.shared.Dilmiter#}}'\n input_variable:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_chunk_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Chunk_Length\n replace_consecutive_spaces_newlines_tabs:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n type: tool\n height: 52\n id: '1751337124089'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 1463.3408543698197\n y: 224.29398382646679\n zoom: 0.6387381963193622\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Dilmiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n\\n and \\n are\n commonly used delimiters for separating paragraphs and lines. Combined with\n commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum\n chunk length. You can also use special delimiters defined by yourself (e.g.\n ***).\n type: text-input\n unit: null\n variable: Dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Chunk Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Chunk_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 128\n label: Chunk Overlap Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Setting the chunk overlap can maintain the semantic relevance between\n them, enhancing the retrieve effect. It is recommended to set 10%-25% of the\n maximum chunk size.\n type: number\n unit: tokens\n variable: Chunk_Overlap_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1751337124089-source-1750836372241-target",
+ "selected": false,
+ "source": "1751337124089",
+ "sourceHandle": "source",
+ "target": "1750836372241",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "tool"
+ },
+ "id": "1753346901505-source-1751337124089-target",
+ "selected": false,
+ "source": "1753346901505",
+ "sourceHandle": "source",
+ "target": "1751337124089",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836391776-source-1753346901505-target",
+ "selected": false,
+ "source": "1750836391776",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "document-extractor",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1753349228522-source-1753346901505-target",
+ "selected": false,
+ "source": "1753349228522",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1754023419266-source-1753346901505-target",
+ "selected": false,
+ "source": "1754023419266",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756442998557-source-1756442986174-target",
+ "selected": false,
+ "source": "1756442998557",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "if-else"
+ },
+ "id": "1756442986174-source-1756443014860-target",
+ "selected": false,
+ "source": "1756442986174",
+ "sourceHandle": "source",
+ "target": "1756443014860",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836380067-source-1756442986174-target",
+ "selected": false,
+ "source": "1750836380067",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "tool"
+ },
+ "id": "1756443014860-true-1750836391776-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "true",
+ "target": "1750836391776",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "document-extractor"
+ },
+ "id": "1756443014860-false-1753349228522-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "false",
+ "target": "1753349228522",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756896212061-source-1753346901505-target",
+ "source": "1756896212061",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756907397615-source-1753346901505-target",
+ "source": "1756907397615",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "text_model",
+ "index_chunk_variable_selector": [
+ "1751337124089",
+ "result"
+ ],
+ "indexing_technique": "economy",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "score_threshold": 0.5,
+ "score_threshold_enabled": false,
+ "search_method": "keyword_search",
+ "top_k": 3
+ },
+ "selected": false,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750836372241",
+ "position": {
+ "x": 479.7628208876065,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 479.7628208876065,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "txt",
+ "markdown",
+ "mdx",
+ "pdf",
+ "html",
+ "xlsx",
+ "xls",
+ "vtt",
+ "properties",
+ "doc",
+ "docx",
+ "csv",
+ "eml",
+ "msg",
+ "pptx",
+ "xml",
+ "epub",
+ "ppt",
+ "md"
+ ],
+ "plugin_id": "langgenius/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750836380067",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "documents": {
+ "description": "the documents extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ },
+ "images": {
+ "description": "The images extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
+ },
+ "label": {
+ "en_US": "file",
+ "ja_JP": "file",
+ "pt_BR": "file",
+ "zh_Hans": "file"
+ },
+ "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "max": null,
+ "min": null,
+ "name": "file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ }
+ ],
+ "params": {
+ "file": ""
+ },
+ "provider_id": "langgenius/dify_extractor/dify_extractor",
+ "provider_name": "langgenius/dify_extractor/dify_extractor",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Dify Extractor",
+ "tool_configurations": {},
+ "tool_description": "Dify Extractor",
+ "tool_label": "Dify Extractor",
+ "tool_name": "dify_extractor",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "file": {
+ "type": "variable",
+ "value": [
+ "1756442986174",
+ "output"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1750836391776",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 252,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1124
+ },
+ "height": 252,
+ "id": "1751252161631",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1124
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 388,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 285
+ },
+ "height": 388,
+ "id": "1751252440357",
+ "position": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 285
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 430,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 430,
+ "id": "1751253091602",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 532.832924599999
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 532.832924599999
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 265,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"General Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" divides content into chunks and retrieves the most relevant ones based on the user’s query for LLM processing. You can customize chunking rules—such as delimiter, maximum length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 265,
+ "id": "1751253953926",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 407.42301051148354
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 407.42301051148354
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 344,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 344,
+ "id": "1751254117904",
+ "position": {
+ "x": 479.7628208876065,
+ "y": 472.46585541244207
+ },
+ "positionAbsolute": {
+ "x": 479.7628208876065,
+ "y": 472.46585541244207
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "result": {
+ "description": "The result of the general chunk tool.",
+ "properties": {
+ "general_chunks": {
+ "items": {
+ "description": "The chunk of the text.",
+ "type": "string"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The text you want to chunk.",
+ "ja_JP": "The text you want to chunk.",
+ "pt_BR": "The text you want to chunk.",
+ "zh_Hans": "你想要分块的文本。"
+ },
+ "label": {
+ "en_US": "Input Content",
+ "ja_JP": "Input Content",
+ "pt_BR": "Input Content",
+ "zh_Hans": "输入变量"
+ },
+ "llm_description": "The text you want to chunk.",
+ "max": null,
+ "min": null,
+ "name": "input_variable",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The delimiter of the chunks.",
+ "ja_JP": "The delimiter of the chunks.",
+ "pt_BR": "The delimiter of the chunks.",
+ "zh_Hans": "块的分隔符。"
+ },
+ "label": {
+ "en_US": "Delimiter",
+ "ja_JP": "Delimiter",
+ "pt_BR": "Delimiter",
+ "zh_Hans": "分隔符"
+ },
+ "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string.",
+ "max": null,
+ "min": null,
+ "name": "delimiter",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The maximum chunk length.",
+ "ja_JP": "The maximum chunk length.",
+ "pt_BR": "The maximum chunk length.",
+ "zh_Hans": "最大块的长度。"
+ },
+ "label": {
+ "en_US": "Maximum Chunk Length",
+ "ja_JP": "Maximum Chunk Length",
+ "pt_BR": "Maximum Chunk Length",
+ "zh_Hans": "最大块的长度"
+ },
+ "llm_description": "The maximum chunk length, the format of the chunk size must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "max_chunk_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The chunk overlap length.",
+ "ja_JP": "The chunk overlap length.",
+ "pt_BR": "The chunk overlap length.",
+ "zh_Hans": "块的重叠长度。"
+ },
+ "label": {
+ "en_US": "Chunk Overlap Length",
+ "ja_JP": "Chunk Overlap Length",
+ "pt_BR": "Chunk Overlap Length",
+ "zh_Hans": "块的重叠长度"
+ },
+ "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "chunk_overlap_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Replace consecutive spaces, newlines and tabs",
+ "zh_Hans": "替换连续的空格、换行符和制表符"
+ },
+ "label": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Replace consecutive spaces, newlines and tabs",
+ "zh_Hans": "替换连续的空格、换行符和制表符"
+ },
+ "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean.",
+ "max": null,
+ "min": null,
+ "name": "replace_consecutive_spaces_newlines_tabs",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Delete all URLs and email addresses",
+ "zh_Hans": "删除所有URL和电子邮件地址"
+ },
+ "label": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Delete all URLs and email addresses",
+ "zh_Hans": "删除所有URL和电子邮件地址"
+ },
+ "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean.",
+ "max": null,
+ "min": null,
+ "name": "delete_all_urls_and_email_addresses",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ }
+ ],
+ "params": {
+ "chunk_overlap_length": "",
+ "delete_all_urls_and_email_addresses": "",
+ "delimiter": "",
+ "input_variable": "",
+ "max_chunk_length": "",
+ "replace_consecutive_spaces_newlines_tabs": ""
+ },
+ "provider_id": "langgenius/general_chunker/general_chunker",
+ "provider_name": "langgenius/general_chunker/general_chunker",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "General Chunker",
+ "tool_configurations": {},
+ "tool_description": "A tool for general text chunking mode, the chunks retrieved and recalled are the same.",
+ "tool_label": "General Chunker",
+ "tool_name": "general_chunker",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "chunk_overlap_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Chunk_Overlap_Length"
+ ]
+ },
+ "delete_all_urls_and_email_addresses": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_2"
+ ]
+ },
+ "delimiter": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Dilmiter#}}"
+ },
+ "input_variable": {
+ "type": "mixed",
+ "value": "{{#1753346901505.output#}}"
+ },
+ "max_chunk_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Chunk_Length"
+ ]
+ },
+ "replace_consecutive_spaces_newlines_tabs": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_1"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1751337124089",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "output_type": "string",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836391776",
+ "text"
+ ],
+ [
+ "1753349228522",
+ "text"
+ ],
+ [
+ "1754023419266",
+ "content"
+ ],
+ [
+ "1756896212061",
+ "content"
+ ]
+ ]
+ },
+ "height": 187,
+ "id": "1753346901505",
+ "position": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_array_file": false,
+ "selected": false,
+ "title": "Doc Extractor",
+ "type": "document-extractor",
+ "variable_selector": [
+ "1756442986174",
+ "output"
+ ]
+ },
+ "height": 92,
+ "id": "1753349228522",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Notion",
+ "datasource_name": "notion_datasource",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/notion_datasource",
+ "provider_name": "notion_datasource",
+ "provider_type": "online_document",
+ "selected": false,
+ "title": "Notion",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1754023419266",
+ "position": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "positionAbsolute": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "output_type": "file",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836380067",
+ "file"
+ ],
+ [
+ "1756442998557",
+ "file"
+ ]
+ ]
+ },
+ "height": 135,
+ "id": "1756442986174",
+ "position": {
+ "x": -1067.06980963949,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -1067.06980963949,
+ "y": 236.10252072775984
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Google Drive",
+ "datasource_name": "google_drive",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/google_drive",
+ "provider_name": "google_drive",
+ "provider_type": "online_drive",
+ "selected": false,
+ "title": "Google Drive",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756442998557",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "cases": [
+ {
+ "case_id": "true",
+ "conditions": [
+ {
+ "comparison_operator": "is",
+ "id": "1581dd11-7898-41f4-962f-937283ba7e01",
+ "value": ".xlsx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
+ "value": ".xls",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
+ "value": ".md",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
+ "value": ".markdown",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
+ "value": ".mdx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "436877b8-8c0a-4cc6-9565-92754db08571",
+ "value": ".html",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
+ "value": ".htm",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
+ "value": ".docx",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "62d11445-876a-493f-85d3-8fc020146bdd",
+ "value": ".csv",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "02c4bce8-7668-4ccd-b750-4281f314b231",
+ "value": ".txt",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ }
+ ],
+ "id": "true",
+ "logical_operator": "or"
+ }
+ ],
+ "selected": false,
+ "title": "IF/ELSE",
+ "type": "if-else"
+ },
+ "height": 358,
+ "id": "1756443014860",
+ "position": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Jina Reader",
+ "datasource_name": "jina_reader",
+ "datasource_parameters": {
+ "crawl_sub_pages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_subpages"
+ ]
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_limit"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756896212061.jina_url#}}"
+ },
+ "use_sitemap": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jian_sitemap"
+ ]
+ }
+ },
+ "plugin_id": "langgenius/jina_datasource",
+ "provider_name": "jinareader",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Jina Reader",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756896212061",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Firecrawl",
+ "datasource_name": "crawl",
+ "datasource_parameters": {
+ "crawl_subpages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "firecrawl_subpages"
+ ]
+ },
+ "exclude_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.exclude_paths#}}"
+ },
+ "include_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.include_paths#}}"
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_pages"
+ ]
+ },
+ "max_depth": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_depth"
+ ]
+ },
+ "only_main_content": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "main_content"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.firecrawl_url1#}}"
+ }
+ },
+ "plugin_id": "langgenius/firecrawl_datasource",
+ "provider_name": "firecrawl",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Firecrawl",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756907397615",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ }
+ ],
+ "viewport": {
+ "x": 1463.3408543698197,
+ "y": 224.29398382646679,
+ "zoom": 0.6387381963193622
+ }
+ },
+ "icon_info": {
+ "icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "id": "9f5ea5a7-7796-49f3-9e9a-ae2d8e84cfa3",
+ "name": "General Mode-ECO",
+ "icon": {
+ "icon": "52064ff0-26b6-47d0-902f-e331f94d959b",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "language": "zh-Hans",
+ "position": 1
+ },
+ "9553b1e0-0c26-445b-9e18-063ad7eca0b4": {
+ "chunk_structure": "hierarchical_model",
+ "description": "This template uses an advanced chunking strategy that organizes document text into a hierarchical structure of larger \"parent\" chunks and smaller \"child\" chunks to balance retrieval precision and contextual richness.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 6509176c-def5-421c-b966-5122ad6bf658\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: Parent-child-HQ\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: tool\n id: 1753346901505-source-1756972161593-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756972161593'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756972161593-source-1750836372241-target\n source: '1756972161593'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756972161593'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius/jina/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 479.7628208876065\n y: 326\n positionAbsolute:\n x: 479.7628208876065\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 547.4103414077279\n positionAbsolute:\n x: -417.5334221022782\n y: 547.4103414077279\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 638\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM). length, and overlap—to fit different\n document formats or scenarios. Preprocessing options are also available\n to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 638\n id: '1751253953926'\n position:\n x: 184.46657789772178\n y: 407.42301051148354\n positionAbsolute:\n x: 184.46657789772178\n y: 407.42301051148354\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 479.7628208876065\n y: 472.46585541244207\n positionAbsolute:\n x: 479.7628208876065\n y: 472.46585541244207\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n - - '1756907397615'\n - content\n height: 213\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1054.415447856335\n y: 236.10252072775984\n positionAbsolute:\n x: -1054.415447856335\n y: 236.10252072775984\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The text you want to chunk.\n ja_JP: The text you want to chunk.\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - icon: ''\n label:\n en_US: paragraph\n ja_JP: paragraph\n pt_BR: paragraph\n zh_Hans: paragraph\n value: paragraph\n - icon: ''\n label:\n en_US: full_doc\n ja_JP: full_doc\n pt_BR: full_doc\n zh_Hans: full_doc\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1753346901505.output#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - parent_length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - parent_mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.parent_dilmiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - child_length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.child_delimiter#}}'\n type: tool\n height: 52\n id: '1756972161593'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 947.2141381290828\n y: 179.30600859363653\n zoom: 0.47414481289660987\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: parent_mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: parent_dilmiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: parent_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: child_delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: child_length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836391776-source-1753346901505-target",
+ "selected": false,
+ "source": "1750836391776",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "document-extractor",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1753349228522-source-1753346901505-target",
+ "selected": false,
+ "source": "1753349228522",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1754023419266-source-1753346901505-target",
+ "selected": false,
+ "source": "1754023419266",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756442998557-source-1756442986174-target",
+ "selected": false,
+ "source": "1756442998557",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "if-else"
+ },
+ "id": "1756442986174-source-1756443014860-target",
+ "selected": false,
+ "source": "1756442986174",
+ "sourceHandle": "source",
+ "target": "1756443014860",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836380067-source-1756442986174-target",
+ "selected": false,
+ "source": "1750836380067",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "tool"
+ },
+ "id": "1756443014860-true-1750836391776-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "true",
+ "target": "1750836391776",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "document-extractor"
+ },
+ "id": "1756443014860-false-1753349228522-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "false",
+ "target": "1753349228522",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756896212061-source-1753346901505-target",
+ "source": "1756896212061",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756907397615-source-1753346901505-target",
+ "source": "1756907397615",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "tool"
+ },
+ "id": "1753346901505-source-1756972161593-target",
+ "source": "1753346901505",
+ "sourceHandle": "source",
+ "target": "1756972161593",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1756972161593-source-1750836372241-target",
+ "source": "1756972161593",
+ "sourceHandle": "source",
+ "target": "1750836372241",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "hierarchical_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius/jina/jina",
+ "index_chunk_variable_selector": [
+ "1756972161593",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "reranking_enable": true,
+ "reranking_mode": "reranking_model",
+ "reranking_model": {
+ "reranking_model_name": "jina-reranker-v1-base-en",
+ "reranking_provider_name": "langgenius/jina/jina"
+ },
+ "score_threshold": 0,
+ "score_threshold_enabled": false,
+ "search_method": "hybrid_search",
+ "top_k": 3,
+ "weights": null
+ },
+ "selected": false,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750836372241",
+ "position": {
+ "x": 479.7628208876065,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 479.7628208876065,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "txt",
+ "markdown",
+ "mdx",
+ "pdf",
+ "html",
+ "xlsx",
+ "xls",
+ "vtt",
+ "properties",
+ "doc",
+ "docx",
+ "csv",
+ "eml",
+ "msg",
+ "pptx",
+ "xml",
+ "epub",
+ "ppt",
+ "md"
+ ],
+ "plugin_id": "langgenius/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750836380067",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "documents": {
+ "description": "the documents extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ },
+ "images": {
+ "description": "The images extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
+ },
+ "label": {
+ "en_US": "file",
+ "ja_JP": "file",
+ "pt_BR": "file",
+ "zh_Hans": "file"
+ },
+ "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "max": null,
+ "min": null,
+ "name": "file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ }
+ ],
+ "params": {
+ "file": ""
+ },
+ "provider_id": "langgenius/dify_extractor/dify_extractor",
+ "provider_name": "langgenius/dify_extractor/dify_extractor",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Dify Extractor",
+ "tool_configurations": {},
+ "tool_description": "Dify Extractor",
+ "tool_label": "Dify Extractor",
+ "tool_name": "dify_extractor",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "file": {
+ "type": "variable",
+ "value": [
+ "1756442986174",
+ "output"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1750836391776",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 252,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1124
+ },
+ "height": 252,
+ "id": "1751252161631",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1124
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 388,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 285
+ },
+ "height": 388,
+ "id": "1751252440357",
+ "position": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 285
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 430,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 430,
+ "id": "1751253091602",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 547.4103414077279
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 547.4103414077279
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 638,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM). length, and overlap—to fit different document formats or scenarios. Preprocessing options are also available to clean up the text by removing excess spaces, URLs, and emails.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 638,
+ "id": "1751253953926",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 407.42301051148354
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 407.42301051148354
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 410,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 410,
+ "id": "1751254117904",
+ "position": {
+ "x": 479.7628208876065,
+ "y": 472.46585541244207
+ },
+ "positionAbsolute": {
+ "x": 479.7628208876065,
+ "y": 472.46585541244207
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "output_type": "string",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836391776",
+ "text"
+ ],
+ [
+ "1753349228522",
+ "text"
+ ],
+ [
+ "1754023419266",
+ "content"
+ ],
+ [
+ "1756896212061",
+ "content"
+ ],
+ [
+ "1756907397615",
+ "content"
+ ]
+ ]
+ },
+ "height": 213,
+ "id": "1753346901505",
+ "position": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_array_file": false,
+ "selected": false,
+ "title": "Doc Extractor",
+ "type": "document-extractor",
+ "variable_selector": [
+ "1756442986174",
+ "output"
+ ]
+ },
+ "height": 92,
+ "id": "1753349228522",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Notion",
+ "datasource_name": "notion_datasource",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/notion_datasource",
+ "provider_name": "notion_datasource",
+ "provider_type": "online_document",
+ "selected": false,
+ "title": "Notion",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1754023419266",
+ "position": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "positionAbsolute": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "output_type": "file",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836380067",
+ "file"
+ ],
+ [
+ "1756442998557",
+ "file"
+ ]
+ ]
+ },
+ "height": 135,
+ "id": "1756442986174",
+ "position": {
+ "x": -1054.415447856335,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -1054.415447856335,
+ "y": 236.10252072775984
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Google Drive",
+ "datasource_name": "google_drive",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/google_drive",
+ "provider_name": "google_drive",
+ "provider_type": "online_drive",
+ "selected": false,
+ "title": "Google Drive",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756442998557",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "cases": [
+ {
+ "case_id": "true",
+ "conditions": [
+ {
+ "comparison_operator": "is",
+ "id": "1581dd11-7898-41f4-962f-937283ba7e01",
+ "value": ".xlsx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
+ "value": ".xls",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
+ "value": ".md",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
+ "value": ".markdown",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
+ "value": ".mdx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "436877b8-8c0a-4cc6-9565-92754db08571",
+ "value": ".html",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
+ "value": ".htm",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
+ "value": ".docx",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "62d11445-876a-493f-85d3-8fc020146bdd",
+ "value": ".csv",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "02c4bce8-7668-4ccd-b750-4281f314b231",
+ "value": ".txt",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ }
+ ],
+ "id": "true",
+ "logical_operator": "or"
+ }
+ ],
+ "selected": false,
+ "title": "IF/ELSE",
+ "type": "if-else"
+ },
+ "height": 358,
+ "id": "1756443014860",
+ "position": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Jina Reader",
+ "datasource_name": "jina_reader",
+ "datasource_parameters": {
+ "crawl_sub_pages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_subpages"
+ ]
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_limit"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756896212061.jina_url#}}"
+ },
+ "use_sitemap": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jian_sitemap"
+ ]
+ }
+ },
+ "plugin_id": "langgenius/jina_datasource",
+ "provider_name": "jinareader",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Jina Reader",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756896212061",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Firecrawl",
+ "datasource_name": "crawl",
+ "datasource_parameters": {
+ "crawl_subpages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "firecrawl_subpages"
+ ]
+ },
+ "exclude_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.exclude_paths#}}"
+ },
+ "include_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.include_paths#}}"
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_pages"
+ ]
+ },
+ "max_depth": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_depth"
+ ]
+ },
+ "only_main_content": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "main_content"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.firecrawl_url1#}}"
+ }
+ },
+ "plugin_id": "langgenius/firecrawl_datasource",
+ "provider_name": "firecrawl",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Firecrawl",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756907397615",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The text you want to chunk.",
+ "ja_JP": "The text you want to chunk.",
+ "pt_BR": "Conteúdo de Entrada",
+ "zh_Hans": "输入文本"
+ },
+ "label": {
+ "en_US": "Input Content",
+ "ja_JP": "Input Content",
+ "pt_BR": "Conteúdo de Entrada",
+ "zh_Hans": "输入文本"
+ },
+ "llm_description": "The text you want to chunk.",
+ "max": null,
+ "min": null,
+ "name": "input_text",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": "paragraph",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.",
+ "zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。"
+ },
+ "label": {
+ "en_US": "Parent Mode",
+ "ja_JP": "Parent Mode",
+ "pt_BR": "Modo Pai",
+ "zh_Hans": "父块模式"
+ },
+ "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "max": null,
+ "min": null,
+ "name": "parent_mode",
+ "options": [
+ {
+ "icon": "",
+ "label": {
+ "en_US": "paragraph",
+ "ja_JP": "paragraph",
+ "pt_BR": "paragraph",
+ "zh_Hans": "paragraph"
+ },
+ "value": "paragraph"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "full_doc",
+ "ja_JP": "full_doc",
+ "pt_BR": "full_doc",
+ "zh_Hans": "full_doc"
+ },
+ "value": "full_doc"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "\n\n",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for chunking",
+ "ja_JP": "Separator used for chunking",
+ "pt_BR": "Separador usado para divisão",
+ "zh_Hans": "用于分块的分隔符"
+ },
+ "label": {
+ "en_US": "Parent Delimiter",
+ "ja_JP": "Parent Delimiter",
+ "pt_BR": "Separador de Pai",
+ "zh_Hans": "父块分隔符"
+ },
+ "llm_description": "The separator used to split chunks",
+ "max": null,
+ "min": null,
+ "name": "separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 1024,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for chunking",
+ "ja_JP": "Maximum length for chunking",
+ "pt_BR": "Comprimento máximo para divisão",
+ "zh_Hans": "用于分块的最大长度"
+ },
+ "label": {
+ "en_US": "Maximum Parent Chunk Length",
+ "ja_JP": "Maximum Parent Chunk Length",
+ "pt_BR": "Comprimento Máximo do Bloco Pai",
+ "zh_Hans": "最大父块长度"
+ },
+ "llm_description": "Maximum length allowed per chunk",
+ "max": null,
+ "min": null,
+ "name": "max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": ". ",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for subchunking",
+ "ja_JP": "Separator used for subchunking",
+ "pt_BR": "Separador usado para subdivisão",
+ "zh_Hans": "用于子分块的分隔符"
+ },
+ "label": {
+ "en_US": "Child Delimiter",
+ "ja_JP": "Child Delimiter",
+ "pt_BR": "Separador de Subdivisão",
+ "zh_Hans": "子分块分隔符"
+ },
+ "llm_description": "The separator used to split subchunks",
+ "max": null,
+ "min": null,
+ "name": "subchunk_separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 512,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for subchunking",
+ "ja_JP": "Maximum length for subchunking",
+ "pt_BR": "Comprimento máximo para subdivisão",
+ "zh_Hans": "用于子分块的最大长度"
+ },
+ "label": {
+ "en_US": "Maximum Child Chunk Length",
+ "ja_JP": "Maximum Child Chunk Length",
+ "pt_BR": "Comprimento Máximo de Subdivisão",
+ "zh_Hans": "子分块最大长度"
+ },
+ "llm_description": "Maximum length allowed per subchunk",
+ "max": null,
+ "min": null,
+ "name": "subchunk_max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove consecutive spaces, newlines and tabs",
+ "ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
+ "pt_BR": "Se deve remover espaços extras no texto",
+ "zh_Hans": "是否移除文本中的连续空格、换行符和制表符"
+ },
+ "label": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Substituir espaços consecutivos, novas linhas e guias",
+ "zh_Hans": "替换连续空格、换行符和制表符"
+ },
+ "llm_description": "Whether to remove consecutive spaces, newlines and tabs",
+ "max": null,
+ "min": null,
+ "name": "remove_extra_spaces",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove URLs and emails in the text",
+ "ja_JP": "Whether to remove URLs and emails in the text",
+ "pt_BR": "Se deve remover URLs e e-mails no texto",
+ "zh_Hans": "是否移除文本中的URL和电子邮件地址"
+ },
+ "label": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Remover todas as URLs e e-mails",
+ "zh_Hans": "删除所有URL和电子邮件地址"
+ },
+ "llm_description": "Whether to remove URLs and emails in the text",
+ "max": null,
+ "min": null,
+ "name": "remove_urls_emails",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ }
+ ],
+ "params": {
+ "input_text": "",
+ "max_length": "",
+ "parent_mode": "",
+ "remove_extra_spaces": "",
+ "remove_urls_emails": "",
+ "separator": "",
+ "subchunk_max_length": "",
+ "subchunk_separator": ""
+ },
+ "provider_id": "langgenius/parentchild_chunker/parentchild_chunker",
+ "provider_name": "langgenius/parentchild_chunker/parentchild_chunker",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Parent-child Chunker",
+ "tool_configurations": {},
+ "tool_description": "Process documents into parent-child chunk structures",
+ "tool_label": "Parent-child Chunker",
+ "tool_name": "parentchild_chunker",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "input_text": {
+ "type": "mixed",
+ "value": "{{#1753346901505.output#}}"
+ },
+ "max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "parent_length"
+ ]
+ },
+ "parent_mode": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "parent_mode"
+ ]
+ },
+ "remove_extra_spaces": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_1"
+ ]
+ },
+ "remove_urls_emails": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_2"
+ ]
+ },
+ "separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.parent_dilmiter#}}"
+ },
+ "subchunk_max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "child_length"
+ ]
+ },
+ "subchunk_separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.child_delimiter#}}"
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1756972161593",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ }
+ ],
+ "viewport": {
+ "x": 947.2141381290828,
+ "y": 179.30600859363653,
+ "zoom": 0.47414481289660987
+ }
+ },
+ "icon_info": {
+ "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "id": "9553b1e0-0c26-445b-9e18-063ad7eca0b4",
+ "name": "Parent-child-HQ",
+ "icon": {
+ "icon": "ab8da246-37ba-4bbb-9b24-e7bda0778005",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "language": "zh-Hans",
+ "position": 2
+ },
+ "9ef3e66a-11c7-4227-897c-3b0f9a42da1a": {
+ "chunk_structure": "qa_model",
+ "description": "This template generates structured Q&A pairs by extracting selected columns from a table. These pairs are indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n version: null\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n version: null\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 769900fc-8a31-4584-94f6-f227357c00c8\n icon_background: null\n icon_type: image\n icon_url: \n name: Simple Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750836380067-source-1753253430271-target\n source: '1750836380067'\n sourceHandle: source\n target: '1753253430271'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1753253430271-source-1750836372241-target\n source: '1753253430271'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1753253430271'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: false\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: null\n reranking_provider_name: null\n score_threshold: 0\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 160\n y: 326\n positionAbsolute:\n x: 160\n y: 326\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - csv\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -714.4192784522008\n y: 326\n positionAbsolute:\n x: -714.4192784522008\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 249\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1115\n height: 249\n id: '1751252161631'\n position:\n x: -714.4192784522008\n y: -19.94142868660783\n positionAbsolute:\n x: -714.4192784522008\n y: -19.94142868660783\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1115\n - data:\n author: TenTen\n desc: ''\n height: 281\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 415\n height: 281\n id: '1751252440357'\n position:\n x: -1206.996048993409\n y: 311.5998178583933\n positionAbsolute:\n x: -1206.996048993409\n y: 311.5998178583933\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 415\n - data:\n author: TenTen\n desc: ''\n height: 403\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 403\n id: '1751254117904'\n position:\n x: 160\n y: 471.1516409864865\n positionAbsolute:\n x: 160\n y: 471.1516409864865\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 341\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 341\n id: '1751356019653'\n position:\n x: -282.74494795239\n y: 411.6979750489463\n positionAbsolute:\n x: -282.74494795239\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: The result of the general chunk tool.\n properties:\n qa_chunks:\n items:\n description: The QA chunk.\n properties:\n answer:\n description: The answer of the QA chunk.\n type: string\n question:\n description: The question of the QA chunk.\n type: string\n type: object\n type: array\n type: object\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A PROCESSOR\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Answers\n input_file:\n type: variable\n value:\n - '1750836380067'\n - file\n question_column:\n type: variable\n value:\n - rag\n - shared\n - Column_Number_for_Questions\n type: tool\n height: 52\n id: '1753253430271'\n position:\n x: -282.74494795239\n y: 326\n positionAbsolute:\n x: -282.74494795239\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 173\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple\n Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires\n a pre-prepared table of question-answer pairs. As a result, it only supports\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File\n Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data\n source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file\n formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 173\n id: '1753411065636'\n position:\n x: -714.4192784522008\n y: 411.6979750489463\n positionAbsolute:\n x: -714.4192784522008\n y: 411.6979750489463\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 698.8920691163195\n y: 311.46417000656925\n zoom: 0.41853867943092266\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1\n label: Column Number for Questions\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Questions. The number of first column is\n 0.\n type: number\n unit: ''\n variable: Column_Number_for_Questions\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 2\n label: Column Number for Answers\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: Specify a column in the table as Answers. The number of first column is\n 0.\n type: number\n unit: null\n variable: Column_Number_for_Answers\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "tool"
+ },
+ "id": "1750836380067-source-1753253430271-target",
+ "source": "1750836380067",
+ "sourceHandle": "source",
+ "target": "1753253430271",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1753253430271-source-1750836372241-target",
+ "source": "1753253430271",
+ "sourceHandle": "source",
+ "target": "1750836372241",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "qa_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius/jina/jina",
+ "index_chunk_variable_selector": [
+ "1753253430271",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "reranking_enable": false,
+ "reranking_mode": "reranking_model",
+ "reranking_model": {
+ "reranking_model_name": null,
+ "reranking_provider_name": null
+ },
+ "score_threshold": 0,
+ "score_threshold_enabled": false,
+ "search_method": "semantic_search",
+ "top_k": 3,
+ "weights": null
+ },
+ "selected": true,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750836372241",
+ "position": {
+ "x": 160,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 160,
+ "y": 326
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "csv"
+ ],
+ "plugin_id": "langgenius/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750836380067",
+ "position": {
+ "x": -714.4192784522008,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -714.4192784522008,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 249,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1115
+ },
+ "height": 249,
+ "id": "1751252161631",
+ "position": {
+ "x": -714.4192784522008,
+ "y": -19.94142868660783
+ },
+ "positionAbsolute": {
+ "x": -714.4192784522008,
+ "y": -19.94142868660783
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1115
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 281,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 415
+ },
+ "height": 281,
+ "id": "1751252440357",
+ "position": {
+ "x": -1206.996048993409,
+ "y": 311.5998178583933
+ },
+ "positionAbsolute": {
+ "x": -1206.996048993409,
+ "y": 311.5998178583933
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 415
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 403,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 403,
+ "id": "1751254117904",
+ "position": {
+ "x": 160,
+ "y": 471.1516409864865
+ },
+ "positionAbsolute": {
+ "x": 160,
+ "y": 471.1516409864865
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 341,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 341,
+ "id": "1751356019653",
+ "position": {
+ "x": -282.74494795239,
+ "y": 411.6979750489463
+ },
+ "positionAbsolute": {
+ "x": -282.74494795239,
+ "y": 411.6979750489463
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "result": {
+ "description": "The result of the general chunk tool.",
+ "properties": {
+ "qa_chunks": {
+ "items": {
+ "description": "The QA chunk.",
+ "properties": {
+ "answer": {
+ "description": "The answer of the QA chunk.",
+ "type": "string"
+ },
+ "question": {
+ "description": "The question of the QA chunk.",
+ "type": "string"
+ }
+ },
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The file you want to extract QA from.",
+ "ja_JP": "The file you want to extract QA from.",
+ "pt_BR": "The file you want to extract QA from.",
+ "zh_Hans": "你想要提取 QA 的文件。"
+ },
+ "label": {
+ "en_US": "Input File",
+ "ja_JP": "Input File",
+ "pt_BR": "Input File",
+ "zh_Hans": "输入文件"
+ },
+ "llm_description": "The file you want to extract QA from.",
+ "max": null,
+ "min": null,
+ "name": "input_file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Column number for question.",
+ "ja_JP": "Column number for question.",
+ "pt_BR": "Column number for question.",
+ "zh_Hans": "问题所在的列。"
+ },
+ "label": {
+ "en_US": "Column number for question",
+ "ja_JP": "Column number for question",
+ "pt_BR": "Column number for question",
+ "zh_Hans": "问题所在的列"
+ },
+ "llm_description": "The column number for question, the format of the column number must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "question_column",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Column number for answer.",
+ "ja_JP": "Column number for answer.",
+ "pt_BR": "Column number for answer.",
+ "zh_Hans": "答案所在的列。"
+ },
+ "label": {
+ "en_US": "Column number for answer",
+ "ja_JP": "Column number for answer",
+ "pt_BR": "Column number for answer",
+ "zh_Hans": "答案所在的列"
+ },
+ "llm_description": "The column number for answer, the format of the column number must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "answer_column",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ }
+ ],
+ "params": {
+ "answer_column": "",
+ "input_file": "",
+ "question_column": ""
+ },
+ "provider_id": "langgenius/qa_chunk/qa_chunk",
+ "provider_name": "langgenius/qa_chunk/qa_chunk",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Q&A PROCESSOR",
+ "tool_configurations": {},
+ "tool_description": "A tool for QA chunking mode.",
+ "tool_label": "QA Chunk",
+ "tool_name": "qa_chunk",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "answer_column": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Column_Number_for_Answers"
+ ]
+ },
+ "input_file": {
+ "type": "variable",
+ "value": [
+ "1750836380067",
+ "file"
+ ]
+ },
+ "question_column": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Column_Number_for_Questions"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1753253430271",
+ "position": {
+ "x": -282.74494795239,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -282.74494795239,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 173,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Simple Q&A Template\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" requires a pre-prepared table of question-answer pairs. As a result, it only supports \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"File Upload\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" data source, accepting \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\"csv\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" file formats.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 173,
+ "id": "1753411065636",
+ "position": {
+ "x": -714.4192784522008,
+ "y": 411.6979750489463
+ },
+ "positionAbsolute": {
+ "x": -714.4192784522008,
+ "y": 411.6979750489463
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ }
+ ],
+ "viewport": {
+ "x": 698.8920691163195,
+ "y": 311.46417000656925,
+ "zoom": 0.41853867943092266
+ }
+ },
+ "icon_info": {
+ "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "id": "9ef3e66a-11c7-4227-897c-3b0f9a42da1a",
+ "name": "Simple Q&A",
+ "icon": {
+ "icon": "ae0993dc-ff90-48ac-9e35-c31ebae5124b",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "language": "zh-Hans",
+ "position": 3
+ },
+ "982d1788-837a-40c8-b7de-d37b09a9b2bc": {
+ "chunk_structure": "hierarchical_model",
+ "description": "This template is designed for converting native Office files such as DOCX, XLSX, and PPTX into Markdown to facilitate better information processing. PDF files are not recommended.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: yevanchen/markitdown:0.0.4@776b3e2e930e2ffd28a75bb20fecbe7a020849cf754f86e604acacf1258877f6\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 9d658c3a-b22f-487d-8223-db51e9012505\n icon_background: null\n icon_type: image\n icon_url: \n name: Convert to Markdown\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751359716720-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751359716720'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751359716720-source-1751336942081-target\n source: '1751359716720'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 357.7591396590142\n y: 282\n positionAbsolute:\n x: 357.7591396590142\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - html\n - xlsx\n - xls\n - doc\n - docx\n - csv\n - pptx\n - xml\n - ppt\n - txt\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -580.684520226929\n y: 282\n positionAbsolute:\n x: -580.684520226929\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 316\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 374\n height: 316\n id: '1751264451381'\n position:\n x: -1034.2054006208518\n y: 282\n positionAbsolute:\n x: -1034.2054006208518\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 374\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -580.684520226929\n y: -21.891401375096322\n positionAbsolute:\n x: -580.684520226929\n y: -21.891401375096322\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 417\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n serves as an excellent alternative to traditional document extraction nodes,\n offering robust file conversion capabilities within the Dify ecosystem.\n It leverages MarkItDown''s plugin-based architecture to provide seamless\n conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 241\n height: 417\n id: '1751266402561'\n position:\n x: -266.96080929383595\n y: 372.64040589639495\n positionAbsolute:\n x: -266.96080929383595\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 241\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such\n as a paragraph, a section, or even an entire document—that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 37.74090119950054\n y: 372.64040589639495\n positionAbsolute:\n x: 37.74090119950054\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 357.7591396590142\n y: 434.3959856026883\n positionAbsolute:\n x: 357.7591396590142\n y: 434.3959856026883\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conteúdo de Entrada\n zh_Hans: 输入文本\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em parágrafos com base no separador e no comprimento\n máximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuperá-lo.\n zh_Hans: 根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: 父块模式\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Parágrafo\n zh_Hans: 段落\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: 全文\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divisão\n zh_Hans: 用于分块的分隔符\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: 父块分隔符\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento máximo para divisão\n zh_Hans: 用于分块的最大长度\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento Máximo do Bloco Pai\n zh_Hans: 最大父块长度\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivisão\n zh_Hans: 用于子分块的分隔符\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivisão\n zh_Hans: 子分块分隔符\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento máximo para subdivisão\n zh_Hans: 用于子分块的最大长度\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento Máximo de Subdivisão\n zh_Hans: 子分块最大长度\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espaços extras no texto\n zh_Hans: 是否移除文本中的连续空格、换行符和制表符\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espaços consecutivos, novas linhas e guias\n zh_Hans: 替换连续空格、换行符和制表符\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: 是否移除文本中的URL和电子邮件地址\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: 删除所有URL和电子邮件地址\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius/parentchild_chunker/parentchild_chunker\n provider_name: langgenius/parentchild_chunker/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751359716720.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 37.74090119950054\n y: 282\n positionAbsolute:\n x: 37.74090119950054\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema: null\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Upload files for processing\n ja_JP: Upload files for processing\n pt_BR: Carregar arquivos para processamento\n zh_Hans: 上传文件进行处理\n label:\n en_US: Files\n ja_JP: Files\n pt_BR: Arquivos\n zh_Hans: 文件\n llm_description: ''\n max: null\n min: null\n name: files\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: files\n params:\n files: ''\n provider_id: yevanchen/markitdown/markitdown\n provider_name: yevanchen/markitdown/markitdown\n provider_type: builtin\n selected: false\n title: markitdown\n tool_configurations: {}\n tool_description: Python tool for converting files and office documents to\n Markdown.\n tool_label: markitdown\n tool_name: markitdown\n tool_node_version: '2'\n tool_parameters:\n files:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 52\n id: '1751359716720'\n position:\n x: -266.96080929383595\n y: 282\n positionAbsolute:\n x: -266.96080929383595\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 301\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n recommended for converting and handling a wide range of file formats, particularly\n for transforming content into Markdown. It works especially well for converting\n native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into\n Markdown to facilitate better information processing. However, as some users\n have noted its suboptimal performance in extracting content from PDF files,\n using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 301\n id: '1753425718313'\n position:\n x: -580.684520226929\n y: 372.64040589639495\n positionAbsolute:\n x: -580.684520226929\n y: 372.64040589639495\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n viewport:\n x: 747.6785299994758\n y: 94.6209873206409\n zoom: 0.8152773235379324\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1751336942081-source-1750400198569-target",
+ "selected": false,
+ "source": "1751336942081",
+ "sourceHandle": "source",
+ "target": "1750400198569",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "tool"
+ },
+ "id": "1750400203722-source-1751359716720-target",
+ "selected": false,
+ "source": "1750400203722",
+ "sourceHandle": "source",
+ "target": "1751359716720",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "tool"
+ },
+ "id": "1751359716720-source-1751336942081-target",
+ "source": "1751359716720",
+ "sourceHandle": "source",
+ "target": "1751336942081",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "hierarchical_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius/jina/jina",
+ "index_chunk_variable_selector": [
+ "1751336942081",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "hybridSearchMode": "weighted_score",
+ "score_threshold": 0.5,
+ "score_threshold_enabled": false,
+ "search_method": "hybrid_search",
+ "top_k": 3,
+ "vector_setting": {
+ "embedding_model_name": "jina-embeddings-v2-base-en",
+ "embedding_provider_name": "langgenius/jina/jina"
+ }
+ },
+ "selected": true,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750400198569",
+ "position": {
+ "x": 357.7591396590142,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 357.7591396590142,
+ "y": 282
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "html",
+ "xlsx",
+ "xls",
+ "doc",
+ "docx",
+ "csv",
+ "pptx",
+ "xml",
+ "ppt",
+ "txt"
+ ],
+ "plugin_id": "langgenius/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750400203722",
+ "position": {
+ "x": -580.684520226929,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -580.684520226929,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 316,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 374
+ },
+ "height": 316,
+ "id": "1751264451381",
+ "position": {
+ "x": -1034.2054006208518,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -1034.2054006208518,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 374
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 260,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1182
+ },
+ "height": 260,
+ "id": "1751266376760",
+ "position": {
+ "x": -580.684520226929,
+ "y": -21.891401375096322
+ },
+ "positionAbsolute": {
+ "x": -580.684520226929,
+ "y": -21.891401375096322
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1182
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 417,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Markitdown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" serves as an excellent alternative to traditional document extraction nodes, offering robust file conversion capabilities within the Dify ecosystem. It leverages MarkItDown's plugin-based architecture to provide seamless conversion of multiple file formats to Markdown.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 241
+ },
+ "height": 417,
+ "id": "1751266402561",
+ "position": {
+ "x": -266.96080929383595,
+ "y": 372.64040589639495
+ },
+ "positionAbsolute": {
+ "x": -266.96080929383595,
+ "y": 372.64040589639495
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 241
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 554,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections—such as a paragraph, a section, or even an entire document—that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 554,
+ "id": "1751266447821",
+ "position": {
+ "x": 37.74090119950054,
+ "y": 372.64040589639495
+ },
+ "positionAbsolute": {
+ "x": 37.74090119950054,
+ "y": 372.64040589639495
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 411,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 411,
+ "id": "1751266580099",
+ "position": {
+ "x": 357.7591396590142,
+ "y": 434.3959856026883
+ },
+ "positionAbsolute": {
+ "x": 357.7591396590142,
+ "y": 434.3959856026883
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "result": {
+ "description": "Parent child chunks result",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "",
+ "ja_JP": "",
+ "pt_BR": "",
+ "zh_Hans": ""
+ },
+ "label": {
+ "en_US": "Input Content",
+ "ja_JP": "Input Content",
+ "pt_BR": "Conteúdo de Entrada",
+ "zh_Hans": "输入文本"
+ },
+ "llm_description": "The text you want to chunk.",
+ "max": null,
+ "min": null,
+ "name": "input_text",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": "paragraph",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "pt_BR": "Dividir texto em parágrafos com base no separador e no comprimento máximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuperá-lo.",
+ "zh_Hans": "根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块或整个文档用作父块并直接检索。"
+ },
+ "label": {
+ "en_US": "Parent Mode",
+ "ja_JP": "Parent Mode",
+ "pt_BR": "Modo Pai",
+ "zh_Hans": "父块模式"
+ },
+ "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "max": null,
+ "min": null,
+ "name": "parent_mode",
+ "options": [
+ {
+ "label": {
+ "en_US": "Paragraph",
+ "ja_JP": "Paragraph",
+ "pt_BR": "Parágrafo",
+ "zh_Hans": "段落"
+ },
+ "value": "paragraph"
+ },
+ {
+ "label": {
+ "en_US": "Full Document",
+ "ja_JP": "Full Document",
+ "pt_BR": "Documento Completo",
+ "zh_Hans": "全文"
+ },
+ "value": "full_doc"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "\n\n",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for chunking",
+ "ja_JP": "Separator used for chunking",
+ "pt_BR": "Separador usado para divisão",
+ "zh_Hans": "用于分块的分隔符"
+ },
+ "label": {
+ "en_US": "Parent Delimiter",
+ "ja_JP": "Parent Delimiter",
+ "pt_BR": "Separador de Pai",
+ "zh_Hans": "父块分隔符"
+ },
+ "llm_description": "The separator used to split chunks",
+ "max": null,
+ "min": null,
+ "name": "separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 1024,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for chunking",
+ "ja_JP": "Maximum length for chunking",
+ "pt_BR": "Comprimento máximo para divisão",
+ "zh_Hans": "用于分块的最大长度"
+ },
+ "label": {
+ "en_US": "Maximum Parent Chunk Length",
+ "ja_JP": "Maximum Parent Chunk Length",
+ "pt_BR": "Comprimento Máximo do Bloco Pai",
+ "zh_Hans": "最大父块长度"
+ },
+ "llm_description": "Maximum length allowed per chunk",
+ "max": null,
+ "min": null,
+ "name": "max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": ". ",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for subchunking",
+ "ja_JP": "Separator used for subchunking",
+ "pt_BR": "Separador usado para subdivisão",
+ "zh_Hans": "用于子分块的分隔符"
+ },
+ "label": {
+ "en_US": "Child Delimiter",
+ "ja_JP": "Child Delimiter",
+ "pt_BR": "Separador de Subdivisão",
+ "zh_Hans": "子分块分隔符"
+ },
+ "llm_description": "The separator used to split subchunks",
+ "max": null,
+ "min": null,
+ "name": "subchunk_separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 512,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for subchunking",
+ "ja_JP": "Maximum length for subchunking",
+ "pt_BR": "Comprimento máximo para subdivisão",
+ "zh_Hans": "用于子分块的最大长度"
+ },
+ "label": {
+ "en_US": "Maximum Child Chunk Length",
+ "ja_JP": "Maximum Child Chunk Length",
+ "pt_BR": "Comprimento Máximo de Subdivisão",
+ "zh_Hans": "子分块最大长度"
+ },
+ "llm_description": "Maximum length allowed per subchunk",
+ "max": null,
+ "min": null,
+ "name": "subchunk_max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove consecutive spaces, newlines and tabs",
+ "ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
+ "pt_BR": "Se deve remover espaços extras no texto",
+ "zh_Hans": "是否移除文本中的连续空格、换行符和制表符"
+ },
+ "label": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Substituir espaços consecutivos, novas linhas e guias",
+ "zh_Hans": "替换连续空格、换行符和制表符"
+ },
+ "llm_description": "Whether to remove consecutive spaces, newlines and tabs",
+ "max": null,
+ "min": null,
+ "name": "remove_extra_spaces",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove URLs and emails in the text",
+ "ja_JP": "Whether to remove URLs and emails in the text",
+ "pt_BR": "Se deve remover URLs e e-mails no texto",
+ "zh_Hans": "是否移除文本中的URL和电子邮件地址"
+ },
+ "label": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Remover todas as URLs e e-mails",
+ "zh_Hans": "删除所有URL和电子邮件地址"
+ },
+ "llm_description": "Whether to remove URLs and emails in the text",
+ "max": null,
+ "min": null,
+ "name": "remove_urls_emails",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ }
+ ],
+ "params": {
+ "input_text": "",
+ "max_length": "",
+ "parent_mode": "",
+ "remove_extra_spaces": "",
+ "remove_urls_emails": "",
+ "separator": "",
+ "subchunk_max_length": "",
+ "subchunk_separator": ""
+ },
+ "provider_id": "langgenius/parentchild_chunker/parentchild_chunker",
+ "provider_name": "langgenius/parentchild_chunker/parentchild_chunker",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Parent-child Chunker",
+ "tool_configurations": {},
+ "tool_description": "Process documents into parent-child chunk structures",
+ "tool_label": "Parent-child Chunker",
+ "tool_name": "parentchild_chunker",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "input_text": {
+ "type": "mixed",
+ "value": "{{#1751359716720.text#}}"
+ },
+ "max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Parent_Length"
+ ]
+ },
+ "parent_mode": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Parent_Mode"
+ ]
+ },
+ "separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Parent_Delimiter#}}"
+ },
+ "subchunk_max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Child_Length"
+ ]
+ },
+ "subchunk_separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Child_Delimiter#}}"
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1751336942081",
+ "position": {
+ "x": 37.74090119950054,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 37.74090119950054,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": null,
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Upload files for processing",
+ "ja_JP": "Upload files for processing",
+ "pt_BR": "Carregar arquivos para processamento",
+ "zh_Hans": "上传文件进行处理"
+ },
+ "label": {
+ "en_US": "Files",
+ "ja_JP": "Files",
+ "pt_BR": "Arquivos",
+ "zh_Hans": "文件"
+ },
+ "llm_description": "",
+ "max": null,
+ "min": null,
+ "name": "files",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "files"
+ }
+ ],
+ "params": {
+ "files": ""
+ },
+ "provider_id": "yevanchen/markitdown/markitdown",
+ "provider_name": "yevanchen/markitdown/markitdown",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "markitdown",
+ "tool_configurations": {},
+ "tool_description": "Python tool for converting files and office documents to Markdown.",
+ "tool_label": "markitdown",
+ "tool_name": "markitdown",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "files": {
+ "type": "variable",
+ "value": [
+ "1750400203722",
+ "file"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1751359716720",
+ "position": {
+ "x": -266.96080929383595,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -266.96080929383595,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 301,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MarkItDown\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is recommended for converting and handling a wide range of file formats, particularly for transforming content into Markdown. It works especially well for converting native Office files—such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"DOCX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"XLSX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"PPTX\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"—into Markdown to facilitate better information processing. However, as some users have noted its suboptimal performance in extracting content from PDF files, using it for PDFs is not recommended.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 301,
+ "id": "1753425718313",
+ "position": {
+ "x": -580.684520226929,
+ "y": 372.64040589639495
+ },
+ "positionAbsolute": {
+ "x": -580.684520226929,
+ "y": 372.64040589639495
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ }
+ ],
+ "viewport": {
+ "x": 747.6785299994758,
+ "y": 94.6209873206409,
+ "zoom": 0.8152773235379324
+ }
+ },
+ "icon_info": {
+ "icon": "9d658c3a-b22f-487d-8223-db51e9012505",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "id": "982d1788-837a-40c8-b7de-d37b09a9b2bc",
+ "name": "Convert to Markdown",
+ "icon": {
+ "icon": "9d658c3a-b22f-487d-8223-db51e9012505",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "language": "zh-Hans",
+ "position": 4
+ },
+ "98374ab6-9dcd-434d-983e-268bec156b43": {
+ "chunk_structure": "qa_model",
+ "description": "This template is designed to use LLM to extract key information from the input document and generate Q&A pairs indexed by questions, enabling efficient retrieval of relevant answers based on query similarity.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/dify_extractor:0.0.5@ba7e2fd9165eda73bfcc68e31a108855197e88706e5556c058e0777ab08409b3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/notion_datasource:0.1.12@2855c4a7cffd3311118ebe70f095e546f99935e47f12c841123146f728534f55\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina_datasource:0.0.5@75942f5bbde870ad28e0345ff5ebf54ebd3aec63f0e66344ef76b88cf06b85c3\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/google_drive:0.1.6@4bc0cf8f8979ebd7321b91506b4bc8f090b05b769b5d214f2da4ce4c04ce30bd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/qa_chunk:0.0.8@1fed9644646bdd48792cdf5a1d559a3df336bd3a8edb0807227499fb56dce3af\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: bowenliang123/md_exporter:2.0.0@13e1aca1995328e41c080ff9f7f6d898df60ff74a3f4d98d6de4b18ab5b92c2e\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/firecrawl_datasource:0.2.4@37b490ebc52ac30d1c6cbfa538edcddddcfed7d5f5de58982edbd4e2094eb6e2\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 2b887f89-b6c9-4288-be43-635fee45216b\n icon_background: '#FFEAD5'\n icon_type: image\n icon_url: \n name: LLM Generated Q&A\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: variable-aggregator\n id: 1750836391776-source-1753346901505-target\n selected: false\n source: '1750836391776'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: document-extractor\n targetType: variable-aggregator\n id: 1753349228522-source-1753346901505-target\n selected: false\n source: '1753349228522'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1754023419266-source-1753346901505-target\n selected: false\n source: '1754023419266'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756442998557-source-1756442986174-target\n selected: false\n source: '1756442998557'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: if-else\n id: 1756442986174-source-1756443014860-target\n selected: false\n source: '1756442986174'\n sourceHandle: source\n target: '1756443014860'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1750836380067-source-1756442986174-target\n selected: false\n source: '1750836380067'\n sourceHandle: source\n target: '1756442986174'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: tool\n id: 1756443014860-true-1750836391776-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'true'\n target: '1750836391776'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: if-else\n targetType: document-extractor\n id: 1756443014860-false-1753349228522-target\n selected: false\n source: '1756443014860'\n sourceHandle: 'false'\n target: '1753349228522'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756896212061-source-1753346901505-target\n source: '1756896212061'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: variable-aggregator\n id: 1756907397615-source-1753346901505-target\n source: '1756907397615'\n sourceHandle: source\n target: '1753346901505'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: variable-aggregator\n targetType: llm\n id: 1753346901505-source-1756912504019-target\n source: '1753346901505'\n sourceHandle: source\n target: '1756912504019'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1756912504019-source-1756912537172-target\n source: '1756912504019'\n sourceHandle: source\n target: '1756912537172'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1756912537172-source-1756912274158-target\n source: '1756912537172'\n sourceHandle: source\n target: '1756912274158'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1756912274158-source-1750836372241-target\n source: '1756912274158'\n sourceHandle: source\n target: '1750836372241'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: qa_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius/jina/jina\n index_chunk_variable_selector:\n - '1756912274158'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n hybridSearchMode: weighted_score\n reranking_enable: false\n score_threshold: 0.5\n score_threshold_enabled: false\n search_method: semantic_search\n top_k: 3\n vector_setting:\n embedding_model_name: jina-embeddings-v2-base-en\n embedding_provider_name: langgenius/jina/jina\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750836372241'\n position:\n x: 1150.8369138826617\n y: 326\n positionAbsolute:\n x: 1150.8369138826617\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1750836380067'\n position:\n x: -1371.6520723158733\n y: 224.87938381325645\n positionAbsolute:\n x: -1371.6520723158733\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n documents:\n description: the documents extracted from the file\n items:\n type: object\n type: array\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n pt_BR: o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png,\n jpg, jpeg)\n zh_Hans: 用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n params:\n file: ''\n provider_id: langgenius/dify_extractor/dify_extractor\n provider_name: langgenius/dify_extractor/dify_extractor\n provider_type: builtin\n selected: false\n title: Dify Extractor\n tool_configurations: {}\n tool_description: Dify Extractor\n tool_label: Dify Extractor\n tool_name: dify_extractor\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756442986174'\n - output\n type: tool\n height: 52\n id: '1750836391776'\n position:\n x: -417.5334221022782\n y: 268.1692071834485\n positionAbsolute:\n x: -417.5334221022782\n y: 268.1692071834485\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 252\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n → use extractor to extract document content → split and clean content into\n structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1124\n height: 252\n id: '1751252161631'\n position:\n x: -1371.6520723158733\n y: -123.758428116601\n positionAbsolute:\n x: -1371.6520723158733\n y: -123.758428116601\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1124\n - data:\n author: TenTen\n desc: ''\n height: 388\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 285\n height: 388\n id: '1751252440357'\n position:\n x: -1723.9942193415582\n y: 224.87938381325645\n positionAbsolute:\n x: -1723.9942193415582\n y: 224.87938381325645\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 285\n - data:\n author: TenTen\n desc: ''\n height: 430\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor in Retrieval-Augmented Generation (RAG) is a tool or\n component that automatically identifies, extracts, and structures text and\n data from various types of documents—such as PDFs, images, scanned files,\n handwritten notes, and more—into a format that can be effectively used by\n language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify\n Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is\n a built-in document parser developed by Dify. It supports a wide range of\n common file formats and offers specialized handling for certain formats,\n such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\n In addition to text extraction, it can extract images embedded within documents,\n store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 430\n id: '1751253091602'\n position:\n x: -417.5334221022782\n y: 546.5283142529594\n positionAbsolute:\n x: -417.5334221022782\n y: 546.5283142529594\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 336\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts\n specified columns from tables to generate structured Q&A pairs. Users can\n independently designate which columns to use for questions and which for\n answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These\n pairs are indexed by the question field, so user queries are matched directly\n against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching\n strategy improves clarity and precision, especially in scenarios involving\n high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 336\n id: '1751253953926'\n position:\n x: 794.2003154321724\n y: 417.25474169825833\n positionAbsolute:\n x: 794.2003154321724\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 410\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only\n support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 410\n id: '1751254117904'\n position:\n x: 1150.8369138826617\n y: 475.88970282568215\n positionAbsolute:\n x: 1150.8369138826617\n y: 475.88970282568215\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n output_type: string\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836391776'\n - text\n - - '1753349228522'\n - text\n - - '1754023419266'\n - content\n - - '1756896212061'\n - content\n height: 187\n id: '1753346901505'\n position:\n x: -117.24452412456148\n y: 326\n positionAbsolute:\n x: -117.24452412456148\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_array_file: false\n selected: false\n title: Doc Extractor\n type: document-extractor\n variable_selector:\n - '1756442986174'\n - output\n height: 92\n id: '1753349228522'\n position:\n x: -417.5334221022782\n y: 417.25474169825833\n positionAbsolute:\n x: -417.5334221022782\n y: 417.25474169825833\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Notion\n datasource_name: notion_datasource\n datasource_parameters: {}\n plugin_id: langgenius/notion_datasource\n provider_name: notion_datasource\n provider_type: online_document\n selected: false\n title: Notion\n type: datasource\n height: 52\n id: '1754023419266'\n position:\n x: -1369.6904698303242\n y: 440.01452302398053\n positionAbsolute:\n x: -1369.6904698303242\n y: 440.01452302398053\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n output_type: file\n selected: false\n title: Variable Aggregator\n type: variable-aggregator\n variables:\n - - '1750836380067'\n - file\n - - '1756442998557'\n - file\n height: 135\n id: '1756442986174'\n position:\n x: -1067.06980963949\n y: 236.10252072775984\n positionAbsolute:\n x: -1067.06980963949\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Google Drive\n datasource_name: google_drive\n datasource_parameters: {}\n plugin_id: langgenius/google_drive\n provider_name: google_drive\n provider_type: online_drive\n selected: false\n title: Google Drive\n type: datasource\n height: 52\n id: '1756442998557'\n position:\n x: -1371.6520723158733\n y: 326\n positionAbsolute:\n x: -1371.6520723158733\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n cases:\n - case_id: 'true'\n conditions:\n - comparison_operator: is\n id: 1581dd11-7898-41f4-962f-937283ba7e01\n value: .xlsx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 92abb46d-d7e4-46e7-a5e1-8a29bb45d528\n value: .xls\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1dde5ae7-754d-4e83-96b2-fe1f02995d8b\n value: .md\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 7e1a80e5-c32a-46a4-8f92-8912c64972aa\n value: .markdown\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 53abfe95-c7d0-4f63-ad37-17d425d25106\n value: .mdx\n varType: string\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 436877b8-8c0a-4cc6-9565-92754db08571\n value: .html\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 5e3e375e-750b-4204-8ac3-9a1174a5ab7c\n value: .htm\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 1a84a784-a797-4f96-98a0-33a9b48ceb2b\n value: .docx\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 62d11445-876a-493f-85d3-8fc020146bdd\n value: .csv\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n - comparison_operator: is\n id: 02c4bce8-7668-4ccd-b750-4281f314b231\n value: .txt\n varType: file\n variable_selector:\n - '1756442986174'\n - output\n - extension\n id: 'true'\n logical_operator: or\n selected: false\n title: IF/ELSE\n type: if-else\n height: 358\n id: '1756443014860'\n position:\n x: -733.5977815139424\n y: 236.10252072775984\n positionAbsolute:\n x: -733.5977815139424\n y: 236.10252072775984\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Jina Reader\n datasource_name: jina_reader\n datasource_parameters:\n crawl_sub_pages:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_subpages\n limit:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jina_limit\n url:\n type: mixed\n value: '{{#rag.1756896212061.jina_url#}}'\n use_sitemap:\n type: variable\n value:\n - rag\n - '1756896212061'\n - jian_sitemap\n plugin_id: langgenius/jina_datasource\n provider_name: jinareader\n provider_type: website_crawl\n selected: false\n title: Jina Reader\n type: datasource\n height: 52\n id: '1756896212061'\n position:\n x: -1371.6520723158733\n y: 538.9988445953813\n positionAbsolute:\n x: -1371.6520723158733\n y: 538.9988445953813\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: Firecrawl\n datasource_name: crawl\n datasource_parameters:\n crawl_subpages:\n type: variable\n value:\n - rag\n - '1756907397615'\n - firecrawl_subpages\n exclude_paths:\n type: mixed\n value: '{{#rag.1756907397615.exclude_paths#}}'\n include_paths:\n type: mixed\n value: '{{#rag.1756907397615.include_paths#}}'\n limit:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_pages\n max_depth:\n type: variable\n value:\n - rag\n - '1756907397615'\n - max_depth\n only_main_content:\n type: variable\n value:\n - rag\n - '1756907397615'\n - main_content\n url:\n type: mixed\n value: '{{#rag.1756907397615.firecrawl_url1#}}'\n plugin_id: langgenius/firecrawl_datasource\n provider_name: firecrawl\n provider_type: website_crawl\n selected: false\n title: Firecrawl\n type: datasource\n height: 52\n id: '1756907397615'\n position:\n x: -1371.6520723158733\n y: 644.3296146102903\n positionAbsolute:\n x: -1371.6520723158733\n y: 644.3296146102903\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file you want to extract QA from.\n ja_JP: The file you want to extract QA from.\n pt_BR: The file you want to extract QA from.\n zh_Hans: 你想要提取 QA 的文件。\n label:\n en_US: Input File\n ja_JP: Input File\n pt_BR: Input File\n zh_Hans: 输入文件\n llm_description: The file you want to extract QA from.\n max: null\n min: null\n name: input_file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Column number for question.\n ja_JP: Column number for question.\n pt_BR: Column number for question.\n zh_Hans: 问题所在的列。\n label:\n en_US: Column number for question\n ja_JP: Column number for question\n pt_BR: Column number for question\n zh_Hans: 问题所在的列\n llm_description: The column number for question, the format of the column\n number must be an integer.\n max: null\n min: null\n name: question_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 1\n form: llm\n human_description:\n en_US: Column number for answer.\n ja_JP: Column number for answer.\n pt_BR: Column number for answer.\n zh_Hans: 答案所在的列。\n label:\n en_US: Column number for answer\n ja_JP: Column number for answer\n pt_BR: Column number for answer\n zh_Hans: 答案所在的列\n llm_description: The column number for answer, the format of the column\n number must be an integer.\n max: null\n min: null\n name: answer_column\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: number\n params:\n answer_column: ''\n input_file: ''\n question_column: ''\n provider_id: langgenius/qa_chunk/qa_chunk\n provider_name: langgenius/qa_chunk/qa_chunk\n provider_type: builtin\n selected: false\n title: Q&A Processor\n tool_configurations: {}\n tool_description: A tool for QA chunking mode.\n tool_label: QA Chunk\n tool_name: qa_chunk\n tool_node_version: '2'\n tool_parameters:\n answer_column:\n type: constant\n value: 2\n input_file:\n type: variable\n value:\n - '1756912537172'\n - files\n question_column:\n type: constant\n value: 1\n type: tool\n height: 52\n id: '1756912274158'\n position:\n x: 794.2003154321724\n y: 326\n positionAbsolute:\n x: 794.2003154321724\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius/anthropic/anthropic\n prompt_template:\n - id: 7f8105aa-a37d-4f5a-b581-babeeb31e833\n role: system\n text: '\n\n Generate a list of Q&A pairs based on {{#1753346901505.output#}}. Present\n the output as a Markdown table, where the first column is serial number,\n the second column is Question, and the third column is Question. Ensure\n that the table format can be easily converted into a CSV file.\n\n Example Output Format:\n\n | Index | Question | Answer |\n\n |-------|-----------|--------|\n\n | 1 | What is the main purpose of the document? | The document explains\n the company''s new product launch strategy. \n |\n\n | 2 || When will the product be launched? | The product will be launched\n in Q3 of this year. |\n\n\n Instructions:\n\n Read and understand the input text.\n\n Extract key information and generate meaningful questions and answers.\n\n Preserve any ![image] URLs from the input text in the answers.\n\n Keep questions concise and specific.\n\n Ensure answers are accurate, self-contained, and clear.\n\n Output only the Markdown table without any extra explanation.'\n selected: false\n title: LLM\n type: llm\n vision:\n enabled: false\n height: 88\n id: '1756912504019'\n position:\n x: 184.46657789772178\n y: 326\n positionAbsolute:\n x: 184.46657789772178\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本,必须为Markdown表格格式\n label:\n en_US: Markdown text\n ja_JP: Markdown text\n pt_BR: Markdown text\n zh_Hans: Markdown格式文本\n llm_description: ''\n max: null\n min: null\n name: md_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n label:\n en_US: Filename of the output file\n ja_JP: Filename of the output file\n pt_BR: Filename of the output file\n zh_Hans: 输出文件名\n llm_description: ''\n max: null\n min: null\n name: output_filename\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n md_text: ''\n output_filename: ''\n provider_id: bowenliang123/md_exporter/md_exporter\n provider_name: bowenliang123/md_exporter/md_exporter\n provider_type: builtin\n selected: false\n title: Markdown to CSV file\n tool_configurations: {}\n tool_description: Generate CSV file from Markdown text\n tool_label: Markdown to CSV file\n tool_name: md_to_csv\n tool_node_version: '2'\n tool_parameters:\n md_text:\n type: mixed\n value: '{{#1756912504019.text#}}'\n output_filename:\n type: mixed\n value: LLM Generated Q&A\n type: tool\n height: 52\n id: '1756912537172'\n position:\n x: 484.75465419110174\n y: 326\n positionAbsolute:\n x: 484.75465419110174\n y: 326\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 174\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n LLM-generated Q&A pairs are designed to extract key information from the\n input text and present it in a structured, easy-to-use format. Each pair\n consists of a concise question that captures an important point or detail,\n and a clear, self-contained answer that provides the relevant information\n without requiring additional context. The output is formatted as a Markdown\n table with three columns—Index, Question, and Answer—so that it can be easily\n converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 528\n height: 174\n id: '1756912556940'\n position:\n x: 184.46657789772178\n y: 462.64405262857747\n positionAbsolute:\n x: 184.46657789772178\n y: 462.64405262857747\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 528\n viewport:\n x: 1149.1394490177502\n y: 317.2338302699771\n zoom: 0.4911032886685182\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_reader_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_reader_imit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Crawl sub-pages\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: Crawl_sub_pages_2\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1753688365254'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: Use_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: jina_url\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: pages\n variable: jina_limit\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Use sitemap\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Follow the sitemap to crawl the site. If not, Jina Reader will crawl\n iteratively based on page relevance, yielding fewer but higher-quality pages.\n type: checkbox\n unit: null\n variable: jian_sitemap\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756896212061'\n default_value: true\n label: Crawl subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: jina_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: URL\n max_length: 256\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: text-input\n unit: null\n variable: firecrawl_url1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: firecrawl_subpages\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: firecrawl_subpages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: Exclude paths\n max_length: 256\n options: []\n placeholder: blog/*,/about/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: exclude_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: include_paths\n max_length: 256\n options: []\n placeholder: articles/*\n required: false\n tooltips: null\n type: text-input\n unit: null\n variable: include_paths\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 0\n label: Max depth\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: Maximum depth to crawl relative to the entered URL. Depth 0 just scrapes\n the page of the entered url, depth 1 scrapes the url and everything after enteredURL\n + one /, and so on.\n type: number\n unit: null\n variable: max_depth\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: 10\n label: Limit\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: max_pages\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: true\n label: Extract only main content (no headers, navs, footers, etc.)\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: main_content\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: '1756907397615'\n default_value: null\n label: depthtest\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: null\n variable: depthtest\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836391776-source-1753346901505-target",
+ "selected": false,
+ "source": "1750836391776",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "document-extractor",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1753349228522-source-1753346901505-target",
+ "selected": false,
+ "source": "1753349228522",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1754023419266-source-1753346901505-target",
+ "selected": false,
+ "source": "1754023419266",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756442998557-source-1756442986174-target",
+ "selected": false,
+ "source": "1756442998557",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "if-else"
+ },
+ "id": "1756442986174-source-1756443014860-target",
+ "selected": false,
+ "source": "1756442986174",
+ "sourceHandle": "source",
+ "target": "1756443014860",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1750836380067-source-1756442986174-target",
+ "selected": false,
+ "source": "1750836380067",
+ "sourceHandle": "source",
+ "target": "1756442986174",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "tool"
+ },
+ "id": "1756443014860-true-1750836391776-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "true",
+ "target": "1750836391776",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "if-else",
+ "targetType": "document-extractor"
+ },
+ "id": "1756443014860-false-1753349228522-target",
+ "selected": false,
+ "source": "1756443014860",
+ "sourceHandle": "false",
+ "target": "1753349228522",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756896212061-source-1753346901505-target",
+ "source": "1756896212061",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "variable-aggregator"
+ },
+ "id": "1756907397615-source-1753346901505-target",
+ "source": "1756907397615",
+ "sourceHandle": "source",
+ "target": "1753346901505",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "variable-aggregator",
+ "targetType": "llm"
+ },
+ "id": "1753346901505-source-1756912504019-target",
+ "source": "1753346901505",
+ "sourceHandle": "source",
+ "target": "1756912504019",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "llm",
+ "targetType": "tool"
+ },
+ "id": "1756912504019-source-1756912537172-target",
+ "source": "1756912504019",
+ "sourceHandle": "source",
+ "target": "1756912537172",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "tool"
+ },
+ "id": "1756912537172-source-1756912274158-target",
+ "source": "1756912537172",
+ "sourceHandle": "source",
+ "target": "1756912274158",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1756912274158-source-1750836372241-target",
+ "source": "1756912274158",
+ "sourceHandle": "source",
+ "target": "1750836372241",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "qa_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius/jina/jina",
+ "index_chunk_variable_selector": [
+ "1756912274158",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "hybridSearchMode": "weighted_score",
+ "reranking_enable": false,
+ "score_threshold": 0.5,
+ "score_threshold_enabled": false,
+ "search_method": "semantic_search",
+ "top_k": 3,
+ "vector_setting": {
+ "embedding_model_name": "jina-embeddings-v2-base-en",
+ "embedding_provider_name": "langgenius/jina/jina"
+ }
+ },
+ "selected": false,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750836372241",
+ "position": {
+ "x": 1150.8369138826617,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 1150.8369138826617,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "txt",
+ "markdown",
+ "mdx",
+ "pdf",
+ "html",
+ "xlsx",
+ "xls",
+ "vtt",
+ "properties",
+ "doc",
+ "docx",
+ "csv",
+ "eml",
+ "msg",
+ "pptx",
+ "xml",
+ "epub",
+ "ppt",
+ "md"
+ ],
+ "plugin_id": "langgenius/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750836380067",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "documents": {
+ "description": "the documents extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ },
+ "images": {
+ "description": "The images extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "zh_Hans": "用于解析的文件(支持 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
+ },
+ "label": {
+ "en_US": "file",
+ "ja_JP": "file",
+ "pt_BR": "file",
+ "zh_Hans": "file"
+ },
+ "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "max": null,
+ "min": null,
+ "name": "file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ }
+ ],
+ "params": {
+ "file": ""
+ },
+ "provider_id": "langgenius/dify_extractor/dify_extractor",
+ "provider_name": "langgenius/dify_extractor/dify_extractor",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Dify Extractor",
+ "tool_configurations": {},
+ "tool_description": "Dify Extractor",
+ "tool_label": "Dify Extractor",
+ "tool_name": "dify_extractor",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "file": {
+ "type": "variable",
+ "value": [
+ "1756442986174",
+ "output"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1750836391776",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 268.1692071834485
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 252,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source → use extractor to extract document content → split and clean content into structured chunks → store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https://docs.dify.ai/en/guides/knowledge-base/knowledge-pipeline/knowledge-pipeline-orchestration\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1124
+ },
+ "height": 252,
+ "id": "1751252161631",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": -123.758428116601
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1124
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 388,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 285
+ },
+ "height": 388,
+ "id": "1751252440357",
+ "position": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "positionAbsolute": {
+ "x": -1723.9942193415582,
+ "y": 224.87938381325645
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 285
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 430,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor in Retrieval-Augmented Generation (RAG) is a tool or component that automatically identifies, extracts, and structures text and data from various types of documents—such as PDFs, images, scanned files, handwritten notes, and more—into a format that can be effectively used by language models within RAG Pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Dify Extractor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is a built-in document parser developed by Dify. It supports a wide range of common file formats and offers specialized handling for certain formats, such as \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":16,\"mode\":\"normal\",\"style\":\"\",\"text\":\".docx\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\". In addition to text extraction, it can extract images embedded within documents, store them, and return their accessible URLs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 430,
+ "id": "1751253091602",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 546.5283142529594
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 546.5283142529594
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 336,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Processor\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" extracts specified columns from tables to generate structured Q&A pairs. Users can independently designate which columns to use for questions and which for answers.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"These pairs are indexed by the question field, so user queries are matched directly against the questions to retrieve the corresponding answers. This \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q-to-Q\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" matching strategy improves clarity and precision, especially in scenarios involving high-frequency or highly similar user questions.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 336,
+ "id": "1751253953926",
+ "position": {
+ "x": 794.2003154321724,
+ "y": 417.25474169825833
+ },
+ "positionAbsolute": {
+ "x": 794.2003154321724,
+ "y": 417.25474169825833
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 410,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods: \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" and \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" only support the \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 410,
+ "id": "1751254117904",
+ "position": {
+ "x": 1150.8369138826617,
+ "y": 475.88970282568215
+ },
+ "positionAbsolute": {
+ "x": 1150.8369138826617,
+ "y": 475.88970282568215
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "output_type": "string",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836391776",
+ "text"
+ ],
+ [
+ "1753349228522",
+ "text"
+ ],
+ [
+ "1754023419266",
+ "content"
+ ],
+ [
+ "1756896212061",
+ "content"
+ ]
+ ]
+ },
+ "height": 187,
+ "id": "1753346901505",
+ "position": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -117.24452412456148,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_array_file": false,
+ "selected": false,
+ "title": "Doc Extractor",
+ "type": "document-extractor",
+ "variable_selector": [
+ "1756442986174",
+ "output"
+ ]
+ },
+ "height": 92,
+ "id": "1753349228522",
+ "position": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "positionAbsolute": {
+ "x": -417.5334221022782,
+ "y": 417.25474169825833
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Notion",
+ "datasource_name": "notion_datasource",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/notion_datasource",
+ "provider_name": "notion_datasource",
+ "provider_type": "online_document",
+ "selected": false,
+ "title": "Notion",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1754023419266",
+ "position": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "positionAbsolute": {
+ "x": -1369.6904698303242,
+ "y": 440.01452302398053
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "output_type": "file",
+ "selected": false,
+ "title": "Variable Aggregator",
+ "type": "variable-aggregator",
+ "variables": [
+ [
+ "1750836380067",
+ "file"
+ ],
+ [
+ "1756442998557",
+ "file"
+ ]
+ ]
+ },
+ "height": 135,
+ "id": "1756442986174",
+ "position": {
+ "x": -1067.06980963949,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -1067.06980963949,
+ "y": 236.10252072775984
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Google Drive",
+ "datasource_name": "google_drive",
+ "datasource_parameters": {},
+ "plugin_id": "langgenius/google_drive",
+ "provider_name": "google_drive",
+ "provider_type": "online_drive",
+ "selected": false,
+ "title": "Google Drive",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756442998557",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "cases": [
+ {
+ "case_id": "true",
+ "conditions": [
+ {
+ "comparison_operator": "is",
+ "id": "1581dd11-7898-41f4-962f-937283ba7e01",
+ "value": ".xlsx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "92abb46d-d7e4-46e7-a5e1-8a29bb45d528",
+ "value": ".xls",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1dde5ae7-754d-4e83-96b2-fe1f02995d8b",
+ "value": ".md",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "7e1a80e5-c32a-46a4-8f92-8912c64972aa",
+ "value": ".markdown",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "53abfe95-c7d0-4f63-ad37-17d425d25106",
+ "value": ".mdx",
+ "varType": "string",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "436877b8-8c0a-4cc6-9565-92754db08571",
+ "value": ".html",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "5e3e375e-750b-4204-8ac3-9a1174a5ab7c",
+ "value": ".htm",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "1a84a784-a797-4f96-98a0-33a9b48ceb2b",
+ "value": ".docx",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "62d11445-876a-493f-85d3-8fc020146bdd",
+ "value": ".csv",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ },
+ {
+ "comparison_operator": "is",
+ "id": "02c4bce8-7668-4ccd-b750-4281f314b231",
+ "value": ".txt",
+ "varType": "file",
+ "variable_selector": [
+ "1756442986174",
+ "output",
+ "extension"
+ ]
+ }
+ ],
+ "id": "true",
+ "logical_operator": "or"
+ }
+ ],
+ "selected": false,
+ "title": "IF/ELSE",
+ "type": "if-else"
+ },
+ "height": 358,
+ "id": "1756443014860",
+ "position": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "positionAbsolute": {
+ "x": -733.5977815139424,
+ "y": 236.10252072775984
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Jina Reader",
+ "datasource_name": "jina_reader",
+ "datasource_parameters": {
+ "crawl_sub_pages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_subpages"
+ ]
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jina_limit"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756896212061.jina_url#}}"
+ },
+ "use_sitemap": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756896212061",
+ "jian_sitemap"
+ ]
+ }
+ },
+ "plugin_id": "langgenius/jina_datasource",
+ "provider_name": "jinareader",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Jina Reader",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756896212061",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 538.9988445953813
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "Firecrawl",
+ "datasource_name": "crawl",
+ "datasource_parameters": {
+ "crawl_subpages": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "firecrawl_subpages"
+ ]
+ },
+ "exclude_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.exclude_paths#}}"
+ },
+ "include_paths": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.include_paths#}}"
+ },
+ "limit": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_pages"
+ ]
+ },
+ "max_depth": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "max_depth"
+ ]
+ },
+ "only_main_content": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "1756907397615",
+ "main_content"
+ ]
+ },
+ "url": {
+ "type": "mixed",
+ "value": "{{#rag.1756907397615.firecrawl_url1#}}"
+ }
+ },
+ "plugin_id": "langgenius/firecrawl_datasource",
+ "provider_name": "firecrawl",
+ "provider_type": "website_crawl",
+ "selected": false,
+ "title": "Firecrawl",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756907397615",
+ "position": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "positionAbsolute": {
+ "x": -1371.6520723158733,
+ "y": 644.3296146102903
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The file you want to extract QA from.",
+ "ja_JP": "The file you want to extract QA from.",
+ "pt_BR": "The file you want to extract QA from.",
+ "zh_Hans": "你想要提取 QA 的文件。"
+ },
+ "label": {
+ "en_US": "Input File",
+ "ja_JP": "Input File",
+ "pt_BR": "Input File",
+ "zh_Hans": "输入文件"
+ },
+ "llm_description": "The file you want to extract QA from.",
+ "max": null,
+ "min": null,
+ "name": "input_file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Column number for question.",
+ "ja_JP": "Column number for question.",
+ "pt_BR": "Column number for question.",
+ "zh_Hans": "问题所在的列。"
+ },
+ "label": {
+ "en_US": "Column number for question",
+ "ja_JP": "Column number for question",
+ "pt_BR": "Column number for question",
+ "zh_Hans": "问题所在的列"
+ },
+ "llm_description": "The column number for question, the format of the column number must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "question_column",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Column number for answer.",
+ "ja_JP": "Column number for answer.",
+ "pt_BR": "Column number for answer.",
+ "zh_Hans": "答案所在的列。"
+ },
+ "label": {
+ "en_US": "Column number for answer",
+ "ja_JP": "Column number for answer",
+ "pt_BR": "Column number for answer",
+ "zh_Hans": "答案所在的列"
+ },
+ "llm_description": "The column number for answer, the format of the column number must be an integer.",
+ "max": null,
+ "min": null,
+ "name": "answer_column",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ }
+ ],
+ "params": {
+ "answer_column": "",
+ "input_file": "",
+ "question_column": ""
+ },
+ "provider_id": "langgenius/qa_chunk/qa_chunk",
+ "provider_name": "langgenius/qa_chunk/qa_chunk",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Q&A Processor",
+ "tool_configurations": {},
+ "tool_description": "A tool for QA chunking mode.",
+ "tool_label": "QA Chunk",
+ "tool_name": "qa_chunk",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "answer_column": {
+ "type": "constant",
+ "value": 2
+ },
+ "input_file": {
+ "type": "variable",
+ "value": [
+ "1756912537172",
+ "files"
+ ]
+ },
+ "question_column": {
+ "type": "constant",
+ "value": 1
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1756912274158",
+ "position": {
+ "x": 794.2003154321724,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 794.2003154321724,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "context": {
+ "enabled": false,
+ "variable_selector": []
+ },
+ "model": {
+ "completion_params": {
+ "temperature": 0.7
+ },
+ "mode": "chat",
+ "name": "claude-3-5-sonnet-20240620",
+ "provider": "langgenius/anthropic/anthropic"
+ },
+ "prompt_template": [
+ {
+ "id": "7f8105aa-a37d-4f5a-b581-babeeb31e833",
+ "role": "system",
+ "text": "\nGenerate a list of Q&A pairs based on {{#1753346901505.output#}}. Present the output as a Markdown table, where the first column is serial number, the second column is Question, and the third column is Question. Ensure that the table format can be easily converted into a CSV file.\nExample Output Format:\n| Index | Question | Answer |\n|-------|-----------|--------|\n| 1 | What is the main purpose of the document? | The document explains the company's new product launch strategy.  |\n| 2 || When will the product be launched? | The product will be launched in Q3 of this year. |\n\nInstructions:\nRead and understand the input text.\nExtract key information and generate meaningful questions and answers.\nPreserve any ![image] URLs from the input text in the answers.\nKeep questions concise and specific.\nEnsure answers are accurate, self-contained, and clear.\nOutput only the Markdown table without any extra explanation."
+ }
+ ],
+ "selected": false,
+ "title": "LLM",
+ "type": "llm",
+ "vision": {
+ "enabled": false
+ }
+ },
+ "height": 88,
+ "id": "1756912504019",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Markdown text",
+ "ja_JP": "Markdown text",
+ "pt_BR": "Markdown text",
+ "zh_Hans": "Markdown格式文本,必须为Markdown表格格式"
+ },
+ "label": {
+ "en_US": "Markdown text",
+ "ja_JP": "Markdown text",
+ "pt_BR": "Markdown text",
+ "zh_Hans": "Markdown格式文本"
+ },
+ "llm_description": "",
+ "max": null,
+ "min": null,
+ "name": "md_text",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Filename of the output file",
+ "ja_JP": "Filename of the output file",
+ "pt_BR": "Filename of the output file",
+ "zh_Hans": "输出文件名"
+ },
+ "label": {
+ "en_US": "Filename of the output file",
+ "ja_JP": "Filename of the output file",
+ "pt_BR": "Filename of the output file",
+ "zh_Hans": "输出文件名"
+ },
+ "llm_description": "",
+ "max": null,
+ "min": null,
+ "name": "output_filename",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ }
+ ],
+ "params": {
+ "md_text": "",
+ "output_filename": ""
+ },
+ "provider_id": "bowenliang123/md_exporter/md_exporter",
+ "provider_name": "bowenliang123/md_exporter/md_exporter",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Markdown to CSV file",
+ "tool_configurations": {},
+ "tool_description": "Generate CSV file from Markdown text",
+ "tool_label": "Markdown to CSV file",
+ "tool_name": "md_to_csv",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "md_text": {
+ "type": "mixed",
+ "value": "{{#1756912504019.text#}}"
+ },
+ "output_filename": {
+ "type": "mixed",
+ "value": "LLM Generated Q&A"
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1756912537172",
+ "position": {
+ "x": 484.75465419110174,
+ "y": 326
+ },
+ "positionAbsolute": {
+ "x": 484.75465419110174,
+ "y": 326
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 174,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The LLM-generated Q&A pairs are designed to extract key information from the input text and present it in a structured, easy-to-use format. Each pair consists of a concise question that captures an important point or detail, and a clear, self-contained answer that provides the relevant information without requiring additional context. The output is formatted as a Markdown table with three columns—Index, Question, and Answer—so that it can be easily converted into a CSV file for further processing. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 528
+ },
+ "height": 174,
+ "id": "1756912556940",
+ "position": {
+ "x": 184.46657789772178,
+ "y": 462.64405262857747
+ },
+ "positionAbsolute": {
+ "x": 184.46657789772178,
+ "y": 462.64405262857747
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 528
+ }
+ ],
+ "viewport": {
+ "x": 1149.1394490177502,
+ "y": 317.2338302699771,
+ "zoom": 0.4911032886685182
+ }
+ },
+ "icon_info": {
+ "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "id": "98374ab6-9dcd-434d-983e-268bec156b43",
+ "name": "LLM Generated Q&A",
+ "icon": {
+ "icon": "e4ea16ed-9690-4de9-ab80-5b622ecbcc04",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": ""
+ },
+ "language": "zh-Hans",
+ "position": 5
+ },
+ {
+ "chunk_structure": "hierarchical_model",
+ "description": "This knowledge pipeline uses LLMs to extract content from images and tables in documents and automatically generate descriptive annotations for contextual enrichment.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: e642577f-da15-4c03-81b9-c9dec9189a3c\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=\n name: Contextual Enrichment Using LLM\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1758002850987-source-1751336942081-target\n source: '1758002850987'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1756915693835-source-1758027159239-target\n source: '1756915693835'\n sourceHandle: source\n target: '1758027159239'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: llm\n id: 1758027159239-source-1758002850987-target\n source: '1758027159239'\n sourceHandle: source\n target: '1758002850987'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 474.7618603027596\n y: 282\n positionAbsolute:\n x: 474.7618603027596\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 458\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different\n types of Data Sources have different input and output types. The output\n of File Upload and Online Drive are files, while the output of Online Doc\n and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 458\n id: '1751264451381'\n position:\n x: -893.2836123260277\n y: 378.2537898330178\n positionAbsolute:\n x: -893.2836123260277\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -704.0614991386192\n y: -73.30453110517956\n positionAbsolute:\n x: -704.0614991386192\n y: -73.30453110517956\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 304\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 304\n id: '1751266402561'\n position:\n x: -555.2228329530462\n y: 592.0458661166498\n positionAbsolute:\n x: -555.2228329530462\n y: 592.0458661166498\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 153.2996965006646\n y: 378.2537898330178\n positionAbsolute:\n x: 153.2996965006646\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 482.3389174180554\n y: 437.9839361130071\n positionAbsolute:\n x: 482.3389174180554\n y: 437.9839361130071\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1758002850987.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 144.55897745117755\n y: 282\n positionAbsolute:\n x: 144.55897745117755\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 446\n selected: true\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In\n this step, the LLM is responsible for enriching and reorganizing content,\n along with images and tables. The goal is to maintain the integrity of image\n URLs and tables while providing contextual descriptions and summaries to\n enhance understanding. The content should be structured into well-organized\n paragraphs, using double newlines to separate them. The LLM should enrich\n the document by adding relevant descriptions for images and extracting key\n insights from tables, ensuring the content remains easy to retrieve within\n a Retrieval-Augmented Generation (RAG) system. The final output should preserve\n the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 446\n id: '1753967810859'\n position:\n x: -176.67459682201036\n y: 405.2790698865377\n positionAbsolute:\n x: -176.67459682201036\n y: 405.2790698865377\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - pdf\n - doc\n - docx\n - pptx\n - ppt\n - jpg\n - png\n - jpeg\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1756915693835'\n position:\n x: -893.2836123260277\n y: 282\n positionAbsolute:\n x: -893.2836123260277\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius\/anthropic\/anthropic\n prompt_template:\n - id: beb97761-d30d-4549-9b67-de1b8292e43d\n role: system\n text: \"You are an AI document assistant. \\nYour tasks are:\\nEnrich the content\\\n \\ contextually:\\nAdd meaningful descriptions for each image.\\nSummarize\\\n \\ key information from each table.\\nOutput the enriched content\u00a0with clear\\\n \\ annotations showing the\u00a0corresponding image and table positions, so\\\n \\ the text can later be aligned back into the original document. Preserve\\\n \\ any ![image] URLs from the input text.\\nYou will receive two inputs:\\n\\\n The file and text\u00a0(may contain images url and tables).\\nThe final output\\\n \\ should be a\u00a0single, enriched version of the original document with ![image]\\\n \\ url preserved.\\nGenerate output directly without saying words like:\\\n \\ Here's the enriched version of the original text with the image description\\\n \\ inserted.\"\n - id: f92ef0cd-03a7-48a7-80e8-bcdc965fb399\n role: user\n text: The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}.\n selected: false\n title: LLM\n type: llm\n vision:\n configs:\n detail: high\n variable_selector:\n - '1756915693835'\n - file\n enabled: true\n height: 88\n id: '1758002850987'\n position:\n x: -176.67459682201036\n y: 282\n positionAbsolute:\n x: -176.67459682201036\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: The file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: (For local deployment v1 and v2) Parsing method, can be\n auto, ocr, or txt. Default is auto. If results are not satisfactory, try\n ocr\n max: null\n min: null\n name: parse_method\n options:\n - icon: ''\n label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - icon: ''\n label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - icon: ''\n label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable formula\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable formula\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable table\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable table\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n pt_BR: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n zh_Hans: \uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API and local deployment v2) Specify document\n language, default ch, can be set to auto(local deployment need to specify\n the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: pipeline\n form: form\n human_description:\n en_US: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline\n pt_BR: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline\n label:\n en_US: Backend type\n ja_JP: \u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7\n pt_BR: Backend type\n zh_Hans: \u89e3\u6790\u540e\u7aef\n llm_description: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n max: null\n min: null\n name: backend\n options:\n - icon: ''\n label:\n en_US: pipeline\n ja_JP: pipeline\n pt_BR: pipeline\n zh_Hans: pipeline\n value: pipeline\n - icon: ''\n label:\n en_US: vlm-transformers\n ja_JP: vlm-transformers\n pt_BR: vlm-transformers\n zh_Hans: vlm-transformers\n value: vlm-transformers\n - icon: ''\n label:\n en_US: vlm-sglang-engine\n ja_JP: vlm-sglang-engine\n pt_BR: vlm-sglang-engine\n zh_Hans: vlm-sglang-engine\n value: vlm-sglang-engine\n - icon: ''\n label:\n en_US: vlm-sglang-client\n ja_JP: vlm-sglang-client\n pt_BR: vlm-sglang-client\n zh_Hans: vlm-sglang-client\n value: vlm-sglang-client\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: ''\n form: form\n human_description:\n en_US: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a\n pt_BR: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a\n label:\n en_US: sglang-server url\n ja_JP: sglang-server\u30a2\u30c9\u30ec\u30b9\n pt_BR: sglang-server url\n zh_Hans: sglang-server\u5730\u5740\n llm_description: '(For local deployment v2 when backend is vlm-sglang-client)\n Example: http:\/\/127.0.0.1:8000, default is empty'\n max: null\n min: null\n name: sglang_server_url\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n backend: ''\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n parse_method: ''\n sglang_server_url: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: Parse File\n tool_configurations:\n backend:\n type: constant\n value: pipeline\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: true\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: mixed\n value: '[]'\n language:\n type: mixed\n value: auto\n parse_method:\n type: constant\n value: auto\n sglang_server_url:\n type: mixed\n value: ''\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756915693835'\n - file\n type: tool\n height: 270\n id: '1758027159239'\n position:\n x: -544.9739996945534\n y: 282\n positionAbsolute:\n x: -544.9739996945534\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 679.9701291615181\n y: -191.49392257836791\n zoom: 0.8239704766223018\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: ''\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: ''\n type: checkbox\n unit: null\n variable: clean_2\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1751336942081-source-1750400198569-target",
+ "selected": false,
+ "source": "1751336942081",
+ "sourceHandle": "source",
+ "target": "1750400198569",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "llm",
+ "targetType": "tool"
+ },
+ "id": "1758002850987-source-1751336942081-target",
+ "source": "1758002850987",
+ "sourceHandle": "source",
+ "target": "1751336942081",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInIteration": false,
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "tool"
+ },
+ "id": "1756915693835-source-1758027159239-target",
+ "source": "1756915693835",
+ "sourceHandle": "source",
+ "target": "1758027159239",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "llm"
+ },
+ "id": "1758027159239-source-1758002850987-target",
+ "source": "1758027159239",
+ "sourceHandle": "source",
+ "target": "1758002850987",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "hierarchical_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius\/jina\/jina",
+ "index_chunk_variable_selector": [
+ "1751336942081",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "reranking_enable": true,
+ "reranking_mode": "reranking_model",
+ "reranking_model": {
+ "reranking_model_name": "jina-reranker-v1-base-en",
+ "reranking_provider_name": "langgenius\/jina\/jina"
+ },
+ "score_threshold": 0,
+ "score_threshold_enabled": false,
+ "search_method": "hybrid_search",
+ "top_k": 3,
+ "weights": null
+ },
+ "selected": false,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750400198569",
+ "position": {
+ "x": 474.7618603027596,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 474.7618603027596,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 458,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 458,
+ "id": "1751264451381",
+ "position": {
+ "x": -893.2836123260277,
+ "y": 378.2537898330178
+ },
+ "positionAbsolute": {
+ "x": -893.2836123260277,
+ "y": 378.2537898330178
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 260,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1182
+ },
+ "height": 260,
+ "id": "1751266376760",
+ "position": {
+ "x": -704.0614991386192,
+ "y": -73.30453110517956
+ },
+ "positionAbsolute": {
+ "x": -704.0614991386192,
+ "y": -73.30453110517956
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1182
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 304,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 304,
+ "id": "1751266402561",
+ "position": {
+ "x": -555.2228329530462,
+ "y": 592.0458661166498
+ },
+ "positionAbsolute": {
+ "x": -555.2228329530462,
+ "y": 592.0458661166498
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 554,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 554,
+ "id": "1751266447821",
+ "position": {
+ "x": 153.2996965006646,
+ "y": 378.2537898330178
+ },
+ "positionAbsolute": {
+ "x": 153.2996965006646,
+ "y": 378.2537898330178
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 411,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 411,
+ "id": "1751266580099",
+ "position": {
+ "x": 482.3389174180554,
+ "y": 437.9839361130071
+ },
+ "positionAbsolute": {
+ "x": 482.3389174180554,
+ "y": 437.9839361130071
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "result": {
+ "description": "Parent child chunks result",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "",
+ "ja_JP": "",
+ "pt_BR": "",
+ "zh_Hans": ""
+ },
+ "label": {
+ "en_US": "Input Content",
+ "ja_JP": "Input Content",
+ "pt_BR": "Conte\u00fado de Entrada",
+ "zh_Hans": "\u8f93\u5165\u6587\u672c"
+ },
+ "llm_description": "The text you want to chunk.",
+ "max": null,
+ "min": null,
+ "name": "input_text",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": "paragraph",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.",
+ "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002"
+ },
+ "label": {
+ "en_US": "Parent Mode",
+ "ja_JP": "Parent Mode",
+ "pt_BR": "Modo Pai",
+ "zh_Hans": "\u7236\u5757\u6a21\u5f0f"
+ },
+ "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "max": null,
+ "min": null,
+ "name": "parent_mode",
+ "options": [
+ {
+ "label": {
+ "en_US": "Paragraph",
+ "ja_JP": "Paragraph",
+ "pt_BR": "Par\u00e1grafo",
+ "zh_Hans": "\u6bb5\u843d"
+ },
+ "value": "paragraph"
+ },
+ {
+ "label": {
+ "en_US": "Full Document",
+ "ja_JP": "Full Document",
+ "pt_BR": "Documento Completo",
+ "zh_Hans": "\u5168\u6587"
+ },
+ "value": "full_doc"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "\n\n",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for chunking",
+ "ja_JP": "Separator used for chunking",
+ "pt_BR": "Separador usado para divis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26"
+ },
+ "label": {
+ "en_US": "Parent Delimiter",
+ "ja_JP": "Parent Delimiter",
+ "pt_BR": "Separador de Pai",
+ "zh_Hans": "\u7236\u5757\u5206\u9694\u7b26"
+ },
+ "llm_description": "The separator used to split chunks",
+ "max": null,
+ "min": null,
+ "name": "separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 1024,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for chunking",
+ "ja_JP": "Maximum length for chunking",
+ "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
+ },
+ "label": {
+ "en_US": "Maximum Parent Chunk Length",
+ "ja_JP": "Maximum Parent Chunk Length",
+ "pt_BR": "Comprimento M\u00e1ximo do Bloco Pai",
+ "zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6"
+ },
+ "llm_description": "Maximum length allowed per chunk",
+ "max": null,
+ "min": null,
+ "name": "max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": ". ",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for subchunking",
+ "ja_JP": "Separator used for subchunking",
+ "pt_BR": "Separador usado para subdivis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26"
+ },
+ "label": {
+ "en_US": "Child Delimiter",
+ "ja_JP": "Child Delimiter",
+ "pt_BR": "Separador de Subdivis\u00e3o",
+ "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26"
+ },
+ "llm_description": "The separator used to split subchunks",
+ "max": null,
+ "min": null,
+ "name": "subchunk_separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 512,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for subchunking",
+ "ja_JP": "Maximum length for subchunking",
+ "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
+ },
+ "label": {
+ "en_US": "Maximum Child Chunk Length",
+ "ja_JP": "Maximum Child Chunk Length",
+ "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o",
+ "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6"
+ },
+ "llm_description": "Maximum length allowed per subchunk",
+ "max": null,
+ "min": null,
+ "name": "subchunk_max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove consecutive spaces, newlines and tabs",
+ "ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
+ "pt_BR": "Se deve remover espa\u00e7os extras no texto",
+ "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
+ },
+ "label": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias",
+ "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
+ },
+ "llm_description": "Whether to remove consecutive spaces, newlines and tabs",
+ "max": null,
+ "min": null,
+ "name": "remove_extra_spaces",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove URLs and emails in the text",
+ "ja_JP": "Whether to remove URLs and emails in the text",
+ "pt_BR": "Se deve remover URLs e e-mails no texto",
+ "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
+ },
+ "label": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Remover todas as URLs e e-mails",
+ "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
+ },
+ "llm_description": "Whether to remove URLs and emails in the text",
+ "max": null,
+ "min": null,
+ "name": "remove_urls_emails",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ }
+ ],
+ "params": {
+ "input_text": "",
+ "max_length": "",
+ "parent_mode": "",
+ "remove_extra_spaces": "",
+ "remove_urls_emails": "",
+ "separator": "",
+ "subchunk_max_length": "",
+ "subchunk_separator": ""
+ },
+ "provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker",
+ "provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Parent-child Chunker",
+ "tool_configurations": {},
+ "tool_description": "Process documents into parent-child chunk structures",
+ "tool_label": "Parent-child Chunker",
+ "tool_name": "parentchild_chunker",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "input_text": {
+ "type": "mixed",
+ "value": "{{#1758002850987.text#}}"
+ },
+ "max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Parent_Length"
+ ]
+ },
+ "parent_mode": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Parent_Mode"
+ ]
+ },
+ "remove_extra_spaces": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_1"
+ ]
+ },
+ "remove_urls_emails": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_2"
+ ]
+ },
+ "separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Parent_Delimiter#}}"
+ },
+ "subchunk_max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Child_Length"
+ ]
+ },
+ "subchunk_separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Child_Delimiter#}}"
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1751336942081",
+ "position": {
+ "x": 144.55897745117755,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 144.55897745117755,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 446,
+ "selected": true,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In this step, the LLM is responsible for enriching and reorganizing content, along with images and tables. The goal is to maintain the integrity of image URLs and tables while providing contextual descriptions and summaries to enhance understanding. The content should be structured into well-organized paragraphs, using double newlines to separate them. The LLM should enrich the document by adding relevant descriptions for images and extracting key insights from tables, ensuring the content remains easy to retrieve within a Retrieval-Augmented Generation (RAG) system. The final output should preserve the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 446,
+ "id": "1753967810859",
+ "position": {
+ "x": -176.67459682201036,
+ "y": 405.2790698865377
+ },
+ "positionAbsolute": {
+ "x": -176.67459682201036,
+ "y": 405.2790698865377
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "pdf",
+ "doc",
+ "docx",
+ "pptx",
+ "ppt",
+ "jpg",
+ "png",
+ "jpeg"
+ ],
+ "plugin_id": "langgenius\/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1756915693835",
+ "position": {
+ "x": -893.2836123260277,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -893.2836123260277,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "context": {
+ "enabled": false,
+ "variable_selector": []
+ },
+ "model": {
+ "completion_params": {
+ "temperature": 0.7
+ },
+ "mode": "chat",
+ "name": "claude-3-5-sonnet-20240620",
+ "provider": "langgenius\/anthropic\/anthropic"
+ },
+ "prompt_template": [
+ {
+ "id": "beb97761-d30d-4549-9b67-de1b8292e43d",
+ "role": "system",
+ "text": "You are an AI document assistant. \nYour tasks are:\nEnrich the content contextually:\nAdd meaningful descriptions for each image.\nSummarize key information from each table.\nOutput the enriched content\u00a0with clear annotations showing the\u00a0corresponding image and table positions, so the text can later be aligned back into the original document. Preserve any ![image] URLs from the input text.\nYou will receive two inputs:\nThe file and text\u00a0(may contain images url and tables).\nThe final output should be a\u00a0single, enriched version of the original document with ![image] url preserved.\nGenerate output directly without saying words like: Here's the enriched version of the original text with the image description inserted."
+ },
+ {
+ "id": "f92ef0cd-03a7-48a7-80e8-bcdc965fb399",
+ "role": "user",
+ "text": "The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}."
+ }
+ ],
+ "selected": false,
+ "title": "LLM",
+ "type": "llm",
+ "vision": {
+ "configs": {
+ "detail": "high",
+ "variable_selector": [
+ "1756915693835",
+ "file"
+ ]
+ },
+ "enabled": true
+ }
+ },
+ "height": 88,
+ "id": "1758002850987",
+ "position": {
+ "x": -176.67459682201036,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -176.67459682201036,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)",
+ "pt_BR": "The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
+ },
+ "label": {
+ "en_US": "file",
+ "ja_JP": "file",
+ "pt_BR": "file",
+ "zh_Hans": "file"
+ },
+ "llm_description": "The file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "max": null,
+ "min": null,
+ "name": "file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ },
+ {
+ "auto_generate": null,
+ "default": "auto",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044",
+ "pt_BR": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr"
+ },
+ "label": {
+ "en_US": "parse method",
+ "ja_JP": "\u89e3\u6790\u65b9\u6cd5",
+ "pt_BR": "parse method",
+ "zh_Hans": "\u89e3\u6790\u65b9\u6cd5"
+ },
+ "llm_description": "(For local deployment v1 and v2) Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "max": null,
+ "min": null,
+ "name": "parse_method",
+ "options": [
+ {
+ "icon": "",
+ "label": {
+ "en_US": "auto",
+ "ja_JP": "auto",
+ "pt_BR": "auto",
+ "zh_Hans": "auto"
+ },
+ "value": "auto"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "ocr",
+ "ja_JP": "ocr",
+ "pt_BR": "ocr",
+ "zh_Hans": "ocr"
+ },
+ "value": "ocr"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "txt",
+ "ja_JP": "txt",
+ "pt_BR": "txt",
+ "zh_Hans": "txt"
+ },
+ "value": "txt"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API and local deployment v2) Whether to enable formula recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API and local deployment v2) Whether to enable formula recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable formula recognition",
+ "ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable formula recognition",
+ "zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
+ },
+ "llm_description": "(For official API and local deployment v2) Whether to enable formula recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_formula",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API and local deployment v2) Whether to enable table recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API and local deployment v2) Whether to enable table recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable table recognition",
+ "ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable table recognition",
+ "zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b"
+ },
+ "llm_description": "(For official API and local deployment v2) Whether to enable table recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_table",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": "auto",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
+ "pt_BR": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
+ "zh_Hans": "\uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5"
+ },
+ "label": {
+ "en_US": "Document language",
+ "ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e",
+ "pt_BR": "Document language",
+ "zh_Hans": "\u6587\u6863\u8bed\u8a00"
+ },
+ "llm_description": "(For official API and local deployment v2) Specify document language, default ch, can be set to auto(local deployment need to specify the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5",
+ "max": null,
+ "min": null,
+ "name": "language",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Whether to enable OCR recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API) Whether to enable OCR recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable OCR recognition",
+ "ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable OCR recognition",
+ "zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b"
+ },
+ "llm_description": "(For official API) Whether to enable OCR recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_ocr",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": "[]",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059",
+ "pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a"
+ },
+ "label": {
+ "en_US": "Extra export formats",
+ "ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f",
+ "pt_BR": "Extra export formats",
+ "zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f"
+ },
+ "llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "max": null,
+ "min": null,
+ "name": "extra_formats",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": "pipeline",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
+ "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline",
+ "pt_BR": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
+ "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline"
+ },
+ "label": {
+ "en_US": "Backend type",
+ "ja_JP": "\u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7",
+ "pt_BR": "Backend type",
+ "zh_Hans": "\u89e3\u6790\u540e\u7aef"
+ },
+ "llm_description": "(For local deployment v2) Example: pipeline, vlm-transformers, vlm-sglang-engine, vlm-sglang-client, default is pipeline",
+ "max": null,
+ "min": null,
+ "name": "backend",
+ "options": [
+ {
+ "icon": "",
+ "label": {
+ "en_US": "pipeline",
+ "ja_JP": "pipeline",
+ "pt_BR": "pipeline",
+ "zh_Hans": "pipeline"
+ },
+ "value": "pipeline"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "vlm-transformers",
+ "ja_JP": "vlm-transformers",
+ "pt_BR": "vlm-transformers",
+ "zh_Hans": "vlm-transformers"
+ },
+ "value": "vlm-transformers"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "vlm-sglang-engine",
+ "ja_JP": "vlm-sglang-engine",
+ "pt_BR": "vlm-sglang-engine",
+ "zh_Hans": "vlm-sglang-engine"
+ },
+ "value": "vlm-sglang-engine"
+ },
+ {
+ "icon": "",
+ "label": {
+ "en_US": "vlm-sglang-client",
+ "ja_JP": "vlm-sglang-client",
+ "pt_BR": "vlm-sglang-client",
+ "zh_Hans": "vlm-sglang-client"
+ },
+ "value": "vlm-sglang-client"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
+ "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a",
+ "pt_BR": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
+ "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a"
+ },
+ "label": {
+ "en_US": "sglang-server url",
+ "ja_JP": "sglang-server\u30a2\u30c9\u30ec\u30b9",
+ "pt_BR": "sglang-server url",
+ "zh_Hans": "sglang-server\u5730\u5740"
+ },
+ "llm_description": "(For local deployment v2 when backend is vlm-sglang-client) Example: http:\/\/127.0.0.1:8000, default is empty",
+ "max": null,
+ "min": null,
+ "name": "sglang_server_url",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ }
+ ],
+ "params": {
+ "backend": "",
+ "enable_formula": "",
+ "enable_ocr": "",
+ "enable_table": "",
+ "extra_formats": "",
+ "file": "",
+ "language": "",
+ "parse_method": "",
+ "sglang_server_url": ""
+ },
+ "provider_id": "langgenius\/mineru\/mineru",
+ "provider_name": "langgenius\/mineru\/mineru",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Parse File",
+ "tool_configurations": {
+ "backend": {
+ "type": "constant",
+ "value": "pipeline"
+ },
+ "enable_formula": {
+ "type": "constant",
+ "value": 1
+ },
+ "enable_ocr": {
+ "type": "constant",
+ "value": true
+ },
+ "enable_table": {
+ "type": "constant",
+ "value": 1
+ },
+ "extra_formats": {
+ "type": "mixed",
+ "value": "[]"
+ },
+ "language": {
+ "type": "mixed",
+ "value": "auto"
+ },
+ "parse_method": {
+ "type": "constant",
+ "value": "auto"
+ },
+ "sglang_server_url": {
+ "type": "mixed",
+ "value": ""
+ }
+ },
+ "tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.",
+ "tool_label": "Parse File",
+ "tool_name": "parse-file",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "file": {
+ "type": "variable",
+ "value": [
+ "1756915693835",
+ "file"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 270,
+ "id": "1758027159239",
+ "position": {
+ "x": -544.9739996945534,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -544.9739996945534,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ }
+ ],
+ "viewport": {
+ "x": 679.9701291615181,
+ "y": -191.49392257836791,
+ "zoom": 0.8239704766223018
+ }
+ },
+ "icon_info": {
+ "icon": "e642577f-da15-4c03-81b9-c9dec9189a3c",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII="
+ },
+ "id": "103825d3-7018-43ae-bcf0-f3c001f3eb69",
+ "name": "Contextual Enrichment Using LLM"
+},
+{
+ "chunk_structure": "hierarchical_model",
+ "description": "This Knowledge Pipeline extracts images and tables from complex PDF documents for downstream processing.",
+ "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 87426868-91d6-4774-a535-5fd4595a77b3\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=\n name: Complex PDF with Images & Tables\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751281136356-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751281136356'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751338398711-source-1750400198569-target\n selected: false\n source: '1751338398711'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751281136356-source-1751338398711-target\n selected: false\n source: '1751281136356'\n sourceHandle: source\n target: '1751338398711'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751338398711'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 355.92518399555183\n y: 282\n positionAbsolute:\n x: 355.92518399555183\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File Upload\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -579\n y: 282\n positionAbsolute:\n x: -579\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 337\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 358\n height: 337\n id: '1751264451381'\n position:\n x: -990.8091030156684\n y: 282\n positionAbsolute:\n x: -990.8091030156684\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 358\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -579\n y: -22.64803881585007\n positionAbsolute:\n x: -579\n y: -22.64803881585007\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 541\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor for large language models (LLMs) like MinerU is a tool\n that preprocesses and converts diverse document types into structured, clean,\n and machine-readable data. This structured data can then be used to train\n or augment LLMs and retrieval-augmented generation (RAG) systems by providing\n them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 541\n id: '1751266402561'\n position:\n x: -263.7680017647218\n y: 558.328085421591\n positionAbsolute:\n x: -263.7680017647218\n y: 558.328085421591\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 42.95253988413964\n y: 366.1915342509804\n positionAbsolute:\n x: 42.95253988413964\n y: 366.1915342509804\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 355.92518399555183\n y: 434.6494699299023\n positionAbsolute:\n x: 355.92518399555183\n y: 434.6494699299023\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n credential_id: fd1cbc33-1481-47ee-9af2-954b53d350e0\n is_team_authorization: false\n output_schema:\n properties:\n full_zip_url:\n description: The zip URL of the complete parsed result\n type: string\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: Parsing method, can be auto, ocr, or txt. Default is auto.\n If results are not satisfactory, try ocr\n max: null\n min: null\n name: parse_method\n options:\n - label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable formula recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable formula recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API) Whether to enable formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable table recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable table recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API) Whether to enable table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: doclayout_yolo\n form: form\n human_description:\n en_US: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo\n \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044\n pt_BR: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo\n \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d\n label:\n en_US: Layout model\n ja_JP: \u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb\n pt_BR: Layout model\n zh_Hans: \u5e03\u5c40\u68c0\u6d4b\u6a21\u578b\n llm_description: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed model\n withbetter effect'\n max: null\n min: null\n name: layout_model\n options:\n - label:\n en_US: doclayout_yolo\n ja_JP: doclayout_yolo\n pt_BR: doclayout_yolo\n zh_Hans: doclayout_yolo\n value: doclayout_yolo\n - label:\n en_US: layoutlmv3\n ja_JP: layoutlmv3\n pt_BR: layoutlmv3\n zh_Hans: layoutlmv3\n value: layoutlmv3\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n pt_BR: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API) Specify document language, default\n ch, can be set to auto, when auto, the model will automatically identify\n document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n layout_model: ''\n parse_method: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: MinerU\n tool_configurations:\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: 0\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: constant\n value: '[]'\n language:\n type: constant\n value: auto\n layout_model:\n type: constant\n value: doclayout_yolo\n parse_method:\n type: constant\n value: auto\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 244\n id: '1751281136356'\n position:\n x: -263.7680017647218\n y: 282\n positionAbsolute:\n x: -263.7680017647218\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751281136356.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751338398711'\n position:\n x: 42.95253988413964\n y: 282\n positionAbsolute:\n x: 42.95253988413964\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 628.3302331655243\n y: 120.08894361588159\n zoom: 0.7027501395646496\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n",
+ "graph": {
+ "edges": [
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "datasource",
+ "targetType": "tool"
+ },
+ "id": "1750400203722-source-1751281136356-target",
+ "selected": false,
+ "source": "1750400203722",
+ "sourceHandle": "source",
+ "target": "1751281136356",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "knowledge-index"
+ },
+ "id": "1751338398711-source-1750400198569-target",
+ "selected": false,
+ "source": "1751338398711",
+ "sourceHandle": "source",
+ "target": "1750400198569",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ },
+ {
+ "data": {
+ "isInLoop": false,
+ "sourceType": "tool",
+ "targetType": "tool"
+ },
+ "id": "1751281136356-source-1751338398711-target",
+ "selected": false,
+ "source": "1751281136356",
+ "sourceHandle": "source",
+ "target": "1751338398711",
+ "targetHandle": "target",
+ "type": "custom",
+ "zIndex": 0
+ }
+ ],
+ "nodes": [
+ {
+ "data": {
+ "chunk_structure": "hierarchical_model",
+ "embedding_model": "jina-embeddings-v2-base-en",
+ "embedding_model_provider": "langgenius\/jina\/jina",
+ "index_chunk_variable_selector": [
+ "1751338398711",
+ "result"
+ ],
+ "indexing_technique": "high_quality",
+ "keyword_number": 10,
+ "retrieval_model": {
+ "reranking_enable": true,
+ "reranking_mode": "reranking_model",
+ "reranking_model": {
+ "reranking_model_name": "jina-reranker-v1-base-en",
+ "reranking_provider_name": "langgenius\/jina\/jina"
+ },
+ "score_threshold": 0,
+ "score_threshold_enabled": false,
+ "search_method": "hybrid_search",
+ "top_k": 3,
+ "weights": null
+ },
+ "selected": true,
+ "title": "Knowledge Base",
+ "type": "knowledge-index"
+ },
+ "height": 114,
+ "id": "1750400198569",
+ "position": {
+ "x": 355.92518399555183,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 355.92518399555183,
+ "y": 282
+ },
+ "selected": true,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "datasource_configurations": {},
+ "datasource_label": "File",
+ "datasource_name": "upload-file",
+ "datasource_parameters": {},
+ "fileExtensions": [
+ "txt",
+ "markdown",
+ "mdx",
+ "pdf",
+ "html",
+ "xlsx",
+ "xls",
+ "vtt",
+ "properties",
+ "doc",
+ "docx",
+ "csv",
+ "eml",
+ "msg",
+ "pptx",
+ "xml",
+ "epub",
+ "ppt",
+ "md"
+ ],
+ "plugin_id": "langgenius\/file",
+ "provider_name": "file",
+ "provider_type": "local_file",
+ "selected": false,
+ "title": "File Upload",
+ "type": "datasource"
+ },
+ "height": 52,
+ "id": "1750400203722",
+ "position": {
+ "x": -579,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -579,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 337,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\": File Upload, Online Drive, Online Doc, and Web Crawler. Different types of Data Sources have different input and output types. The output of File Upload and Online Drive are files, while the output of Online Doc and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A Knowledge Pipeline can have multiple data sources. Each data source can be selected more than once with different settings. Each added data source is a tab on the add file interface. However, each time the user can only select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 358
+ },
+ "height": 337,
+ "id": "1751264451381",
+ "position": {
+ "x": -990.8091030156684,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -990.8091030156684,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 358
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 260,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" starts with Data Source as the starting node and ends with the knowledge base node. The general steps are: import documents from the data source \u2192 use extractor to extract document content \u2192 split and clean content into structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The user input variables required by the Knowledge Pipeline node must be predefined and managed via the Input Field section located in the top-right corner of the orchestration canvas. It determines what input fields the end users will see and need to fill in when importing files to the knowledge base through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique Inputs: Input fields defined here are only available to the selected data source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global Inputs: These input fields are shared across all subsequent nodes after the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 1182
+ },
+ "height": 260,
+ "id": "1751266376760",
+ "position": {
+ "x": -579,
+ "y": -22.64803881585007
+ },
+ "positionAbsolute": {
+ "x": -579,
+ "y": -22.64803881585007
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 1182
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 541,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A document extractor for large language models (LLMs) like MinerU is a tool that preprocesses and converts diverse document types into structured, clean, and machine-readable data. This structured data can then be used to train or augment LLMs and retrieval-augmented generation (RAG) systems by providing them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" is an advanced open-source document extractor designed specifically to convert complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into high-quality, machine-readable formats like Markdown and JSON. MinerU addresses challenges in document parsing such as layout detection, formula recognition, and multi-language support, which are critical for generating high-quality training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 541,
+ "id": "1751266402561",
+ "position": {
+ "x": -263.7680017647218,
+ "y": 558.328085421591
+ },
+ "positionAbsolute": {
+ "x": -263.7680017647218,
+ "y": 558.328085421591
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 554,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\" addresses the dilemma of context and precision by leveraging a two-tier hierarchical approach that effectively balances the trade-off between accurate matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Query Matching with Child Chunks: Small, focused pieces of information, often as concise as a single sentence within a paragraph, are used to match the user's query. These child chunks enable precise and relevant initial retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"- Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such as a paragraph, a section, or even an entire document\u2014that include the matched child chunks are then retrieved. These parent chunks provide comprehensive context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 554,
+ "id": "1751266447821",
+ "position": {
+ "x": 42.95253988413964,
+ "y": 366.1915342509804
+ },
+ "positionAbsolute": {
+ "x": 42.95253988413964,
+ "y": 366.1915342509804
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "author": "TenTen",
+ "desc": "",
+ "height": 411,
+ "selected": false,
+ "showAuthor": true,
+ "text": "{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\", each with different retrieval strategies. High-Quality mode uses embeddings for vectorization and supports vector, full-text, and hybrid retrieval, offering more accurate results but higher resource usage. Economical mode uses keyword-based inverted indexing with no token consumption but lower accuracy; upgrading to High-Quality is possible, but downgrading requires creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"* Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}",
+ "theme": "blue",
+ "title": "",
+ "type": "",
+ "width": 240
+ },
+ "height": 411,
+ "id": "1751266580099",
+ "position": {
+ "x": 355.92518399555183,
+ "y": 434.6494699299023
+ },
+ "positionAbsolute": {
+ "x": 355.92518399555183,
+ "y": 434.6494699299023
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom-note",
+ "width": 240
+ },
+ {
+ "data": {
+ "credential_id": "fd1cbc33-1481-47ee-9af2-954b53d350e0",
+ "is_team_authorization": false,
+ "output_schema": {
+ "properties": {
+ "full_zip_url": {
+ "description": "The zip URL of the complete parsed result",
+ "type": "string"
+ },
+ "images": {
+ "description": "The images extracted from the file",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "ja_JP": "\u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)",
+ "pt_BR": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"
+ },
+ "label": {
+ "en_US": "file",
+ "ja_JP": "file",
+ "pt_BR": "file",
+ "zh_Hans": "file"
+ },
+ "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)",
+ "max": null,
+ "min": null,
+ "name": "file",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "file"
+ },
+ {
+ "auto_generate": null,
+ "default": "auto",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "ja_JP": "\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044",
+ "pt_BR": "(For local deployment service)Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "zh_Hans": "\uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr"
+ },
+ "label": {
+ "en_US": "parse method",
+ "ja_JP": "\u89e3\u6790\u65b9\u6cd5",
+ "pt_BR": "parse method",
+ "zh_Hans": "\u89e3\u6790\u65b9\u6cd5"
+ },
+ "llm_description": "Parsing method, can be auto, ocr, or txt. Default is auto. If results are not satisfactory, try ocr",
+ "max": null,
+ "min": null,
+ "name": "parse_method",
+ "options": [
+ {
+ "label": {
+ "en_US": "auto",
+ "ja_JP": "auto",
+ "pt_BR": "auto",
+ "zh_Hans": "auto"
+ },
+ "value": "auto"
+ },
+ {
+ "label": {
+ "en_US": "ocr",
+ "ja_JP": "ocr",
+ "pt_BR": "ocr",
+ "zh_Hans": "ocr"
+ },
+ "value": "ocr"
+ },
+ {
+ "label": {
+ "en_US": "txt",
+ "ja_JP": "txt",
+ "pt_BR": "txt",
+ "zh_Hans": "txt"
+ },
+ "value": "txt"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Whether to enable formula recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API) Whether to enable formula recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable formula recognition",
+ "ja_JP": "\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable formula recognition",
+ "zh_Hans": "\u5f00\u542f\u516c\u5f0f\u8bc6\u522b"
+ },
+ "llm_description": "(For official API) Whether to enable formula recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_formula",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 1,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Whether to enable table recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API) Whether to enable table recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable table recognition",
+ "ja_JP": "\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable table recognition",
+ "zh_Hans": "\u5f00\u542f\u8868\u683c\u8bc6\u522b"
+ },
+ "llm_description": "(For official API) Whether to enable table recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_table",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": "doclayout_yolo",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044",
+ "pt_BR": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model with better effect",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d"
+ },
+ "label": {
+ "en_US": "Layout model",
+ "ja_JP": "\u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb",
+ "pt_BR": "Layout model",
+ "zh_Hans": "\u5e03\u5c40\u68c0\u6d4b\u6a21\u578b"
+ },
+ "llm_description": "(For official API) Optional values: doclayout_yolo, layoutlmv3, default value is doclayout_yolo. doclayout_yolo is a self-developed model withbetter effect",
+ "max": null,
+ "min": null,
+ "name": "layout_model",
+ "options": [
+ {
+ "label": {
+ "en_US": "doclayout_yolo",
+ "ja_JP": "doclayout_yolo",
+ "pt_BR": "doclayout_yolo",
+ "zh_Hans": "doclayout_yolo"
+ },
+ "value": "doclayout_yolo"
+ },
+ {
+ "label": {
+ "en_US": "layoutlmv3",
+ "ja_JP": "layoutlmv3",
+ "pt_BR": "layoutlmv3",
+ "zh_Hans": "layoutlmv3"
+ },
+ "value": "layoutlmv3"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "auto",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
+ "pt_BR": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5"
+ },
+ "label": {
+ "en_US": "Document language",
+ "ja_JP": "\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e",
+ "pt_BR": "Document language",
+ "zh_Hans": "\u6587\u6863\u8bed\u8a00"
+ },
+ "llm_description": "(For official API) Specify document language, default ch, can be set to auto, when auto, the model will automatically identify document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5",
+ "max": null,
+ "min": null,
+ "name": "language",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Whether to enable OCR recognition",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b",
+ "pt_BR": "(For official API) Whether to enable OCR recognition",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b"
+ },
+ "label": {
+ "en_US": "Enable OCR recognition",
+ "ja_JP": "OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b",
+ "pt_BR": "Enable OCR recognition",
+ "zh_Hans": "\u5f00\u542fOCR\u8bc6\u522b"
+ },
+ "llm_description": "(For official API) Whether to enable OCR recognition",
+ "max": null,
+ "min": null,
+ "name": "enable_ocr",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": "[]",
+ "form": "form",
+ "human_description": {
+ "en_US": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "ja_JP": "\uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059",
+ "pt_BR": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "zh_Hans": "\uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a"
+ },
+ "label": {
+ "en_US": "Extra export formats",
+ "ja_JP": "\u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f",
+ "pt_BR": "Extra export formats",
+ "zh_Hans": "\u989d\u5916\u5bfc\u51fa\u683c\u5f0f"
+ },
+ "llm_description": "(For official API) Example: [\"docx\",\"html\"], markdown, json are the default export formats, no need to set, this parameter only supports one or more of docx, html, latex",
+ "max": null,
+ "min": null,
+ "name": "extra_formats",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ }
+ ],
+ "params": {
+ "enable_formula": "",
+ "enable_ocr": "",
+ "enable_table": "",
+ "extra_formats": "",
+ "file": "",
+ "language": "",
+ "layout_model": "",
+ "parse_method": ""
+ },
+ "provider_id": "langgenius\/mineru\/mineru",
+ "provider_name": "langgenius\/mineru\/mineru",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "MinerU",
+ "tool_configurations": {
+ "enable_formula": {
+ "type": "constant",
+ "value": 1
+ },
+ "enable_ocr": {
+ "type": "constant",
+ "value": 0
+ },
+ "enable_table": {
+ "type": "constant",
+ "value": 1
+ },
+ "extra_formats": {
+ "type": "constant",
+ "value": "[]"
+ },
+ "language": {
+ "type": "constant",
+ "value": "auto"
+ },
+ "layout_model": {
+ "type": "constant",
+ "value": "doclayout_yolo"
+ },
+ "parse_method": {
+ "type": "constant",
+ "value": "auto"
+ }
+ },
+ "tool_description": "a tool for parsing text, tables, and images, supporting multiple formats such as pdf, pptx, docx, etc. supporting multiple languages such as English, Chinese, etc.",
+ "tool_label": "Parse File",
+ "tool_name": "parse-file",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "file": {
+ "type": "variable",
+ "value": [
+ "1750400203722",
+ "file"
+ ]
+ }
+ },
+ "type": "tool"
+ },
+ "height": 244,
+ "id": "1751281136356",
+ "position": {
+ "x": -263.7680017647218,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": -263.7680017647218,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ },
+ {
+ "data": {
+ "is_team_authorization": true,
+ "output_schema": {
+ "properties": {
+ "result": {
+ "description": "Parent child chunks result",
+ "items": {
+ "type": "object"
+ },
+ "type": "array"
+ }
+ },
+ "type": "object"
+ },
+ "paramSchemas": [
+ {
+ "auto_generate": null,
+ "default": null,
+ "form": "llm",
+ "human_description": {
+ "en_US": "",
+ "ja_JP": "",
+ "pt_BR": "",
+ "zh_Hans": ""
+ },
+ "label": {
+ "en_US": "Input Content",
+ "ja_JP": "Input Content",
+ "pt_BR": "Conte\u00fado de Entrada",
+ "zh_Hans": "\u8f93\u5165\u6587\u672c"
+ },
+ "llm_description": "The text you want to chunk.",
+ "max": null,
+ "min": null,
+ "name": "input_text",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": "paragraph",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.",
+ "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002"
+ },
+ "label": {
+ "en_US": "Parent Mode",
+ "ja_JP": "Parent Mode",
+ "pt_BR": "Modo Pai",
+ "zh_Hans": "\u7236\u5757\u6a21\u5f0f"
+ },
+ "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.",
+ "max": null,
+ "min": null,
+ "name": "parent_mode",
+ "options": [
+ {
+ "label": {
+ "en_US": "Paragraph",
+ "ja_JP": "Paragraph",
+ "pt_BR": "Par\u00e1grafo",
+ "zh_Hans": "\u6bb5\u843d"
+ },
+ "value": "paragraph"
+ },
+ {
+ "label": {
+ "en_US": "Full Document",
+ "ja_JP": "Full Document",
+ "pt_BR": "Documento Completo",
+ "zh_Hans": "\u5168\u6587"
+ },
+ "value": "full_doc"
+ }
+ ],
+ "placeholder": null,
+ "precision": null,
+ "required": true,
+ "scope": null,
+ "template": null,
+ "type": "select"
+ },
+ {
+ "auto_generate": null,
+ "default": "\n\n",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for chunking",
+ "ja_JP": "Separator used for chunking",
+ "pt_BR": "Separador usado para divis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26"
+ },
+ "label": {
+ "en_US": "Parent Delimiter",
+ "ja_JP": "Parent Delimiter",
+ "pt_BR": "Separador de Pai",
+ "zh_Hans": "\u7236\u5757\u5206\u9694\u7b26"
+ },
+ "llm_description": "The separator used to split chunks",
+ "max": null,
+ "min": null,
+ "name": "separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 1024,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for chunking",
+ "ja_JP": "Maximum length for chunking",
+ "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
+ },
+ "label": {
+ "en_US": "Maximum Parent Chunk Length",
+ "ja_JP": "Maximum Parent Chunk Length",
+ "pt_BR": "Comprimento M\u00e1ximo do Bloco Pai",
+ "zh_Hans": "\u6700\u5927\u7236\u5757\u957f\u5ea6"
+ },
+ "llm_description": "Maximum length allowed per chunk",
+ "max": null,
+ "min": null,
+ "name": "max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": ". ",
+ "form": "llm",
+ "human_description": {
+ "en_US": "Separator used for subchunking",
+ "ja_JP": "Separator used for subchunking",
+ "pt_BR": "Separador usado para subdivis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26"
+ },
+ "label": {
+ "en_US": "Child Delimiter",
+ "ja_JP": "Child Delimiter",
+ "pt_BR": "Separador de Subdivis\u00e3o",
+ "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26"
+ },
+ "llm_description": "The separator used to split subchunks",
+ "max": null,
+ "min": null,
+ "name": "subchunk_separator",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "string"
+ },
+ {
+ "auto_generate": null,
+ "default": 512,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Maximum length for subchunking",
+ "ja_JP": "Maximum length for subchunking",
+ "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o",
+ "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"
+ },
+ "label": {
+ "en_US": "Maximum Child Chunk Length",
+ "ja_JP": "Maximum Child Chunk Length",
+ "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o",
+ "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6"
+ },
+ "llm_description": "Maximum length allowed per subchunk",
+ "max": null,
+ "min": null,
+ "name": "subchunk_max_length",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "number"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove consecutive spaces, newlines and tabs",
+ "ja_JP": "Whether to remove consecutive spaces, newlines and tabs",
+ "pt_BR": "Se deve remover espa\u00e7os extras no texto",
+ "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
+ },
+ "label": {
+ "en_US": "Replace consecutive spaces, newlines and tabs",
+ "ja_JP": "Replace consecutive spaces, newlines and tabs",
+ "pt_BR": "Substituir espa\u00e7os consecutivos, novas linhas e guias",
+ "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"
+ },
+ "llm_description": "Whether to remove consecutive spaces, newlines and tabs",
+ "max": null,
+ "min": null,
+ "name": "remove_extra_spaces",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ },
+ {
+ "auto_generate": null,
+ "default": 0,
+ "form": "llm",
+ "human_description": {
+ "en_US": "Whether to remove URLs and emails in the text",
+ "ja_JP": "Whether to remove URLs and emails in the text",
+ "pt_BR": "Se deve remover URLs e e-mails no texto",
+ "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
+ },
+ "label": {
+ "en_US": "Delete all URLs and email addresses",
+ "ja_JP": "Delete all URLs and email addresses",
+ "pt_BR": "Remover todas as URLs e e-mails",
+ "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"
+ },
+ "llm_description": "Whether to remove URLs and emails in the text",
+ "max": null,
+ "min": null,
+ "name": "remove_urls_emails",
+ "options": [],
+ "placeholder": null,
+ "precision": null,
+ "required": false,
+ "scope": null,
+ "template": null,
+ "type": "boolean"
+ }
+ ],
+ "params": {
+ "input_text": "",
+ "max_length": "",
+ "parent_mode": "",
+ "remove_extra_spaces": "",
+ "remove_urls_emails": "",
+ "separator": "",
+ "subchunk_max_length": "",
+ "subchunk_separator": ""
+ },
+ "provider_id": "langgenius\/parentchild_chunker\/parentchild_chunker",
+ "provider_name": "langgenius\/parentchild_chunker\/parentchild_chunker",
+ "provider_type": "builtin",
+ "selected": false,
+ "title": "Parent-child Chunker",
+ "tool_configurations": {},
+ "tool_description": "Process documents into parent-child chunk structures",
+ "tool_label": "Parent-child Chunker",
+ "tool_name": "parentchild_chunker",
+ "tool_node_version": "2",
+ "tool_parameters": {
+ "input_text": {
+ "type": "mixed",
+ "value": "{{#1751281136356.text#}}"
+ },
+ "max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Parent_Length"
+ ]
+ },
+ "parent_mode": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Parent_Mode"
+ ]
+ },
+ "remove_extra_spaces": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_1"
+ ]
+ },
+ "remove_urls_emails": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "clean_2"
+ ]
+ },
+ "separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Parent_Delimiter#}}"
+ },
+ "subchunk_max_length": {
+ "type": "variable",
+ "value": [
+ "rag",
+ "shared",
+ "Maximum_Child_Length"
+ ]
+ },
+ "subchunk_separator": {
+ "type": "mixed",
+ "value": "{{#rag.shared.Child_Delimiter#}}"
+ }
+ },
+ "type": "tool"
+ },
+ "height": 52,
+ "id": "1751338398711",
+ "position": {
+ "x": 42.95253988413964,
+ "y": 282
+ },
+ "positionAbsolute": {
+ "x": 42.95253988413964,
+ "y": 282
+ },
+ "selected": false,
+ "sourcePosition": "right",
+ "targetPosition": "left",
+ "type": "custom",
+ "width": 242
+ }
+ ],
+ "viewport": {
+ "x": 628.3302331655243,
+ "y": 120.08894361588159,
+ "zoom": 0.7027501395646496
+ }
+ },
+ "icon_info": {
+ "icon": "87426868-91d6-4774-a535-5fd4595a77b3",
+ "icon_background": null,
+ "icon_type": "image",
+ "icon_url": "data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII="
+ },
+ "id": "629cb5b8-490a-48bc-808b-ffc13085cb4f",
+ "name": "Complex PDF with Images & Tables"
+}
+ }
+}
\ No newline at end of file
diff --git a/api/contexts/__init__.py b/api/contexts/__init__.py
index 2126a06f75..7c16bc231f 100644
--- a/api/contexts/__init__.py
+++ b/api/contexts/__init__.py
@@ -9,6 +9,7 @@ if TYPE_CHECKING:
from core.model_runtime.entities.model_entities import AIModelEntity
from core.plugin.entities.plugin_daemon import PluginModelProviderEntity
from core.tools.plugin_tool.provider import PluginToolProviderController
+ from core.trigger.provider import PluginTriggerProviderController
"""
@@ -41,3 +42,11 @@ datasource_plugin_providers: RecyclableContextVar[dict[str, "DatasourcePluginPro
datasource_plugin_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
ContextVar("datasource_plugin_providers_lock")
)
+
+plugin_trigger_providers: RecyclableContextVar[dict[str, "PluginTriggerProviderController"]] = RecyclableContextVar(
+ ContextVar("plugin_trigger_providers")
+)
+
+plugin_trigger_providers_lock: RecyclableContextVar[Lock] = RecyclableContextVar(
+ ContextVar("plugin_trigger_providers_lock")
+)
diff --git a/api/controllers/common/errors.py b/api/controllers/common/errors.py
index 6e2ea952fc..252cf3549a 100644
--- a/api/controllers/common/errors.py
+++ b/api/controllers/common/errors.py
@@ -25,6 +25,12 @@ class UnsupportedFileTypeError(BaseHTTPException):
code = 415
+class BlockedFileExtensionError(BaseHTTPException):
+ error_code = "file_extension_blocked"
+ description = "The file extension is blocked for security reasons."
+ code = 400
+
+
class TooManyFilesError(BaseHTTPException):
error_code = "too_many_files"
description = "Only one file is allowed."
diff --git a/api/controllers/common/helpers.py b/api/controllers/common/helpers.py
index 6a5197635e..ef89e66980 100644
--- a/api/controllers/common/helpers.py
+++ b/api/controllers/common/helpers.py
@@ -24,7 +24,7 @@ except ImportError:
)
else:
warnings.warn("To use python-magic guess MIMETYPE, you need to install `libmagic`", stacklevel=2)
- magic = None # type: ignore
+ magic = None # type: ignore[assignment]
from pydantic import BaseModel
diff --git a/api/controllers/common/schema.py b/api/controllers/common/schema.py
new file mode 100644
index 0000000000..e0896a8dc2
--- /dev/null
+++ b/api/controllers/common/schema.py
@@ -0,0 +1,26 @@
+"""Helpers for registering Pydantic models with Flask-RESTX namespaces."""
+
+from flask_restx import Namespace
+from pydantic import BaseModel
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+def register_schema_model(namespace: Namespace, model: type[BaseModel]) -> None:
+ """Register a single BaseModel with a namespace for Swagger documentation."""
+
+ namespace.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+def register_schema_models(namespace: Namespace, *models: type[BaseModel]) -> None:
+ """Register multiple BaseModels with a namespace."""
+
+ for model in models:
+ register_schema_model(namespace, model)
+
+
+__all__ = [
+ "DEFAULT_REF_TEMPLATE_SWAGGER_2_0",
+ "register_schema_model",
+ "register_schema_models",
+]
diff --git a/api/controllers/console/__init__.py b/api/controllers/console/__init__.py
index 621f5066e4..ad878fc266 100644
--- a/api/controllers/console/__init__.py
+++ b/api/controllers/console/__init__.py
@@ -66,6 +66,7 @@ from .app import (
workflow_draft_variable,
workflow_run,
workflow_statistic,
+ workflow_trigger,
)
# Import auth controllers
@@ -126,6 +127,7 @@ from .workspace import (
models,
plugin,
tool_providers,
+ trigger_providers,
workspace,
)
@@ -196,6 +198,7 @@ __all__ = [
"statistic",
"tags",
"tool_providers",
+ "trigger_providers",
"version",
"website",
"workflow",
@@ -203,5 +206,6 @@ __all__ = [
"workflow_draft_variable",
"workflow_run",
"workflow_statistic",
+ "workflow_trigger",
"workspace",
]
diff --git a/api/controllers/console/admin.py b/api/controllers/console/admin.py
index 93f242ad28..a25ca5ef51 100644
--- a/api/controllers/console/admin.py
+++ b/api/controllers/console/admin.py
@@ -3,19 +3,46 @@ from functools import wraps
from typing import ParamSpec, TypeVar
from flask import request
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
-from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound, Unauthorized
+from configs import dify_config
+from constants.languages import supported_language
+from controllers.console import console_ns
+from controllers.console.wraps import only_edition_cloud
+from core.db.session_factory import session_factory
+from extensions.ext_database import db
+from libs.token import extract_access_token
+from models.model import App, InstalledApp, RecommendedApp
+
P = ParamSpec("P")
R = TypeVar("R")
-from configs import dify_config
-from constants.languages import supported_language
-from controllers.console import api, console_ns
-from controllers.console.wraps import only_edition_cloud
-from extensions.ext_database import db
-from models.model import App, InstalledApp, RecommendedApp
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class InsertExploreAppPayload(BaseModel):
+ app_id: str = Field(...)
+ desc: str | None = None
+ copyright: str | None = None
+ privacy_policy: str | None = None
+ custom_disclaimer: str | None = None
+ language: str = Field(...)
+ category: str = Field(...)
+ position: int = Field(...)
+
+ @field_validator("language")
+ @classmethod
+ def validate_language(cls, value: str) -> str:
+ return supported_language(value)
+
+
+console_ns.schema_model(
+ InsertExploreAppPayload.__name__,
+ InsertExploreAppPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
def admin_required(view: Callable[P, R]):
@@ -24,19 +51,9 @@ def admin_required(view: Callable[P, R]):
if not dify_config.ADMIN_API_KEY:
raise Unauthorized("API key is invalid.")
- auth_header = request.headers.get("Authorization")
- if auth_header is None:
+ auth_token = extract_access_token(request)
+ if not auth_token:
raise Unauthorized("Authorization header is missing.")
-
- if " " not in auth_header:
- raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.")
-
- auth_scheme, auth_token = auth_header.split(None, 1)
- auth_scheme = auth_scheme.lower()
-
- if auth_scheme != "bearer":
- raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.")
-
if auth_token != dify_config.ADMIN_API_KEY:
raise Unauthorized("API key is invalid.")
@@ -47,59 +64,36 @@ def admin_required(view: Callable[P, R]):
@console_ns.route("/admin/insert-explore-apps")
class InsertExploreAppListApi(Resource):
- @api.doc("insert_explore_app")
- @api.doc(description="Insert or update an app in the explore list")
- @api.expect(
- api.model(
- "InsertExploreAppRequest",
- {
- "app_id": fields.String(required=True, description="Application ID"),
- "desc": fields.String(description="App description"),
- "copyright": fields.String(description="Copyright information"),
- "privacy_policy": fields.String(description="Privacy policy"),
- "custom_disclaimer": fields.String(description="Custom disclaimer"),
- "language": fields.String(required=True, description="Language code"),
- "category": fields.String(required=True, description="App category"),
- "position": fields.Integer(required=True, description="Display position"),
- },
- )
- )
- @api.response(200, "App updated successfully")
- @api.response(201, "App inserted successfully")
- @api.response(404, "App not found")
+ @console_ns.doc("insert_explore_app")
+ @console_ns.doc(description="Insert or update an app in the explore list")
+ @console_ns.expect(console_ns.models[InsertExploreAppPayload.__name__])
+ @console_ns.response(200, "App updated successfully")
+ @console_ns.response(201, "App inserted successfully")
+ @console_ns.response(404, "App not found")
@only_edition_cloud
@admin_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("app_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument("desc", type=str, location="json")
- parser.add_argument("copyright", type=str, location="json")
- parser.add_argument("privacy_policy", type=str, location="json")
- parser.add_argument("custom_disclaimer", type=str, location="json")
- parser.add_argument("language", type=supported_language, required=True, nullable=False, location="json")
- parser.add_argument("category", type=str, required=True, nullable=False, location="json")
- parser.add_argument("position", type=int, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = InsertExploreAppPayload.model_validate(console_ns.payload)
- app = db.session.execute(select(App).where(App.id == args["app_id"])).scalar_one_or_none()
+ app = db.session.execute(select(App).where(App.id == payload.app_id)).scalar_one_or_none()
if not app:
- raise NotFound(f"App '{args['app_id']}' is not found")
+ raise NotFound(f"App '{payload.app_id}' is not found")
site = app.site
if not site:
- desc = args["desc"] or ""
- copy_right = args["copyright"] or ""
- privacy_policy = args["privacy_policy"] or ""
- custom_disclaimer = args["custom_disclaimer"] or ""
+ desc = payload.desc or ""
+ copy_right = payload.copyright or ""
+ privacy_policy = payload.privacy_policy or ""
+ custom_disclaimer = payload.custom_disclaimer or ""
else:
- desc = site.description or args["desc"] or ""
- copy_right = site.copyright or args["copyright"] or ""
- privacy_policy = site.privacy_policy or args["privacy_policy"] or ""
- custom_disclaimer = site.custom_disclaimer or args["custom_disclaimer"] or ""
+ desc = site.description or payload.desc or ""
+ copy_right = site.copyright or payload.copyright or ""
+ privacy_policy = site.privacy_policy or payload.privacy_policy or ""
+ custom_disclaimer = site.custom_disclaimer or payload.custom_disclaimer or ""
- with Session(db.engine) as session:
+ with session_factory.create_session() as session:
recommended_app = session.execute(
- select(RecommendedApp).where(RecommendedApp.app_id == args["app_id"])
+ select(RecommendedApp).where(RecommendedApp.app_id == payload.app_id)
).scalar_one_or_none()
if not recommended_app:
@@ -109,9 +103,9 @@ class InsertExploreAppListApi(Resource):
copyright=copy_right,
privacy_policy=privacy_policy,
custom_disclaimer=custom_disclaimer,
- language=args["language"],
- category=args["category"],
- position=args["position"],
+ language=payload.language,
+ category=payload.category,
+ position=payload.position,
)
db.session.add(recommended_app)
@@ -125,9 +119,9 @@ class InsertExploreAppListApi(Resource):
recommended_app.copyright = copy_right
recommended_app.privacy_policy = privacy_policy
recommended_app.custom_disclaimer = custom_disclaimer
- recommended_app.language = args["language"]
- recommended_app.category = args["category"]
- recommended_app.position = args["position"]
+ recommended_app.language = payload.language
+ recommended_app.category = payload.category
+ recommended_app.position = payload.position
app.is_public = True
@@ -138,14 +132,14 @@ class InsertExploreAppListApi(Resource):
@console_ns.route("/admin/insert-explore-apps/")
class InsertExploreAppApi(Resource):
- @api.doc("delete_explore_app")
- @api.doc(description="Remove an app from the explore list")
- @api.doc(params={"app_id": "Application ID to remove"})
- @api.response(204, "App removed successfully")
+ @console_ns.doc("delete_explore_app")
+ @console_ns.doc(description="Remove an app from the explore list")
+ @console_ns.doc(params={"app_id": "Application ID to remove"})
+ @console_ns.response(204, "App removed successfully")
@only_edition_cloud
@admin_required
def delete(self, app_id):
- with Session(db.engine) as session:
+ with session_factory.create_session() as session:
recommended_app = session.execute(
select(RecommendedApp).where(RecommendedApp.app_id == str(app_id))
).scalar_one_or_none()
@@ -153,13 +147,13 @@ class InsertExploreAppApi(Resource):
if not recommended_app:
return {"result": "success"}, 204
- with Session(db.engine) as session:
+ with session_factory.create_session() as session:
app = session.execute(select(App).where(App.id == recommended_app.app_id)).scalar_one_or_none()
if app:
app.is_public = False
- with Session(db.engine) as session:
+ with session_factory.create_session() as session:
installed_apps = (
session.execute(
select(InstalledApp).where(
diff --git a/api/controllers/console/apikey.py b/api/controllers/console/apikey.py
index fec527e4cb..9b0d4b1a78 100644
--- a/api/controllers/console/apikey.py
+++ b/api/controllers/console/apikey.py
@@ -1,5 +1,4 @@
import flask_restx
-from flask_login import current_user
from flask_restx import Resource, fields, marshal_with
from flask_restx._http import HTTPStatus
from sqlalchemy import select
@@ -8,12 +7,12 @@ from werkzeug.exceptions import Forbidden
from extensions.ext_database import db
from libs.helper import TimestampField
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.dataset import Dataset
from models.model import ApiToken, App
-from . import api, console_ns
-from .wraps import account_initialization_required, setup_required
+from . import console_ns
+from .wraps import account_initialization_required, edit_permission_required, setup_required
api_key_fields = {
"id": fields.String,
@@ -25,6 +24,12 @@ api_key_fields = {
api_key_list = {"data": fields.List(fields.Nested(api_key_fields), attribute="items")}
+api_key_item_model = console_ns.model("ApiKeyItem", api_key_fields)
+
+api_key_list_model = console_ns.model(
+ "ApiKeyList", {"data": fields.List(fields.Nested(api_key_item_model), attribute="items")}
+)
+
def _get_resource(resource_id, tenant_id, resource_model):
if resource_model == App:
@@ -53,11 +58,13 @@ class BaseApiKeyListResource(Resource):
token_prefix: str | None = None
max_keys = 10
- @marshal_with(api_key_list)
+ @marshal_with(api_key_list_model)
def get(self, resource_id):
assert self.resource_id_field is not None, "resource_id_field must be set"
resource_id = str(resource_id)
- _get_resource(resource_id, current_user.current_tenant_id, self.resource_model)
+ _, current_tenant_id = current_account_with_tenant()
+
+ _get_resource(resource_id, current_tenant_id, self.resource_model)
keys = db.session.scalars(
select(ApiToken).where(
ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id
@@ -65,14 +72,13 @@ class BaseApiKeyListResource(Resource):
).all()
return {"items": keys}
- @marshal_with(api_key_fields)
+ @marshal_with(api_key_item_model)
+ @edit_permission_required
def post(self, resource_id):
assert self.resource_id_field is not None, "resource_id_field must be set"
resource_id = str(resource_id)
- _get_resource(resource_id, current_user.current_tenant_id, self.resource_model)
- if not current_user.is_editor:
- raise Forbidden()
-
+ _, current_tenant_id = current_account_with_tenant()
+ _get_resource(resource_id, current_tenant_id, self.resource_model)
current_key_count = (
db.session.query(ApiToken)
.where(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id)
@@ -89,7 +95,7 @@ class BaseApiKeyListResource(Resource):
key = ApiToken.generate_api_key(self.token_prefix or "", 24)
api_token = ApiToken()
setattr(api_token, self.resource_id_field, resource_id)
- api_token.tenant_id = current_user.current_tenant_id
+ api_token.tenant_id = current_tenant_id
api_token.token = key
api_token.type = self.resource_type
db.session.add(api_token)
@@ -104,13 +110,11 @@ class BaseApiKeyResource(Resource):
resource_model: type | None = None
resource_id_field: str | None = None
- def delete(self, resource_id, api_key_id):
+ def delete(self, resource_id: str, api_key_id: str):
assert self.resource_id_field is not None, "resource_id_field must be set"
- resource_id = str(resource_id)
- api_key_id = str(api_key_id)
- _get_resource(resource_id, current_user.current_tenant_id, self.resource_model)
+ current_user, current_tenant_id = current_account_with_tenant()
+ _get_resource(resource_id, current_tenant_id, self.resource_model)
- # The role of the current user in the ta table must be admin or owner
if not current_user.is_admin_or_owner:
raise Forbidden()
@@ -135,28 +139,23 @@ class BaseApiKeyResource(Resource):
@console_ns.route("/apps//api-keys")
class AppApiKeyListResource(BaseApiKeyListResource):
- @api.doc("get_app_api_keys")
- @api.doc(description="Get all API keys for an app")
- @api.doc(params={"resource_id": "App ID"})
- @api.response(200, "Success", api_key_list)
- def get(self, resource_id):
+ @console_ns.doc("get_app_api_keys")
+ @console_ns.doc(description="Get all API keys for an app")
+ @console_ns.doc(params={"resource_id": "App ID"})
+ @console_ns.response(200, "Success", api_key_list_model)
+ def get(self, resource_id): # type: ignore
"""Get all API keys for an app"""
return super().get(resource_id)
- @api.doc("create_app_api_key")
- @api.doc(description="Create a new API key for an app")
- @api.doc(params={"resource_id": "App ID"})
- @api.response(201, "API key created successfully", api_key_fields)
- @api.response(400, "Maximum keys exceeded")
- def post(self, resource_id):
+ @console_ns.doc("create_app_api_key")
+ @console_ns.doc(description="Create a new API key for an app")
+ @console_ns.doc(params={"resource_id": "App ID"})
+ @console_ns.response(201, "API key created successfully", api_key_item_model)
+ @console_ns.response(400, "Maximum keys exceeded")
+ def post(self, resource_id): # type: ignore
"""Create a new API key for an app"""
return super().post(resource_id)
- def after_request(self, resp):
- resp.headers["Access-Control-Allow-Origin"] = "*"
- resp.headers["Access-Control-Allow-Credentials"] = "true"
- return resp
-
resource_type = "app"
resource_model = App
resource_id_field = "app_id"
@@ -165,19 +164,14 @@ class AppApiKeyListResource(BaseApiKeyListResource):
@console_ns.route("/apps//api-keys/")
class AppApiKeyResource(BaseApiKeyResource):
- @api.doc("delete_app_api_key")
- @api.doc(description="Delete an API key for an app")
- @api.doc(params={"resource_id": "App ID", "api_key_id": "API key ID"})
- @api.response(204, "API key deleted successfully")
+ @console_ns.doc("delete_app_api_key")
+ @console_ns.doc(description="Delete an API key for an app")
+ @console_ns.doc(params={"resource_id": "App ID", "api_key_id": "API key ID"})
+ @console_ns.response(204, "API key deleted successfully")
def delete(self, resource_id, api_key_id):
"""Delete an API key for an app"""
return super().delete(resource_id, api_key_id)
- def after_request(self, resp):
- resp.headers["Access-Control-Allow-Origin"] = "*"
- resp.headers["Access-Control-Allow-Credentials"] = "true"
- return resp
-
resource_type = "app"
resource_model = App
resource_id_field = "app_id"
@@ -185,28 +179,23 @@ class AppApiKeyResource(BaseApiKeyResource):
@console_ns.route("/datasets//api-keys")
class DatasetApiKeyListResource(BaseApiKeyListResource):
- @api.doc("get_dataset_api_keys")
- @api.doc(description="Get all API keys for a dataset")
- @api.doc(params={"resource_id": "Dataset ID"})
- @api.response(200, "Success", api_key_list)
- def get(self, resource_id):
+ @console_ns.doc("get_dataset_api_keys")
+ @console_ns.doc(description="Get all API keys for a dataset")
+ @console_ns.doc(params={"resource_id": "Dataset ID"})
+ @console_ns.response(200, "Success", api_key_list_model)
+ def get(self, resource_id): # type: ignore
"""Get all API keys for a dataset"""
return super().get(resource_id)
- @api.doc("create_dataset_api_key")
- @api.doc(description="Create a new API key for a dataset")
- @api.doc(params={"resource_id": "Dataset ID"})
- @api.response(201, "API key created successfully", api_key_fields)
- @api.response(400, "Maximum keys exceeded")
- def post(self, resource_id):
+ @console_ns.doc("create_dataset_api_key")
+ @console_ns.doc(description="Create a new API key for a dataset")
+ @console_ns.doc(params={"resource_id": "Dataset ID"})
+ @console_ns.response(201, "API key created successfully", api_key_item_model)
+ @console_ns.response(400, "Maximum keys exceeded")
+ def post(self, resource_id): # type: ignore
"""Create a new API key for a dataset"""
return super().post(resource_id)
- def after_request(self, resp):
- resp.headers["Access-Control-Allow-Origin"] = "*"
- resp.headers["Access-Control-Allow-Credentials"] = "true"
- return resp
-
resource_type = "dataset"
resource_model = Dataset
resource_id_field = "dataset_id"
@@ -215,19 +204,14 @@ class DatasetApiKeyListResource(BaseApiKeyListResource):
@console_ns.route("/datasets//api-keys/")
class DatasetApiKeyResource(BaseApiKeyResource):
- @api.doc("delete_dataset_api_key")
- @api.doc(description="Delete an API key for a dataset")
- @api.doc(params={"resource_id": "Dataset ID", "api_key_id": "API key ID"})
- @api.response(204, "API key deleted successfully")
+ @console_ns.doc("delete_dataset_api_key")
+ @console_ns.doc(description="Delete an API key for a dataset")
+ @console_ns.doc(params={"resource_id": "Dataset ID", "api_key_id": "API key ID"})
+ @console_ns.response(204, "API key deleted successfully")
def delete(self, resource_id, api_key_id):
"""Delete an API key for a dataset"""
return super().delete(resource_id, api_key_id)
- def after_request(self, resp):
- resp.headers["Access-Control-Allow-Origin"] = "*"
- resp.headers["Access-Control-Allow-Credentials"] = "true"
- return resp
-
resource_type = "dataset"
resource_model = Dataset
resource_id_field = "dataset_id"
diff --git a/api/controllers/console/app/advanced_prompt_template.py b/api/controllers/console/app/advanced_prompt_template.py
index 315825db79..3bd61feb44 100644
--- a/api/controllers/console/app/advanced_prompt_template.py
+++ b/api/controllers/console/app/advanced_prompt_template.py
@@ -1,35 +1,39 @@
-from flask_restx import Resource, fields, reqparse
+from flask import request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, setup_required
from libs.login import login_required
from services.advanced_prompt_template_service import AdvancedPromptTemplateService
+class AdvancedPromptTemplateQuery(BaseModel):
+ app_mode: str = Field(..., description="Application mode")
+ model_mode: str = Field(..., description="Model mode")
+ has_context: str = Field(default="true", description="Whether has context")
+ model_name: str = Field(..., description="Model name")
+
+
+console_ns.schema_model(
+ AdvancedPromptTemplateQuery.__name__,
+ AdvancedPromptTemplateQuery.model_json_schema(ref_template="#/definitions/{model}"),
+)
+
+
@console_ns.route("/app/prompt-templates")
class AdvancedPromptTemplateList(Resource):
- @api.doc("get_advanced_prompt_templates")
- @api.doc(description="Get advanced prompt templates based on app mode and model configuration")
- @api.expect(
- api.parser()
- .add_argument("app_mode", type=str, required=True, location="args", help="Application mode")
- .add_argument("model_mode", type=str, required=True, location="args", help="Model mode")
- .add_argument("has_context", type=str, default="true", location="args", help="Whether has context")
- .add_argument("model_name", type=str, required=True, location="args", help="Model name")
- )
- @api.response(
+ @console_ns.doc("get_advanced_prompt_templates")
+ @console_ns.doc(description="Get advanced prompt templates based on app mode and model configuration")
+ @console_ns.expect(console_ns.models[AdvancedPromptTemplateQuery.__name__])
+ @console_ns.response(
200, "Prompt templates retrieved successfully", fields.List(fields.Raw(description="Prompt template data"))
)
- @api.response(400, "Invalid request parameters")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("app_mode", type=str, required=True, location="args")
- parser.add_argument("model_mode", type=str, required=True, location="args")
- parser.add_argument("has_context", type=str, required=False, default="true", location="args")
- parser.add_argument("model_name", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = AdvancedPromptTemplateQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- return AdvancedPromptTemplateService.get_prompt(args)
+ return AdvancedPromptTemplateService.get_prompt(args.model_dump())
diff --git a/api/controllers/console/app/agent.py b/api/controllers/console/app/agent.py
index c063f336c7..cfdb9cf417 100644
--- a/api/controllers/console/app/agent.py
+++ b/api/controllers/console/app/agent.py
@@ -1,6 +1,8 @@
-from flask_restx import Resource, fields, reqparse
+from flask import request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from libs.helper import uuid_value
@@ -8,29 +10,40 @@ from libs.login import login_required
from models.model import AppMode
from services.agent_service import AgentService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class AgentLogQuery(BaseModel):
+ message_id: str = Field(..., description="Message UUID")
+ conversation_id: str = Field(..., description="Conversation UUID")
+
+ @field_validator("message_id", "conversation_id")
+ @classmethod
+ def validate_uuid(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+console_ns.schema_model(
+ AgentLogQuery.__name__, AgentLogQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
@console_ns.route("/apps//agent/logs")
class AgentLogApi(Resource):
- @api.doc("get_agent_logs")
- @api.doc(description="Get agent execution logs for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("message_id", type=str, required=True, location="args", help="Message UUID")
- .add_argument("conversation_id", type=str, required=True, location="args", help="Conversation UUID")
+ @console_ns.doc("get_agent_logs")
+ @console_ns.doc(description="Get agent execution logs for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AgentLogQuery.__name__])
+ @console_ns.response(
+ 200, "Agent logs retrieved successfully", fields.List(fields.Raw(description="Agent log entries"))
)
- @api.response(200, "Agent logs retrieved successfully", fields.List(fields.Raw(description="Agent log entries")))
- @api.response(400, "Invalid request parameters")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.AGENT_CHAT])
def get(self, app_model):
"""Get agent logs"""
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", type=uuid_value, required=True, location="args")
- parser.add_argument("conversation_id", type=uuid_value, required=True, location="args")
+ args = AgentLogQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- args = parser.parse_args()
-
- return AgentService.get_agent_logs(app_model, args["conversation_id"], args["message_id"])
+ return AgentService.get_agent_logs(app_model, args.conversation_id, args.message_id)
diff --git a/api/controllers/console/app/annotation.py b/api/controllers/console/app/annotation.py
index d0ee11fe75..6a4c1528b0 100644
--- a/api/controllers/console/app/annotation.py
+++ b/api/controllers/console/app/annotation.py
@@ -1,59 +1,114 @@
-from typing import Literal
+from typing import Any, Literal
-from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal, marshal_with, reqparse
-from werkzeug.exceptions import Forbidden
+from flask import abort, make_response, request
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field, field_validator
from controllers.common.errors import NoFileUploadedError, TooManyFilesError
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.wraps import (
account_initialization_required,
+ annotation_import_concurrency_limit,
+ annotation_import_rate_limit,
cloud_edition_billing_resource_check,
+ edit_permission_required,
setup_required,
)
from extensions.ext_redis import redis_client
from fields.annotation_fields import (
annotation_fields,
annotation_hit_history_fields,
+ build_annotation_model,
)
+from libs.helper import uuid_value
from libs.login import login_required
from services.annotation_service import AppAnnotationService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class AnnotationReplyPayload(BaseModel):
+ score_threshold: float = Field(..., description="Score threshold for annotation matching")
+ embedding_provider_name: str = Field(..., description="Embedding provider name")
+ embedding_model_name: str = Field(..., description="Embedding model name")
+
+
+class AnnotationSettingUpdatePayload(BaseModel):
+ score_threshold: float = Field(..., description="Score threshold")
+
+
+class AnnotationListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, description="Page number")
+ limit: int = Field(default=20, ge=1, description="Page size")
+ keyword: str = Field(default="", description="Search keyword")
+
+
+class CreateAnnotationPayload(BaseModel):
+ message_id: str | None = Field(default=None, description="Message ID")
+ question: str | None = Field(default=None, description="Question text")
+ answer: str | None = Field(default=None, description="Answer text")
+ content: str | None = Field(default=None, description="Content text")
+ annotation_reply: dict[str, Any] | None = Field(default=None, description="Annotation reply data")
+
+ @field_validator("message_id")
+ @classmethod
+ def validate_message_id(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class UpdateAnnotationPayload(BaseModel):
+ question: str | None = None
+ answer: str | None = None
+ content: str | None = None
+ annotation_reply: dict[str, Any] | None = None
+
+
+class AnnotationReplyStatusQuery(BaseModel):
+ action: Literal["enable", "disable"]
+
+
+class AnnotationFilePayload(BaseModel):
+ message_id: str = Field(..., description="Message ID")
+
+ @field_validator("message_id")
+ @classmethod
+ def validate_message_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+def reg(model: type[BaseModel]) -> None:
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(AnnotationReplyPayload)
+reg(AnnotationSettingUpdatePayload)
+reg(AnnotationListQuery)
+reg(CreateAnnotationPayload)
+reg(UpdateAnnotationPayload)
+reg(AnnotationReplyStatusQuery)
+reg(AnnotationFilePayload)
+
@console_ns.route("/apps//annotation-reply/")
class AnnotationReplyActionApi(Resource):
- @api.doc("annotation_reply_action")
- @api.doc(description="Enable or disable annotation reply for an app")
- @api.doc(params={"app_id": "Application ID", "action": "Action to perform (enable/disable)"})
- @api.expect(
- api.model(
- "AnnotationReplyActionRequest",
- {
- "score_threshold": fields.Float(required=True, description="Score threshold for annotation matching"),
- "embedding_provider_name": fields.String(required=True, description="Embedding provider name"),
- "embedding_model_name": fields.String(required=True, description="Embedding model name"),
- },
- )
- )
- @api.response(200, "Action completed successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("annotation_reply_action")
+ @console_ns.doc(description="Enable or disable annotation reply for an app")
+ @console_ns.doc(params={"app_id": "Application ID", "action": "Action to perform (enable/disable)"})
+ @console_ns.expect(console_ns.models[AnnotationReplyPayload.__name__])
+ @console_ns.response(200, "Action completed successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
+ @edit_permission_required
def post(self, app_id, action: Literal["enable", "disable"]):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
- parser = reqparse.RequestParser()
- parser.add_argument("score_threshold", required=True, type=float, location="json")
- parser.add_argument("embedding_provider_name", required=True, type=str, location="json")
- parser.add_argument("embedding_model_name", required=True, type=str, location="json")
- args = parser.parse_args()
+ args = AnnotationReplyPayload.model_validate(console_ns.payload)
if action == "enable":
- result = AppAnnotationService.enable_app_annotation(args, app_id)
+ result = AppAnnotationService.enable_app_annotation(args.model_dump(), app_id)
elif action == "disable":
result = AppAnnotationService.disable_app_annotation(app_id)
return result, 200
@@ -61,18 +116,16 @@ class AnnotationReplyActionApi(Resource):
@console_ns.route("/apps//annotation-setting")
class AppAnnotationSettingDetailApi(Resource):
- @api.doc("get_annotation_setting")
- @api.doc(description="Get annotation settings for an app")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Annotation settings retrieved successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("get_annotation_setting")
+ @console_ns.doc(description="Get annotation settings for an app")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Annotation settings retrieved successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
result = AppAnnotationService.get_app_annotation_setting_by_app_id(app_id)
return result, 200
@@ -80,54 +133,39 @@ class AppAnnotationSettingDetailApi(Resource):
@console_ns.route("/apps//annotation-settings/")
class AppAnnotationSettingUpdateApi(Resource):
- @api.doc("update_annotation_setting")
- @api.doc(description="Update annotation settings for an app")
- @api.doc(params={"app_id": "Application ID", "annotation_setting_id": "Annotation setting ID"})
- @api.expect(
- api.model(
- "AnnotationSettingUpdateRequest",
- {
- "score_threshold": fields.Float(required=True, description="Score threshold"),
- "embedding_provider_name": fields.String(required=True, description="Embedding provider"),
- "embedding_model_name": fields.String(required=True, description="Embedding model"),
- },
- )
- )
- @api.response(200, "Settings updated successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_annotation_setting")
+ @console_ns.doc(description="Update annotation settings for an app")
+ @console_ns.doc(params={"app_id": "Application ID", "annotation_setting_id": "Annotation setting ID"})
+ @console_ns.expect(console_ns.models[AnnotationSettingUpdatePayload.__name__])
+ @console_ns.response(200, "Settings updated successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, app_id, annotation_setting_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
annotation_setting_id = str(annotation_setting_id)
- parser = reqparse.RequestParser()
- parser.add_argument("score_threshold", required=True, type=float, location="json")
- args = parser.parse_args()
+ args = AnnotationSettingUpdatePayload.model_validate(console_ns.payload)
- result = AppAnnotationService.update_app_annotation_setting(app_id, annotation_setting_id, args)
+ result = AppAnnotationService.update_app_annotation_setting(app_id, annotation_setting_id, args.model_dump())
return result, 200
@console_ns.route("/apps//annotation-reply//status/")
class AnnotationReplyActionStatusApi(Resource):
- @api.doc("get_annotation_reply_action_status")
- @api.doc(description="Get status of annotation reply action job")
- @api.doc(params={"app_id": "Application ID", "job_id": "Job ID", "action": "Action type"})
- @api.response(200, "Job status retrieved successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("get_annotation_reply_action_status")
+ @console_ns.doc(description="Get status of annotation reply action job")
+ @console_ns.doc(params={"app_id": "Application ID", "job_id": "Job ID", "action": "Action type"})
+ @console_ns.response(200, "Job status retrieved successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
+ @edit_permission_required
def get(self, app_id, job_id, action):
- if not current_user.is_editor:
- raise Forbidden()
-
job_id = str(job_id)
app_annotation_job_key = f"{action}_app_annotation_job_{str(job_id)}"
cache_result = redis_client.get(app_annotation_job_key)
@@ -145,27 +183,21 @@ class AnnotationReplyActionStatusApi(Resource):
@console_ns.route("/apps//annotations")
class AnnotationApi(Resource):
- @api.doc("list_annotations")
- @api.doc(description="Get annotations for an app with pagination")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("page", type=int, location="args", default=1, help="Page number")
- .add_argument("limit", type=int, location="args", default=20, help="Page size")
- .add_argument("keyword", type=str, location="args", default="", help="Search keyword")
- )
- @api.response(200, "Annotations retrieved successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("list_annotations")
+ @console_ns.doc(description="Get annotations for an app with pagination")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AnnotationListQuery.__name__])
+ @console_ns.response(200, "Annotations retrieved successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
-
- page = request.args.get("page", default=1, type=int)
- limit = request.args.get("limit", default=20, type=int)
- keyword = request.args.get("keyword", default="", type=str)
+ args = AnnotationListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ page = args.page
+ limit = args.limit
+ keyword = args.keyword
app_id = str(app_id)
annotation_list, total = AppAnnotationService.get_annotation_list_by_app_id(app_id, page, limit, keyword)
@@ -178,45 +210,30 @@ class AnnotationApi(Resource):
}
return response, 200
- @api.doc("create_annotation")
- @api.doc(description="Create a new annotation for an app")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "CreateAnnotationRequest",
- {
- "question": fields.String(required=True, description="Question text"),
- "answer": fields.String(required=True, description="Answer text"),
- "annotation_reply": fields.Raw(description="Annotation reply data"),
- },
- )
- )
- @api.response(201, "Annotation created successfully", annotation_fields)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("create_annotation")
+ @console_ns.doc(description="Create a new annotation for an app")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[CreateAnnotationPayload.__name__])
+ @console_ns.response(201, "Annotation created successfully", build_annotation_model(console_ns))
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
@marshal_with(annotation_fields)
+ @edit_permission_required
def post(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
- parser = reqparse.RequestParser()
- parser.add_argument("question", required=True, type=str, location="json")
- parser.add_argument("answer", required=True, type=str, location="json")
- args = parser.parse_args()
- annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
+ args = CreateAnnotationPayload.model_validate(console_ns.payload)
+ data = args.model_dump(exclude_none=True)
+ annotation = AppAnnotationService.up_insert_app_annotation_from_message(data, app_id)
return annotation
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def delete(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
# Use request.args.getlist to get annotation_ids array directly
@@ -241,57 +258,61 @@ class AnnotationApi(Resource):
@console_ns.route("/apps//annotations/export")
class AnnotationExportApi(Resource):
- @api.doc("export_annotations")
- @api.doc(description="Export all annotations for an app")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Annotations exported successfully", fields.List(fields.Nested(annotation_fields)))
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("export_annotations")
+ @console_ns.doc(description="Export all annotations for an app with CSV injection protection")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(
+ 200,
+ "Annotations exported successfully",
+ console_ns.model("AnnotationList", {"data": fields.List(fields.Nested(build_annotation_model(console_ns)))}),
+ )
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
annotation_list = AppAnnotationService.export_annotation_list_by_app_id(app_id)
- response = {"data": marshal(annotation_list, annotation_fields)}
- return response, 200
+ response_data = {"data": marshal(annotation_list, annotation_fields)}
+
+ # Create response with secure headers for CSV export
+ response = make_response(response_data, 200)
+ response.headers["Content-Type"] = "application/json; charset=utf-8"
+ response.headers["X-Content-Type-Options"] = "nosniff"
+
+ return response
@console_ns.route("/apps//annotations/")
class AnnotationUpdateDeleteApi(Resource):
- @api.doc("update_delete_annotation")
- @api.doc(description="Update or delete an annotation")
- @api.doc(params={"app_id": "Application ID", "annotation_id": "Annotation ID"})
- @api.response(200, "Annotation updated successfully", annotation_fields)
- @api.response(204, "Annotation deleted successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_delete_annotation")
+ @console_ns.doc(description="Update or delete an annotation")
+ @console_ns.doc(params={"app_id": "Application ID", "annotation_id": "Annotation ID"})
+ @console_ns.response(200, "Annotation updated successfully", build_annotation_model(console_ns))
+ @console_ns.response(204, "Annotation deleted successfully")
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.expect(console_ns.models[UpdateAnnotationPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
+ @edit_permission_required
@marshal_with(annotation_fields)
def post(self, app_id, annotation_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
annotation_id = str(annotation_id)
- parser = reqparse.RequestParser()
- parser.add_argument("question", required=True, type=str, location="json")
- parser.add_argument("answer", required=True, type=str, location="json")
- args = parser.parse_args()
- annotation = AppAnnotationService.update_app_annotation_directly(args, app_id, annotation_id)
+ args = UpdateAnnotationPayload.model_validate(console_ns.payload)
+ annotation = AppAnnotationService.update_app_annotation_directly(
+ args.model_dump(exclude_none=True), app_id, annotation_id
+ )
return annotation
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def delete(self, app_id, annotation_id):
- if not current_user.is_editor:
- raise Forbidden()
-
app_id = str(app_id)
annotation_id = str(annotation_id)
AppAnnotationService.delete_app_annotation(app_id, annotation_id)
@@ -300,21 +321,26 @@ class AnnotationUpdateDeleteApi(Resource):
@console_ns.route("/apps//annotations/batch-import")
class AnnotationBatchImportApi(Resource):
- @api.doc("batch_import_annotations")
- @api.doc(description="Batch import annotations from CSV file")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Batch import started successfully")
- @api.response(403, "Insufficient permissions")
- @api.response(400, "No file uploaded or too many files")
+ @console_ns.doc("batch_import_annotations")
+ @console_ns.doc(description="Batch import annotations from CSV file with rate limiting and security checks")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Batch import started successfully")
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(400, "No file uploaded or too many files")
+ @console_ns.response(413, "File too large")
+ @console_ns.response(429, "Too many requests or concurrent imports")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
+ @annotation_import_rate_limit
+ @annotation_import_concurrency_limit
+ @edit_permission_required
def post(self, app_id):
- if not current_user.is_editor:
- raise Forbidden()
+ from configs import dify_config
app_id = str(app_id)
+
# check file
if "file" not in request.files:
raise NoFileUploadedError()
@@ -324,27 +350,43 @@ class AnnotationBatchImportApi(Resource):
# get file from request
file = request.files["file"]
+
# check file type
if not file.filename or not file.filename.lower().endswith(".csv"):
raise ValueError("Invalid file type. Only CSV files are allowed")
+
+ # Check file size before processing
+ file.seek(0, 2) # Seek to end of file
+ file_size = file.tell()
+ file.seek(0) # Reset to beginning
+
+ max_size_bytes = dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT * 1024 * 1024
+ if file_size > max_size_bytes:
+ abort(
+ 413,
+ f"File size exceeds maximum limit of {dify_config.ANNOTATION_IMPORT_FILE_SIZE_LIMIT}MB. "
+ f"Please reduce the file size and try again.",
+ )
+
+ if file_size == 0:
+ raise ValueError("The uploaded file is empty")
+
return AppAnnotationService.batch_import_app_annotations(app_id, file)
@console_ns.route("/apps//annotations/batch-import-status/")
class AnnotationBatchImportStatusApi(Resource):
- @api.doc("get_batch_import_status")
- @api.doc(description="Get status of batch import job")
- @api.doc(params={"app_id": "Application ID", "job_id": "Job ID"})
- @api.response(200, "Job status retrieved successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("get_batch_import_status")
+ @console_ns.doc(description="Get status of batch import job")
+ @console_ns.doc(params={"app_id": "Application ID", "job_id": "Job ID"})
+ @console_ns.response(200, "Job status retrieved successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
+ @edit_permission_required
def get(self, app_id, job_id):
- if not current_user.is_editor:
- raise Forbidden()
-
job_id = str(job_id)
indexing_cache_key = f"app_annotation_batch_import_{str(job_id)}"
cache_result = redis_client.get(indexing_cache_key)
@@ -361,25 +403,32 @@ class AnnotationBatchImportStatusApi(Resource):
@console_ns.route("/apps//annotations//hit-histories")
class AnnotationHitHistoryListApi(Resource):
- @api.doc("list_annotation_hit_histories")
- @api.doc(description="Get hit histories for an annotation")
- @api.doc(params={"app_id": "Application ID", "annotation_id": "Annotation ID"})
- @api.expect(
- api.parser()
+ @console_ns.doc("list_annotation_hit_histories")
+ @console_ns.doc(description="Get hit histories for an annotation")
+ @console_ns.doc(params={"app_id": "Application ID", "annotation_id": "Annotation ID"})
+ @console_ns.expect(
+ console_ns.parser()
.add_argument("page", type=int, location="args", default=1, help="Page number")
.add_argument("limit", type=int, location="args", default=20, help="Page size")
)
- @api.response(
- 200, "Hit histories retrieved successfully", fields.List(fields.Nested(annotation_hit_history_fields))
+ @console_ns.response(
+ 200,
+ "Hit histories retrieved successfully",
+ console_ns.model(
+ "AnnotationHitHistoryList",
+ {
+ "data": fields.List(
+ fields.Nested(console_ns.model("AnnotationHitHistoryItem", annotation_hit_history_fields))
+ )
+ },
+ ),
)
- @api.response(403, "Insufficient permissions")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, app_id, annotation_id):
- if not current_user.is_editor:
- raise Forbidden()
-
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
app_id = str(app_id)
diff --git a/api/controllers/console/app/app.py b/api/controllers/console/app/app.py
index 3927685af3..62e997dae2 100644
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -1,96 +1,280 @@
import uuid
-from typing import cast
+from typing import Literal
-from flask_login import current_user
-from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse
+from flask import request
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
-from werkzeug.exceptions import BadRequest, Forbidden, abort
+from werkzeug.exceptions import BadRequest
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import (
account_initialization_required,
cloud_edition_billing_resource_check,
+ edit_permission_required,
enterprise_license_required,
+ is_admin_or_owner_required,
setup_required,
)
from core.ops.ops_trace_manager import OpsTraceManager
+from core.workflow.enums import NodeType
from extensions.ext_database import db
-from fields.app_fields import app_detail_fields, app_detail_fields_with_site, app_pagination_fields
-from libs.login import login_required
-from libs.validators import validate_description_length
-from models import Account, App
+from fields.app_fields import (
+ deleted_tool_fields,
+ model_config_fields,
+ model_config_partial_fields,
+ site_fields,
+ tag_fields,
+)
+from fields.workflow_fields import workflow_partial_fields as _workflow_partial_fields_dict
+from libs.helper import AppIconUrlField, TimestampField
+from libs.login import current_account_with_tenant, login_required
+from models import App, Workflow
from services.app_dsl_service import AppDslService, ImportMode
from services.app_service import AppService
from services.enterprise.enterprise_service import EnterpriseService
from services.feature_service import FeatureService
ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class AppListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=99999, description="Page number (1-99999)")
+ limit: int = Field(default=20, ge=1, le=100, description="Page size (1-100)")
+ mode: Literal["completion", "chat", "advanced-chat", "workflow", "agent-chat", "channel", "all"] = Field(
+ default="all", description="App mode filter"
+ )
+ name: str | None = Field(default=None, description="Filter by app name")
+ tag_ids: list[str] | None = Field(default=None, description="Comma-separated tag IDs")
+ is_created_by_me: bool | None = Field(default=None, description="Filter by creator")
+
+ @field_validator("tag_ids", mode="before")
+ @classmethod
+ def validate_tag_ids(cls, value: str | list[str] | None) -> list[str] | None:
+ if not value:
+ return None
+
+ if isinstance(value, str):
+ items = [item.strip() for item in value.split(",") if item.strip()]
+ elif isinstance(value, list):
+ items = [str(item).strip() for item in value if item and str(item).strip()]
+ else:
+ raise TypeError("Unsupported tag_ids type.")
+
+ if not items:
+ return None
+
+ try:
+ return [str(uuid.UUID(item)) for item in items]
+ except ValueError as exc:
+ raise ValueError("Invalid UUID format in tag_ids.") from exc
+
+
+class CreateAppPayload(BaseModel):
+ name: str = Field(..., min_length=1, description="App name")
+ description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400)
+ mode: Literal["chat", "agent-chat", "advanced-chat", "workflow", "completion"] = Field(..., description="App mode")
+ icon_type: str | None = Field(default=None, description="Icon type")
+ icon: str | None = Field(default=None, description="Icon")
+ icon_background: str | None = Field(default=None, description="Icon background color")
+
+
+class UpdateAppPayload(BaseModel):
+ name: str = Field(..., min_length=1, description="App name")
+ description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400)
+ icon_type: str | None = Field(default=None, description="Icon type")
+ icon: str | None = Field(default=None, description="Icon")
+ icon_background: str | None = Field(default=None, description="Icon background color")
+ use_icon_as_answer_icon: bool | None = Field(default=None, description="Use icon as answer icon")
+ max_active_requests: int | None = Field(default=None, description="Maximum active requests")
+
+
+class CopyAppPayload(BaseModel):
+ name: str | None = Field(default=None, description="Name for the copied app")
+ description: str | None = Field(default=None, description="Description for the copied app", max_length=400)
+ icon_type: str | None = Field(default=None, description="Icon type")
+ icon: str | None = Field(default=None, description="Icon")
+ icon_background: str | None = Field(default=None, description="Icon background color")
+
+
+class AppExportQuery(BaseModel):
+ include_secret: bool = Field(default=False, description="Include secrets in export")
+ workflow_id: str | None = Field(default=None, description="Specific workflow ID to export")
+
+
+class AppNamePayload(BaseModel):
+ name: str = Field(..., min_length=1, description="Name to check")
+
+
+class AppIconPayload(BaseModel):
+ icon: str | None = Field(default=None, description="Icon data")
+ icon_background: str | None = Field(default=None, description="Icon background color")
+
+
+class AppSiteStatusPayload(BaseModel):
+ enable_site: bool = Field(..., description="Enable or disable site")
+
+
+class AppApiStatusPayload(BaseModel):
+ enable_api: bool = Field(..., description="Enable or disable API")
+
+
+class AppTracePayload(BaseModel):
+ enabled: bool = Field(..., description="Enable or disable tracing")
+ tracing_provider: str | None = Field(default=None, description="Tracing provider")
+
+ @field_validator("tracing_provider")
+ @classmethod
+ def validate_tracing_provider(cls, value: str | None, info) -> str | None:
+ if info.data.get("enabled") and not value:
+ raise ValueError("tracing_provider is required when enabled is True")
+ return value
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(AppListQuery)
+reg(CreateAppPayload)
+reg(UpdateAppPayload)
+reg(CopyAppPayload)
+reg(AppExportQuery)
+reg(AppNamePayload)
+reg(AppIconPayload)
+reg(AppSiteStatusPayload)
+reg(AppApiStatusPayload)
+reg(AppTracePayload)
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register base models first
+tag_model = console_ns.model("Tag", tag_fields)
+
+workflow_partial_model = console_ns.model("WorkflowPartial", _workflow_partial_fields_dict)
+
+model_config_model = console_ns.model("ModelConfig", model_config_fields)
+
+model_config_partial_model = console_ns.model("ModelConfigPartial", model_config_partial_fields)
+
+deleted_tool_model = console_ns.model("DeletedTool", deleted_tool_fields)
+
+site_model = console_ns.model("Site", site_fields)
+
+app_partial_model = console_ns.model(
+ "AppPartial",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "max_active_requests": fields.Raw(),
+ "description": fields.String(attribute="desc_or_prompt"),
+ "mode": fields.String(attribute="mode_compatible_with_agent"),
+ "icon_type": fields.String,
+ "icon": fields.String,
+ "icon_background": fields.String,
+ "icon_url": AppIconUrlField,
+ "model_config": fields.Nested(model_config_partial_model, attribute="app_model_config", allow_null=True),
+ "workflow": fields.Nested(workflow_partial_model, allow_null=True),
+ "use_icon_as_answer_icon": fields.Boolean,
+ "created_by": fields.String,
+ "created_at": TimestampField,
+ "updated_by": fields.String,
+ "updated_at": TimestampField,
+ "tags": fields.List(fields.Nested(tag_model)),
+ "access_mode": fields.String,
+ "create_user_name": fields.String,
+ "author_name": fields.String,
+ "has_draft_trigger": fields.Boolean,
+ },
+)
+
+app_detail_model = console_ns.model(
+ "AppDetail",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "description": fields.String,
+ "mode": fields.String(attribute="mode_compatible_with_agent"),
+ "icon": fields.String,
+ "icon_background": fields.String,
+ "enable_site": fields.Boolean,
+ "enable_api": fields.Boolean,
+ "model_config": fields.Nested(model_config_model, attribute="app_model_config", allow_null=True),
+ "workflow": fields.Nested(workflow_partial_model, allow_null=True),
+ "tracing": fields.Raw,
+ "use_icon_as_answer_icon": fields.Boolean,
+ "created_by": fields.String,
+ "created_at": TimestampField,
+ "updated_by": fields.String,
+ "updated_at": TimestampField,
+ "access_mode": fields.String,
+ "tags": fields.List(fields.Nested(tag_model)),
+ },
+)
+
+app_detail_with_site_model = console_ns.model(
+ "AppDetailWithSite",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "description": fields.String,
+ "mode": fields.String(attribute="mode_compatible_with_agent"),
+ "icon_type": fields.String,
+ "icon": fields.String,
+ "icon_background": fields.String,
+ "icon_url": AppIconUrlField,
+ "enable_site": fields.Boolean,
+ "enable_api": fields.Boolean,
+ "model_config": fields.Nested(model_config_model, attribute="app_model_config", allow_null=True),
+ "workflow": fields.Nested(workflow_partial_model, allow_null=True),
+ "api_base_url": fields.String,
+ "use_icon_as_answer_icon": fields.Boolean,
+ "max_active_requests": fields.Integer,
+ "created_by": fields.String,
+ "created_at": TimestampField,
+ "updated_by": fields.String,
+ "updated_at": TimestampField,
+ "deleted_tools": fields.List(fields.Nested(deleted_tool_model)),
+ "access_mode": fields.String,
+ "tags": fields.List(fields.Nested(tag_model)),
+ "site": fields.Nested(site_model),
+ },
+)
+
+app_pagination_model = console_ns.model(
+ "AppPagination",
+ {
+ "page": fields.Integer,
+ "limit": fields.Integer(attribute="per_page"),
+ "total": fields.Integer,
+ "has_more": fields.Boolean(attribute="has_next"),
+ "data": fields.List(fields.Nested(app_partial_model), attribute="items"),
+ },
+)
@console_ns.route("/apps")
class AppListApi(Resource):
- @api.doc("list_apps")
- @api.doc(description="Get list of applications with pagination and filtering")
- @api.expect(
- api.parser()
- .add_argument("page", type=int, location="args", help="Page number (1-99999)", default=1)
- .add_argument("limit", type=int, location="args", help="Page size (1-100)", default=20)
- .add_argument(
- "mode",
- type=str,
- location="args",
- choices=["completion", "chat", "advanced-chat", "workflow", "agent-chat", "channel", "all"],
- default="all",
- help="App mode filter",
- )
- .add_argument("name", type=str, location="args", help="Filter by app name")
- .add_argument("tag_ids", type=str, location="args", help="Comma-separated tag IDs")
- .add_argument("is_created_by_me", type=bool, location="args", help="Filter by creator")
- )
- @api.response(200, "Success", app_pagination_fields)
+ @console_ns.doc("list_apps")
+ @console_ns.doc(description="Get list of applications with pagination and filtering")
+ @console_ns.expect(console_ns.models[AppListQuery.__name__])
+ @console_ns.response(200, "Success", app_pagination_model)
@setup_required
@login_required
@account_initialization_required
@enterprise_license_required
def get(self):
"""Get app list"""
+ current_user, current_tenant_id = current_account_with_tenant()
- def uuid_list(value):
- try:
- return [str(uuid.UUID(v)) for v in value.split(",")]
- except ValueError:
- abort(400, message="Invalid UUID format in tag_ids.")
-
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args")
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- parser.add_argument(
- "mode",
- type=str,
- choices=[
- "completion",
- "chat",
- "advanced-chat",
- "workflow",
- "agent-chat",
- "channel",
- "all",
- ],
- default="all",
- location="args",
- required=False,
- )
- parser.add_argument("name", type=str, location="args", required=False)
- parser.add_argument("tag_ids", type=uuid_list, location="args", required=False)
- parser.add_argument("is_created_by_me", type=inputs.boolean, location="args", required=False)
-
- args = parser.parse_args()
+ args = AppListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ args_dict = args.model_dump()
# get app list
app_service = AppService()
- app_pagination = app_service.get_paginate_apps(current_user.id, current_user.current_tenant_id, args)
+ app_pagination = app_service.get_paginate_apps(current_user.id, current_tenant_id, args_dict)
if not app_pagination:
return {"data": [], "total": 0, "page": 1, "limit": 20, "has_more": False}
@@ -104,71 +288,75 @@ class AppListApi(Resource):
if str(app.id) in res:
app.access_mode = res[str(app.id)].access_mode
- return marshal(app_pagination, app_pagination_fields), 200
+ workflow_capable_app_ids = [
+ str(app.id) for app in app_pagination.items if app.mode in {"workflow", "advanced-chat"}
+ ]
+ draft_trigger_app_ids: set[str] = set()
+ if workflow_capable_app_ids:
+ draft_workflows = (
+ db.session.execute(
+ select(Workflow).where(
+ Workflow.version == Workflow.VERSION_DRAFT,
+ Workflow.app_id.in_(workflow_capable_app_ids),
+ )
+ )
+ .scalars()
+ .all()
+ )
+ trigger_node_types = {
+ NodeType.TRIGGER_WEBHOOK,
+ NodeType.TRIGGER_SCHEDULE,
+ NodeType.TRIGGER_PLUGIN,
+ }
+ for workflow in draft_workflows:
+ try:
+ for _, node_data in workflow.walk_nodes():
+ if node_data.get("type") in trigger_node_types:
+ draft_trigger_app_ids.add(str(workflow.app_id))
+ break
+ except Exception:
+ continue
- @api.doc("create_app")
- @api.doc(description="Create a new application")
- @api.expect(
- api.model(
- "CreateAppRequest",
- {
- "name": fields.String(required=True, description="App name"),
- "description": fields.String(description="App description (max 400 chars)"),
- "mode": fields.String(required=True, enum=ALLOW_CREATE_APP_MODES, description="App mode"),
- "icon_type": fields.String(description="Icon type"),
- "icon": fields.String(description="Icon"),
- "icon_background": fields.String(description="Icon background color"),
- },
- )
- )
- @api.response(201, "App created successfully", app_detail_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(400, "Invalid request parameters")
+ for app in app_pagination.items:
+ app.has_draft_trigger = str(app.id) in draft_trigger_app_ids
+
+ return marshal(app_pagination, app_pagination_model), 200
+
+ @console_ns.doc("create_app")
+ @console_ns.doc(description="Create a new application")
+ @console_ns.expect(console_ns.models[CreateAppPayload.__name__])
+ @console_ns.response(201, "App created successfully", app_detail_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
- @marshal_with(app_detail_fields)
+ @marshal_with(app_detail_model)
@cloud_edition_billing_resource_check("apps")
+ @edit_permission_required
def post(self):
"""Create app"""
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- parser.add_argument("description", type=validate_description_length, location="json")
- parser.add_argument("mode", type=str, choices=ALLOW_CREATE_APP_MODES, location="json")
- parser.add_argument("icon_type", type=str, location="json")
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- args = parser.parse_args()
-
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- if "mode" not in args or args["mode"] is None:
- raise BadRequest("mode is required")
+ current_user, current_tenant_id = current_account_with_tenant()
+ args = CreateAppPayload.model_validate(console_ns.payload)
app_service = AppService()
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- if current_user.current_tenant_id is None:
- raise ValueError("current_user.current_tenant_id cannot be None")
- app = app_service.create_app(current_user.current_tenant_id, args, current_user)
+ app = app_service.create_app(current_tenant_id, args.model_dump(), current_user)
return app, 201
@console_ns.route("/apps/")
class AppApi(Resource):
- @api.doc("get_app_detail")
- @api.doc(description="Get application details")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Success", app_detail_fields_with_site)
+ @console_ns.doc("get_app_detail")
+ @console_ns.doc(description="Get application details")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Success", app_detail_with_site_model)
@setup_required
@login_required
@account_initialization_required
@enterprise_license_required
@get_app_model
- @marshal_with(app_detail_fields_with_site)
+ @marshal_with(app_detail_with_site_model)
def get(self, app_model):
"""Get app detail"""
app_service = AppService()
@@ -181,79 +369,50 @@ class AppApi(Resource):
return app_model
- @api.doc("update_app")
- @api.doc(description="Update application details")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "UpdateAppRequest",
- {
- "name": fields.String(required=True, description="App name"),
- "description": fields.String(description="App description (max 400 chars)"),
- "icon_type": fields.String(description="Icon type"),
- "icon": fields.String(description="Icon"),
- "icon_background": fields.String(description="Icon background color"),
- "use_icon_as_answer_icon": fields.Boolean(description="Use icon as answer icon"),
- "max_active_requests": fields.Integer(description="Maximum active requests"),
- },
- )
- )
- @api.response(200, "App updated successfully", app_detail_fields_with_site)
- @api.response(403, "Insufficient permissions")
- @api.response(400, "Invalid request parameters")
+ @console_ns.doc("update_app")
+ @console_ns.doc(description="Update application details")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[UpdateAppPayload.__name__])
+ @console_ns.response(200, "App updated successfully", app_detail_with_site_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields_with_site)
+ @edit_permission_required
+ @marshal_with(app_detail_with_site_model)
def put(self, app_model):
"""Update app"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- parser.add_argument("description", type=validate_description_length, location="json")
- parser.add_argument("icon_type", type=str, location="json")
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- parser.add_argument("use_icon_as_answer_icon", type=bool, location="json")
- parser.add_argument("max_active_requests", type=int, location="json")
- args = parser.parse_args()
+ args = UpdateAppPayload.model_validate(console_ns.payload)
app_service = AppService()
- # Construct ArgsDict from parsed arguments
- from services.app_service import AppService as AppServiceType
- args_dict: AppServiceType.ArgsDict = {
- "name": args["name"],
- "description": args.get("description", ""),
- "icon_type": args.get("icon_type", ""),
- "icon": args.get("icon", ""),
- "icon_background": args.get("icon_background", ""),
- "use_icon_as_answer_icon": args.get("use_icon_as_answer_icon", False),
- "max_active_requests": args.get("max_active_requests", 0),
+ args_dict: AppService.ArgsDict = {
+ "name": args.name,
+ "description": args.description or "",
+ "icon_type": args.icon_type or "",
+ "icon": args.icon or "",
+ "icon_background": args.icon_background or "",
+ "use_icon_as_answer_icon": args.use_icon_as_answer_icon or False,
+ "max_active_requests": args.max_active_requests or 0,
}
app_model = app_service.update_app(app_model, args_dict)
return app_model
- @api.doc("delete_app")
- @api.doc(description="Delete application")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(204, "App deleted successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("delete_app")
+ @console_ns.doc(description="Delete application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(204, "App deleted successfully")
+ @console_ns.response(403, "Insufficient permissions")
@get_app_model
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def delete(self, app_model):
"""Delete app"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
app_service = AppService()
app_service.delete_app(app_model)
@@ -262,55 +421,37 @@ class AppApi(Resource):
@console_ns.route("/apps//copy")
class AppCopyApi(Resource):
- @api.doc("copy_app")
- @api.doc(description="Create a copy of an existing application")
- @api.doc(params={"app_id": "Application ID to copy"})
- @api.expect(
- api.model(
- "CopyAppRequest",
- {
- "name": fields.String(description="Name for the copied app"),
- "description": fields.String(description="Description for the copied app"),
- "icon_type": fields.String(description="Icon type"),
- "icon": fields.String(description="Icon"),
- "icon_background": fields.String(description="Icon background color"),
- },
- )
- )
- @api.response(201, "App copied successfully", app_detail_fields_with_site)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("copy_app")
+ @console_ns.doc(description="Create a copy of an existing application")
+ @console_ns.doc(params={"app_id": "Application ID to copy"})
+ @console_ns.expect(console_ns.models[CopyAppPayload.__name__])
+ @console_ns.response(201, "App copied successfully", app_detail_with_site_model)
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields_with_site)
+ @edit_permission_required
+ @marshal_with(app_detail_with_site_model)
def post(self, app_model):
"""Copy app"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, location="json")
- parser.add_argument("description", type=validate_description_length, location="json")
- parser.add_argument("icon_type", type=str, location="json")
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- args = parser.parse_args()
+ args = CopyAppPayload.model_validate(console_ns.payload or {})
with Session(db.engine) as session:
import_service = AppDslService(session)
yaml_content = import_service.export_dsl(app_model=app_model, include_secret=True)
- account = cast(Account, current_user)
result = import_service.import_app(
- account=account,
+ account=current_user,
import_mode=ImportMode.YAML_CONTENT,
yaml_content=yaml_content,
- name=args.get("name"),
- description=args.get("description"),
- icon_type=args.get("icon_type"),
- icon=args.get("icon"),
- icon_background=args.get("icon_background"),
+ name=args.name,
+ description=args.description,
+ icon_type=args.icon_type,
+ icon=args.icon,
+ icon_background=args.icon_background,
)
session.commit()
@@ -322,178 +463,131 @@ class AppCopyApi(Resource):
@console_ns.route("/apps//export")
class AppExportApi(Resource):
- @api.doc("export_app")
- @api.doc(description="Export application configuration as DSL")
- @api.doc(params={"app_id": "Application ID to export"})
- @api.expect(
- api.parser()
- .add_argument("include_secret", type=bool, location="args", default=False, help="Include secrets in export")
- .add_argument("workflow_id", type=str, location="args", help="Specific workflow ID to export")
- )
- @api.response(
+ @console_ns.doc("export_app")
+ @console_ns.doc(description="Export application configuration as DSL")
+ @console_ns.doc(params={"app_id": "Application ID to export"})
+ @console_ns.expect(console_ns.models[AppExportQuery.__name__])
+ @console_ns.response(
200,
"App exported successfully",
- api.model("AppExportResponse", {"data": fields.String(description="DSL export data")}),
+ console_ns.model("AppExportResponse", {"data": fields.String(description="DSL export data")}),
)
- @api.response(403, "Insufficient permissions")
+ @console_ns.response(403, "Insufficient permissions")
@get_app_model
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, app_model):
"""Export app"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- # Add include_secret params
- parser = reqparse.RequestParser()
- parser.add_argument("include_secret", type=inputs.boolean, default=False, location="args")
- parser.add_argument("workflow_id", type=str, location="args")
- args = parser.parse_args()
+ args = AppExportQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
return {
"data": AppDslService.export_dsl(
- app_model=app_model, include_secret=args["include_secret"], workflow_id=args.get("workflow_id")
+ app_model=app_model,
+ include_secret=args.include_secret,
+ workflow_id=args.workflow_id,
)
}
@console_ns.route("/apps//name")
class AppNameApi(Resource):
- @api.doc("check_app_name")
- @api.doc(description="Check if app name is available")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(api.parser().add_argument("name", type=str, required=True, location="args", help="Name to check"))
- @api.response(200, "Name availability checked")
+ @console_ns.doc("check_app_name")
+ @console_ns.doc(description="Check if app name is available")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppNamePayload.__name__])
+ @console_ns.response(200, "Name availability checked")
@setup_required
@login_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields)
+ @marshal_with(app_detail_model)
+ @edit_permission_required
def post(self, app_model):
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = AppNamePayload.model_validate(console_ns.payload)
app_service = AppService()
- app_model = app_service.update_app_name(app_model, args["name"])
+ app_model = app_service.update_app_name(app_model, args.name)
return app_model
@console_ns.route("/apps//icon")
class AppIconApi(Resource):
- @api.doc("update_app_icon")
- @api.doc(description="Update application icon")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AppIconRequest",
- {
- "icon": fields.String(required=True, description="Icon data"),
- "icon_type": fields.String(description="Icon type"),
- "icon_background": fields.String(description="Icon background color"),
- },
- )
- )
- @api.response(200, "Icon updated successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_app_icon")
+ @console_ns.doc(description="Update application icon")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppIconPayload.__name__])
+ @console_ns.response(200, "Icon updated successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields)
+ @marshal_with(app_detail_model)
+ @edit_permission_required
def post(self, app_model):
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- args = parser.parse_args()
+ args = AppIconPayload.model_validate(console_ns.payload or {})
app_service = AppService()
- app_model = app_service.update_app_icon(app_model, args.get("icon") or "", args.get("icon_background") or "")
+ app_model = app_service.update_app_icon(app_model, args.icon or "", args.icon_background or "")
return app_model
@console_ns.route("/apps//site-enable")
class AppSiteStatus(Resource):
- @api.doc("update_app_site_status")
- @api.doc(description="Enable or disable app site")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AppSiteStatusRequest", {"enable_site": fields.Boolean(required=True, description="Enable or disable site")}
- )
- )
- @api.response(200, "Site status updated successfully", app_detail_fields)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_app_site_status")
+ @console_ns.doc(description="Enable or disable app site")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppSiteStatusPayload.__name__])
+ @console_ns.response(200, "Site status updated successfully", app_detail_model)
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields)
+ @marshal_with(app_detail_model)
+ @edit_permission_required
def post(self, app_model):
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("enable_site", type=bool, required=True, location="json")
- args = parser.parse_args()
+ args = AppSiteStatusPayload.model_validate(console_ns.payload)
app_service = AppService()
- app_model = app_service.update_app_site_status(app_model, args["enable_site"])
+ app_model = app_service.update_app_site_status(app_model, args.enable_site)
return app_model
@console_ns.route("/apps//api-enable")
class AppApiStatus(Resource):
- @api.doc("update_app_api_status")
- @api.doc(description="Enable or disable app API")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AppApiStatusRequest", {"enable_api": fields.Boolean(required=True, description="Enable or disable API")}
- )
- )
- @api.response(200, "API status updated successfully", app_detail_fields)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_app_api_status")
+ @console_ns.doc(description="Enable or disable app API")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppApiStatusPayload.__name__])
+ @console_ns.response(200, "API status updated successfully", app_detail_model)
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
@get_app_model
- @marshal_with(app_detail_fields)
+ @marshal_with(app_detail_model)
def post(self, app_model):
- # The role of the current user in the ta table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("enable_api", type=bool, required=True, location="json")
- args = parser.parse_args()
+ args = AppApiStatusPayload.model_validate(console_ns.payload)
app_service = AppService()
- app_model = app_service.update_app_api_status(app_model, args["enable_api"])
+ app_model = app_service.update_app_api_status(app_model, args.enable_api)
return app_model
@console_ns.route("/apps//trace")
class AppTraceApi(Resource):
- @api.doc("get_app_trace")
- @api.doc(description="Get app tracing configuration")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Trace configuration retrieved successfully")
+ @console_ns.doc("get_app_trace")
+ @console_ns.doc(description="Get app tracing configuration")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Trace configuration retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@@ -503,36 +597,24 @@ class AppTraceApi(Resource):
return app_trace_config
- @api.doc("update_app_trace")
- @api.doc(description="Update app tracing configuration")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AppTraceRequest",
- {
- "enabled": fields.Boolean(required=True, description="Enable or disable tracing"),
- "tracing_provider": fields.String(required=True, description="Tracing provider"),
- },
- )
- )
- @api.response(200, "Trace configuration updated successfully")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("update_app_trace")
+ @console_ns.doc(description="Update app tracing configuration")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppTracePayload.__name__])
+ @console_ns.response(200, "Trace configuration updated successfully")
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, app_id):
# add app trace
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("enabled", type=bool, required=True, location="json")
- parser.add_argument("tracing_provider", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = AppTracePayload.model_validate(console_ns.payload)
OpsTraceManager.update_app_tracing_config(
app_id=app_id,
- enabled=args["enabled"],
- tracing_provider=args["tracing_provider"],
+ enabled=args.enabled,
+ tracing_provider=args.tracing_provider,
)
return {"result": "success"}
diff --git a/api/controllers/console/app/app_import.py b/api/controllers/console/app/app_import.py
index 037561cfed..22e2aeb720 100644
--- a/api/controllers/console/app/app_import.py
+++ b/api/controllers/console/app/app_import.py
@@ -1,20 +1,21 @@
-from typing import cast
-
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import (
account_initialization_required,
cloud_edition_billing_resource_check,
+ edit_permission_required,
setup_required,
)
from extensions.ext_database import db
-from fields.app_fields import app_import_check_dependencies_fields, app_import_fields
-from libs.login import login_required
-from models import Account
+from fields.app_fields import (
+ app_import_check_dependencies_fields,
+ app_import_fields,
+ leaked_dependency_fields,
+)
+from libs.login import current_account_with_tenant, login_required
from models.model import App
from services.app_dsl_service import AppDslService, ImportStatus
from services.enterprise.enterprise_service import EnterpriseService
@@ -22,47 +23,69 @@ from services.feature_service import FeatureService
from .. import console_ns
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register base model first
+leaked_dependency_model = console_ns.model("LeakedDependency", leaked_dependency_fields)
+
+app_import_model = console_ns.model("AppImport", app_import_fields)
+
+# For nested models, need to replace nested dict with registered model
+app_import_check_dependencies_fields_copy = app_import_check_dependencies_fields.copy()
+app_import_check_dependencies_fields_copy["leaked_dependencies"] = fields.List(fields.Nested(leaked_dependency_model))
+app_import_check_dependencies_model = console_ns.model(
+ "AppImportCheckDependencies", app_import_check_dependencies_fields_copy
+)
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class AppImportPayload(BaseModel):
+ mode: str = Field(..., description="Import mode")
+ yaml_content: str | None = None
+ yaml_url: str | None = None
+ name: str | None = None
+ description: str | None = None
+ icon_type: str | None = None
+ icon: str | None = None
+ icon_background: str | None = None
+ app_id: str | None = None
+
+
+console_ns.schema_model(
+ AppImportPayload.__name__, AppImportPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
@console_ns.route("/apps/imports")
class AppImportApi(Resource):
+ @console_ns.expect(console_ns.models[AppImportPayload.__name__])
@setup_required
@login_required
@account_initialization_required
- @marshal_with(app_import_fields)
+ @marshal_with(app_import_model)
@cloud_edition_billing_resource_check("apps")
+ @edit_permission_required
def post(self):
# Check user role first
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("mode", type=str, required=True, location="json")
- parser.add_argument("yaml_content", type=str, location="json")
- parser.add_argument("yaml_url", type=str, location="json")
- parser.add_argument("name", type=str, location="json")
- parser.add_argument("description", type=str, location="json")
- parser.add_argument("icon_type", type=str, location="json")
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- parser.add_argument("app_id", type=str, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = AppImportPayload.model_validate(console_ns.payload)
# Create service with session
with Session(db.engine) as session:
import_service = AppDslService(session)
# Import app
- account = cast(Account, current_user)
+ account = current_user
result = import_service.import_app(
account=account,
- import_mode=args["mode"],
- yaml_content=args.get("yaml_content"),
- yaml_url=args.get("yaml_url"),
- name=args.get("name"),
- description=args.get("description"),
- icon_type=args.get("icon_type"),
- icon=args.get("icon"),
- icon_background=args.get("icon_background"),
- app_id=args.get("app_id"),
+ import_mode=args.mode,
+ yaml_content=args.yaml_content,
+ yaml_url=args.yaml_url,
+ name=args.name,
+ description=args.description,
+ icon_type=args.icon_type,
+ icon=args.icon,
+ icon_background=args.icon_background,
+ app_id=args.app_id,
)
session.commit()
if result.app_id and FeatureService.get_system_features().webapp_auth.enabled:
@@ -82,17 +105,17 @@ class AppImportConfirmApi(Resource):
@setup_required
@login_required
@account_initialization_required
- @marshal_with(app_import_fields)
+ @marshal_with(app_import_model)
+ @edit_permission_required
def post(self, import_id):
# Check user role first
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
# Create service with session
with Session(db.engine) as session:
import_service = AppDslService(session)
# Confirm import
- account = cast(Account, current_user)
+ account = current_user
result = import_service.confirm_import(import_id=import_id, account=account)
session.commit()
@@ -108,11 +131,9 @@ class AppImportCheckDependenciesApi(Resource):
@login_required
@get_app_model
@account_initialization_required
- @marshal_with(app_import_check_dependencies_fields)
+ @marshal_with(app_import_check_dependencies_model)
+ @edit_permission_required
def get(self, app_model: App):
- if not current_user.is_editor:
- raise Forbidden()
-
with Session(db.engine) as session:
import_service = AppDslService(session)
result = import_service.check_dependencies(app_model=app_model)
diff --git a/api/controllers/console/app/audio.py b/api/controllers/console/app/audio.py
index 7d659dae0d..d344ede466 100644
--- a/api/controllers/console/app/audio.py
+++ b/api/controllers/console/app/audio.py
@@ -1,11 +1,12 @@
import logging
from flask import request
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field
from werkzeug.exceptions import InternalServerError
import services
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import (
AppUnavailableError,
AudioTooLargeError,
@@ -32,20 +33,41 @@ from services.errors.audio import (
)
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class TextToSpeechPayload(BaseModel):
+ message_id: str | None = Field(default=None, description="Message ID")
+ text: str = Field(..., description="Text to convert")
+ voice: str | None = Field(default=None, description="Voice name")
+ streaming: bool | None = Field(default=None, description="Whether to stream audio")
+
+
+class TextToSpeechVoiceQuery(BaseModel):
+ language: str = Field(..., description="Language code")
+
+
+console_ns.schema_model(
+ TextToSpeechPayload.__name__, TextToSpeechPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+console_ns.schema_model(
+ TextToSpeechVoiceQuery.__name__,
+ TextToSpeechVoiceQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
@console_ns.route("/apps//audio-to-text")
class ChatMessageAudioApi(Resource):
- @api.doc("chat_message_audio_transcript")
- @api.doc(description="Transcript audio to text for chat messages")
- @api.doc(params={"app_id": "App ID"})
- @api.response(
+ @console_ns.doc("chat_message_audio_transcript")
+ @console_ns.doc(description="Transcript audio to text for chat messages")
+ @console_ns.doc(params={"app_id": "App ID"})
+ @console_ns.response(
200,
"Audio transcription successful",
- api.model("AudioTranscriptResponse", {"text": fields.String(description="Transcribed text from audio")}),
+ console_ns.model("AudioTranscriptResponse", {"text": fields.String(description="Transcribed text from audio")}),
)
- @api.response(400, "Bad request - No audio uploaded or unsupported type")
- @api.response(413, "Audio file too large")
+ @console_ns.response(400, "Bad request - No audio uploaded or unsupported type")
+ @console_ns.response(413, "Audio file too large")
@setup_required
@login_required
@account_initialization_required
@@ -89,41 +111,26 @@ class ChatMessageAudioApi(Resource):
@console_ns.route("/apps//text-to-audio")
class ChatMessageTextApi(Resource):
- @api.doc("chat_message_text_to_speech")
- @api.doc(description="Convert text to speech for chat messages")
- @api.doc(params={"app_id": "App ID"})
- @api.expect(
- api.model(
- "TextToSpeechRequest",
- {
- "message_id": fields.String(description="Message ID"),
- "text": fields.String(required=True, description="Text to convert to speech"),
- "voice": fields.String(description="Voice to use for TTS"),
- "streaming": fields.Boolean(description="Whether to stream the audio"),
- },
- )
- )
- @api.response(200, "Text to speech conversion successful")
- @api.response(400, "Bad request - Invalid parameters")
+ @console_ns.doc("chat_message_text_to_speech")
+ @console_ns.doc(description="Convert text to speech for chat messages")
+ @console_ns.doc(params={"app_id": "App ID"})
+ @console_ns.expect(console_ns.models[TextToSpeechPayload.__name__])
+ @console_ns.response(200, "Text to speech conversion successful")
+ @console_ns.response(400, "Bad request - Invalid parameters")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def post(self, app_model: App):
try:
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", type=str, location="json")
- parser.add_argument("text", type=str, location="json")
- parser.add_argument("voice", type=str, location="json")
- parser.add_argument("streaming", type=bool, location="json")
- args = parser.parse_args()
-
- message_id = args.get("message_id", None)
- text = args.get("text", None)
- voice = args.get("voice", None)
+ payload = TextToSpeechPayload.model_validate(console_ns.payload)
response = AudioService.transcript_tts(
- app_model=app_model, text=text, voice=voice, message_id=message_id, is_draft=True
+ app_model=app_model,
+ text=payload.text,
+ voice=payload.voice,
+ message_id=payload.message_id,
+ is_draft=True,
)
return response
except services.errors.app_model_config.AppModelConfigBrokenError:
@@ -154,25 +161,25 @@ class ChatMessageTextApi(Resource):
@console_ns.route("/apps//text-to-audio/voices")
class TextModesApi(Resource):
- @api.doc("get_text_to_speech_voices")
- @api.doc(description="Get available TTS voices for a specific language")
- @api.doc(params={"app_id": "App ID"})
- @api.expect(api.parser().add_argument("language", type=str, required=True, location="args", help="Language code"))
- @api.response(200, "TTS voices retrieved successfully", fields.List(fields.Raw(description="Available voices")))
- @api.response(400, "Invalid language parameter")
+ @console_ns.doc("get_text_to_speech_voices")
+ @console_ns.doc(description="Get available TTS voices for a specific language")
+ @console_ns.doc(params={"app_id": "App ID"})
+ @console_ns.expect(console_ns.models[TextToSpeechVoiceQuery.__name__])
+ @console_ns.response(
+ 200, "TTS voices retrieved successfully", fields.List(fields.Raw(description="Available voices"))
+ )
+ @console_ns.response(400, "Invalid language parameter")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def get(self, app_model):
try:
- parser = reqparse.RequestParser()
- parser.add_argument("language", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = TextToSpeechVoiceQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
response = AudioService.transcript_tts_voices(
tenant_id=app_model.tenant_id,
- language=args["language"],
+ language=args.language,
)
return response
diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py
index 2f7b90e7fb..2922121a54 100644
--- a/api/controllers/console/app/completion.py
+++ b/api/controllers/console/app/completion.py
@@ -1,11 +1,13 @@
import logging
+from typing import Any, Literal
from flask import request
-from flask_restx import Resource, fields, reqparse
-from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
+from werkzeug.exceptions import InternalServerError, NotFound
import services
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import (
AppUnavailableError,
CompletionRequestError,
@@ -15,9 +17,8 @@ from controllers.console.app.error import (
ProviderQuotaExceededError,
)
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
-from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
ModelCurrentlyNotSupportError,
@@ -32,48 +33,66 @@ from libs.login import current_user, login_required
from models import Account
from models.model import AppMode
from services.app_generate_service import AppGenerateService
+from services.app_task_service import AppTaskService
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class BaseMessagePayload(BaseModel):
+ inputs: dict[str, Any]
+ model_config_data: dict[str, Any] = Field(..., alias="model_config")
+ files: list[Any] | None = Field(default=None, description="Uploaded files")
+ response_mode: Literal["blocking", "streaming"] = Field(default="blocking", description="Response mode")
+ retriever_from: str = Field(default="dev", description="Retriever source")
+
+
+class CompletionMessagePayload(BaseMessagePayload):
+ query: str = Field(default="", description="Query text")
+
+
+class ChatMessagePayload(BaseMessagePayload):
+ query: str = Field(..., description="User query")
+ conversation_id: str | None = Field(default=None, description="Conversation ID")
+ parent_message_id: str | None = Field(default=None, description="Parent message ID")
+
+ @field_validator("conversation_id", "parent_message_id")
+ @classmethod
+ def validate_uuid(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+console_ns.schema_model(
+ CompletionMessagePayload.__name__,
+ CompletionMessagePayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+console_ns.schema_model(
+ ChatMessagePayload.__name__, ChatMessagePayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
# define completion message api for user
@console_ns.route("/apps//completion-messages")
class CompletionMessageApi(Resource):
- @api.doc("create_completion_message")
- @api.doc(description="Generate completion message for debugging")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "CompletionMessageRequest",
- {
- "inputs": fields.Raw(required=True, description="Input variables"),
- "query": fields.String(description="Query text", default=""),
- "files": fields.List(fields.Raw(), description="Uploaded files"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- "response_mode": fields.String(enum=["blocking", "streaming"], description="Response mode"),
- "retriever_from": fields.String(default="dev", description="Retriever source"),
- },
- )
- )
- @api.response(200, "Completion generated successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(404, "App not found")
+ @console_ns.doc("create_completion_message")
+ @console_ns.doc(description="Generate completion message for debugging")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[CompletionMessagePayload.__name__])
+ @console_ns.response(200, "Completion generated successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(404, "App not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.COMPLETION)
def post(self, app_model):
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, location="json")
- parser.add_argument("query", type=str, location="json", default="")
- parser.add_argument("files", type=list, required=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, location="json")
- parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
- parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
- args = parser.parse_args()
+ args_model = CompletionMessagePayload.model_validate(console_ns.payload)
+ args = args_model.model_dump(exclude_none=True, by_alias=True)
- streaming = args["response_mode"] != "blocking"
+ streaming = args_model.response_mode != "blocking"
args["auto_generate_name"] = False
try:
@@ -108,10 +127,10 @@ class CompletionMessageApi(Resource):
@console_ns.route("/apps//completion-messages//stop")
class CompletionMessageStopApi(Resource):
- @api.doc("stop_completion_message")
- @api.doc(description="Stop a running completion message generation")
- @api.doc(params={"app_id": "Application ID", "task_id": "Task ID to stop"})
- @api.response(200, "Task stopped successfully")
+ @console_ns.doc("stop_completion_message")
+ @console_ns.doc(description="Stop a running completion message generation")
+ @console_ns.doc(params={"app_id": "Application ID", "task_id": "Task ID to stop"})
+ @console_ns.response(200, "Task stopped successfully")
@setup_required
@login_required
@account_initialization_required
@@ -119,57 +138,36 @@ class CompletionMessageStopApi(Resource):
def post(self, app_model, task_id):
if not isinstance(current_user, Account):
raise ValueError("current_user must be an Account instance")
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.DEBUGGER, current_user.id)
+
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.DEBUGGER,
+ user_id=current_user.id,
+ app_mode=AppMode.value_of(app_model.mode),
+ )
return {"result": "success"}, 200
@console_ns.route("/apps//chat-messages")
class ChatMessageApi(Resource):
- @api.doc("create_chat_message")
- @api.doc(description="Generate chat message for debugging")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "ChatMessageRequest",
- {
- "inputs": fields.Raw(required=True, description="Input variables"),
- "query": fields.String(required=True, description="User query"),
- "files": fields.List(fields.Raw(), description="Uploaded files"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- "conversation_id": fields.String(description="Conversation ID"),
- "parent_message_id": fields.String(description="Parent message ID"),
- "response_mode": fields.String(enum=["blocking", "streaming"], description="Response mode"),
- "retriever_from": fields.String(default="dev", description="Retriever source"),
- },
- )
- )
- @api.response(200, "Chat message generated successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(404, "App or conversation not found")
+ @console_ns.doc("create_chat_message")
+ @console_ns.doc(description="Generate chat message for debugging")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[ChatMessagePayload.__name__])
+ @console_ns.response(200, "Chat message generated successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(404, "App or conversation not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT])
+ @edit_permission_required
def post(self, app_model):
- if not isinstance(current_user, Account):
- raise Forbidden()
+ args_model = ChatMessagePayload.model_validate(console_ns.payload)
+ args = args_model.model_dump(exclude_none=True, by_alias=True)
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, location="json")
- parser.add_argument("query", type=str, required=True, location="json")
- parser.add_argument("files", type=list, required=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, location="json")
- parser.add_argument("conversation_id", type=uuid_value, location="json")
- parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
- parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
- parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
- args = parser.parse_args()
-
- streaming = args["response_mode"] != "blocking"
+ streaming = args_model.response_mode != "blocking"
args["auto_generate_name"] = False
external_trace_id = get_external_trace_id(request)
@@ -210,10 +208,10 @@ class ChatMessageApi(Resource):
@console_ns.route("/apps//chat-messages//stop")
class ChatMessageStopApi(Resource):
- @api.doc("stop_chat_message")
- @api.doc(description="Stop a running chat message generation")
- @api.doc(params={"app_id": "Application ID", "task_id": "Task ID to stop"})
- @api.response(200, "Task stopped successfully")
+ @console_ns.doc("stop_chat_message")
+ @console_ns.doc(description="Stop a running chat message generation")
+ @console_ns.doc(params={"app_id": "Application ID", "task_id": "Task ID to stop"})
+ @console_ns.response(200, "Task stopped successfully")
@setup_required
@login_required
@account_initialization_required
@@ -221,6 +219,12 @@ class ChatMessageStopApi(Resource):
def post(self, app_model, task_id):
if not isinstance(current_user, Account):
raise ValueError("current_user must be an Account instance")
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.DEBUGGER, current_user.id)
+
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.DEBUGGER,
+ user_id=current_user.id,
+ app_mode=AppMode.value_of(app_model.mode),
+ )
return {"result": "success"}, 200
diff --git a/api/controllers/console/app/conversation.py b/api/controllers/console/app/conversation.py
index 3b8dff613b..c16dcfd91f 100644
--- a/api/controllers/console/app/conversation.py
+++ b/api/controllers/console/app/conversation.py
@@ -1,116 +1,376 @@
-from datetime import datetime
+from typing import Literal
-import pytz # pip install pytz
import sqlalchemy as sa
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
-from flask_restx.inputs import int_range
+from flask import abort, request
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import func, or_
from sqlalchemy.orm import joinedload
-from werkzeug.exceptions import Forbidden, NotFound
+from werkzeug.exceptions import NotFound
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db
-from fields.conversation_fields import (
- conversation_detail_fields,
- conversation_message_detail_fields,
- conversation_pagination_fields,
- conversation_with_summary_pagination_fields,
-)
-from libs.datetime_utils import naive_utc_now
-from libs.helper import DatetimeString
-from libs.login import login_required
-from models import Account, Conversation, EndUser, Message, MessageAnnotation
+from fields.conversation_fields import MessageTextField
+from fields.raws import FilesContainedField
+from libs.datetime_utils import naive_utc_now, parse_time_range
+from libs.helper import TimestampField
+from libs.login import current_account_with_tenant, login_required
+from models import Conversation, EndUser, Message, MessageAnnotation
from models.model import AppMode
from services.conversation_service import ConversationService
from services.errors.conversation import ConversationNotExistsError
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class BaseConversationQuery(BaseModel):
+ keyword: str | None = Field(default=None, description="Search keyword")
+ start: str | None = Field(default=None, description="Start date (YYYY-MM-DD HH:MM)")
+ end: str | None = Field(default=None, description="End date (YYYY-MM-DD HH:MM)")
+ annotation_status: Literal["annotated", "not_annotated", "all"] = Field(
+ default="all", description="Annotation status filter"
+ )
+ page: int = Field(default=1, ge=1, le=99999, description="Page number")
+ limit: int = Field(default=20, ge=1, le=100, description="Page size (1-100)")
+
+ @field_validator("start", "end", mode="before")
+ @classmethod
+ def blank_to_none(cls, value: str | None) -> str | None:
+ if value == "":
+ return None
+ return value
+
+
+class CompletionConversationQuery(BaseConversationQuery):
+ pass
+
+
+class ChatConversationQuery(BaseConversationQuery):
+ sort_by: Literal["created_at", "-created_at", "updated_at", "-updated_at"] = Field(
+ default="-updated_at", description="Sort field and direction"
+ )
+
+
+console_ns.schema_model(
+ CompletionConversationQuery.__name__,
+ CompletionConversationQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+console_ns.schema_model(
+ ChatConversationQuery.__name__,
+ ChatConversationQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register in dependency order: base models first, then dependent models
+
+# Base models
+simple_account_model = console_ns.model(
+ "SimpleAccount",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "email": fields.String,
+ },
+)
+
+feedback_stat_model = console_ns.model(
+ "FeedbackStat",
+ {
+ "like": fields.Integer,
+ "dislike": fields.Integer,
+ },
+)
+
+status_count_model = console_ns.model(
+ "StatusCount",
+ {
+ "success": fields.Integer,
+ "failed": fields.Integer,
+ "partial_success": fields.Integer,
+ },
+)
+
+message_file_model = console_ns.model(
+ "MessageFile",
+ {
+ "id": fields.String,
+ "filename": fields.String,
+ "type": fields.String,
+ "url": fields.String,
+ "mime_type": fields.String,
+ "size": fields.Integer,
+ "transfer_method": fields.String,
+ "belongs_to": fields.String(default="user"),
+ "upload_file_id": fields.String(default=None),
+ },
+)
+
+agent_thought_model = console_ns.model(
+ "AgentThought",
+ {
+ "id": fields.String,
+ "chain_id": fields.String,
+ "message_id": fields.String,
+ "position": fields.Integer,
+ "thought": fields.String,
+ "tool": fields.String,
+ "tool_labels": fields.Raw,
+ "tool_input": fields.String,
+ "created_at": TimestampField,
+ "observation": fields.String,
+ "files": fields.List(fields.String),
+ },
+)
+
+simple_model_config_model = console_ns.model(
+ "SimpleModelConfig",
+ {
+ "model": fields.Raw(attribute="model_dict"),
+ "pre_prompt": fields.String,
+ },
+)
+
+model_config_model = console_ns.model(
+ "ModelConfig",
+ {
+ "opening_statement": fields.String,
+ "suggested_questions": fields.Raw,
+ "model": fields.Raw,
+ "user_input_form": fields.Raw,
+ "pre_prompt": fields.String,
+ "agent_mode": fields.Raw,
+ },
+)
+
+# Models that depend on simple_account_model
+feedback_model = console_ns.model(
+ "Feedback",
+ {
+ "rating": fields.String,
+ "content": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account": fields.Nested(simple_account_model, allow_null=True),
+ },
+)
+
+annotation_model = console_ns.model(
+ "Annotation",
+ {
+ "id": fields.String,
+ "question": fields.String,
+ "content": fields.String,
+ "account": fields.Nested(simple_account_model, allow_null=True),
+ "created_at": TimestampField,
+ },
+)
+
+annotation_hit_history_model = console_ns.model(
+ "AnnotationHitHistory",
+ {
+ "annotation_id": fields.String(attribute="id"),
+ "annotation_create_account": fields.Nested(simple_account_model, allow_null=True),
+ "created_at": TimestampField,
+ },
+)
+
+# Simple message detail model
+simple_message_detail_model = console_ns.model(
+ "SimpleMessageDetail",
+ {
+ "inputs": FilesContainedField,
+ "query": fields.String,
+ "message": MessageTextField,
+ "answer": fields.String,
+ },
+)
+
+# Message detail model that depends on multiple models
+message_detail_model = console_ns.model(
+ "MessageDetail",
+ {
+ "id": fields.String,
+ "conversation_id": fields.String,
+ "inputs": FilesContainedField,
+ "query": fields.String,
+ "message": fields.Raw,
+ "message_tokens": fields.Integer,
+ "answer": fields.String(attribute="re_sign_file_url_answer"),
+ "answer_tokens": fields.Integer,
+ "provider_response_latency": fields.Float,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account_id": fields.String,
+ "feedbacks": fields.List(fields.Nested(feedback_model)),
+ "workflow_run_id": fields.String,
+ "annotation": fields.Nested(annotation_model, allow_null=True),
+ "annotation_hit_history": fields.Nested(annotation_hit_history_model, allow_null=True),
+ "created_at": TimestampField,
+ "agent_thoughts": fields.List(fields.Nested(agent_thought_model)),
+ "message_files": fields.List(fields.Nested(message_file_model)),
+ "metadata": fields.Raw(attribute="message_metadata_dict"),
+ "status": fields.String,
+ "error": fields.String,
+ "parent_message_id": fields.String,
+ },
+)
+
+# Conversation models
+conversation_fields_model = console_ns.model(
+ "Conversation",
+ {
+ "id": fields.String,
+ "status": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_end_user_session_id": fields.String(),
+ "from_account_id": fields.String,
+ "from_account_name": fields.String,
+ "read_at": TimestampField,
+ "created_at": TimestampField,
+ "updated_at": TimestampField,
+ "annotation": fields.Nested(annotation_model, allow_null=True),
+ "model_config": fields.Nested(simple_model_config_model),
+ "user_feedback_stats": fields.Nested(feedback_stat_model),
+ "admin_feedback_stats": fields.Nested(feedback_stat_model),
+ "message": fields.Nested(simple_message_detail_model, attribute="first_message"),
+ },
+)
+
+conversation_pagination_model = console_ns.model(
+ "ConversationPagination",
+ {
+ "page": fields.Integer,
+ "limit": fields.Integer(attribute="per_page"),
+ "total": fields.Integer,
+ "has_more": fields.Boolean(attribute="has_next"),
+ "data": fields.List(fields.Nested(conversation_fields_model), attribute="items"),
+ },
+)
+
+conversation_message_detail_model = console_ns.model(
+ "ConversationMessageDetail",
+ {
+ "id": fields.String,
+ "status": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account_id": fields.String,
+ "created_at": TimestampField,
+ "model_config": fields.Nested(model_config_model),
+ "message": fields.Nested(message_detail_model, attribute="first_message"),
+ },
+)
+
+conversation_with_summary_model = console_ns.model(
+ "ConversationWithSummary",
+ {
+ "id": fields.String,
+ "status": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_end_user_session_id": fields.String,
+ "from_account_id": fields.String,
+ "from_account_name": fields.String,
+ "name": fields.String,
+ "summary": fields.String(attribute="summary_or_query"),
+ "read_at": TimestampField,
+ "created_at": TimestampField,
+ "updated_at": TimestampField,
+ "annotated": fields.Boolean,
+ "model_config": fields.Nested(simple_model_config_model),
+ "message_count": fields.Integer,
+ "user_feedback_stats": fields.Nested(feedback_stat_model),
+ "admin_feedback_stats": fields.Nested(feedback_stat_model),
+ "status_count": fields.Nested(status_count_model),
+ },
+)
+
+conversation_with_summary_pagination_model = console_ns.model(
+ "ConversationWithSummaryPagination",
+ {
+ "page": fields.Integer,
+ "limit": fields.Integer(attribute="per_page"),
+ "total": fields.Integer,
+ "has_more": fields.Boolean(attribute="has_next"),
+ "data": fields.List(fields.Nested(conversation_with_summary_model), attribute="items"),
+ },
+)
+
+conversation_detail_model = console_ns.model(
+ "ConversationDetail",
+ {
+ "id": fields.String,
+ "status": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account_id": fields.String,
+ "created_at": TimestampField,
+ "updated_at": TimestampField,
+ "annotated": fields.Boolean,
+ "introduction": fields.String,
+ "model_config": fields.Nested(model_config_model),
+ "message_count": fields.Integer,
+ "user_feedback_stats": fields.Nested(feedback_stat_model),
+ "admin_feedback_stats": fields.Nested(feedback_stat_model),
+ },
+)
+
@console_ns.route("/apps//completion-conversations")
class CompletionConversationApi(Resource):
- @api.doc("list_completion_conversations")
- @api.doc(description="Get completion conversations with pagination and filtering")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("keyword", type=str, location="args", help="Search keyword")
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- .add_argument(
- "annotation_status",
- type=str,
- location="args",
- choices=["annotated", "not_annotated", "all"],
- default="all",
- help="Annotation status filter",
- )
- .add_argument("page", type=int, location="args", default=1, help="Page number")
- .add_argument("limit", type=int, location="args", default=20, help="Page size (1-100)")
- )
- @api.response(200, "Success", conversation_pagination_fields)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("list_completion_conversations")
+ @console_ns.doc(description="Get completion conversations with pagination and filtering")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[CompletionConversationQuery.__name__])
+ @console_ns.response(200, "Success", conversation_pagination_model)
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.COMPLETION)
- @marshal_with(conversation_pagination_fields)
+ @marshal_with(conversation_pagination_model)
+ @edit_permission_required
def get(self, app_model):
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("keyword", type=str, location="args")
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument(
- "annotation_status", type=str, choices=["annotated", "not_annotated", "all"], default="all", location="args"
- )
- parser.add_argument("page", type=int_range(1, 99999), default=1, location="args")
- parser.add_argument("limit", type=int_range(1, 100), default=20, location="args")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = CompletionConversationQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
query = sa.select(Conversation).where(
Conversation.app_id == app_model.id, Conversation.mode == "completion", Conversation.is_deleted.is_(False)
)
- if args["keyword"]:
+ if args.keyword:
query = query.join(Message, Message.conversation_id == Conversation.id).where(
or_(
- Message.query.ilike(f"%{args['keyword']}%"),
- Message.answer.ilike(f"%{args['keyword']}%"),
+ Message.query.ilike(f"%{args.keyword}%"),
+ Message.answer.ilike(f"%{args.keyword}%"),
)
)
account = current_user
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ assert account.timezone is not None
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
query = query.where(Conversation.created_at >= start_datetime_utc)
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=59)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
+ end_datetime_utc = end_datetime_utc.replace(second=59)
query = query.where(Conversation.created_at < end_datetime_utc)
# FIXME, the type ignore in this file
- if args["annotation_status"] == "annotated":
+ if args.annotation_status == "annotated":
query = query.options(joinedload(Conversation.message_annotations)).join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
- elif args["annotation_status"] == "not_annotated":
+ elif args.annotation_status == "not_annotated":
query = (
query.outerjoin(MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id)
.group_by(Conversation.id)
@@ -119,49 +379,46 @@ class CompletionConversationApi(Resource):
query = query.order_by(Conversation.created_at.desc())
- conversations = db.paginate(query, page=args["page"], per_page=args["limit"], error_out=False)
+ conversations = db.paginate(query, page=args.page, per_page=args.limit, error_out=False)
return conversations
@console_ns.route("/apps//completion-conversations/")
class CompletionConversationDetailApi(Resource):
- @api.doc("get_completion_conversation")
- @api.doc(description="Get completion conversation details with messages")
- @api.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
- @api.response(200, "Success", conversation_message_detail_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Conversation not found")
+ @console_ns.doc("get_completion_conversation")
+ @console_ns.doc(description="Get completion conversation details with messages")
+ @console_ns.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
+ @console_ns.response(200, "Success", conversation_message_detail_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Conversation not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.COMPLETION)
- @marshal_with(conversation_message_detail_fields)
+ @marshal_with(conversation_message_detail_model)
+ @edit_permission_required
def get(self, app_model, conversation_id):
- if not current_user.is_editor:
- raise Forbidden()
conversation_id = str(conversation_id)
return _get_conversation(app_model, conversation_id)
- @api.doc("delete_completion_conversation")
- @api.doc(description="Delete a completion conversation")
- @api.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
- @api.response(204, "Conversation deleted successfully")
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Conversation not found")
+ @console_ns.doc("delete_completion_conversation")
+ @console_ns.doc(description="Delete a completion conversation")
+ @console_ns.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
+ @console_ns.response(204, "Conversation deleted successfully")
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Conversation not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.COMPLETION)
+ @edit_permission_required
def delete(self, app_model, conversation_id):
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
conversation_id = str(conversation_id)
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
ConversationService.delete(app_model, conversation_id, current_user)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@@ -171,63 +428,21 @@ class CompletionConversationDetailApi(Resource):
@console_ns.route("/apps//chat-conversations")
class ChatConversationApi(Resource):
- @api.doc("list_chat_conversations")
- @api.doc(description="Get chat conversations with pagination, filtering and summary")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("keyword", type=str, location="args", help="Search keyword")
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- .add_argument(
- "annotation_status",
- type=str,
- location="args",
- choices=["annotated", "not_annotated", "all"],
- default="all",
- help="Annotation status filter",
- )
- .add_argument("message_count_gte", type=int, location="args", help="Minimum message count")
- .add_argument("page", type=int, location="args", default=1, help="Page number")
- .add_argument("limit", type=int, location="args", default=20, help="Page size (1-100)")
- .add_argument(
- "sort_by",
- type=str,
- location="args",
- choices=["created_at", "-created_at", "updated_at", "-updated_at"],
- default="-updated_at",
- help="Sort field and direction",
- )
- )
- @api.response(200, "Success", conversation_with_summary_pagination_fields)
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("list_chat_conversations")
+ @console_ns.doc(description="Get chat conversations with pagination, filtering and summary")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[ChatConversationQuery.__name__])
+ @console_ns.response(200, "Success", conversation_with_summary_pagination_model)
+ @console_ns.response(403, "Insufficient permissions")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
- @marshal_with(conversation_with_summary_pagination_fields)
+ @marshal_with(conversation_with_summary_pagination_model)
+ @edit_permission_required
def get(self, app_model):
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("keyword", type=str, location="args")
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument(
- "annotation_status", type=str, choices=["annotated", "not_annotated", "all"], default="all", location="args"
- )
- parser.add_argument("message_count_gte", type=int_range(1, 99999), required=False, location="args")
- parser.add_argument("page", type=int_range(1, 99999), required=False, default=1, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- parser.add_argument(
- "sort_by",
- type=str,
- choices=["created_at", "-created_at", "updated_at", "-updated_at"],
- required=False,
- default="-updated_at",
- location="args",
- )
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = ChatConversationQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
subquery = (
db.session.query(
@@ -239,8 +454,8 @@ class ChatConversationApi(Resource):
query = sa.select(Conversation).where(Conversation.app_id == app_model.id, Conversation.is_deleted.is_(False))
- if args["keyword"]:
- keyword_filter = f"%{args['keyword']}%"
+ if args.keyword:
+ keyword_filter = f"%{args.keyword}%"
query = (
query.join(
Message,
@@ -260,58 +475,43 @@ class ChatConversationApi(Resource):
)
account = current_user
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ assert account.timezone is not None
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-
- match args["sort_by"]:
+ if start_datetime_utc:
+ match args.sort_by:
case "updated_at" | "-updated_at":
query = query.where(Conversation.updated_at >= start_datetime_utc)
case "created_at" | "-created_at" | _:
query = query.where(Conversation.created_at >= start_datetime_utc)
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=59)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
- match args["sort_by"]:
+ if end_datetime_utc:
+ end_datetime_utc = end_datetime_utc.replace(second=59)
+ match args.sort_by:
case "updated_at" | "-updated_at":
query = query.where(Conversation.updated_at <= end_datetime_utc)
case "created_at" | "-created_at" | _:
query = query.where(Conversation.created_at <= end_datetime_utc)
- if args["annotation_status"] == "annotated":
+ if args.annotation_status == "annotated":
query = query.options(joinedload(Conversation.message_annotations)).join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
- elif args["annotation_status"] == "not_annotated":
+ elif args.annotation_status == "not_annotated":
query = (
query.outerjoin(MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id)
.group_by(Conversation.id)
.having(func.count(MessageAnnotation.id) == 0)
)
- if args["message_count_gte"] and args["message_count_gte"] >= 1:
- query = (
- query.options(joinedload(Conversation.messages)) # type: ignore
- .join(Message, Message.conversation_id == Conversation.id)
- .group_by(Conversation.id)
- .having(func.count(Message.id) >= args["message_count_gte"])
- )
-
if app_model.mode == AppMode.ADVANCED_CHAT:
query = query.where(Conversation.invoke_from != InvokeFrom.DEBUGGER)
- match args["sort_by"]:
+ match args.sort_by:
case "created_at":
query = query.order_by(Conversation.created_at.asc())
case "-created_at":
@@ -323,49 +523,46 @@ class ChatConversationApi(Resource):
case _:
query = query.order_by(Conversation.created_at.desc())
- conversations = db.paginate(query, page=args["page"], per_page=args["limit"], error_out=False)
+ conversations = db.paginate(query, page=args.page, per_page=args.limit, error_out=False)
return conversations
@console_ns.route("/apps//chat-conversations/")
class ChatConversationDetailApi(Resource):
- @api.doc("get_chat_conversation")
- @api.doc(description="Get chat conversation details")
- @api.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
- @api.response(200, "Success", conversation_detail_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Conversation not found")
+ @console_ns.doc("get_chat_conversation")
+ @console_ns.doc(description="Get chat conversation details")
+ @console_ns.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
+ @console_ns.response(200, "Success", conversation_detail_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Conversation not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
- @marshal_with(conversation_detail_fields)
+ @marshal_with(conversation_detail_model)
+ @edit_permission_required
def get(self, app_model, conversation_id):
- if not current_user.is_editor:
- raise Forbidden()
conversation_id = str(conversation_id)
return _get_conversation(app_model, conversation_id)
- @api.doc("delete_chat_conversation")
- @api.doc(description="Delete a chat conversation")
- @api.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
- @api.response(204, "Conversation deleted successfully")
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Conversation not found")
+ @console_ns.doc("delete_chat_conversation")
+ @console_ns.doc(description="Delete a chat conversation")
+ @console_ns.doc(params={"app_id": "Application ID", "conversation_id": "Conversation ID"})
+ @console_ns.response(204, "Conversation deleted successfully")
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Conversation not found")
@setup_required
@login_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
@account_initialization_required
+ @edit_permission_required
def delete(self, app_model, conversation_id):
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
conversation_id = str(conversation_id)
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
ConversationService.delete(app_model, conversation_id, current_user)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@@ -374,6 +571,7 @@ class ChatConversationDetailApi(Resource):
def _get_conversation(app_model, conversation_id):
+ current_user, _ = current_account_with_tenant()
conversation = (
db.session.query(Conversation)
.where(Conversation.id == conversation_id, Conversation.app_id == app_model.id)
diff --git a/api/controllers/console/app/conversation_variables.py b/api/controllers/console/app/conversation_variables.py
index 8a65a89963..368a6112ba 100644
--- a/api/controllers/console/app/conversation_variables.py
+++ b/api/controllers/console/app/conversation_variables.py
@@ -1,47 +1,68 @@
-from flask_restx import Resource, marshal_with, reqparse
+from flask import request
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.orm import Session
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from extensions.ext_database import db
-from fields.conversation_variable_fields import paginated_conversation_variable_fields
+from fields.conversation_variable_fields import (
+ conversation_variable_fields,
+ paginated_conversation_variable_fields,
+)
from libs.login import login_required
from models import ConversationVariable
from models.model import AppMode
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ConversationVariablesQuery(BaseModel):
+ conversation_id: str = Field(..., description="Conversation ID to filter variables")
+
+
+console_ns.schema_model(
+ ConversationVariablesQuery.__name__,
+ ConversationVariablesQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register base model first
+conversation_variable_model = console_ns.model("ConversationVariable", conversation_variable_fields)
+
+# For nested models, need to replace nested dict with registered model
+paginated_conversation_variable_fields_copy = paginated_conversation_variable_fields.copy()
+paginated_conversation_variable_fields_copy["data"] = fields.List(
+ fields.Nested(conversation_variable_model), attribute="data"
+)
+paginated_conversation_variable_model = console_ns.model(
+ "PaginatedConversationVariable", paginated_conversation_variable_fields_copy
+)
+
@console_ns.route("/apps//conversation-variables")
class ConversationVariablesApi(Resource):
- @api.doc("get_conversation_variables")
- @api.doc(description="Get conversation variables for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser().add_argument(
- "conversation_id", type=str, location="args", help="Conversation ID to filter variables"
- )
- )
- @api.response(200, "Conversation variables retrieved successfully", paginated_conversation_variable_fields)
+ @console_ns.doc("get_conversation_variables")
+ @console_ns.doc(description="Get conversation variables for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[ConversationVariablesQuery.__name__])
+ @console_ns.response(200, "Conversation variables retrieved successfully", paginated_conversation_variable_model)
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=AppMode.ADVANCED_CHAT)
- @marshal_with(paginated_conversation_variable_fields)
+ @marshal_with(paginated_conversation_variable_model)
def get(self, app_model):
- parser = reqparse.RequestParser()
- parser.add_argument("conversation_id", type=str, location="args")
- args = parser.parse_args()
+ args = ConversationVariablesQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
stmt = (
select(ConversationVariable)
.where(ConversationVariable.app_id == app_model.id)
.order_by(ConversationVariable.created_at)
)
- if args["conversation_id"]:
- stmt = stmt.where(ConversationVariable.conversation_id == args["conversation_id"])
- else:
- raise ValueError("conversation_id is required")
+ stmt = stmt.where(ConversationVariable.conversation_id == args.conversation_id)
# NOTE: This is a temporary solution to avoid performance issues.
page = 1
diff --git a/api/controllers/console/app/generator.py b/api/controllers/console/app/generator.py
index 230ccdca15..b4fc44767a 100644
--- a/api/controllers/console/app/generator.py
+++ b/api/controllers/console/app/generator.py
@@ -1,9 +1,10 @@
from collections.abc import Sequence
+from typing import Any
-from flask_login import current_user
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import (
CompletionRequestError,
ProviderModelCurrentlyNotSupportError,
@@ -12,50 +13,80 @@ from controllers.console.app.error import (
)
from controllers.console.wraps import account_initialization_required, setup_required
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
+from core.helper.code_executor.code_node_provider import CodeNodeProvider
from core.helper.code_executor.javascript.javascript_code_provider import JavascriptCodeProvider
from core.helper.code_executor.python3.python3_code_provider import Python3CodeProvider
from core.llm_generator.llm_generator import LLMGenerator
from core.model_runtime.errors.invoke import InvokeError
from extensions.ext_database import db
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models import App
from services.workflow_service import WorkflowService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class RuleGeneratePayload(BaseModel):
+ instruction: str = Field(..., description="Rule generation instruction")
+ model_config_data: dict[str, Any] = Field(..., alias="model_config", description="Model configuration")
+ no_variable: bool = Field(default=False, description="Whether to exclude variables")
+
+
+class RuleCodeGeneratePayload(RuleGeneratePayload):
+ code_language: str = Field(default="javascript", description="Programming language for code generation")
+
+
+class RuleStructuredOutputPayload(BaseModel):
+ instruction: str = Field(..., description="Structured output generation instruction")
+ model_config_data: dict[str, Any] = Field(..., alias="model_config", description="Model configuration")
+
+
+class InstructionGeneratePayload(BaseModel):
+ flow_id: str = Field(..., description="Workflow/Flow ID")
+ node_id: str = Field(default="", description="Node ID for workflow context")
+ current: str = Field(default="", description="Current instruction text")
+ language: str = Field(default="javascript", description="Programming language (javascript/python)")
+ instruction: str = Field(..., description="Instruction for generation")
+ model_config_data: dict[str, Any] = Field(..., alias="model_config", description="Model configuration")
+ ideal_output: str = Field(default="", description="Expected ideal output")
+
+
+class InstructionTemplatePayload(BaseModel):
+ type: str = Field(..., description="Instruction template type")
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(RuleGeneratePayload)
+reg(RuleCodeGeneratePayload)
+reg(RuleStructuredOutputPayload)
+reg(InstructionGeneratePayload)
+reg(InstructionTemplatePayload)
+
@console_ns.route("/rule-generate")
class RuleGenerateApi(Resource):
- @api.doc("generate_rule_config")
- @api.doc(description="Generate rule configuration using LLM")
- @api.expect(
- api.model(
- "RuleGenerateRequest",
- {
- "instruction": fields.String(required=True, description="Rule generation instruction"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- "no_variable": fields.Boolean(required=True, default=False, description="Whether to exclude variables"),
- },
- )
- )
- @api.response(200, "Rule configuration generated successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(402, "Provider quota exceeded")
+ @console_ns.doc("generate_rule_config")
+ @console_ns.doc(description="Generate rule configuration using LLM")
+ @console_ns.expect(console_ns.models[RuleGeneratePayload.__name__])
+ @console_ns.response(200, "Rule configuration generated successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(402, "Provider quota exceeded")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("instruction", type=str, required=True, nullable=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("no_variable", type=bool, required=True, default=False, location="json")
- args = parser.parse_args()
+ args = RuleGeneratePayload.model_validate(console_ns.payload)
+ _, current_tenant_id = current_account_with_tenant()
- account = current_user
try:
rules = LLMGenerator.generate_rule_config(
- tenant_id=account.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
- no_variable=args["no_variable"],
+ tenant_id=current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
+ no_variable=args.no_variable,
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -71,42 +102,25 @@ class RuleGenerateApi(Resource):
@console_ns.route("/rule-code-generate")
class RuleCodeGenerateApi(Resource):
- @api.doc("generate_rule_code")
- @api.doc(description="Generate code rules using LLM")
- @api.expect(
- api.model(
- "RuleCodeGenerateRequest",
- {
- "instruction": fields.String(required=True, description="Code generation instruction"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- "no_variable": fields.Boolean(required=True, default=False, description="Whether to exclude variables"),
- "code_language": fields.String(
- default="javascript", description="Programming language for code generation"
- ),
- },
- )
- )
- @api.response(200, "Code rules generated successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(402, "Provider quota exceeded")
+ @console_ns.doc("generate_rule_code")
+ @console_ns.doc(description="Generate code rules using LLM")
+ @console_ns.expect(console_ns.models[RuleCodeGeneratePayload.__name__])
+ @console_ns.response(200, "Code rules generated successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(402, "Provider quota exceeded")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("instruction", type=str, required=True, nullable=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("no_variable", type=bool, required=True, default=False, location="json")
- parser.add_argument("code_language", type=str, required=False, default="javascript", location="json")
- args = parser.parse_args()
+ args = RuleCodeGeneratePayload.model_validate(console_ns.payload)
+ _, current_tenant_id = current_account_with_tenant()
- account = current_user
try:
code_result = LLMGenerator.generate_code(
- tenant_id=account.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
- code_language=args["code_language"],
+ tenant_id=current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
+ code_language=args.code_language,
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -122,35 +136,24 @@ class RuleCodeGenerateApi(Resource):
@console_ns.route("/rule-structured-output-generate")
class RuleStructuredOutputGenerateApi(Resource):
- @api.doc("generate_structured_output")
- @api.doc(description="Generate structured output rules using LLM")
- @api.expect(
- api.model(
- "StructuredOutputGenerateRequest",
- {
- "instruction": fields.String(required=True, description="Structured output generation instruction"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- },
- )
- )
- @api.response(200, "Structured output generated successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(402, "Provider quota exceeded")
+ @console_ns.doc("generate_structured_output")
+ @console_ns.doc(description="Generate structured output rules using LLM")
+ @console_ns.expect(console_ns.models[RuleStructuredOutputPayload.__name__])
+ @console_ns.response(200, "Structured output generated successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(402, "Provider quota exceeded")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("instruction", type=str, required=True, nullable=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = RuleStructuredOutputPayload.model_validate(console_ns.payload)
+ _, current_tenant_id = current_account_with_tenant()
- account = current_user
try:
structured_output = LLMGenerator.generate_structured_output(
- tenant_id=account.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
+ tenant_id=current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -166,101 +169,79 @@ class RuleStructuredOutputGenerateApi(Resource):
@console_ns.route("/instruction-generate")
class InstructionGenerateApi(Resource):
- @api.doc("generate_instruction")
- @api.doc(description="Generate instruction for workflow nodes or general use")
- @api.expect(
- api.model(
- "InstructionGenerateRequest",
- {
- "flow_id": fields.String(required=True, description="Workflow/Flow ID"),
- "node_id": fields.String(description="Node ID for workflow context"),
- "current": fields.String(description="Current instruction text"),
- "language": fields.String(default="javascript", description="Programming language (javascript/python)"),
- "instruction": fields.String(required=True, description="Instruction for generation"),
- "model_config": fields.Raw(required=True, description="Model configuration"),
- "ideal_output": fields.String(description="Expected ideal output"),
- },
- )
- )
- @api.response(200, "Instruction generated successfully")
- @api.response(400, "Invalid request parameters or flow/workflow not found")
- @api.response(402, "Provider quota exceeded")
+ @console_ns.doc("generate_instruction")
+ @console_ns.doc(description="Generate instruction for workflow nodes or general use")
+ @console_ns.expect(console_ns.models[InstructionGeneratePayload.__name__])
+ @console_ns.response(200, "Instruction generated successfully")
+ @console_ns.response(400, "Invalid request parameters or flow/workflow not found")
+ @console_ns.response(402, "Provider quota exceeded")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("flow_id", type=str, required=True, default="", location="json")
- parser.add_argument("node_id", type=str, required=False, default="", location="json")
- parser.add_argument("current", type=str, required=False, default="", location="json")
- parser.add_argument("language", type=str, required=False, default="javascript", location="json")
- parser.add_argument("instruction", type=str, required=True, nullable=False, location="json")
- parser.add_argument("model_config", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("ideal_output", type=str, required=False, default="", location="json")
- args = parser.parse_args()
- code_template = (
- Python3CodeProvider.get_default_code()
- if args["language"] == "python"
- else (JavascriptCodeProvider.get_default_code())
- if args["language"] == "javascript"
- else ""
+ args = InstructionGeneratePayload.model_validate(console_ns.payload)
+ _, current_tenant_id = current_account_with_tenant()
+ providers: list[type[CodeNodeProvider]] = [Python3CodeProvider, JavascriptCodeProvider]
+ code_provider: type[CodeNodeProvider] | None = next(
+ (p for p in providers if p.is_accept_language(args.language)), None
)
+ code_template = code_provider.get_default_code() if code_provider else ""
try:
# Generate from nothing for a workflow node
- if (args["current"] == code_template or args["current"] == "") and args["node_id"] != "":
- app = db.session.query(App).where(App.id == args["flow_id"]).first()
+ if (args.current in (code_template, "")) and args.node_id != "":
+ app = db.session.query(App).where(App.id == args.flow_id).first()
if not app:
- return {"error": f"app {args['flow_id']} not found"}, 400
+ return {"error": f"app {args.flow_id} not found"}, 400
workflow = WorkflowService().get_draft_workflow(app_model=app)
if not workflow:
- return {"error": f"workflow {args['flow_id']} not found"}, 400
+ return {"error": f"workflow {args.flow_id} not found"}, 400
nodes: Sequence = workflow.graph_dict["nodes"]
- node = [node for node in nodes if node["id"] == args["node_id"]]
+ node = [node for node in nodes if node["id"] == args.node_id]
if len(node) == 0:
- return {"error": f"node {args['node_id']} not found"}, 400
+ return {"error": f"node {args.node_id} not found"}, 400
node_type = node[0]["data"]["type"]
match node_type:
case "llm":
return LLMGenerator.generate_rule_config(
- current_user.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
+ current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
no_variable=True,
)
case "agent":
return LLMGenerator.generate_rule_config(
- current_user.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
+ current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
no_variable=True,
)
case "code":
return LLMGenerator.generate_code(
- tenant_id=current_user.current_tenant_id,
- instruction=args["instruction"],
- model_config=args["model_config"],
- code_language=args["language"],
+ tenant_id=current_tenant_id,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
+ code_language=args.language,
)
case _:
return {"error": f"invalid node type: {node_type}"}
- if args["node_id"] == "" and args["current"] != "": # For legacy app without a workflow
+ if args.node_id == "" and args.current != "": # For legacy app without a workflow
return LLMGenerator.instruction_modify_legacy(
- tenant_id=current_user.current_tenant_id,
- flow_id=args["flow_id"],
- current=args["current"],
- instruction=args["instruction"],
- model_config=args["model_config"],
- ideal_output=args["ideal_output"],
+ tenant_id=current_tenant_id,
+ flow_id=args.flow_id,
+ current=args.current,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
+ ideal_output=args.ideal_output,
)
- if args["node_id"] != "" and args["current"] != "": # For workflow node
+ if args.node_id != "" and args.current != "": # For workflow node
return LLMGenerator.instruction_modify_workflow(
- tenant_id=current_user.current_tenant_id,
- flow_id=args["flow_id"],
- node_id=args["node_id"],
- current=args["current"],
- instruction=args["instruction"],
- model_config=args["model_config"],
- ideal_output=args["ideal_output"],
+ tenant_id=current_tenant_id,
+ flow_id=args.flow_id,
+ node_id=args.node_id,
+ current=args.current,
+ instruction=args.instruction,
+ model_config=args.model_config_data,
+ ideal_output=args.ideal_output,
workflow_service=WorkflowService(),
)
return {"error": "incompatible parameters"}, 400
@@ -276,27 +257,17 @@ class InstructionGenerateApi(Resource):
@console_ns.route("/instruction-generate/template")
class InstructionGenerationTemplateApi(Resource):
- @api.doc("get_instruction_template")
- @api.doc(description="Get instruction generation template")
- @api.expect(
- api.model(
- "InstructionTemplateRequest",
- {
- "instruction": fields.String(required=True, description="Template instruction"),
- "ideal_output": fields.String(description="Expected ideal output"),
- },
- )
- )
- @api.response(200, "Template retrieved successfully")
- @api.response(400, "Invalid request parameters")
+ @console_ns.doc("get_instruction_template")
+ @console_ns.doc(description="Get instruction generation template")
+ @console_ns.expect(console_ns.models[InstructionTemplatePayload.__name__])
+ @console_ns.response(200, "Template retrieved successfully")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("type", type=str, required=True, default=False, location="json")
- args = parser.parse_args()
- match args["type"]:
+ args = InstructionTemplatePayload.model_validate(console_ns.payload)
+ match args.type:
case "prompt":
from core.llm_generator.prompts import INSTRUCTION_GENERATE_TEMPLATE_PROMPT
@@ -306,4 +277,4 @@ class InstructionGenerationTemplateApi(Resource):
return {"data": INSTRUCTION_GENERATE_TEMPLATE_CODE}
case _:
- raise ValueError(f"Invalid type: {args['type']}")
+ raise ValueError(f"Invalid type: {args.type}")
diff --git a/api/controllers/console/app/mcp_server.py b/api/controllers/console/app/mcp_server.py
index b9a383ee61..dd982b6d7b 100644
--- a/api/controllers/console/app/mcp_server.py
+++ b/api/controllers/console/app/mcp_server.py
@@ -1,119 +1,113 @@
import json
from enum import StrEnum
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal_with, reqparse
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field
from werkzeug.exceptions import NotFound
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from extensions.ext_database import db
from fields.app_fields import app_server_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.model import AppMCPServer
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+# Register model for flask_restx to avoid dict type issues in Swagger
+app_server_model = console_ns.model("AppServer", app_server_fields)
+
class AppMCPServerStatus(StrEnum):
ACTIVE = "active"
INACTIVE = "inactive"
+class MCPServerCreatePayload(BaseModel):
+ description: str | None = Field(default=None, description="Server description")
+ parameters: dict = Field(..., description="Server parameters configuration")
+
+
+class MCPServerUpdatePayload(BaseModel):
+ id: str = Field(..., description="Server ID")
+ description: str | None = Field(default=None, description="Server description")
+ parameters: dict = Field(..., description="Server parameters configuration")
+ status: str | None = Field(default=None, description="Server status")
+
+
+for model in (MCPServerCreatePayload, MCPServerUpdatePayload):
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
@console_ns.route("/apps//server")
class AppMCPServerController(Resource):
- @api.doc("get_app_mcp_server")
- @api.doc(description="Get MCP server configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "MCP server configuration retrieved successfully", app_server_fields)
- @setup_required
+ @console_ns.doc("get_app_mcp_server")
+ @console_ns.doc(description="Get MCP server configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "MCP server configuration retrieved successfully", app_server_model)
@login_required
@account_initialization_required
+ @setup_required
@get_app_model
- @marshal_with(app_server_fields)
+ @marshal_with(app_server_model)
def get(self, app_model):
server = db.session.query(AppMCPServer).where(AppMCPServer.app_id == app_model.id).first()
return server
- @api.doc("create_app_mcp_server")
- @api.doc(description="Create MCP server configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "MCPServerCreateRequest",
- {
- "description": fields.String(description="Server description"),
- "parameters": fields.Raw(required=True, description="Server parameters configuration"),
- },
- )
- )
- @api.response(201, "MCP server configuration created successfully", app_server_fields)
- @api.response(403, "Insufficient permissions")
- @setup_required
- @login_required
+ @console_ns.doc("create_app_mcp_server")
+ @console_ns.doc(description="Create MCP server configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[MCPServerCreatePayload.__name__])
+ @console_ns.response(201, "MCP server configuration created successfully", app_server_model)
+ @console_ns.response(403, "Insufficient permissions")
@account_initialization_required
@get_app_model
- @marshal_with(app_server_fields)
+ @login_required
+ @setup_required
+ @marshal_with(app_server_model)
+ @edit_permission_required
def post(self, app_model):
- if not current_user.is_editor:
- raise NotFound()
- parser = reqparse.RequestParser()
- parser.add_argument("description", type=str, required=False, location="json")
- parser.add_argument("parameters", type=dict, required=True, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = MCPServerCreatePayload.model_validate(console_ns.payload or {})
- description = args.get("description")
+ description = payload.description
if not description:
description = app_model.description or ""
server = AppMCPServer(
name=app_model.name,
description=description,
- parameters=json.dumps(args["parameters"], ensure_ascii=False),
+ parameters=json.dumps(payload.parameters, ensure_ascii=False),
status=AppMCPServerStatus.ACTIVE,
app_id=app_model.id,
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
server_code=AppMCPServer.generate_server_code(16),
)
db.session.add(server)
db.session.commit()
return server
- @api.doc("update_app_mcp_server")
- @api.doc(description="Update MCP server configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "MCPServerUpdateRequest",
- {
- "id": fields.String(required=True, description="Server ID"),
- "description": fields.String(description="Server description"),
- "parameters": fields.Raw(required=True, description="Server parameters configuration"),
- "status": fields.String(description="Server status"),
- },
- )
- )
- @api.response(200, "MCP server configuration updated successfully", app_server_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Server not found")
- @setup_required
- @login_required
- @account_initialization_required
+ @console_ns.doc("update_app_mcp_server")
+ @console_ns.doc(description="Update MCP server configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[MCPServerUpdatePayload.__name__])
+ @console_ns.response(200, "MCP server configuration updated successfully", app_server_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Server not found")
@get_app_model
- @marshal_with(app_server_fields)
+ @login_required
+ @setup_required
+ @account_initialization_required
+ @marshal_with(app_server_model)
+ @edit_permission_required
def put(self, app_model):
- if not current_user.is_editor:
- raise NotFound()
- parser = reqparse.RequestParser()
- parser.add_argument("id", type=str, required=True, location="json")
- parser.add_argument("description", type=str, required=False, location="json")
- parser.add_argument("parameters", type=dict, required=True, location="json")
- parser.add_argument("status", type=str, required=False, location="json")
- args = parser.parse_args()
- server = db.session.query(AppMCPServer).where(AppMCPServer.id == args["id"]).first()
+ payload = MCPServerUpdatePayload.model_validate(console_ns.payload or {})
+ server = db.session.query(AppMCPServer).where(AppMCPServer.id == payload.id).first()
if not server:
raise NotFound()
- description = args.get("description")
+ description = payload.description
if description is None:
pass
elif not description:
@@ -121,34 +115,34 @@ class AppMCPServerController(Resource):
else:
server.description = description
- server.parameters = json.dumps(args["parameters"], ensure_ascii=False)
- if args["status"]:
- if args["status"] not in [status.value for status in AppMCPServerStatus]:
+ server.parameters = json.dumps(payload.parameters, ensure_ascii=False)
+ if payload.status:
+ if payload.status not in [status.value for status in AppMCPServerStatus]:
raise ValueError("Invalid status")
- server.status = args["status"]
+ server.status = payload.status
db.session.commit()
return server
@console_ns.route("/apps//server/refresh")
class AppMCPServerRefreshController(Resource):
- @api.doc("refresh_app_mcp_server")
- @api.doc(description="Refresh MCP server configuration and regenerate server code")
- @api.doc(params={"server_id": "Server ID"})
- @api.response(200, "MCP server refreshed successfully", app_server_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(404, "Server not found")
+ @console_ns.doc("refresh_app_mcp_server")
+ @console_ns.doc(description="Refresh MCP server configuration and regenerate server code")
+ @console_ns.doc(params={"server_id": "Server ID"})
+ @console_ns.response(200, "MCP server refreshed successfully", app_server_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "Server not found")
@setup_required
@login_required
@account_initialization_required
- @marshal_with(app_server_fields)
+ @marshal_with(app_server_model)
+ @edit_permission_required
def get(self, server_id):
- if not current_user.is_editor:
- raise NotFound()
+ _, current_tenant_id = current_account_with_tenant()
server = (
db.session.query(AppMCPServer)
.where(AppMCPServer.id == server_id)
- .where(AppMCPServer.tenant_id == current_user.current_tenant_id)
+ .where(AppMCPServer.tenant_id == current_tenant_id)
.first()
)
if not server:
diff --git a/api/controllers/console/app/message.py b/api/controllers/console/app/message.py
index 46523feccc..12ada8b798 100644
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -1,11 +1,13 @@
import logging
+from typing import Literal
-from flask_restx import Resource, fields, marshal_with, reqparse
-from flask_restx.inputs import int_range
+from flask import request
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import exists, select
-from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
+from werkzeug.exceptions import InternalServerError, NotFound
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import (
CompletionRequestError,
ProviderModelCurrentlyNotSupportError,
@@ -16,74 +18,234 @@ from controllers.console.app.wraps import get_app_model
from controllers.console.explore.error import AppSuggestedQuestionsAfterAnswerDisabledError
from controllers.console.wraps import (
account_initialization_required,
- cloud_edition_billing_resource_check,
+ edit_permission_required,
setup_required,
)
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from extensions.ext_database import db
-from fields.conversation_fields import annotation_fields, message_detail_fields
-from libs.helper import uuid_value
+from fields.raws import FilesContainedField
+from libs.helper import TimestampField, uuid_value
from libs.infinite_scroll_pagination import InfiniteScrollPagination
-from libs.login import current_user, login_required
-from models.account import Account
+from libs.login import current_account_with_tenant, login_required
from models.model import AppMode, Conversation, Message, MessageAnnotation, MessageFeedback
-from services.annotation_service import AppAnnotationService
from services.errors.conversation import ConversationNotExistsError
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
from services.message_service import MessageService
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ChatMessagesQuery(BaseModel):
+ conversation_id: str = Field(..., description="Conversation ID")
+ first_id: str | None = Field(default=None, description="First message ID for pagination")
+ limit: int = Field(default=20, ge=1, le=100, description="Number of messages to return (1-100)")
+
+ @field_validator("first_id", mode="before")
+ @classmethod
+ def empty_to_none(cls, value: str | None) -> str | None:
+ if value == "":
+ return None
+ return value
+
+ @field_validator("conversation_id", "first_id")
+ @classmethod
+ def validate_uuid(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class MessageFeedbackPayload(BaseModel):
+ message_id: str = Field(..., description="Message ID")
+ rating: Literal["like", "dislike"] | None = Field(default=None, description="Feedback rating")
+ content: str | None = Field(default=None, description="Feedback content")
+
+ @field_validator("message_id")
+ @classmethod
+ def validate_message_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class FeedbackExportQuery(BaseModel):
+ from_source: Literal["user", "admin"] | None = Field(default=None, description="Filter by feedback source")
+ rating: Literal["like", "dislike"] | None = Field(default=None, description="Filter by rating")
+ has_comment: bool | None = Field(default=None, description="Only include feedback with comments")
+ start_date: str | None = Field(default=None, description="Start date (YYYY-MM-DD)")
+ end_date: str | None = Field(default=None, description="End date (YYYY-MM-DD)")
+ format: Literal["csv", "json"] = Field(default="csv", description="Export format")
+
+ @field_validator("has_comment", mode="before")
+ @classmethod
+ def parse_bool(cls, value: bool | str | None) -> bool | None:
+ if isinstance(value, bool) or value is None:
+ return value
+ lowered = value.lower()
+ if lowered in {"true", "1", "yes", "on"}:
+ return True
+ if lowered in {"false", "0", "no", "off"}:
+ return False
+ raise ValueError("has_comment must be a boolean value")
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(ChatMessagesQuery)
+reg(MessageFeedbackPayload)
+reg(FeedbackExportQuery)
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register in dependency order: base models first, then dependent models
+
+# Base models
+simple_account_model = console_ns.model(
+ "SimpleAccount",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "email": fields.String,
+ },
+)
+
+message_file_model = console_ns.model(
+ "MessageFile",
+ {
+ "id": fields.String,
+ "filename": fields.String,
+ "type": fields.String,
+ "url": fields.String,
+ "mime_type": fields.String,
+ "size": fields.Integer,
+ "transfer_method": fields.String,
+ "belongs_to": fields.String(default="user"),
+ "upload_file_id": fields.String(default=None),
+ },
+)
+
+agent_thought_model = console_ns.model(
+ "AgentThought",
+ {
+ "id": fields.String,
+ "chain_id": fields.String,
+ "message_id": fields.String,
+ "position": fields.Integer,
+ "thought": fields.String,
+ "tool": fields.String,
+ "tool_labels": fields.Raw,
+ "tool_input": fields.String,
+ "created_at": TimestampField,
+ "observation": fields.String,
+ "files": fields.List(fields.String),
+ },
+)
+
+# Models that depend on simple_account_model
+feedback_model = console_ns.model(
+ "Feedback",
+ {
+ "rating": fields.String,
+ "content": fields.String,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account": fields.Nested(simple_account_model, allow_null=True),
+ },
+)
+
+annotation_model = console_ns.model(
+ "Annotation",
+ {
+ "id": fields.String,
+ "question": fields.String,
+ "content": fields.String,
+ "account": fields.Nested(simple_account_model, allow_null=True),
+ "created_at": TimestampField,
+ },
+)
+
+annotation_hit_history_model = console_ns.model(
+ "AnnotationHitHistory",
+ {
+ "annotation_id": fields.String(attribute="id"),
+ "annotation_create_account": fields.Nested(simple_account_model, allow_null=True),
+ "created_at": TimestampField,
+ },
+)
+
+# Message detail model that depends on multiple models
+message_detail_model = console_ns.model(
+ "MessageDetail",
+ {
+ "id": fields.String,
+ "conversation_id": fields.String,
+ "inputs": FilesContainedField,
+ "query": fields.String,
+ "message": fields.Raw,
+ "message_tokens": fields.Integer,
+ "answer": fields.String(attribute="re_sign_file_url_answer"),
+ "answer_tokens": fields.Integer,
+ "provider_response_latency": fields.Float,
+ "from_source": fields.String,
+ "from_end_user_id": fields.String,
+ "from_account_id": fields.String,
+ "feedbacks": fields.List(fields.Nested(feedback_model)),
+ "workflow_run_id": fields.String,
+ "annotation": fields.Nested(annotation_model, allow_null=True),
+ "annotation_hit_history": fields.Nested(annotation_hit_history_model, allow_null=True),
+ "created_at": TimestampField,
+ "agent_thoughts": fields.List(fields.Nested(agent_thought_model)),
+ "message_files": fields.List(fields.Nested(message_file_model)),
+ "metadata": fields.Raw(attribute="message_metadata_dict"),
+ "status": fields.String,
+ "error": fields.String,
+ "parent_message_id": fields.String,
+ },
+)
+
+# Message infinite scroll pagination model
+message_infinite_scroll_pagination_model = console_ns.model(
+ "MessageInfiniteScrollPagination",
+ {
+ "limit": fields.Integer,
+ "has_more": fields.Boolean,
+ "data": fields.List(fields.Nested(message_detail_model)),
+ },
+)
@console_ns.route("/apps//chat-messages")
class ChatMessageListApi(Resource):
- message_infinite_scroll_pagination_fields = {
- "limit": fields.Integer,
- "has_more": fields.Boolean,
- "data": fields.List(fields.Nested(message_detail_fields)),
- }
-
- @api.doc("list_chat_messages")
- @api.doc(description="Get chat messages for a conversation with pagination")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("conversation_id", type=str, required=True, location="args", help="Conversation ID")
- .add_argument("first_id", type=str, location="args", help="First message ID for pagination")
- .add_argument("limit", type=int, location="args", default=20, help="Number of messages to return (1-100)")
- )
- @api.response(200, "Success", message_infinite_scroll_pagination_fields)
- @api.response(404, "Conversation not found")
- @setup_required
+ @console_ns.doc("list_chat_messages")
+ @console_ns.doc(description="Get chat messages for a conversation with pagination")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[ChatMessagesQuery.__name__])
+ @console_ns.response(200, "Success", message_infinite_scroll_pagination_model)
+ @console_ns.response(404, "Conversation not found")
@login_required
- @get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
@account_initialization_required
- @marshal_with(message_infinite_scroll_pagination_fields)
+ @setup_required
+ @get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
+ @marshal_with(message_infinite_scroll_pagination_model)
+ @edit_permission_required
def get(self, app_model):
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("conversation_id", required=True, type=uuid_value, location="args")
- parser.add_argument("first_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ args = ChatMessagesQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
conversation = (
db.session.query(Conversation)
- .where(Conversation.id == args["conversation_id"], Conversation.app_id == app_model.id)
+ .where(Conversation.id == args.conversation_id, Conversation.app_id == app_model.id)
.first()
)
if not conversation:
raise NotFound("Conversation Not Exists.")
- if args["first_id"]:
+ if args.first_id:
first_message = (
db.session.query(Message)
- .where(Message.conversation_id == conversation.id, Message.id == args["first_id"])
+ .where(Message.conversation_id == conversation.id, Message.id == args.first_id)
.first()
)
@@ -98,7 +260,7 @@ class ChatMessageListApi(Resource):
Message.id != first_message.id,
)
.order_by(Message.created_at.desc())
- .limit(args["limit"])
+ .limit(args.limit)
.all()
)
else:
@@ -106,12 +268,12 @@ class ChatMessageListApi(Resource):
db.session.query(Message)
.where(Message.conversation_id == conversation.id)
.order_by(Message.created_at.desc())
- .limit(args["limit"])
+ .limit(args.limit)
.all()
)
# Initialize has_more based on whether we have a full page
- if len(history_messages) == args["limit"]:
+ if len(history_messages) == args.limit:
current_page_first_message = history_messages[-1]
# Check if there are more messages before the current page
has_more = db.session.scalar(
@@ -129,40 +291,28 @@ class ChatMessageListApi(Resource):
history_messages = list(reversed(history_messages))
- return InfiniteScrollPagination(data=history_messages, limit=args["limit"], has_more=has_more)
+ return InfiniteScrollPagination(data=history_messages, limit=args.limit, has_more=has_more)
@console_ns.route("/apps//feedbacks")
class MessageFeedbackApi(Resource):
- @api.doc("create_message_feedback")
- @api.doc(description="Create or update message feedback (like/dislike)")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "MessageFeedbackRequest",
- {
- "message_id": fields.String(required=True, description="Message ID"),
- "rating": fields.String(enum=["like", "dislike"], description="Feedback rating"),
- },
- )
- )
- @api.response(200, "Feedback updated successfully")
- @api.response(404, "Message not found")
- @api.response(403, "Insufficient permissions")
+ @console_ns.doc("create_message_feedback")
+ @console_ns.doc(description="Create or update message feedback (like/dislike)")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[MessageFeedbackPayload.__name__])
+ @console_ns.response(200, "Feedback updated successfully")
+ @console_ns.response(404, "Message not found")
+ @console_ns.response(403, "Insufficient permissions")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def post(self, app_model):
- if current_user is None:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", required=True, type=uuid_value, location="json")
- parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json")
- args = parser.parse_args()
+ args = MessageFeedbackPayload.model_validate(console_ns.payload)
- message_id = str(args["message_id"])
+ message_id = str(args.message_id)
message = db.session.query(Message).where(Message.id == message_id, Message.app_id == app_model.id).first()
@@ -171,18 +321,23 @@ class MessageFeedbackApi(Resource):
feedback = message.admin_feedback
- if not args["rating"] and feedback:
+ if not args.rating and feedback:
db.session.delete(feedback)
- elif args["rating"] and feedback:
- feedback.rating = args["rating"]
- elif not args["rating"] and not feedback:
+ elif args.rating and feedback:
+ feedback.rating = args.rating
+ feedback.content = args.content
+ elif not args.rating and not feedback:
raise ValueError("rating cannot be None when feedback not exists")
else:
+ rating_value = args.rating
+ if rating_value is None:
+ raise ValueError("rating is required to create feedback")
feedback = MessageFeedback(
app_id=app_model.id,
conversation_id=message.conversation_id,
message_id=message.id,
- rating=args["rating"],
+ rating=rating_value,
+ content=args.content,
from_source="admin",
from_account_id=current_user.id,
)
@@ -193,56 +348,15 @@ class MessageFeedbackApi(Resource):
return {"result": "success"}
-@console_ns.route("/apps//annotations")
-class MessageAnnotationApi(Resource):
- @api.doc("create_message_annotation")
- @api.doc(description="Create message annotation")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "MessageAnnotationRequest",
- {
- "message_id": fields.String(description="Message ID"),
- "question": fields.String(required=True, description="Question text"),
- "answer": fields.String(required=True, description="Answer text"),
- "annotation_reply": fields.Raw(description="Annotation reply"),
- },
- )
- )
- @api.response(200, "Annotation created successfully", annotation_fields)
- @api.response(403, "Insufficient permissions")
- @setup_required
- @login_required
- @account_initialization_required
- @cloud_edition_billing_resource_check("annotation")
- @get_app_model
- @marshal_with(annotation_fields)
- def post(self, app_model):
- if not isinstance(current_user, Account):
- raise Forbidden()
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", required=False, type=uuid_value, location="json")
- parser.add_argument("question", required=True, type=str, location="json")
- parser.add_argument("answer", required=True, type=str, location="json")
- parser.add_argument("annotation_reply", required=False, type=dict, location="json")
- args = parser.parse_args()
- annotation = AppAnnotationService.up_insert_app_annotation_from_message(args, app_model.id)
-
- return annotation
-
-
@console_ns.route("/apps//annotations/count")
class MessageAnnotationCountApi(Resource):
- @api.doc("get_annotation_count")
- @api.doc(description="Get count of message annotations for the app")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(
+ @console_ns.doc("get_annotation_count")
+ @console_ns.doc(description="Get count of message annotations for the app")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(
200,
"Annotation count retrieved successfully",
- api.model("AnnotationCountResponse", {"count": fields.Integer(description="Number of annotations")}),
+ console_ns.model("AnnotationCountResponse", {"count": fields.Integer(description="Number of annotations")}),
)
@get_app_model
@setup_required
@@ -256,20 +370,23 @@ class MessageAnnotationCountApi(Resource):
@console_ns.route("/apps//chat-messages//suggested-questions")
class MessageSuggestedQuestionApi(Resource):
- @api.doc("get_message_suggested_questions")
- @api.doc(description="Get suggested questions for a message")
- @api.doc(params={"app_id": "Application ID", "message_id": "Message ID"})
- @api.response(
+ @console_ns.doc("get_message_suggested_questions")
+ @console_ns.doc(description="Get suggested questions for a message")
+ @console_ns.doc(params={"app_id": "Application ID", "message_id": "Message ID"})
+ @console_ns.response(
200,
"Suggested questions retrieved successfully",
- api.model("SuggestedQuestionsResponse", {"data": fields.List(fields.String(description="Suggested question"))}),
+ console_ns.model(
+ "SuggestedQuestionsResponse", {"data": fields.List(fields.String(description="Suggested question"))}
+ ),
)
- @api.response(404, "Message or conversation not found")
+ @console_ns.response(404, "Message or conversation not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
def get(self, app_model, message_id):
+ current_user, _ = current_account_with_tenant()
message_id = str(message_id)
try:
@@ -297,19 +414,59 @@ class MessageSuggestedQuestionApi(Resource):
return {"data": questions}
-@console_ns.route("/apps//messages/")
-class MessageApi(Resource):
- @api.doc("get_message")
- @api.doc(description="Get message details by ID")
- @api.doc(params={"app_id": "Application ID", "message_id": "Message ID"})
- @api.response(200, "Message retrieved successfully", message_detail_fields)
- @api.response(404, "Message not found")
+@console_ns.route("/apps//feedbacks/export")
+class MessageFeedbackExportApi(Resource):
+ @console_ns.doc("export_feedbacks")
+ @console_ns.doc(description="Export user feedback data for Google Sheets")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[FeedbackExportQuery.__name__])
+ @console_ns.response(200, "Feedback data exported successfully")
+ @console_ns.response(400, "Invalid parameters")
+ @console_ns.response(500, "Internal server error")
+ @get_app_model
@setup_required
@login_required
@account_initialization_required
+ def get(self, app_model):
+ args = FeedbackExportQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+
+ # Import the service function
+ from services.feedback_service import FeedbackService
+
+ try:
+ export_data = FeedbackService.export_feedbacks(
+ app_id=app_model.id,
+ from_source=args.from_source,
+ rating=args.rating,
+ has_comment=args.has_comment,
+ start_date=args.start_date,
+ end_date=args.end_date,
+ format_type=args.format,
+ )
+
+ return export_data
+
+ except ValueError as e:
+ logger.exception("Parameter validation error in feedback export")
+ return {"error": f"Parameter validation error: {str(e)}"}, 400
+ except Exception as e:
+ logger.exception("Error exporting feedback data")
+ raise InternalServerError(str(e))
+
+
+@console_ns.route("/apps//messages/")
+class MessageApi(Resource):
+ @console_ns.doc("get_message")
+ @console_ns.doc(description="Get message details by ID")
+ @console_ns.doc(params={"app_id": "Application ID", "message_id": "Message ID"})
+ @console_ns.response(200, "Message retrieved successfully", message_detail_model)
+ @console_ns.response(404, "Message not found")
@get_app_model
- @marshal_with(message_detail_fields)
- def get(self, app_model, message_id):
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @marshal_with(message_detail_model)
+ def get(self, app_model, message_id: str):
message_id = str(message_id)
message = db.session.query(Message).where(Message.id == message_id, Message.app_id == app_model.id).first()
diff --git a/api/controllers/console/app/model_config.py b/api/controllers/console/app/model_config.py
index e71b774d3e..a85e54fb51 100644
--- a/api/controllers/console/app/model_config.py
+++ b/api/controllers/console/app/model_config.py
@@ -2,31 +2,29 @@ import json
from typing import cast
from flask import request
-from flask_login import current_user
from flask_restx import Resource, fields
-from werkzeug.exceptions import Forbidden
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from core.agent.entities import AgentToolEntity
from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ToolParameterConfigurationManager
from events.app_event import app_model_config_was_updated
from extensions.ext_database import db
-from libs.login import login_required
-from models.account import Account
+from libs.datetime_utils import naive_utc_now
+from libs.login import current_account_with_tenant, login_required
from models.model import AppMode, AppModelConfig
from services.app_model_config_service import AppModelConfigService
@console_ns.route("/apps//model-config")
class ModelConfigResource(Resource):
- @api.doc("update_app_model_config")
- @api.doc(description="Update application model configuration")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
+ @console_ns.doc("update_app_model_config")
+ @console_ns.doc(description="Update application model configuration")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(
+ console_ns.model(
"ModelConfigRequest",
{
"provider": fields.String(description="Model provider"),
@@ -44,25 +42,20 @@ class ModelConfigResource(Resource):
},
)
)
- @api.response(200, "Model configuration updated successfully")
- @api.response(400, "Invalid configuration")
- @api.response(404, "App not found")
+ @console_ns.response(200, "Model configuration updated successfully")
+ @console_ns.response(400, "Invalid configuration")
+ @console_ns.response(404, "App not found")
@setup_required
@login_required
+ @edit_permission_required
@account_initialization_required
@get_app_model(mode=[AppMode.AGENT_CHAT, AppMode.CHAT, AppMode.COMPLETION])
def post(self, app_model):
"""Modify app model config"""
- if not isinstance(current_user, Account):
- raise Forbidden()
-
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- assert current_user.current_tenant_id is not None, "The tenant information should be loaded."
+ current_user, current_tenant_id = current_account_with_tenant()
# validate config
model_configuration = AppModelConfigService.validate_configuration(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
config=cast(dict, request.json),
app_mode=AppMode.value_of(app_model.mode),
)
@@ -94,12 +87,12 @@ class ModelConfigResource(Resource):
# get tool
try:
tool_runtime = ToolManager.get_agent_tool_runtime(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
app_id=app_model.id,
agent_tool=agent_tool_entity,
)
manager = ToolParameterConfigurationManager(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
tool_runtime=tool_runtime,
provider_name=agent_tool_entity.provider_id,
provider_type=agent_tool_entity.provider_type,
@@ -133,7 +126,7 @@ class ModelConfigResource(Resource):
else:
try:
tool_runtime = ToolManager.get_agent_tool_runtime(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
app_id=app_model.id,
agent_tool=agent_tool_entity,
)
@@ -141,7 +134,7 @@ class ModelConfigResource(Resource):
continue
manager = ToolParameterConfigurationManager(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
tool_runtime=tool_runtime,
provider_name=agent_tool_entity.provider_id,
provider_type=agent_tool_entity.provider_type,
@@ -172,6 +165,8 @@ class ModelConfigResource(Resource):
db.session.flush()
app_model.app_model_config_id = new_app_model_config.id
+ app_model.updated_by = current_user.id
+ app_model.updated_at = naive_utc_now()
db.session.commit()
app_model_config_was_updated.send(app_model, app_model_config=new_app_model_config)
diff --git a/api/controllers/console/app/ops_trace.py b/api/controllers/console/app/ops_trace.py
index 981974e842..cbcf513162 100644
--- a/api/controllers/console/app/ops_trace.py
+++ b/api/controllers/console/app/ops_trace.py
@@ -1,12 +1,36 @@
-from flask_restx import Resource, fields, reqparse
+from typing import Any
+
+from flask import request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field
from werkzeug.exceptions import BadRequest
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import TracingConfigCheckError, TracingConfigIsExist, TracingConfigNotExist
from controllers.console.wraps import account_initialization_required, setup_required
from libs.login import login_required
from services.ops_service import OpsService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class TraceProviderQuery(BaseModel):
+ tracing_provider: str = Field(..., description="Tracing provider name")
+
+
+class TraceConfigPayload(BaseModel):
+ tracing_provider: str = Field(..., description="Tracing provider name")
+ tracing_config: dict[str, Any] = Field(..., description="Tracing configuration data")
+
+
+console_ns.schema_model(
+ TraceProviderQuery.__name__,
+ TraceProviderQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+console_ns.schema_model(
+ TraceConfigPayload.__name__, TraceConfigPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
@console_ns.route("/apps//trace-config")
class TraceAppConfigApi(Resource):
@@ -14,63 +38,46 @@ class TraceAppConfigApi(Resource):
Manage trace app configurations
"""
- @api.doc("get_trace_app_config")
- @api.doc(description="Get tracing configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser().add_argument(
- "tracing_provider", type=str, required=True, location="args", help="Tracing provider name"
- )
- )
- @api.response(
+ @console_ns.doc("get_trace_app_config")
+ @console_ns.doc(description="Get tracing configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[TraceProviderQuery.__name__])
+ @console_ns.response(
200, "Tracing configuration retrieved successfully", fields.Raw(description="Tracing configuration data")
)
- @api.response(400, "Invalid request parameters")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
def get(self, app_id):
- parser = reqparse.RequestParser()
- parser.add_argument("tracing_provider", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = TraceProviderQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
- trace_config = OpsService.get_tracing_app_config(app_id=app_id, tracing_provider=args["tracing_provider"])
+ trace_config = OpsService.get_tracing_app_config(app_id=app_id, tracing_provider=args.tracing_provider)
if not trace_config:
return {"has_not_configured": True}
return trace_config
except Exception as e:
raise BadRequest(str(e))
- @api.doc("create_trace_app_config")
- @api.doc(description="Create a new tracing configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "TraceConfigCreateRequest",
- {
- "tracing_provider": fields.String(required=True, description="Tracing provider name"),
- "tracing_config": fields.Raw(required=True, description="Tracing configuration data"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("create_trace_app_config")
+ @console_ns.doc(description="Create a new tracing configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[TraceConfigPayload.__name__])
+ @console_ns.response(
201, "Tracing configuration created successfully", fields.Raw(description="Created configuration data")
)
- @api.response(400, "Invalid request parameters or configuration already exists")
+ @console_ns.response(400, "Invalid request parameters or configuration already exists")
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
"""Create a new trace app configuration"""
- parser = reqparse.RequestParser()
- parser.add_argument("tracing_provider", type=str, required=True, location="json")
- parser.add_argument("tracing_config", type=dict, required=True, location="json")
- args = parser.parse_args()
+ args = TraceConfigPayload.model_validate(console_ns.payload)
try:
result = OpsService.create_tracing_app_config(
- app_id=app_id, tracing_provider=args["tracing_provider"], tracing_config=args["tracing_config"]
+ app_id=app_id, tracing_provider=args.tracing_provider, tracing_config=args.tracing_config
)
if not result:
raise TracingConfigIsExist()
@@ -80,33 +87,22 @@ class TraceAppConfigApi(Resource):
except Exception as e:
raise BadRequest(str(e))
- @api.doc("update_trace_app_config")
- @api.doc(description="Update an existing tracing configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "TraceConfigUpdateRequest",
- {
- "tracing_provider": fields.String(required=True, description="Tracing provider name"),
- "tracing_config": fields.Raw(required=True, description="Updated tracing configuration data"),
- },
- )
- )
- @api.response(200, "Tracing configuration updated successfully", fields.Raw(description="Success response"))
- @api.response(400, "Invalid request parameters or configuration not found")
+ @console_ns.doc("update_trace_app_config")
+ @console_ns.doc(description="Update an existing tracing configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[TraceConfigPayload.__name__])
+ @console_ns.response(200, "Tracing configuration updated successfully", fields.Raw(description="Success response"))
+ @console_ns.response(400, "Invalid request parameters or configuration not found")
@setup_required
@login_required
@account_initialization_required
def patch(self, app_id):
"""Update an existing trace app configuration"""
- parser = reqparse.RequestParser()
- parser.add_argument("tracing_provider", type=str, required=True, location="json")
- parser.add_argument("tracing_config", type=dict, required=True, location="json")
- args = parser.parse_args()
+ args = TraceConfigPayload.model_validate(console_ns.payload)
try:
result = OpsService.update_tracing_app_config(
- app_id=app_id, tracing_provider=args["tracing_provider"], tracing_config=args["tracing_config"]
+ app_id=app_id, tracing_provider=args.tracing_provider, tracing_config=args.tracing_config
)
if not result:
raise TracingConfigNotExist()
@@ -114,27 +110,21 @@ class TraceAppConfigApi(Resource):
except Exception as e:
raise BadRequest(str(e))
- @api.doc("delete_trace_app_config")
- @api.doc(description="Delete an existing tracing configuration for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser().add_argument(
- "tracing_provider", type=str, required=True, location="args", help="Tracing provider name"
- )
- )
- @api.response(204, "Tracing configuration deleted successfully")
- @api.response(400, "Invalid request parameters or configuration not found")
+ @console_ns.doc("delete_trace_app_config")
+ @console_ns.doc(description="Delete an existing tracing configuration for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[TraceProviderQuery.__name__])
+ @console_ns.response(204, "Tracing configuration deleted successfully")
+ @console_ns.response(400, "Invalid request parameters or configuration not found")
@setup_required
@login_required
@account_initialization_required
def delete(self, app_id):
"""Delete an existing trace app configuration"""
- parser = reqparse.RequestParser()
- parser.add_argument("tracing_provider", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = TraceProviderQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
- result = OpsService.delete_tracing_app_config(app_id=app_id, tracing_provider=args["tracing_provider"])
+ result = OpsService.delete_tracing_app_config(app_id=app_id, tracing_provider=args.tracing_provider)
if not result:
raise TracingConfigNotExist()
return {"result": "success"}, 204
diff --git a/api/controllers/console/app/site.py b/api/controllers/console/app/site.py
index 95befc5df9..db218d8b81 100644
--- a/api/controllers/console/app/site.py
+++ b/api/controllers/console/app/site.py
@@ -1,86 +1,80 @@
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal_with, reqparse
-from werkzeug.exceptions import Forbidden, NotFound
+from typing import Literal
+
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field, field_validator
+from werkzeug.exceptions import NotFound
from constants.languages import supported_language
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import (
+ account_initialization_required,
+ edit_permission_required,
+ is_admin_or_owner_required,
+ setup_required,
+)
from extensions.ext_database import db
from fields.app_fields import app_site_fields
from libs.datetime_utils import naive_utc_now
-from libs.login import login_required
-from models import Account, Site
+from libs.login import current_account_with_tenant, login_required
+from models import Site
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
-def parse_app_site_args():
- parser = reqparse.RequestParser()
- parser.add_argument("title", type=str, required=False, location="json")
- parser.add_argument("icon_type", type=str, required=False, location="json")
- parser.add_argument("icon", type=str, required=False, location="json")
- parser.add_argument("icon_background", type=str, required=False, location="json")
- parser.add_argument("description", type=str, required=False, location="json")
- parser.add_argument("default_language", type=supported_language, required=False, location="json")
- parser.add_argument("chat_color_theme", type=str, required=False, location="json")
- parser.add_argument("chat_color_theme_inverted", type=bool, required=False, location="json")
- parser.add_argument("customize_domain", type=str, required=False, location="json")
- parser.add_argument("copyright", type=str, required=False, location="json")
- parser.add_argument("privacy_policy", type=str, required=False, location="json")
- parser.add_argument("custom_disclaimer", type=str, required=False, location="json")
- parser.add_argument(
- "customize_token_strategy", type=str, choices=["must", "allow", "not_allow"], required=False, location="json"
- )
- parser.add_argument("prompt_public", type=bool, required=False, location="json")
- parser.add_argument("show_workflow_steps", type=bool, required=False, location="json")
- parser.add_argument("use_icon_as_answer_icon", type=bool, required=False, location="json")
- return parser.parse_args()
+class AppSiteUpdatePayload(BaseModel):
+ title: str | None = Field(default=None)
+ icon_type: str | None = Field(default=None)
+ icon: str | None = Field(default=None)
+ icon_background: str | None = Field(default=None)
+ description: str | None = Field(default=None)
+ default_language: str | None = Field(default=None)
+ chat_color_theme: str | None = Field(default=None)
+ chat_color_theme_inverted: bool | None = Field(default=None)
+ customize_domain: str | None = Field(default=None)
+ copyright: str | None = Field(default=None)
+ privacy_policy: str | None = Field(default=None)
+ custom_disclaimer: str | None = Field(default=None)
+ customize_token_strategy: Literal["must", "allow", "not_allow"] | None = Field(default=None)
+ prompt_public: bool | None = Field(default=None)
+ show_workflow_steps: bool | None = Field(default=None)
+ use_icon_as_answer_icon: bool | None = Field(default=None)
+
+ @field_validator("default_language")
+ @classmethod
+ def validate_language(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return supported_language(value)
+
+
+console_ns.schema_model(
+ AppSiteUpdatePayload.__name__,
+ AppSiteUpdatePayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
+# Register model for flask_restx to avoid dict type issues in Swagger
+app_site_model = console_ns.model("AppSite", app_site_fields)
@console_ns.route("/apps//site")
class AppSite(Resource):
- @api.doc("update_app_site")
- @api.doc(description="Update application site configuration")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AppSiteRequest",
- {
- "title": fields.String(description="Site title"),
- "icon_type": fields.String(description="Icon type"),
- "icon": fields.String(description="Icon"),
- "icon_background": fields.String(description="Icon background color"),
- "description": fields.String(description="Site description"),
- "default_language": fields.String(description="Default language"),
- "chat_color_theme": fields.String(description="Chat color theme"),
- "chat_color_theme_inverted": fields.Boolean(description="Inverted chat color theme"),
- "customize_domain": fields.String(description="Custom domain"),
- "copyright": fields.String(description="Copyright text"),
- "privacy_policy": fields.String(description="Privacy policy"),
- "custom_disclaimer": fields.String(description="Custom disclaimer"),
- "customize_token_strategy": fields.String(
- enum=["must", "allow", "not_allow"], description="Token strategy"
- ),
- "prompt_public": fields.Boolean(description="Make prompt public"),
- "show_workflow_steps": fields.Boolean(description="Show workflow steps"),
- "use_icon_as_answer_icon": fields.Boolean(description="Use icon as answer icon"),
- },
- )
- )
- @api.response(200, "Site configuration updated successfully", app_site_fields)
- @api.response(403, "Insufficient permissions")
- @api.response(404, "App not found")
+ @console_ns.doc("update_app_site")
+ @console_ns.doc(description="Update application site configuration")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AppSiteUpdatePayload.__name__])
+ @console_ns.response(200, "Site configuration updated successfully", app_site_model)
+ @console_ns.response(403, "Insufficient permissions")
+ @console_ns.response(404, "App not found")
@setup_required
@login_required
+ @edit_permission_required
@account_initialization_required
@get_app_model
- @marshal_with(app_site_fields)
+ @marshal_with(app_site_model)
def post(self, app_model):
- args = parse_app_site_args()
-
- # The role of the current user in the ta table must be editor, admin, or owner
- if not current_user.is_editor:
- raise Forbidden()
-
+ args = AppSiteUpdatePayload.model_validate(console_ns.payload or {})
+ current_user, _ = current_account_with_tenant()
site = db.session.query(Site).where(Site.app_id == app_model.id).first()
if not site:
raise NotFound
@@ -103,12 +97,10 @@ class AppSite(Resource):
"show_workflow_steps",
"use_icon_as_answer_icon",
]:
- value = args.get(attr_name)
+ value = getattr(args, attr_name)
if value is not None:
setattr(site, attr_name, value)
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
site.updated_by = current_user.id
site.updated_at = naive_utc_now()
db.session.commit()
@@ -118,30 +110,26 @@ class AppSite(Resource):
@console_ns.route("/apps//site/access-token-reset")
class AppSiteAccessTokenReset(Resource):
- @api.doc("reset_app_site_access_token")
- @api.doc(description="Reset access token for application site")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Access token reset successfully", app_site_fields)
- @api.response(403, "Insufficient permissions (admin/owner required)")
- @api.response(404, "App or site not found")
+ @console_ns.doc("reset_app_site_access_token")
+ @console_ns.doc(description="Reset access token for application site")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Access token reset successfully", app_site_model)
+ @console_ns.response(403, "Insufficient permissions (admin/owner required)")
+ @console_ns.response(404, "App or site not found")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
@get_app_model
- @marshal_with(app_site_fields)
+ @marshal_with(app_site_model)
def post(self, app_model):
- # The role of the current user in the ta table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
+ current_user, _ = current_account_with_tenant()
site = db.session.query(Site).where(Site.app_id == app_model.id).first()
if not site:
raise NotFound
site.code = Site.generate_code(16)
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
site.updated_by = current_user.id
site.updated_at = naive_utc_now()
db.session.commit()
diff --git a/api/controllers/console/app/statistic.py b/api/controllers/console/app/statistic.py
index 5974395c6a..ffa28b1c95 100644
--- a/api/controllers/console/app/statistic.py
+++ b/api/controllers/console/app/statistic.py
@@ -1,33 +1,48 @@
-from datetime import datetime
from decimal import Decimal
-import pytz
import sqlalchemy as sa
-from flask import jsonify
-from flask_login import current_user
-from flask_restx import Resource, fields, reqparse
+from flask import abort, jsonify, request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db
-from libs.helper import DatetimeString
-from libs.login import login_required
-from models import AppMode, Message
+from libs.datetime_utils import parse_time_range
+from libs.helper import convert_datetime_to_date
+from libs.login import current_account_with_tenant, login_required
+from models import AppMode
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class StatisticTimeRangeQuery(BaseModel):
+ start: str | None = Field(default=None, description="Start date (YYYY-MM-DD HH:MM)")
+ end: str | None = Field(default=None, description="End date (YYYY-MM-DD HH:MM)")
+
+ @field_validator("start", "end", mode="before")
+ @classmethod
+ def empty_string_to_none(cls, value: str | None) -> str | None:
+ if value == "":
+ return None
+ return value
+
+
+console_ns.schema_model(
+ StatisticTimeRangeQuery.__name__,
+ StatisticTimeRangeQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
@console_ns.route("/apps//statistics/daily-messages")
class DailyMessageStatistic(Resource):
- @api.doc("get_daily_message_statistics")
- @api.doc(description="Get daily message statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_daily_message_statistics")
+ @console_ns.doc(description="Get daily message statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Daily message statistics retrieved successfully",
fields.List(fields.Raw(description="Daily message count data")),
@@ -37,15 +52,13 @@ class DailyMessageStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
COUNT(*) AS message_count
FROM
messages
@@ -53,27 +66,18 @@ WHERE
app_id = :app_id
AND invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -91,15 +95,11 @@ WHERE
@console_ns.route("/apps//statistics/daily-conversations")
class DailyConversationStatistic(Resource):
- @api.doc("get_daily_conversation_statistics")
- @api.doc(description="Get daily conversation statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_daily_conversation_statistics")
+ @console_ns.doc(description="Get daily conversation statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Daily conversation statistics retrieved successfully",
fields.List(fields.Raw(description="Daily conversation count data")),
@@ -109,63 +109,53 @@ class DailyConversationStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
+ COUNT(DISTINCT conversation_id) AS conversation_count
+FROM
+ messages
+WHERE
+ app_id = :app_id
+ AND invoke_from != :invoke_from"""
+ arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- stmt = (
- sa.select(
- sa.func.date(
- sa.func.date_trunc("day", sa.text("created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz"))
- ).label("date"),
- sa.func.count(sa.distinct(Message.conversation_id)).label("conversation_count"),
- )
- .select_from(Message)
- .where(Message.app_id == app_model.id, Message.invoke_from != InvokeFrom.DEBUGGER)
- )
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
- stmt = stmt.where(Message.created_at >= start_datetime_utc)
+ if start_datetime_utc:
+ sql_query += " AND created_at >= :start"
+ arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
- stmt = stmt.where(Message.created_at < end_datetime_utc)
+ if end_datetime_utc:
+ sql_query += " AND created_at < :end"
+ arg_dict["end"] = end_datetime_utc
- stmt = stmt.group_by("date").order_by("date")
+ sql_query += " GROUP BY date ORDER BY date"
response_data = []
with db.engine.begin() as conn:
- rs = conn.execute(stmt, {"tz": account.timezone})
- for row in rs:
- response_data.append({"date": str(row.date), "conversation_count": row.conversation_count})
+ rs = conn.execute(sa.text(sql_query), arg_dict)
+ for i in rs:
+ response_data.append({"date": str(i.date), "conversation_count": i.conversation_count})
return jsonify({"data": response_data})
@console_ns.route("/apps//statistics/daily-end-users")
class DailyTerminalsStatistic(Resource):
- @api.doc("get_daily_terminals_statistics")
- @api.doc(description="Get daily terminal/end-user statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_daily_terminals_statistics")
+ @console_ns.doc(description="Get daily terminal/end-user statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Daily terminal statistics retrieved successfully",
fields.List(fields.Raw(description="Daily terminal count data")),
@@ -175,15 +165,13 @@ class DailyTerminalsStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
COUNT(DISTINCT messages.from_end_user_id) AS terminal_count
FROM
messages
@@ -191,27 +179,18 @@ WHERE
app_id = :app_id
AND invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -229,15 +208,11 @@ WHERE
@console_ns.route("/apps//statistics/token-costs")
class DailyTokenCostStatistic(Resource):
- @api.doc("get_daily_token_cost_statistics")
- @api.doc(description="Get daily token cost statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_daily_token_cost_statistics")
+ @console_ns.doc(description="Get daily token cost statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Daily token cost statistics retrieved successfully",
fields.List(fields.Raw(description="Daily token cost data")),
@@ -247,15 +222,13 @@ class DailyTokenCostStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
(SUM(messages.message_tokens) + SUM(messages.answer_tokens)) AS token_count,
SUM(total_price) AS total_price
FROM
@@ -264,27 +237,18 @@ WHERE
app_id = :app_id
AND invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -304,15 +268,11 @@ WHERE
@console_ns.route("/apps//statistics/average-session-interactions")
class AverageSessionInteractionStatistic(Resource):
- @api.doc("get_average_session_interaction_statistics")
- @api.doc(description="Get average session interaction statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_average_session_interaction_statistics")
+ @console_ns.doc(description="Get average session interaction statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Average session interaction statistics retrieved successfully",
fields.List(fields.Raw(description="Average session interaction data")),
@@ -322,15 +282,13 @@ class AverageSessionInteractionStatistic(Resource):
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT])
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', c.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("c.created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
AVG(subquery.message_count) AS interactions
FROM
(
@@ -346,27 +304,18 @@ FROM
c.app_id = :app_id
AND m.invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND c.created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND c.created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -395,15 +344,11 @@ ORDER BY
@console_ns.route("/apps//statistics/user-satisfaction-rate")
class UserSatisfactionRateStatistic(Resource):
- @api.doc("get_user_satisfaction_rate_statistics")
- @api.doc(description="Get user satisfaction rate statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_user_satisfaction_rate_statistics")
+ @console_ns.doc(description="Get user satisfaction rate statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"User satisfaction rate statistics retrieved successfully",
fields.List(fields.Raw(description="User satisfaction rate data")),
@@ -413,15 +358,13 @@ class UserSatisfactionRateStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', m.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("m.created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
COUNT(m.id) AS message_count,
COUNT(mf.id) AS feedback_count
FROM
@@ -433,27 +376,18 @@ WHERE
m.app_id = :app_id
AND m.invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND m.created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND m.created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -476,15 +410,11 @@ WHERE
@console_ns.route("/apps//statistics/average-response-time")
class AverageResponseTimeStatistic(Resource):
- @api.doc("get_average_response_time_statistics")
- @api.doc(description="Get average response time statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_average_response_time_statistics")
+ @console_ns.doc(description="Get average response time statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Average response time statistics retrieved successfully",
fields.List(fields.Raw(description="Average response time data")),
@@ -494,15 +424,13 @@ class AverageResponseTimeStatistic(Resource):
@account_initialization_required
@get_app_model(mode=AppMode.COMPLETION)
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
AVG(provider_response_latency) AS latency
FROM
messages
@@ -510,27 +438,18 @@ WHERE
app_id = :app_id
AND invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND created_at < :end"
arg_dict["end"] = end_datetime_utc
@@ -548,15 +467,11 @@ WHERE
@console_ns.route("/apps//statistics/tokens-per-second")
class TokensPerSecondStatistic(Resource):
- @api.doc("get_tokens_per_second_statistics")
- @api.doc(description="Get tokens per second statistics for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.parser()
- .add_argument("start", type=str, location="args", help="Start date (YYYY-MM-DD HH:MM)")
- .add_argument("end", type=str, location="args", help="End date (YYYY-MM-DD HH:MM)")
- )
- @api.response(
+ @console_ns.doc("get_tokens_per_second_statistics")
+ @console_ns.doc(description="Get tokens per second statistics for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[StatisticTimeRangeQuery.__name__])
+ @console_ns.response(
200,
"Tokens per second statistics retrieved successfully",
fields.List(fields.Raw(description="Tokens per second data")),
@@ -566,15 +481,12 @@ class TokensPerSecondStatistic(Resource):
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
+ args = StatisticTimeRangeQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
-
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
+ converted_created_at = convert_datetime_to_date("created_at")
+ sql_query = f"""SELECT
+ {converted_created_at} AS date,
CASE
WHEN SUM(provider_response_latency) = 0 THEN 0
ELSE (SUM(answer_tokens) / SUM(provider_response_latency))
@@ -585,27 +497,18 @@ WHERE
app_id = :app_id
AND invoke_from != :invoke_from"""
arg_dict = {"tz": account.timezone, "app_id": app_model.id, "invoke_from": InvokeFrom.DEBUGGER}
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
-
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
+ try:
+ start_datetime_utc, end_datetime_utc = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
+ if start_datetime_utc:
sql_query += " AND created_at >= :start"
arg_dict["start"] = start_datetime_utc
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
+ if end_datetime_utc:
sql_query += " AND created_at < :end"
arg_dict["end"] = end_datetime_utc
diff --git a/api/controllers/console/app/workflow.py b/api/controllers/console/app/workflow.py
index 1f5cbbeca5..b4f2ef0ba8 100644
--- a/api/controllers/console/app/workflow.py
+++ b/api/controllers/console/app/workflow.py
@@ -1,34 +1,46 @@
import json
import logging
from collections.abc import Sequence
-from typing import cast
+from typing import Any
from flask import abort, request
-from flask_restx import Resource, fields, inputs, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import ConversationCompletedError, DraftWorkflowNotExist, DraftWorkflowNotSync
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.app.apps.base_app_queue_manager import AppQueueManager
+from core.app.apps.workflow.app_generator import SKIP_PREPARE_USER_INPUTS_KEY
from core.app.entities.app_invoke_entities import InvokeFrom
from core.file.models import File
from core.helper.trace_id_helper import get_external_trace_id
+from core.model_runtime.utils.encoders import jsonable_encoder
+from core.plugin.impl.exc import PluginInvokeError
+from core.trigger.debug.event_selectors import (
+ TriggerDebugEvent,
+ TriggerDebugEventPoller,
+ create_event_poller,
+ select_trigger_debug_events,
+)
+from core.workflow.enums import NodeType
from core.workflow.graph_engine.manager import GraphEngineManager
from extensions.ext_database import db
from factories import file_factory, variable_factory
+from fields.member_fields import simple_account_fields
from fields.workflow_fields import workflow_fields, workflow_pagination_fields
from fields.workflow_run_fields import workflow_run_node_execution_fields
from libs import helper
+from libs.datetime_utils import naive_utc_now
from libs.helper import TimestampField, uuid_value
-from libs.login import current_user, login_required
+from libs.login import current_account_with_tenant, login_required
from models import App
-from models.account import Account
from models.model import AppMode
from models.workflow import Workflow
from services.app_generate_service import AppGenerateService
@@ -37,6 +49,162 @@ from services.errors.llm import InvokeRateLimitError
from services.workflow_service import DraftWorkflowDeletionError, WorkflowInUseError, WorkflowService
logger = logging.getLogger(__name__)
+LISTENING_RETRY_IN = 2000
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register in dependency order: base models first, then dependent models
+
+# Base models
+simple_account_model = console_ns.model("SimpleAccount", simple_account_fields)
+
+from fields.workflow_fields import pipeline_variable_fields, serialize_value_type
+
+conversation_variable_model = console_ns.model(
+ "ConversationVariable",
+ {
+ "id": fields.String,
+ "name": fields.String,
+ "value_type": fields.String(attribute=serialize_value_type),
+ "value": fields.Raw,
+ "description": fields.String,
+ },
+)
+
+pipeline_variable_model = console_ns.model("PipelineVariable", pipeline_variable_fields)
+
+# Workflow model with nested dependencies
+workflow_fields_copy = workflow_fields.copy()
+workflow_fields_copy["created_by"] = fields.Nested(simple_account_model, attribute="created_by_account")
+workflow_fields_copy["updated_by"] = fields.Nested(
+ simple_account_model, attribute="updated_by_account", allow_null=True
+)
+workflow_fields_copy["conversation_variables"] = fields.List(fields.Nested(conversation_variable_model))
+workflow_fields_copy["rag_pipeline_variables"] = fields.List(fields.Nested(pipeline_variable_model))
+workflow_model = console_ns.model("Workflow", workflow_fields_copy)
+
+# Workflow pagination model
+workflow_pagination_fields_copy = workflow_pagination_fields.copy()
+workflow_pagination_fields_copy["items"] = fields.List(fields.Nested(workflow_model), attribute="items")
+workflow_pagination_model = console_ns.model("WorkflowPagination", workflow_pagination_fields_copy)
+
+# Reuse workflow_run_node_execution_model from workflow_run.py if already registered
+# Otherwise register it here
+from fields.end_user_fields import simple_end_user_fields
+
+simple_end_user_model = None
+try:
+ simple_end_user_model = console_ns.models.get("SimpleEndUser")
+except AttributeError:
+ pass
+if simple_end_user_model is None:
+ simple_end_user_model = console_ns.model("SimpleEndUser", simple_end_user_fields)
+
+workflow_run_node_execution_model = None
+try:
+ workflow_run_node_execution_model = console_ns.models.get("WorkflowRunNodeExecution")
+except AttributeError:
+ pass
+if workflow_run_node_execution_model is None:
+ workflow_run_node_execution_model = console_ns.model("WorkflowRunNodeExecution", workflow_run_node_execution_fields)
+
+
+class SyncDraftWorkflowPayload(BaseModel):
+ graph: dict[str, Any]
+ features: dict[str, Any]
+ hash: str | None = None
+ environment_variables: list[dict[str, Any]] = Field(default_factory=list)
+ conversation_variables: list[dict[str, Any]] = Field(default_factory=list)
+
+
+class BaseWorkflowRunPayload(BaseModel):
+ files: list[dict[str, Any]] | None = None
+
+
+class AdvancedChatWorkflowRunPayload(BaseWorkflowRunPayload):
+ inputs: dict[str, Any] | None = None
+ query: str = ""
+ conversation_id: str | None = None
+ parent_message_id: str | None = None
+
+ @field_validator("conversation_id", "parent_message_id")
+ @classmethod
+ def validate_uuid(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class IterationNodeRunPayload(BaseModel):
+ inputs: dict[str, Any] | None = None
+
+
+class LoopNodeRunPayload(BaseModel):
+ inputs: dict[str, Any] | None = None
+
+
+class DraftWorkflowRunPayload(BaseWorkflowRunPayload):
+ inputs: dict[str, Any]
+
+
+class DraftWorkflowNodeRunPayload(BaseWorkflowRunPayload):
+ inputs: dict[str, Any]
+ query: str = ""
+
+
+class PublishWorkflowPayload(BaseModel):
+ marked_name: str | None = Field(default=None, max_length=20)
+ marked_comment: str | None = Field(default=None, max_length=100)
+
+
+class DefaultBlockConfigQuery(BaseModel):
+ q: str | None = None
+
+
+class ConvertToWorkflowPayload(BaseModel):
+ name: str | None = None
+ icon_type: str | None = None
+ icon: str | None = None
+ icon_background: str | None = None
+
+
+class WorkflowListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=99999)
+ limit: int = Field(default=10, ge=1, le=100)
+ user_id: str | None = None
+ named_only: bool = False
+
+
+class WorkflowUpdatePayload(BaseModel):
+ marked_name: str | None = Field(default=None, max_length=20)
+ marked_comment: str | None = Field(default=None, max_length=100)
+
+
+class DraftWorkflowTriggerRunPayload(BaseModel):
+ node_id: str
+
+
+class DraftWorkflowTriggerRunAllPayload(BaseModel):
+ node_ids: list[str]
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(SyncDraftWorkflowPayload)
+reg(AdvancedChatWorkflowRunPayload)
+reg(IterationNodeRunPayload)
+reg(LoopNodeRunPayload)
+reg(DraftWorkflowRunPayload)
+reg(DraftWorkflowNodeRunPayload)
+reg(PublishWorkflowPayload)
+reg(DefaultBlockConfigQuery)
+reg(ConvertToWorkflowPayload)
+reg(WorkflowListQuery)
+reg(WorkflowUpdatePayload)
+reg(DraftWorkflowTriggerRunPayload)
+reg(DraftWorkflowTriggerRunAllPayload)
# TODO(QuantumGhost): Refactor existing node run API to handle file parameter parsing
@@ -59,25 +227,21 @@ def _parse_file(workflow: Workflow, files: list[dict] | None = None) -> Sequence
@console_ns.route("/apps//workflows/draft")
class DraftWorkflowApi(Resource):
- @api.doc("get_draft_workflow")
- @api.doc(description="Get draft workflow for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Draft workflow retrieved successfully", workflow_fields)
- @api.response(404, "Draft workflow not found")
+ @console_ns.doc("get_draft_workflow")
+ @console_ns.doc(description="Get draft workflow for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Draft workflow retrieved successfully", workflow_model)
+ @console_ns.response(404, "Draft workflow not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_fields)
+ @marshal_with(workflow_model)
+ @edit_permission_required
def get(self, app_model: App):
"""
Get draft workflow
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- assert isinstance(current_user, Account)
- if not current_user.has_edit_permission:
- raise Forbidden()
-
# fetch draft workflow by app_model
workflow_service = WorkflowService()
workflow = workflow_service.get_draft_workflow(app_model=app_model)
@@ -92,66 +256,49 @@ class DraftWorkflowApi(Resource):
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @api.doc("sync_draft_workflow")
- @api.doc(description="Sync draft workflow configuration")
- @api.expect(
- api.model(
- "SyncDraftWorkflowRequest",
+ @console_ns.doc("sync_draft_workflow")
+ @console_ns.doc(description="Sync draft workflow configuration")
+ @console_ns.expect(console_ns.models[SyncDraftWorkflowPayload.__name__])
+ @console_ns.response(
+ 200,
+ "Draft workflow synced successfully",
+ console_ns.model(
+ "SyncDraftWorkflowResponse",
{
- "graph": fields.Raw(required=True, description="Workflow graph configuration"),
- "features": fields.Raw(required=True, description="Workflow features configuration"),
- "hash": fields.String(description="Workflow hash for validation"),
- "environment_variables": fields.List(fields.Raw, required=True, description="Environment variables"),
- "conversation_variables": fields.List(fields.Raw, description="Conversation variables"),
+ "result": fields.String,
+ "hash": fields.String,
+ "updated_at": fields.String,
},
- )
+ ),
)
- @api.response(200, "Draft workflow synced successfully", workflow_fields)
- @api.response(400, "Invalid workflow configuration")
- @api.response(403, "Permission denied")
+ @console_ns.response(400, "Invalid workflow configuration")
+ @console_ns.response(403, "Permission denied")
+ @edit_permission_required
def post(self, app_model: App):
"""
Sync draft workflow
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- assert isinstance(current_user, Account)
- if not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
content_type = request.headers.get("Content-Type", "")
+ payload_data: dict[str, Any] | None = None
if "application/json" in content_type:
- parser = reqparse.RequestParser()
- parser.add_argument("graph", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("features", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("hash", type=str, required=False, location="json")
- parser.add_argument("environment_variables", type=list, required=True, location="json")
- parser.add_argument("conversation_variables", type=list, required=False, location="json")
- args = parser.parse_args()
+ payload_data = request.get_json(silent=True)
+ if not isinstance(payload_data, dict):
+ return {"message": "Invalid JSON data"}, 400
elif "text/plain" in content_type:
try:
- data = json.loads(request.data.decode("utf-8"))
- if "graph" not in data or "features" not in data:
- raise ValueError("graph or features not found in data")
-
- if not isinstance(data.get("graph"), dict) or not isinstance(data.get("features"), dict):
- raise ValueError("graph or features is not a dict")
-
- args = {
- "graph": data.get("graph"),
- "features": data.get("features"),
- "hash": data.get("hash"),
- "environment_variables": data.get("environment_variables"),
- "conversation_variables": data.get("conversation_variables"),
- }
+ payload_data = json.loads(request.data.decode("utf-8"))
except json.JSONDecodeError:
return {"message": "Invalid JSON data"}, 400
+ if not isinstance(payload_data, dict):
+ return {"message": "Invalid JSON data"}, 400
else:
abort(415)
- if not isinstance(current_user, Account):
- raise Forbidden()
-
+ args_model = SyncDraftWorkflowPayload.model_validate(payload_data)
+ args = args_model.model_dump()
workflow_service = WorkflowService()
try:
@@ -184,47 +331,26 @@ class DraftWorkflowApi(Resource):
@console_ns.route("/apps//advanced-chat/workflows/draft/run")
class AdvancedChatDraftWorkflowRunApi(Resource):
- @api.doc("run_advanced_chat_draft_workflow")
- @api.doc(description="Run draft workflow for advanced chat application")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "AdvancedChatWorkflowRunRequest",
- {
- "query": fields.String(required=True, description="User query"),
- "inputs": fields.Raw(description="Input variables"),
- "files": fields.List(fields.Raw, description="File uploads"),
- "conversation_id": fields.String(description="Conversation ID"),
- },
- )
- )
- @api.response(200, "Workflow run started successfully")
- @api.response(400, "Invalid request parameters")
- @api.response(403, "Permission denied")
+ @console_ns.doc("run_advanced_chat_draft_workflow")
+ @console_ns.doc(description="Run draft workflow for advanced chat application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[AdvancedChatWorkflowRunPayload.__name__])
+ @console_ns.response(200, "Workflow run started successfully")
+ @console_ns.response(400, "Invalid request parameters")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT])
+ @edit_permission_required
def post(self, app_model: App):
"""
Run draft workflow
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- assert isinstance(current_user, Account)
- if not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- if not isinstance(current_user, Account):
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- parser.add_argument("query", type=str, required=True, location="json", default="")
- parser.add_argument("files", type=list, location="json")
- parser.add_argument("conversation_id", type=uuid_value, location="json")
- parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
-
- args = parser.parse_args()
+ args_model = AdvancedChatWorkflowRunPayload.model_validate(console_ns.payload or {})
+ args = args_model.model_dump(exclude_none=True)
external_trace_id = get_external_trace_id(request)
if external_trace_id:
@@ -251,38 +377,24 @@ class AdvancedChatDraftWorkflowRunApi(Resource):
@console_ns.route("/apps//advanced-chat/workflows/draft/iteration/nodes//run")
class AdvancedChatDraftRunIterationNodeApi(Resource):
- @api.doc("run_advanced_chat_draft_iteration_node")
- @api.doc(description="Run draft workflow iteration node for advanced chat")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.expect(
- api.model(
- "IterationNodeRunRequest",
- {
- "task_id": fields.String(required=True, description="Task ID"),
- "inputs": fields.Raw(description="Input variables"),
- },
- )
- )
- @api.response(200, "Iteration node run started successfully")
- @api.response(403, "Permission denied")
- @api.response(404, "Node not found")
+ @console_ns.doc("run_advanced_chat_draft_iteration_node")
+ @console_ns.doc(description="Run draft workflow iteration node for advanced chat")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.expect(console_ns.models[IterationNodeRunPayload.__name__])
+ @console_ns.response(200, "Iteration node run started successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(404, "Node not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT])
+ @edit_permission_required
def post(self, app_model: App, node_id: str):
"""
Run draft workflow iteration node
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = IterationNodeRunPayload.model_validate(console_ns.payload or {}).model_dump(exclude_none=True)
try:
response = AppGenerateService.generate_single_iteration(
@@ -303,38 +415,24 @@ class AdvancedChatDraftRunIterationNodeApi(Resource):
@console_ns.route("/apps//workflows/draft/iteration/nodes//run")
class WorkflowDraftRunIterationNodeApi(Resource):
- @api.doc("run_workflow_draft_iteration_node")
- @api.doc(description="Run draft workflow iteration node")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.expect(
- api.model(
- "WorkflowIterationNodeRunRequest",
- {
- "task_id": fields.String(required=True, description="Task ID"),
- "inputs": fields.Raw(description="Input variables"),
- },
- )
- )
- @api.response(200, "Workflow iteration node run started successfully")
- @api.response(403, "Permission denied")
- @api.response(404, "Node not found")
+ @console_ns.doc("run_workflow_draft_iteration_node")
+ @console_ns.doc(description="Run draft workflow iteration node")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.expect(console_ns.models[IterationNodeRunPayload.__name__])
+ @console_ns.response(200, "Workflow iteration node run started successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(404, "Node not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
def post(self, app_model: App, node_id: str):
"""
Run draft workflow iteration node
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account):
- raise Forbidden()
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = IterationNodeRunPayload.model_validate(console_ns.payload or {}).model_dump(exclude_none=True)
try:
response = AppGenerateService.generate_single_iteration(
@@ -355,39 +453,24 @@ class WorkflowDraftRunIterationNodeApi(Resource):
@console_ns.route("/apps//advanced-chat/workflows/draft/loop/nodes//run")
class AdvancedChatDraftRunLoopNodeApi(Resource):
- @api.doc("run_advanced_chat_draft_loop_node")
- @api.doc(description="Run draft workflow loop node for advanced chat")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.expect(
- api.model(
- "LoopNodeRunRequest",
- {
- "task_id": fields.String(required=True, description="Task ID"),
- "inputs": fields.Raw(description="Input variables"),
- },
- )
- )
- @api.response(200, "Loop node run started successfully")
- @api.response(403, "Permission denied")
- @api.response(404, "Node not found")
+ @console_ns.doc("run_advanced_chat_draft_loop_node")
+ @console_ns.doc(description="Run draft workflow loop node for advanced chat")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.expect(console_ns.models[LoopNodeRunPayload.__name__])
+ @console_ns.response(200, "Loop node run started successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(404, "Node not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT])
+ @edit_permission_required
def post(self, app_model: App, node_id: str):
"""
Run draft workflow loop node
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = LoopNodeRunPayload.model_validate(console_ns.payload or {}).model_dump(exclude_none=True)
try:
response = AppGenerateService.generate_single_loop(
@@ -408,39 +491,24 @@ class AdvancedChatDraftRunLoopNodeApi(Resource):
@console_ns.route("/apps//workflows/draft/loop/nodes//run")
class WorkflowDraftRunLoopNodeApi(Resource):
- @api.doc("run_workflow_draft_loop_node")
- @api.doc(description="Run draft workflow loop node")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.expect(
- api.model(
- "WorkflowLoopNodeRunRequest",
- {
- "task_id": fields.String(required=True, description="Task ID"),
- "inputs": fields.Raw(description="Input variables"),
- },
- )
- )
- @api.response(200, "Workflow loop node run started successfully")
- @api.response(403, "Permission denied")
- @api.response(404, "Node not found")
+ @console_ns.doc("run_workflow_draft_loop_node")
+ @console_ns.doc(description="Run draft workflow loop node")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.expect(console_ns.models[LoopNodeRunPayload.__name__])
+ @console_ns.response(200, "Workflow loop node run started successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(404, "Node not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
def post(self, app_model: App, node_id: str):
"""
Run draft workflow loop node
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = LoopNodeRunPayload.model_validate(console_ns.payload or {}).model_dump(exclude_none=True)
try:
response = AppGenerateService.generate_single_loop(
@@ -461,39 +529,23 @@ class WorkflowDraftRunLoopNodeApi(Resource):
@console_ns.route("/apps//workflows/draft/run")
class DraftWorkflowRunApi(Resource):
- @api.doc("run_draft_workflow")
- @api.doc(description="Run draft workflow")
- @api.doc(params={"app_id": "Application ID"})
- @api.expect(
- api.model(
- "DraftWorkflowRunRequest",
- {
- "inputs": fields.Raw(required=True, description="Input variables"),
- "files": fields.List(fields.Raw, description="File uploads"),
- },
- )
- )
- @api.response(200, "Draft workflow run started successfully")
- @api.response(403, "Permission denied")
+ @console_ns.doc("run_draft_workflow")
+ @console_ns.doc(description="Run draft workflow")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[DraftWorkflowRunPayload.__name__])
+ @console_ns.response(200, "Draft workflow run started successfully")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
def post(self, app_model: App):
"""
Run draft workflow
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("files", type=list, required=False, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = DraftWorkflowRunPayload.model_validate(console_ns.payload or {}).model_dump(exclude_none=True)
external_trace_id = get_external_trace_id(request)
if external_trace_id:
@@ -515,27 +567,21 @@ class DraftWorkflowRunApi(Resource):
@console_ns.route("/apps//workflow-runs/tasks//stop")
class WorkflowTaskStopApi(Resource):
- @api.doc("stop_workflow_task")
- @api.doc(description="Stop running workflow task")
- @api.doc(params={"app_id": "Application ID", "task_id": "Task ID"})
- @api.response(200, "Task stopped successfully")
- @api.response(404, "Task not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("stop_workflow_task")
+ @console_ns.doc(description="Stop running workflow task")
+ @console_ns.doc(params={"app_id": "Application ID", "task_id": "Task ID"})
+ @console_ns.response(200, "Task stopped successfully")
+ @console_ns.response(404, "Task not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @edit_permission_required
def post(self, app_model: App, task_id: str):
"""
Stop workflow task
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
# Stop using both mechanisms for backward compatibility
# Legacy stop flag mechanism (without user check)
AppQueueManager.set_stop_flag_no_user_check(task_id)
@@ -548,43 +594,28 @@ class WorkflowTaskStopApi(Resource):
@console_ns.route("/apps//workflows/draft/nodes//run")
class DraftWorkflowNodeRunApi(Resource):
- @api.doc("run_draft_workflow_node")
- @api.doc(description="Run draft workflow node")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.expect(
- api.model(
- "DraftWorkflowNodeRunRequest",
- {
- "inputs": fields.Raw(description="Input variables"),
- },
- )
- )
- @api.response(200, "Node run started successfully", workflow_run_node_execution_fields)
- @api.response(403, "Permission denied")
- @api.response(404, "Node not found")
+ @console_ns.doc("run_draft_workflow_node")
+ @console_ns.doc(description="Run draft workflow node")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.expect(console_ns.models[DraftWorkflowNodeRunPayload.__name__])
+ @console_ns.response(200, "Node run started successfully", workflow_run_node_execution_model)
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(404, "Node not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_run_node_execution_fields)
+ @marshal_with(workflow_run_node_execution_model)
+ @edit_permission_required
def post(self, app_model: App, node_id: str):
"""
Run draft workflow node
"""
+ current_user, _ = current_account_with_tenant()
+ args_model = DraftWorkflowNodeRunPayload.model_validate(console_ns.payload or {})
+ args = args_model.model_dump(exclude_none=True)
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("query", type=str, required=False, location="json", default="")
- parser.add_argument("files", type=list, location="json", default=[])
- args = parser.parse_args()
-
- user_inputs = args.get("inputs")
+ user_inputs = args_model.inputs
if user_inputs is None:
raise ValueError("missing inputs")
@@ -611,27 +642,21 @@ class DraftWorkflowNodeRunApi(Resource):
@console_ns.route("/apps//workflows/publish")
class PublishedWorkflowApi(Resource):
- @api.doc("get_published_workflow")
- @api.doc(description="Get published workflow for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Published workflow retrieved successfully", workflow_fields)
- @api.response(404, "Published workflow not found")
+ @console_ns.doc("get_published_workflow")
+ @console_ns.doc(description="Get published workflow for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Published workflow retrieved successfully", workflow_model)
+ @console_ns.response(404, "Published workflow not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_fields)
+ @marshal_with(workflow_model)
+ @edit_permission_required
def get(self, app_model: App):
"""
Get published workflow
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
# fetch published workflow by app_model
workflow_service = WorkflowService()
workflow = workflow_service.get_published_workflow(app_model=app_model)
@@ -639,30 +664,19 @@ class PublishedWorkflowApi(Resource):
# return workflow, if not found, return None
return workflow
+ @console_ns.expect(console_ns.models[PublishWorkflowPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @edit_permission_required
def post(self, app_model: App):
"""
Publish workflow
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("marked_name", type=str, required=False, default="", location="json")
- parser.add_argument("marked_comment", type=str, required=False, default="", location="json")
- args = parser.parse_args()
-
- # Validate name and comment length
- if args.marked_name and len(args.marked_name) > 20:
- raise ValueError("Marked name cannot exceed 20 characters")
- if args.marked_comment and len(args.marked_comment) > 100:
- raise ValueError("Marked comment cannot exceed 100 characters")
+ args = PublishWorkflowPayload.model_validate(console_ns.payload or {})
workflow_service = WorkflowService()
with Session(db.engine) as session:
@@ -674,8 +688,12 @@ class PublishedWorkflowApi(Resource):
marked_comment=args.marked_comment or "",
)
- app_model.workflow_id = workflow.id
- db.session.commit() # NOTE: this is necessary for update app_model.workflow_id
+ # Update app_model within the same session to ensure atomicity
+ app_model_in_session = session.get(App, app_model.id)
+ if app_model_in_session:
+ app_model_in_session.workflow_id = workflow.id
+ app_model_in_session.updated_by = current_user.id
+ app_model_in_session.updated_at = naive_utc_now()
workflow_created_at = TimestampField().format(workflow.created_at)
@@ -689,25 +707,19 @@ class PublishedWorkflowApi(Resource):
@console_ns.route("/apps//workflows/default-workflow-block-configs")
class DefaultBlockConfigsApi(Resource):
- @api.doc("get_default_block_configs")
- @api.doc(description="Get default block configurations for workflow")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Default block configurations retrieved successfully")
+ @console_ns.doc("get_default_block_configs")
+ @console_ns.doc(description="Get default block configurations for workflow")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Default block configurations retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @edit_permission_required
def get(self, app_model: App):
"""
Get default block config
"""
-
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
# Get default block configs
workflow_service = WorkflowService()
return workflow_service.get_default_block_configs()
@@ -715,35 +727,27 @@ class DefaultBlockConfigsApi(Resource):
@console_ns.route("/apps//workflows/default-workflow-block-configs/")
class DefaultBlockConfigApi(Resource):
- @api.doc("get_default_block_config")
- @api.doc(description="Get default block configuration by type")
- @api.doc(params={"app_id": "Application ID", "block_type": "Block type"})
- @api.response(200, "Default block configuration retrieved successfully")
- @api.response(404, "Block type not found")
+ @console_ns.doc("get_default_block_config")
+ @console_ns.doc(description="Get default block configuration by type")
+ @console_ns.doc(params={"app_id": "Application ID", "block_type": "Block type"})
+ @console_ns.response(200, "Default block configuration retrieved successfully")
+ @console_ns.response(404, "Block type not found")
+ @console_ns.expect(console_ns.models[DefaultBlockConfigQuery.__name__])
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @edit_permission_required
def get(self, app_model: App, block_type: str):
"""
Get default block config
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("q", type=str, location="args")
- args = parser.parse_args()
-
- q = args.get("q")
+ args = DefaultBlockConfigQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
filters = None
- if q:
+ if args.q:
try:
- filters = json.loads(args.get("q", ""))
+ filters = json.loads(args.q)
except json.JSONDecodeError:
raise ValueError("Invalid filters")
@@ -754,37 +758,28 @@ class DefaultBlockConfigApi(Resource):
@console_ns.route("/apps//convert-to-workflow")
class ConvertToWorkflowApi(Resource):
- @api.doc("convert_to_workflow")
- @api.doc(description="Convert application to workflow mode")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Application converted to workflow successfully")
- @api.response(400, "Application cannot be converted")
- @api.response(403, "Permission denied")
+ @console_ns.expect(console_ns.models[ConvertToWorkflowPayload.__name__])
+ @console_ns.doc("convert_to_workflow")
+ @console_ns.doc(description="Convert application to workflow mode")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Application converted to workflow successfully")
+ @console_ns.response(400, "Application cannot be converted")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.CHAT, AppMode.COMPLETION])
+ @edit_permission_required
def post(self, app_model: App):
"""
Convert basic mode of chatbot app to workflow mode
Convert expert mode of chatbot app to workflow mode
Convert Completion App to Workflow App
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- if request.data:
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=False, nullable=True, location="json")
- parser.add_argument("icon_type", type=str, required=False, nullable=True, location="json")
- parser.add_argument("icon", type=str, required=False, nullable=True, location="json")
- parser.add_argument("icon_background", type=str, required=False, nullable=True, location="json")
- args = parser.parse_args()
- else:
- args = {}
+ payload = console_ns.payload or {}
+ args = ConvertToWorkflowPayload.model_validate(payload).model_dump(exclude_none=True)
# convert to workflow mode
workflow_service = WorkflowService()
@@ -798,40 +793,32 @@ class ConvertToWorkflowApi(Resource):
@console_ns.route("/apps//workflows")
class PublishedAllWorkflowApi(Resource):
- @api.doc("get_all_published_workflows")
- @api.doc(description="Get all published workflows for an application")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Published workflows retrieved successfully", workflow_pagination_fields)
+ @console_ns.expect(console_ns.models[WorkflowListQuery.__name__])
+ @console_ns.doc("get_all_published_workflows")
+ @console_ns.doc(description="Get all published workflows for an application")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Published workflows retrieved successfully", workflow_pagination_model)
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_pagination_fields)
+ @marshal_with(workflow_pagination_model)
+ @edit_permission_required
def get(self, app_model: App):
"""
Get published workflows
"""
+ current_user, _ = current_account_with_tenant()
- if not isinstance(current_user, Account):
- raise Forbidden()
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args")
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- parser.add_argument("user_id", type=str, required=False, location="args")
- parser.add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args")
- args = parser.parse_args()
- page = int(args.get("page", 1))
- limit = int(args.get("limit", 10))
- user_id = args.get("user_id")
- named_only = args.get("named_only", False)
+ args = WorkflowListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ page = args.page
+ limit = args.limit
+ user_id = args.user_id
+ named_only = args.named_only
if user_id:
if user_id != current_user.id:
raise Forbidden()
- user_id = cast(str, user_id)
workflow_service = WorkflowService()
with Session(db.engine) as session:
@@ -854,53 +841,32 @@ class PublishedAllWorkflowApi(Resource):
@console_ns.route("/apps//workflows/")
class WorkflowByIdApi(Resource):
- @api.doc("update_workflow_by_id")
- @api.doc(description="Update workflow by ID")
- @api.doc(params={"app_id": "Application ID", "workflow_id": "Workflow ID"})
- @api.expect(
- api.model(
- "UpdateWorkflowRequest",
- {
- "environment_variables": fields.List(fields.Raw, description="Environment variables"),
- "conversation_variables": fields.List(fields.Raw, description="Conversation variables"),
- },
- )
- )
- @api.response(200, "Workflow updated successfully", workflow_fields)
- @api.response(404, "Workflow not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("update_workflow_by_id")
+ @console_ns.doc(description="Update workflow by ID")
+ @console_ns.doc(params={"app_id": "Application ID", "workflow_id": "Workflow ID"})
+ @console_ns.expect(console_ns.models[WorkflowUpdatePayload.__name__])
+ @console_ns.response(200, "Workflow updated successfully", workflow_model)
+ @console_ns.response(404, "Workflow not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_fields)
+ @marshal_with(workflow_model)
+ @edit_permission_required
def patch(self, app_model: App, workflow_id: str):
"""
Update workflow attributes
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # Check permission
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("marked_name", type=str, required=False, location="json")
- parser.add_argument("marked_comment", type=str, required=False, location="json")
- args = parser.parse_args()
-
- # Validate name and comment length
- if args.marked_name and len(args.marked_name) > 20:
- raise ValueError("Marked name cannot exceed 20 characters")
- if args.marked_comment and len(args.marked_comment) > 100:
- raise ValueError("Marked comment cannot exceed 100 characters")
+ current_user, _ = current_account_with_tenant()
+ args = WorkflowUpdatePayload.model_validate(console_ns.payload or {})
# Prepare update data
update_data = {}
- if args.get("marked_name") is not None:
- update_data["marked_name"] = args["marked_name"]
- if args.get("marked_comment") is not None:
- update_data["marked_comment"] = args["marked_comment"]
+ if args.marked_name is not None:
+ update_data["marked_name"] = args.marked_name
+ if args.marked_comment is not None:
+ update_data["marked_comment"] = args.marked_comment
if not update_data:
return {"message": "No valid fields to update"}, 400
@@ -929,16 +895,11 @@ class WorkflowByIdApi(Resource):
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @edit_permission_required
def delete(self, app_model: App, workflow_id: str):
"""
Delete workflow
"""
- if not isinstance(current_user, Account):
- raise Forbidden()
- # Check permission
- if not current_user.has_edit_permission:
- raise Forbidden()
-
workflow_service = WorkflowService()
# Create a session and manage the transaction
@@ -961,17 +922,17 @@ class WorkflowByIdApi(Resource):
@console_ns.route("/apps//workflows/draft/nodes//last-run")
class DraftWorkflowNodeLastRunApi(Resource):
- @api.doc("get_draft_workflow_node_last_run")
- @api.doc(description="Get last run result for draft workflow node")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.response(200, "Node last run retrieved successfully", workflow_run_node_execution_fields)
- @api.response(404, "Node last run not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("get_draft_workflow_node_last_run")
+ @console_ns.doc(description="Get last run result for draft workflow node")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.response(200, "Node last run retrieved successfully", workflow_run_node_execution_model)
+ @console_ns.response(404, "Node last run not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_run_node_execution_fields)
+ @marshal_with(workflow_run_node_execution_model)
def get(self, app_model: App, node_id: str):
srv = WorkflowService()
workflow = srv.get_draft_workflow(app_model)
@@ -985,3 +946,223 @@ class DraftWorkflowNodeLastRunApi(Resource):
if node_exec is None:
raise NotFound("last run not found")
return node_exec
+
+
+@console_ns.route("/apps//workflows/draft/trigger/run")
+class DraftWorkflowTriggerRunApi(Resource):
+ """
+ Full workflow debug - Polling API for trigger events
+ Path: /apps//workflows/draft/trigger/run
+ """
+
+ @console_ns.doc("poll_draft_workflow_trigger_run")
+ @console_ns.doc(description="Poll for trigger events and execute full workflow when event arrives")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(
+ console_ns.model(
+ "DraftWorkflowTriggerRunRequest",
+ {
+ "node_id": fields.String(required=True, description="Node ID"),
+ },
+ )
+ )
+ @console_ns.response(200, "Trigger event received and workflow executed successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(500, "Internal server error")
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
+ def post(self, app_model: App):
+ """
+ Poll for trigger events and execute full workflow when event arrives
+ """
+ current_user, _ = current_account_with_tenant()
+ args = DraftWorkflowTriggerRunPayload.model_validate(console_ns.payload or {})
+ node_id = args.node_id
+ workflow_service = WorkflowService()
+ draft_workflow = workflow_service.get_draft_workflow(app_model)
+ if not draft_workflow:
+ raise ValueError("Workflow not found")
+
+ poller: TriggerDebugEventPoller = create_event_poller(
+ draft_workflow=draft_workflow,
+ tenant_id=app_model.tenant_id,
+ user_id=current_user.id,
+ app_id=app_model.id,
+ node_id=node_id,
+ )
+ event: TriggerDebugEvent | None = None
+ try:
+ event = poller.poll()
+ if not event:
+ return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN})
+ workflow_args = dict(event.workflow_args)
+ workflow_args[SKIP_PREPARE_USER_INPUTS_KEY] = True
+ return helper.compact_generate_response(
+ AppGenerateService.generate(
+ app_model=app_model,
+ user=current_user,
+ args=workflow_args,
+ invoke_from=InvokeFrom.DEBUGGER,
+ streaming=True,
+ root_node_id=node_id,
+ )
+ )
+ except InvokeRateLimitError as ex:
+ raise InvokeRateLimitHttpError(ex.description)
+ except PluginInvokeError as e:
+ return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400
+ except Exception as e:
+ logger.exception("Error polling trigger debug event")
+ raise e
+
+
+@console_ns.route("/apps//workflows/draft/nodes//trigger/run")
+class DraftWorkflowTriggerNodeApi(Resource):
+ """
+ Single node debug - Polling API for trigger events
+ Path: /apps//workflows/draft/nodes//trigger/run
+ """
+
+ @console_ns.doc("poll_draft_workflow_trigger_node")
+ @console_ns.doc(description="Poll for trigger events and execute single node when event arrives")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.response(200, "Trigger event received and node executed successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(500, "Internal server error")
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
+ def post(self, app_model: App, node_id: str):
+ """
+ Poll for trigger events and execute single node when event arrives
+ """
+ current_user, _ = current_account_with_tenant()
+
+ workflow_service = WorkflowService()
+ draft_workflow = workflow_service.get_draft_workflow(app_model)
+ if not draft_workflow:
+ raise ValueError("Workflow not found")
+
+ node_config = draft_workflow.get_node_config_by_id(node_id=node_id)
+ if not node_config:
+ raise ValueError("Node data not found for node %s", node_id)
+ node_type: NodeType = draft_workflow.get_node_type_from_node_config(node_config)
+ event: TriggerDebugEvent | None = None
+ # for schedule trigger, when run single node, just execute directly
+ if node_type == NodeType.TRIGGER_SCHEDULE:
+ event = TriggerDebugEvent(
+ workflow_args={},
+ node_id=node_id,
+ )
+ # for other trigger types, poll for the event
+ else:
+ try:
+ poller: TriggerDebugEventPoller = create_event_poller(
+ draft_workflow=draft_workflow,
+ tenant_id=app_model.tenant_id,
+ user_id=current_user.id,
+ app_id=app_model.id,
+ node_id=node_id,
+ )
+ event = poller.poll()
+ except PluginInvokeError as e:
+ return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400
+ except Exception as e:
+ logger.exception("Error polling trigger debug event")
+ raise e
+ if not event:
+ return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN})
+
+ raw_files = event.workflow_args.get("files")
+ files = _parse_file(draft_workflow, raw_files if isinstance(raw_files, list) else None)
+ try:
+ node_execution = workflow_service.run_draft_workflow_node(
+ app_model=app_model,
+ draft_workflow=draft_workflow,
+ node_id=node_id,
+ user_inputs=event.workflow_args.get("inputs") or {},
+ account=current_user,
+ query="",
+ files=files,
+ )
+ return jsonable_encoder(node_execution)
+ except Exception as e:
+ logger.exception("Error running draft workflow trigger node")
+ return jsonable_encoder(
+ {"status": "error", "error": "An unexpected error occurred while running the node."}
+ ), 400
+
+
+@console_ns.route("/apps//workflows/draft/trigger/run-all")
+class DraftWorkflowTriggerRunAllApi(Resource):
+ """
+ Full workflow debug - Polling API for trigger events
+ Path: /apps//workflows/draft/trigger/run-all
+ """
+
+ @console_ns.doc("draft_workflow_trigger_run_all")
+ @console_ns.doc(description="Full workflow debug when the start node is a trigger")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[DraftWorkflowTriggerRunAllPayload.__name__])
+ @console_ns.response(200, "Workflow executed successfully")
+ @console_ns.response(403, "Permission denied")
+ @console_ns.response(500, "Internal server error")
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=[AppMode.WORKFLOW])
+ @edit_permission_required
+ def post(self, app_model: App):
+ """
+ Full workflow debug when the start node is a trigger
+ """
+ current_user, _ = current_account_with_tenant()
+
+ args = DraftWorkflowTriggerRunAllPayload.model_validate(console_ns.payload or {})
+ node_ids = args.node_ids
+ workflow_service = WorkflowService()
+ draft_workflow = workflow_service.get_draft_workflow(app_model)
+ if not draft_workflow:
+ raise ValueError("Workflow not found")
+
+ try:
+ trigger_debug_event: TriggerDebugEvent | None = select_trigger_debug_events(
+ draft_workflow=draft_workflow,
+ app_model=app_model,
+ user_id=current_user.id,
+ node_ids=node_ids,
+ )
+ except PluginInvokeError as e:
+ return jsonable_encoder({"status": "error", "error": e.to_user_friendly_error()}), 400
+ except Exception as e:
+ logger.exception("Error polling trigger debug event")
+ raise e
+ if trigger_debug_event is None:
+ return jsonable_encoder({"status": "waiting", "retry_in": LISTENING_RETRY_IN})
+
+ try:
+ workflow_args = dict(trigger_debug_event.workflow_args)
+ workflow_args[SKIP_PREPARE_USER_INPUTS_KEY] = True
+ response = AppGenerateService.generate(
+ app_model=app_model,
+ user=current_user,
+ args=workflow_args,
+ invoke_from=InvokeFrom.DEBUGGER,
+ streaming=True,
+ root_node_id=trigger_debug_event.node_id,
+ )
+ return helper.compact_generate_response(response)
+ except InvokeRateLimitError as ex:
+ raise InvokeRateLimitHttpError(ex.description)
+ except Exception:
+ logger.exception("Error running draft workflow trigger run-all")
+ return jsonable_encoder(
+ {
+ "status": "error",
+ }
+ ), 400
diff --git a/api/controllers/console/app/workflow_app_log.py b/api/controllers/console/app/workflow_app_log.py
index 8e24be4fa7..fa67fb8154 100644
--- a/api/controllers/console/app/workflow_app_log.py
+++ b/api/controllers/console/app/workflow_app_log.py
@@ -1,82 +1,85 @@
+from datetime import datetime
+
from dateutil.parser import isoparse
-from flask_restx import Resource, marshal_with, reqparse
-from flask_restx.inputs import int_range
+from flask import request
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy.orm import Session
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from core.workflow.enums import WorkflowExecutionStatus
from extensions.ext_database import db
-from fields.workflow_app_log_fields import workflow_app_log_pagination_fields
+from fields.workflow_app_log_fields import build_workflow_app_log_pagination_model
from libs.login import login_required
from models import App
from models.model import AppMode
from services.workflow_app_service import WorkflowAppService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class WorkflowAppLogQuery(BaseModel):
+ keyword: str | None = Field(default=None, description="Search keyword for filtering logs")
+ status: WorkflowExecutionStatus | None = Field(
+ default=None, description="Execution status filter (succeeded, failed, stopped, partial-succeeded)"
+ )
+ created_at__before: datetime | None = Field(default=None, description="Filter logs created before this timestamp")
+ created_at__after: datetime | None = Field(default=None, description="Filter logs created after this timestamp")
+ created_by_end_user_session_id: str | None = Field(default=None, description="Filter by end user session ID")
+ created_by_account: str | None = Field(default=None, description="Filter by account")
+ detail: bool = Field(default=False, description="Whether to return detailed logs")
+ page: int = Field(default=1, ge=1, le=99999, description="Page number (1-99999)")
+ limit: int = Field(default=20, ge=1, le=100, description="Number of items per page (1-100)")
+
+ @field_validator("created_at__before", "created_at__after", mode="before")
+ @classmethod
+ def parse_datetime(cls, value: str | None) -> datetime | None:
+ if value in (None, ""):
+ return None
+ return isoparse(value) # type: ignore
+
+ @field_validator("detail", mode="before")
+ @classmethod
+ def parse_bool(cls, value: bool | str | None) -> bool:
+ if isinstance(value, bool):
+ return value
+ if value is None:
+ return False
+ lowered = value.lower()
+ if lowered in {"1", "true", "yes", "on"}:
+ return True
+ if lowered in {"0", "false", "no", "off"}:
+ return False
+ raise ValueError("Invalid boolean value for detail")
+
+
+console_ns.schema_model(
+ WorkflowAppLogQuery.__name__, WorkflowAppLogQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
+# Register model for flask_restx to avoid dict type issues in Swagger
+workflow_app_log_pagination_model = build_workflow_app_log_pagination_model(console_ns)
+
@console_ns.route("/apps//workflow-app-logs")
class WorkflowAppLogApi(Resource):
- @api.doc("get_workflow_app_logs")
- @api.doc(description="Get workflow application execution logs")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(
- params={
- "keyword": "Search keyword for filtering logs",
- "status": "Filter by execution status (succeeded, failed, stopped, partial-succeeded)",
- "created_at__before": "Filter logs created before this timestamp",
- "created_at__after": "Filter logs created after this timestamp",
- "created_by_end_user_session_id": "Filter by end user session ID",
- "created_by_account": "Filter by account",
- "page": "Page number (1-99999)",
- "limit": "Number of items per page (1-100)",
- }
- )
- @api.response(200, "Workflow app logs retrieved successfully", workflow_app_log_pagination_fields)
+ @console_ns.doc("get_workflow_app_logs")
+ @console_ns.doc(description="Get workflow application execution logs")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[WorkflowAppLogQuery.__name__])
+ @console_ns.response(200, "Workflow app logs retrieved successfully", workflow_app_log_pagination_model)
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.WORKFLOW])
- @marshal_with(workflow_app_log_pagination_fields)
+ @marshal_with(workflow_app_log_pagination_model)
def get(self, app_model: App):
"""
Get workflow app logs
"""
- parser = reqparse.RequestParser()
- parser.add_argument("keyword", type=str, location="args")
- parser.add_argument(
- "status", type=str, choices=["succeeded", "failed", "stopped", "partial-succeeded"], location="args"
- )
- parser.add_argument(
- "created_at__before", type=str, location="args", help="Filter logs created before this timestamp"
- )
- parser.add_argument(
- "created_at__after", type=str, location="args", help="Filter logs created after this timestamp"
- )
- parser.add_argument(
- "created_by_end_user_session_id",
- type=str,
- location="args",
- required=False,
- default=None,
- )
- parser.add_argument(
- "created_by_account",
- type=str,
- location="args",
- required=False,
- default=None,
- )
- parser.add_argument("page", type=int_range(1, 99999), default=1, location="args")
- parser.add_argument("limit", type=int_range(1, 100), default=20, location="args")
- args = parser.parse_args()
-
- args.status = WorkflowExecutionStatus(args.status) if args.status else None
- if args.created_at__before:
- args.created_at__before = isoparse(args.created_at__before)
-
- if args.created_at__after:
- args.created_at__after = isoparse(args.created_at__after)
+ args = WorkflowAppLogQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
# get paginate workflow app logs
workflow_app_service = WorkflowAppService()
@@ -90,6 +93,7 @@ class WorkflowAppLogApi(Resource):
created_at_after=args.created_at__after,
page=args.page,
limit=args.limit,
+ detail=args.detail,
created_by_end_user_session_id=args.created_by_end_user_session_id,
created_by_account=args.created_by_account,
)
diff --git a/api/controllers/console/app/workflow_draft_variable.py b/api/controllers/console/app/workflow_draft_variable.py
index da6b56d026..3382b65acc 100644
--- a/api/controllers/console/app/workflow_draft_variable.py
+++ b/api/controllers/console/app/workflow_draft_variable.py
@@ -1,17 +1,19 @@
import logging
-from typing import NoReturn
+from collections.abc import Callable
+from functools import wraps
+from typing import Any, NoReturn, ParamSpec, TypeVar
-from flask import Response
-from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse
+from flask import Response, request
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.error import (
DraftWorkflowNotExist,
)
from controllers.console.app.wraps import get_app_model
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from controllers.web.error import InvalidArgumentError, NotFoundError
from core.file import helpers as file_helpers
from core.variables.segment_group import SegmentGroup
@@ -21,14 +23,34 @@ from core.workflow.constants import CONVERSATION_VARIABLE_NODE_ID, SYSTEM_VARIAB
from extensions.ext_database import db
from factories.file_factory import build_from_mapping, build_from_mappings
from factories.variable_factory import build_segment_with_type
-from libs.login import current_user, login_required
+from libs.login import login_required
from models import App, AppMode
-from models.account import Account
from models.workflow import WorkflowDraftVariable
from services.workflow_draft_variable_service import WorkflowDraftVariableList, WorkflowDraftVariableService
from services.workflow_service import WorkflowService
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class WorkflowDraftVariableListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=100_000, description="Page number")
+ limit: int = Field(default=20, ge=1, le=100, description="Items per page")
+
+
+class WorkflowDraftVariableUpdatePayload(BaseModel):
+ name: str | None = Field(default=None, description="Variable name")
+ value: Any | None = Field(default=None, description="Variable value")
+
+
+console_ns.schema_model(
+ WorkflowDraftVariableListQuery.__name__,
+ WorkflowDraftVariableListQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+console_ns.schema_model(
+ WorkflowDraftVariableUpdatePayload.__name__,
+ WorkflowDraftVariableUpdatePayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
def _convert_values_to_json_serializable_object(value: Segment):
@@ -57,20 +79,6 @@ def _serialize_var_value(variable: WorkflowDraftVariable):
return _convert_values_to_json_serializable_object(value)
-def _create_pagination_parser():
- parser = reqparse.RequestParser()
- parser.add_argument(
- "page",
- type=inputs.int_range(1, 100_000),
- required=False,
- default=1,
- location="args",
- help="the page of data requested",
- )
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- return parser
-
-
def _serialize_variable_type(workflow_draft_var: WorkflowDraftVariable) -> str:
value_type = workflow_draft_var.value_type
return value_type.exposed_type().value
@@ -139,8 +147,42 @@ _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS = {
"items": fields.List(fields.Nested(_WORKFLOW_DRAFT_VARIABLE_FIELDS), attribute=_get_items),
}
+# Register models for flask_restx to avoid dict type issues in Swagger
+workflow_draft_variable_without_value_model = console_ns.model(
+ "WorkflowDraftVariableWithoutValue", _WORKFLOW_DRAFT_VARIABLE_WITHOUT_VALUE_FIELDS
+)
-def _api_prerequisite(f):
+workflow_draft_variable_model = console_ns.model("WorkflowDraftVariable", _WORKFLOW_DRAFT_VARIABLE_FIELDS)
+
+workflow_draft_env_variable_model = console_ns.model("WorkflowDraftEnvVariable", _WORKFLOW_DRAFT_ENV_VARIABLE_FIELDS)
+
+workflow_draft_env_variable_list_fields_copy = _WORKFLOW_DRAFT_ENV_VARIABLE_LIST_FIELDS.copy()
+workflow_draft_env_variable_list_fields_copy["items"] = fields.List(fields.Nested(workflow_draft_env_variable_model))
+workflow_draft_env_variable_list_model = console_ns.model(
+ "WorkflowDraftEnvVariableList", workflow_draft_env_variable_list_fields_copy
+)
+
+workflow_draft_variable_list_without_value_fields_copy = _WORKFLOW_DRAFT_VARIABLE_LIST_WITHOUT_VALUE_FIELDS.copy()
+workflow_draft_variable_list_without_value_fields_copy["items"] = fields.List(
+ fields.Nested(workflow_draft_variable_without_value_model), attribute=_get_items
+)
+workflow_draft_variable_list_without_value_model = console_ns.model(
+ "WorkflowDraftVariableListWithoutValue", workflow_draft_variable_list_without_value_fields_copy
+)
+
+workflow_draft_variable_list_fields_copy = _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS.copy()
+workflow_draft_variable_list_fields_copy["items"] = fields.List(
+ fields.Nested(workflow_draft_variable_model), attribute=_get_items
+)
+workflow_draft_variable_list_model = console_ns.model(
+ "WorkflowDraftVariableList", workflow_draft_variable_list_fields_copy
+)
+
+P = ParamSpec("P")
+R = TypeVar("R")
+
+
+def _api_prerequisite(f: Callable[P, R]):
"""Common prerequisites for all draft workflow variable APIs.
It ensures the following conditions are satisfied:
@@ -154,11 +196,10 @@ def _api_prerequisite(f):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- def wrapper(*args, **kwargs):
- assert isinstance(current_user, Account)
- if not current_user.has_edit_permission:
- raise Forbidden()
+ @wraps(f)
+ def wrapper(*args: P.args, **kwargs: P.kwargs):
return f(*args, **kwargs)
return wrapper
@@ -166,19 +207,21 @@ def _api_prerequisite(f):
@console_ns.route("/apps//workflows/draft/variables")
class WorkflowVariableCollectionApi(Resource):
- @api.doc("get_workflow_variables")
- @api.doc(description="Get draft workflow variables")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"page": "Page number (1-100000)", "limit": "Number of items per page (1-100)"})
- @api.response(200, "Workflow variables retrieved successfully", _WORKFLOW_DRAFT_VARIABLE_LIST_WITHOUT_VALUE_FIELDS)
+ @console_ns.expect(console_ns.models[WorkflowDraftVariableListQuery.__name__])
+ @console_ns.doc("get_workflow_variables")
+ @console_ns.doc(description="Get draft workflow variables")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.doc(params={"page": "Page number (1-100000)", "limit": "Number of items per page (1-100)"})
+ @console_ns.response(
+ 200, "Workflow variables retrieved successfully", workflow_draft_variable_list_without_value_model
+ )
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_WITHOUT_VALUE_FIELDS)
+ @marshal_with(workflow_draft_variable_list_without_value_model)
def get(self, app_model: App):
"""
Get draft workflow
"""
- parser = _create_pagination_parser()
- args = parser.parse_args()
+ args = WorkflowDraftVariableListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
# fetch draft workflow by app_model
workflow_service = WorkflowService()
@@ -199,9 +242,9 @@ class WorkflowVariableCollectionApi(Resource):
return workflow_vars
- @api.doc("delete_workflow_variables")
- @api.doc(description="Delete all draft workflow variables")
- @api.response(204, "Workflow variables deleted successfully")
+ @console_ns.doc("delete_workflow_variables")
+ @console_ns.doc(description="Delete all draft workflow variables")
+ @console_ns.response(204, "Workflow variables deleted successfully")
@_api_prerequisite
def delete(self, app_model: App):
draft_var_srv = WorkflowDraftVariableService(
@@ -232,12 +275,12 @@ def validate_node_id(node_id: str) -> NoReturn | None:
@console_ns.route("/apps//workflows/draft/nodes//variables")
class NodeVariableCollectionApi(Resource):
- @api.doc("get_node_variables")
- @api.doc(description="Get variables for a specific node")
- @api.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
- @api.response(200, "Node variables retrieved successfully", _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
+ @console_ns.doc("get_node_variables")
+ @console_ns.doc(description="Get variables for a specific node")
+ @console_ns.doc(params={"app_id": "Application ID", "node_id": "Node ID"})
+ @console_ns.response(200, "Node variables retrieved successfully", workflow_draft_variable_list_model)
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
+ @marshal_with(workflow_draft_variable_list_model)
def get(self, app_model: App, node_id: str):
validate_node_id(node_id)
with Session(bind=db.engine, expire_on_commit=False) as session:
@@ -248,9 +291,9 @@ class NodeVariableCollectionApi(Resource):
return node_vars
- @api.doc("delete_node_variables")
- @api.doc(description="Delete all variables for a specific node")
- @api.response(204, "Node variables deleted successfully")
+ @console_ns.doc("delete_node_variables")
+ @console_ns.doc(description="Delete all variables for a specific node")
+ @console_ns.response(204, "Node variables deleted successfully")
@_api_prerequisite
def delete(self, app_model: App, node_id: str):
validate_node_id(node_id)
@@ -265,13 +308,13 @@ class VariableApi(Resource):
_PATCH_NAME_FIELD = "name"
_PATCH_VALUE_FIELD = "value"
- @api.doc("get_variable")
- @api.doc(description="Get a specific workflow variable")
- @api.doc(params={"app_id": "Application ID", "variable_id": "Variable ID"})
- @api.response(200, "Variable retrieved successfully", _WORKFLOW_DRAFT_VARIABLE_FIELDS)
- @api.response(404, "Variable not found")
+ @console_ns.doc("get_variable")
+ @console_ns.doc(description="Get a specific workflow variable")
+ @console_ns.doc(params={"app_id": "Application ID", "variable_id": "Variable ID"})
+ @console_ns.response(200, "Variable retrieved successfully", workflow_draft_variable_model)
+ @console_ns.response(404, "Variable not found")
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_FIELDS)
+ @marshal_with(workflow_draft_variable_model)
def get(self, app_model: App, variable_id: str):
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
@@ -283,21 +326,13 @@ class VariableApi(Resource):
raise NotFoundError(description=f"variable not found, id={variable_id}")
return variable
- @api.doc("update_variable")
- @api.doc(description="Update a workflow variable")
- @api.expect(
- api.model(
- "UpdateVariableRequest",
- {
- "name": fields.String(description="Variable name"),
- "value": fields.Raw(description="Variable value"),
- },
- )
- )
- @api.response(200, "Variable updated successfully", _WORKFLOW_DRAFT_VARIABLE_FIELDS)
- @api.response(404, "Variable not found")
+ @console_ns.doc("update_variable")
+ @console_ns.doc(description="Update a workflow variable")
+ @console_ns.expect(console_ns.models[WorkflowDraftVariableUpdatePayload.__name__])
+ @console_ns.response(200, "Variable updated successfully", workflow_draft_variable_model)
+ @console_ns.response(404, "Variable not found")
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_FIELDS)
+ @marshal_with(workflow_draft_variable_model)
def patch(self, app_model: App, variable_id: str):
# Request payload for file types:
#
@@ -320,15 +355,10 @@ class VariableApi(Resource):
# "upload_file_id": "1602650a-4fe4-423c-85a2-af76c083e3c4"
# }
- parser = reqparse.RequestParser()
- parser.add_argument(self._PATCH_NAME_FIELD, type=str, required=False, nullable=True, location="json")
- # Parse 'value' field as-is to maintain its original data structure
- parser.add_argument(self._PATCH_VALUE_FIELD, type=lambda x: x, required=False, nullable=True, location="json")
-
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
- args = parser.parse_args(strict=True)
+ args_model = WorkflowDraftVariableUpdatePayload.model_validate(console_ns.payload or {})
variable = draft_var_srv.get_variable(variable_id=variable_id)
if variable is None:
@@ -336,8 +366,8 @@ class VariableApi(Resource):
if variable.app_id != app_model.id:
raise NotFoundError(description=f"variable not found, id={variable_id}")
- new_name = args.get(self._PATCH_NAME_FIELD, None)
- raw_value = args.get(self._PATCH_VALUE_FIELD, None)
+ new_name = args_model.name
+ raw_value = args_model.value
if new_name is None and raw_value is None:
return variable
@@ -358,10 +388,10 @@ class VariableApi(Resource):
db.session.commit()
return variable
- @api.doc("delete_variable")
- @api.doc(description="Delete a workflow variable")
- @api.response(204, "Variable deleted successfully")
- @api.response(404, "Variable not found")
+ @console_ns.doc("delete_variable")
+ @console_ns.doc(description="Delete a workflow variable")
+ @console_ns.response(204, "Variable deleted successfully")
+ @console_ns.response(404, "Variable not found")
@_api_prerequisite
def delete(self, app_model: App, variable_id: str):
draft_var_srv = WorkflowDraftVariableService(
@@ -379,12 +409,12 @@ class VariableApi(Resource):
@console_ns.route("/apps//workflows/draft/variables//reset")
class VariableResetApi(Resource):
- @api.doc("reset_variable")
- @api.doc(description="Reset a workflow variable to its default value")
- @api.doc(params={"app_id": "Application ID", "variable_id": "Variable ID"})
- @api.response(200, "Variable reset successfully", _WORKFLOW_DRAFT_VARIABLE_FIELDS)
- @api.response(204, "Variable reset (no content)")
- @api.response(404, "Variable not found")
+ @console_ns.doc("reset_variable")
+ @console_ns.doc(description="Reset a workflow variable to its default value")
+ @console_ns.doc(params={"app_id": "Application ID", "variable_id": "Variable ID"})
+ @console_ns.response(200, "Variable reset successfully", workflow_draft_variable_model)
+ @console_ns.response(204, "Variable reset (no content)")
+ @console_ns.response(404, "Variable not found")
@_api_prerequisite
def put(self, app_model: App, variable_id: str):
draft_var_srv = WorkflowDraftVariableService(
@@ -408,7 +438,7 @@ class VariableResetApi(Resource):
if resetted is None:
return Response("", 204)
else:
- return marshal(resetted, _WORKFLOW_DRAFT_VARIABLE_FIELDS)
+ return marshal(resetted, workflow_draft_variable_model)
def _get_variable_list(app_model: App, node_id) -> WorkflowDraftVariableList:
@@ -427,13 +457,13 @@ def _get_variable_list(app_model: App, node_id) -> WorkflowDraftVariableList:
@console_ns.route("/apps//workflows/draft/conversation-variables")
class ConversationVariableCollectionApi(Resource):
- @api.doc("get_conversation_variables")
- @api.doc(description="Get conversation variables for workflow")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Conversation variables retrieved successfully", _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
- @api.response(404, "Draft workflow not found")
+ @console_ns.doc("get_conversation_variables")
+ @console_ns.doc(description="Get conversation variables for workflow")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Conversation variables retrieved successfully", workflow_draft_variable_list_model)
+ @console_ns.response(404, "Draft workflow not found")
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
+ @marshal_with(workflow_draft_variable_list_model)
def get(self, app_model: App):
# NOTE(QuantumGhost): Prefill conversation variables into the draft variables table
# so their IDs can be returned to the caller.
@@ -449,23 +479,23 @@ class ConversationVariableCollectionApi(Resource):
@console_ns.route("/apps//workflows/draft/system-variables")
class SystemVariableCollectionApi(Resource):
- @api.doc("get_system_variables")
- @api.doc(description="Get system variables for workflow")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "System variables retrieved successfully", _WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
+ @console_ns.doc("get_system_variables")
+ @console_ns.doc(description="Get system variables for workflow")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "System variables retrieved successfully", workflow_draft_variable_list_model)
@_api_prerequisite
- @marshal_with(_WORKFLOW_DRAFT_VARIABLE_LIST_FIELDS)
+ @marshal_with(workflow_draft_variable_list_model)
def get(self, app_model: App):
return _get_variable_list(app_model, SYSTEM_VARIABLE_NODE_ID)
@console_ns.route("/apps//workflows/draft/environment-variables")
class EnvironmentVariableCollectionApi(Resource):
- @api.doc("get_environment_variables")
- @api.doc(description="Get environment variables for workflow")
- @api.doc(params={"app_id": "Application ID"})
- @api.response(200, "Environment variables retrieved successfully")
- @api.response(404, "Draft workflow not found")
+ @console_ns.doc("get_environment_variables")
+ @console_ns.doc(description="Get environment variables for workflow")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.response(200, "Environment variables retrieved successfully")
+ @console_ns.response(404, "Draft workflow not found")
@_api_prerequisite
def get(self, app_model: App):
"""
diff --git a/api/controllers/console/app/workflow_run.py b/api/controllers/console/app/workflow_run.py
index 23ba63845c..8f1871f1e9 100644
--- a/api/controllers/console/app/workflow_run.py
+++ b/api/controllers/console/app/workflow_run.py
@@ -1,90 +1,341 @@
-from typing import cast
+from typing import Literal, cast
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
-from flask_restx.inputs import int_range
+from flask import request
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field, field_validator
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
+from fields.end_user_fields import simple_end_user_fields
+from fields.member_fields import simple_account_fields
from fields.workflow_run_fields import (
+ advanced_chat_workflow_run_for_list_fields,
advanced_chat_workflow_run_pagination_fields,
+ workflow_run_count_fields,
workflow_run_detail_fields,
+ workflow_run_for_list_fields,
+ workflow_run_node_execution_fields,
workflow_run_node_execution_list_fields,
workflow_run_pagination_fields,
)
+from libs.custom_inputs import time_duration
from libs.helper import uuid_value
-from libs.login import login_required
-from models import Account, App, AppMode, EndUser
+from libs.login import current_user, login_required
+from models import Account, App, AppMode, EndUser, WorkflowRunTriggeredFrom
from services.workflow_run_service import WorkflowRunService
+# Workflow run status choices for filtering
+WORKFLOW_RUN_STATUS_CHOICES = ["running", "succeeded", "failed", "stopped", "partial-succeeded"]
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+# Register in dependency order: base models first, then dependent models
+
+# Base models
+simple_account_model = console_ns.model("SimpleAccount", simple_account_fields)
+
+simple_end_user_model = console_ns.model("SimpleEndUser", simple_end_user_fields)
+
+# Models that depend on simple_account_fields
+workflow_run_for_list_fields_copy = workflow_run_for_list_fields.copy()
+workflow_run_for_list_fields_copy["created_by_account"] = fields.Nested(
+ simple_account_model, attribute="created_by_account", allow_null=True
+)
+workflow_run_for_list_model = console_ns.model("WorkflowRunForList", workflow_run_for_list_fields_copy)
+
+advanced_chat_workflow_run_for_list_fields_copy = advanced_chat_workflow_run_for_list_fields.copy()
+advanced_chat_workflow_run_for_list_fields_copy["created_by_account"] = fields.Nested(
+ simple_account_model, attribute="created_by_account", allow_null=True
+)
+advanced_chat_workflow_run_for_list_model = console_ns.model(
+ "AdvancedChatWorkflowRunForList", advanced_chat_workflow_run_for_list_fields_copy
+)
+
+workflow_run_detail_fields_copy = workflow_run_detail_fields.copy()
+workflow_run_detail_fields_copy["created_by_account"] = fields.Nested(
+ simple_account_model, attribute="created_by_account", allow_null=True
+)
+workflow_run_detail_fields_copy["created_by_end_user"] = fields.Nested(
+ simple_end_user_model, attribute="created_by_end_user", allow_null=True
+)
+workflow_run_detail_model = console_ns.model("WorkflowRunDetail", workflow_run_detail_fields_copy)
+
+workflow_run_node_execution_fields_copy = workflow_run_node_execution_fields.copy()
+workflow_run_node_execution_fields_copy["created_by_account"] = fields.Nested(
+ simple_account_model, attribute="created_by_account", allow_null=True
+)
+workflow_run_node_execution_fields_copy["created_by_end_user"] = fields.Nested(
+ simple_end_user_model, attribute="created_by_end_user", allow_null=True
+)
+workflow_run_node_execution_model = console_ns.model(
+ "WorkflowRunNodeExecution", workflow_run_node_execution_fields_copy
+)
+
+# Simple models without nested dependencies
+workflow_run_count_model = console_ns.model("WorkflowRunCount", workflow_run_count_fields)
+
+# Pagination models that depend on list models
+advanced_chat_workflow_run_pagination_fields_copy = advanced_chat_workflow_run_pagination_fields.copy()
+advanced_chat_workflow_run_pagination_fields_copy["data"] = fields.List(
+ fields.Nested(advanced_chat_workflow_run_for_list_model), attribute="data"
+)
+advanced_chat_workflow_run_pagination_model = console_ns.model(
+ "AdvancedChatWorkflowRunPagination", advanced_chat_workflow_run_pagination_fields_copy
+)
+
+workflow_run_pagination_fields_copy = workflow_run_pagination_fields.copy()
+workflow_run_pagination_fields_copy["data"] = fields.List(fields.Nested(workflow_run_for_list_model), attribute="data")
+workflow_run_pagination_model = console_ns.model("WorkflowRunPagination", workflow_run_pagination_fields_copy)
+
+workflow_run_node_execution_list_fields_copy = workflow_run_node_execution_list_fields.copy()
+workflow_run_node_execution_list_fields_copy["data"] = fields.List(fields.Nested(workflow_run_node_execution_model))
+workflow_run_node_execution_list_model = console_ns.model(
+ "WorkflowRunNodeExecutionList", workflow_run_node_execution_list_fields_copy
+)
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class WorkflowRunListQuery(BaseModel):
+ last_id: str | None = Field(default=None, description="Last run ID for pagination")
+ limit: int = Field(default=20, ge=1, le=100, description="Number of items per page (1-100)")
+ status: Literal["running", "succeeded", "failed", "stopped", "partial-succeeded"] | None = Field(
+ default=None, description="Workflow run status filter"
+ )
+ triggered_from: Literal["debugging", "app-run"] | None = Field(
+ default=None, description="Filter by trigger source: debugging or app-run"
+ )
+
+ @field_validator("last_id")
+ @classmethod
+ def validate_last_id(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class WorkflowRunCountQuery(BaseModel):
+ status: Literal["running", "succeeded", "failed", "stopped", "partial-succeeded"] | None = Field(
+ default=None, description="Workflow run status filter"
+ )
+ time_range: str | None = Field(default=None, description="Time range filter (e.g., 7d, 4h, 30m, 30s)")
+ triggered_from: Literal["debugging", "app-run"] | None = Field(
+ default=None, description="Filter by trigger source: debugging or app-run"
+ )
+
+ @field_validator("time_range")
+ @classmethod
+ def validate_time_range(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return time_duration(value)
+
+
+console_ns.schema_model(
+ WorkflowRunListQuery.__name__, WorkflowRunListQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+console_ns.schema_model(
+ WorkflowRunCountQuery.__name__,
+ WorkflowRunCountQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
@console_ns.route("/apps//advanced-chat/workflow-runs")
class AdvancedChatAppWorkflowRunListApi(Resource):
- @api.doc("get_advanced_chat_workflow_runs")
- @api.doc(description="Get advanced chat workflow run list")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"last_id": "Last run ID for pagination", "limit": "Number of items per page (1-100)"})
- @api.response(200, "Workflow runs retrieved successfully", advanced_chat_workflow_run_pagination_fields)
+ @console_ns.doc("get_advanced_chat_workflow_runs")
+ @console_ns.doc(description="Get advanced chat workflow run list")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.doc(params={"last_id": "Last run ID for pagination", "limit": "Number of items per page (1-100)"})
+ @console_ns.doc(
+ params={"status": "Filter by status (optional): running, succeeded, failed, stopped, partial-succeeded"}
+ )
+ @console_ns.doc(
+ params={"triggered_from": "Filter by trigger source (optional): debugging or app-run. Default: debugging"}
+ )
+ @console_ns.expect(console_ns.models[WorkflowRunListQuery.__name__])
+ @console_ns.response(200, "Workflow runs retrieved successfully", advanced_chat_workflow_run_pagination_model)
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT])
- @marshal_with(advanced_chat_workflow_run_pagination_fields)
+ @marshal_with(advanced_chat_workflow_run_pagination_model)
def get(self, app_model: App):
"""
Get advanced chat app workflow run list
"""
- parser = reqparse.RequestParser()
- parser.add_argument("last_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ args_model = WorkflowRunListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ args = args_model.model_dump(exclude_none=True)
+
+ # Default to DEBUGGING if not specified
+ triggered_from = (
+ WorkflowRunTriggeredFrom(args_model.triggered_from)
+ if args_model.triggered_from
+ else WorkflowRunTriggeredFrom.DEBUGGING
+ )
workflow_run_service = WorkflowRunService()
- result = workflow_run_service.get_paginate_advanced_chat_workflow_runs(app_model=app_model, args=args)
+ result = workflow_run_service.get_paginate_advanced_chat_workflow_runs(
+ app_model=app_model, args=args, triggered_from=triggered_from
+ )
+
+ return result
+
+
+@console_ns.route("/apps//advanced-chat/workflow-runs/count")
+class AdvancedChatAppWorkflowRunCountApi(Resource):
+ @console_ns.doc("get_advanced_chat_workflow_runs_count")
+ @console_ns.doc(description="Get advanced chat workflow runs count statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.doc(
+ params={"status": "Filter by status (optional): running, succeeded, failed, stopped, partial-succeeded"}
+ )
+ @console_ns.doc(
+ params={
+ "time_range": (
+ "Filter by time range (optional): e.g., 7d (7 days), 4h (4 hours), "
+ "30m (30 minutes), 30s (30 seconds). Filters by created_at field."
+ )
+ }
+ )
+ @console_ns.doc(
+ params={"triggered_from": "Filter by trigger source (optional): debugging or app-run. Default: debugging"}
+ )
+ @console_ns.response(200, "Workflow runs count retrieved successfully", workflow_run_count_model)
+ @console_ns.expect(console_ns.models[WorkflowRunCountQuery.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=[AppMode.ADVANCED_CHAT])
+ @marshal_with(workflow_run_count_model)
+ def get(self, app_model: App):
+ """
+ Get advanced chat workflow runs count statistics
+ """
+ args_model = WorkflowRunCountQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ args = args_model.model_dump(exclude_none=True)
+
+ # Default to DEBUGGING if not specified
+ triggered_from = (
+ WorkflowRunTriggeredFrom(args_model.triggered_from)
+ if args_model.triggered_from
+ else WorkflowRunTriggeredFrom.DEBUGGING
+ )
+
+ workflow_run_service = WorkflowRunService()
+ result = workflow_run_service.get_workflow_runs_count(
+ app_model=app_model,
+ status=args.get("status"),
+ time_range=args.get("time_range"),
+ triggered_from=triggered_from,
+ )
return result
@console_ns.route("/apps//workflow-runs")
class WorkflowRunListApi(Resource):
- @api.doc("get_workflow_runs")
- @api.doc(description="Get workflow run list")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"last_id": "Last run ID for pagination", "limit": "Number of items per page (1-100)"})
- @api.response(200, "Workflow runs retrieved successfully", workflow_run_pagination_fields)
+ @console_ns.doc("get_workflow_runs")
+ @console_ns.doc(description="Get workflow run list")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.doc(params={"last_id": "Last run ID for pagination", "limit": "Number of items per page (1-100)"})
+ @console_ns.doc(
+ params={"status": "Filter by status (optional): running, succeeded, failed, stopped, partial-succeeded"}
+ )
+ @console_ns.doc(
+ params={"triggered_from": "Filter by trigger source (optional): debugging or app-run. Default: debugging"}
+ )
+ @console_ns.response(200, "Workflow runs retrieved successfully", workflow_run_pagination_model)
+ @console_ns.expect(console_ns.models[WorkflowRunListQuery.__name__])
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_run_pagination_fields)
+ @marshal_with(workflow_run_pagination_model)
def get(self, app_model: App):
"""
Get workflow run list
"""
- parser = reqparse.RequestParser()
- parser.add_argument("last_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ args_model = WorkflowRunListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ args = args_model.model_dump(exclude_none=True)
+
+ # Default to DEBUGGING for workflow if not specified (backward compatibility)
+ triggered_from = (
+ WorkflowRunTriggeredFrom(args_model.triggered_from)
+ if args_model.triggered_from
+ else WorkflowRunTriggeredFrom.DEBUGGING
+ )
workflow_run_service = WorkflowRunService()
- result = workflow_run_service.get_paginate_workflow_runs(app_model=app_model, args=args)
+ result = workflow_run_service.get_paginate_workflow_runs(
+ app_model=app_model, args=args, triggered_from=triggered_from
+ )
+
+ return result
+
+
+@console_ns.route("/apps//workflow-runs/count")
+class WorkflowRunCountApi(Resource):
+ @console_ns.doc("get_workflow_runs_count")
+ @console_ns.doc(description="Get workflow runs count statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.doc(
+ params={"status": "Filter by status (optional): running, succeeded, failed, stopped, partial-succeeded"}
+ )
+ @console_ns.doc(
+ params={
+ "time_range": (
+ "Filter by time range (optional): e.g., 7d (7 days), 4h (4 hours), "
+ "30m (30 minutes), 30s (30 seconds). Filters by created_at field."
+ )
+ }
+ )
+ @console_ns.doc(
+ params={"triggered_from": "Filter by trigger source (optional): debugging or app-run. Default: debugging"}
+ )
+ @console_ns.response(200, "Workflow runs count retrieved successfully", workflow_run_count_model)
+ @console_ns.expect(console_ns.models[WorkflowRunCountQuery.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
+ @marshal_with(workflow_run_count_model)
+ def get(self, app_model: App):
+ """
+ Get workflow runs count statistics
+ """
+ args_model = WorkflowRunCountQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ args = args_model.model_dump(exclude_none=True)
+
+ # Default to DEBUGGING for workflow if not specified (backward compatibility)
+ triggered_from = (
+ WorkflowRunTriggeredFrom(args_model.triggered_from)
+ if args_model.triggered_from
+ else WorkflowRunTriggeredFrom.DEBUGGING
+ )
+
+ workflow_run_service = WorkflowRunService()
+ result = workflow_run_service.get_workflow_runs_count(
+ app_model=app_model,
+ status=args.get("status"),
+ time_range=args.get("time_range"),
+ triggered_from=triggered_from,
+ )
return result
@console_ns.route("/apps//workflow-runs/")
class WorkflowRunDetailApi(Resource):
- @api.doc("get_workflow_run_detail")
- @api.doc(description="Get workflow run detail")
- @api.doc(params={"app_id": "Application ID", "run_id": "Workflow run ID"})
- @api.response(200, "Workflow run detail retrieved successfully", workflow_run_detail_fields)
- @api.response(404, "Workflow run not found")
+ @console_ns.doc("get_workflow_run_detail")
+ @console_ns.doc(description="Get workflow run detail")
+ @console_ns.doc(params={"app_id": "Application ID", "run_id": "Workflow run ID"})
+ @console_ns.response(200, "Workflow run detail retrieved successfully", workflow_run_detail_model)
+ @console_ns.response(404, "Workflow run not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_run_detail_fields)
+ @marshal_with(workflow_run_detail_model)
def get(self, app_model: App, run_id):
"""
Get workflow run detail
@@ -99,16 +350,16 @@ class WorkflowRunDetailApi(Resource):
@console_ns.route("/apps//workflow-runs//node-executions")
class WorkflowRunNodeExecutionListApi(Resource):
- @api.doc("get_workflow_run_node_executions")
- @api.doc(description="Get workflow run node execution list")
- @api.doc(params={"app_id": "Application ID", "run_id": "Workflow run ID"})
- @api.response(200, "Node executions retrieved successfully", workflow_run_node_execution_list_fields)
- @api.response(404, "Workflow run not found")
+ @console_ns.doc("get_workflow_run_node_executions")
+ @console_ns.doc(description="Get workflow run node execution list")
+ @console_ns.doc(params={"app_id": "Application ID", "run_id": "Workflow run ID"})
+ @console_ns.response(200, "Node executions retrieved successfully", workflow_run_node_execution_list_model)
+ @console_ns.response(404, "Workflow run not found")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.ADVANCED_CHAT, AppMode.WORKFLOW])
- @marshal_with(workflow_run_node_execution_list_fields)
+ @marshal_with(workflow_run_node_execution_list_model)
def get(self, app_model: App, run_id):
"""
Get workflow run node execution list
diff --git a/api/controllers/console/app/workflow_statistic.py b/api/controllers/console/app/workflow_statistic.py
index b8904bf3d9..e48cf42762 100644
--- a/api/controllers/console/app/workflow_statistic.py
+++ b/api/controllers/console/app/workflow_statistic.py
@@ -1,311 +1,194 @@
-from datetime import datetime
-from decimal import Decimal
+from flask import abort, jsonify, request
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
+from sqlalchemy.orm import sessionmaker
-import pytz
-import sqlalchemy as sa
-from flask import jsonify
-from flask_login import current_user
-from flask_restx import Resource, reqparse
-
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.app.wraps import get_app_model
from controllers.console.wraps import account_initialization_required, setup_required
from extensions.ext_database import db
-from libs.helper import DatetimeString
-from libs.login import login_required
+from libs.datetime_utils import parse_time_range
+from libs.login import current_account_with_tenant, login_required
from models.enums import WorkflowRunTriggeredFrom
from models.model import AppMode
+from repositories.factory import DifyAPIRepositoryFactory
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class WorkflowStatisticQuery(BaseModel):
+ start: str | None = Field(default=None, description="Start date and time (YYYY-MM-DD HH:MM)")
+ end: str | None = Field(default=None, description="End date and time (YYYY-MM-DD HH:MM)")
+
+ @field_validator("start", "end", mode="before")
+ @classmethod
+ def blank_to_none(cls, value: str | None) -> str | None:
+ if value == "":
+ return None
+ return value
+
+
+console_ns.schema_model(
+ WorkflowStatisticQuery.__name__,
+ WorkflowStatisticQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
@console_ns.route("/apps//workflow/statistics/daily-conversations")
class WorkflowDailyRunsStatistic(Resource):
- @api.doc("get_workflow_daily_runs_statistic")
- @api.doc(description="Get workflow daily runs statistics")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"start": "Start date and time (YYYY-MM-DD HH:MM)", "end": "End date and time (YYYY-MM-DD HH:MM)"})
- @api.response(200, "Daily runs statistics retrieved successfully")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+ self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+ @console_ns.doc("get_workflow_daily_runs_statistic")
+ @console_ns.doc(description="Get workflow daily runs statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[WorkflowStatisticQuery.__name__])
+ @console_ns.response(200, "Daily runs statistics retrieved successfully")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = WorkflowStatisticQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
- COUNT(id) AS runs
-FROM
- workflow_runs
-WHERE
- app_id = :app_id
- AND triggered_from = :triggered_from"""
- arg_dict = {
- "tz": account.timezone,
- "app_id": app_model.id,
- "triggered_from": WorkflowRunTriggeredFrom.APP_RUN,
- }
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ try:
+ start_date, end_date = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at >= :start"
- arg_dict["start"] = start_datetime_utc
-
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at < :end"
- arg_dict["end"] = end_datetime_utc
-
- sql_query += " GROUP BY date ORDER BY date"
-
- response_data = []
-
- with db.engine.begin() as conn:
- rs = conn.execute(sa.text(sql_query), arg_dict)
- for i in rs:
- response_data.append({"date": str(i.date), "runs": i.runs})
+ response_data = self._workflow_run_repo.get_daily_runs_statistics(
+ tenant_id=app_model.tenant_id,
+ app_id=app_model.id,
+ triggered_from=WorkflowRunTriggeredFrom.APP_RUN,
+ start_date=start_date,
+ end_date=end_date,
+ timezone=account.timezone,
+ )
return jsonify({"data": response_data})
@console_ns.route("/apps//workflow/statistics/daily-terminals")
class WorkflowDailyTerminalsStatistic(Resource):
- @api.doc("get_workflow_daily_terminals_statistic")
- @api.doc(description="Get workflow daily terminals statistics")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"start": "Start date and time (YYYY-MM-DD HH:MM)", "end": "End date and time (YYYY-MM-DD HH:MM)"})
- @api.response(200, "Daily terminals statistics retrieved successfully")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+ self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+ @console_ns.doc("get_workflow_daily_terminals_statistic")
+ @console_ns.doc(description="Get workflow daily terminals statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[WorkflowStatisticQuery.__name__])
+ @console_ns.response(200, "Daily terminals statistics retrieved successfully")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = WorkflowStatisticQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
- COUNT(DISTINCT workflow_runs.created_by) AS terminal_count
-FROM
- workflow_runs
-WHERE
- app_id = :app_id
- AND triggered_from = :triggered_from"""
- arg_dict = {
- "tz": account.timezone,
- "app_id": app_model.id,
- "triggered_from": WorkflowRunTriggeredFrom.APP_RUN,
- }
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ try:
+ start_date, end_date = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at >= :start"
- arg_dict["start"] = start_datetime_utc
-
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at < :end"
- arg_dict["end"] = end_datetime_utc
-
- sql_query += " GROUP BY date ORDER BY date"
-
- response_data = []
-
- with db.engine.begin() as conn:
- rs = conn.execute(sa.text(sql_query), arg_dict)
- for i in rs:
- response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})
+ response_data = self._workflow_run_repo.get_daily_terminals_statistics(
+ tenant_id=app_model.tenant_id,
+ app_id=app_model.id,
+ triggered_from=WorkflowRunTriggeredFrom.APP_RUN,
+ start_date=start_date,
+ end_date=end_date,
+ timezone=account.timezone,
+ )
return jsonify({"data": response_data})
@console_ns.route("/apps//workflow/statistics/token-costs")
class WorkflowDailyTokenCostStatistic(Resource):
- @api.doc("get_workflow_daily_token_cost_statistic")
- @api.doc(description="Get workflow daily token cost statistics")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"start": "Start date and time (YYYY-MM-DD HH:MM)", "end": "End date and time (YYYY-MM-DD HH:MM)"})
- @api.response(200, "Daily token cost statistics retrieved successfully")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+ self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+ @console_ns.doc("get_workflow_daily_token_cost_statistic")
+ @console_ns.doc(description="Get workflow daily token cost statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[WorkflowStatisticQuery.__name__])
+ @console_ns.response(200, "Daily token cost statistics retrieved successfully")
@get_app_model
@setup_required
@login_required
@account_initialization_required
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = WorkflowStatisticQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- DATE(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
- SUM(workflow_runs.total_tokens) AS token_count
-FROM
- workflow_runs
-WHERE
- app_id = :app_id
- AND triggered_from = :triggered_from"""
- arg_dict = {
- "tz": account.timezone,
- "app_id": app_model.id,
- "triggered_from": WorkflowRunTriggeredFrom.APP_RUN,
- }
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ try:
+ start_date, end_date = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at >= :start"
- arg_dict["start"] = start_datetime_utc
-
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
- sql_query += " AND created_at < :end"
- arg_dict["end"] = end_datetime_utc
-
- sql_query += " GROUP BY date ORDER BY date"
-
- response_data = []
-
- with db.engine.begin() as conn:
- rs = conn.execute(sa.text(sql_query), arg_dict)
- for i in rs:
- response_data.append(
- {
- "date": str(i.date),
- "token_count": i.token_count,
- }
- )
+ response_data = self._workflow_run_repo.get_daily_token_cost_statistics(
+ tenant_id=app_model.tenant_id,
+ app_id=app_model.id,
+ triggered_from=WorkflowRunTriggeredFrom.APP_RUN,
+ start_date=start_date,
+ end_date=end_date,
+ timezone=account.timezone,
+ )
return jsonify({"data": response_data})
@console_ns.route("/apps//workflow/statistics/average-app-interactions")
class WorkflowAverageAppInteractionStatistic(Resource):
- @api.doc("get_workflow_average_app_interaction_statistic")
- @api.doc(description="Get workflow average app interaction statistics")
- @api.doc(params={"app_id": "Application ID"})
- @api.doc(params={"start": "Start date and time (YYYY-MM-DD HH:MM)", "end": "End date and time (YYYY-MM-DD HH:MM)"})
- @api.response(200, "Average app interaction statistics retrieved successfully")
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ session_maker = sessionmaker(bind=db.engine, expire_on_commit=False)
+ self._workflow_run_repo = DifyAPIRepositoryFactory.create_api_workflow_run_repository(session_maker)
+
+ @console_ns.doc("get_workflow_average_app_interaction_statistic")
+ @console_ns.doc(description="Get workflow average app interaction statistics")
+ @console_ns.doc(params={"app_id": "Application ID"})
+ @console_ns.expect(console_ns.models[WorkflowStatisticQuery.__name__])
+ @console_ns.response(200, "Average app interaction statistics retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@get_app_model(mode=[AppMode.WORKFLOW])
def get(self, app_model):
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("start", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- parser.add_argument("end", type=DatetimeString("%Y-%m-%d %H:%M"), location="args")
- args = parser.parse_args()
+ args = WorkflowStatisticQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- sql_query = """SELECT
- AVG(sub.interactions) AS interactions,
- sub.date
-FROM
- (
- SELECT
- DATE(DATE_TRUNC('day', c.created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
- c.created_by,
- COUNT(c.id) AS interactions
- FROM
- workflow_runs c
- WHERE
- c.app_id = :app_id
- AND c.triggered_from = :triggered_from
- {{start}}
- {{end}}
- GROUP BY
- date, c.created_by
- ) sub
-GROUP BY
- sub.date"""
- arg_dict = {
- "tz": account.timezone,
- "app_id": app_model.id,
- "triggered_from": WorkflowRunTriggeredFrom.APP_RUN,
- }
+ assert account.timezone is not None
- timezone = pytz.timezone(account.timezone)
- utc_timezone = pytz.utc
+ try:
+ start_date, end_date = parse_time_range(args.start, args.end, account.timezone)
+ except ValueError as e:
+ abort(400, description=str(e))
- if args["start"]:
- start_datetime = datetime.strptime(args["start"], "%Y-%m-%d %H:%M")
- start_datetime = start_datetime.replace(second=0)
-
- start_datetime_timezone = timezone.localize(start_datetime)
- start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
-
- sql_query = sql_query.replace("{{start}}", " AND c.created_at >= :start")
- arg_dict["start"] = start_datetime_utc
- else:
- sql_query = sql_query.replace("{{start}}", "")
-
- if args["end"]:
- end_datetime = datetime.strptime(args["end"], "%Y-%m-%d %H:%M")
- end_datetime = end_datetime.replace(second=0)
-
- end_datetime_timezone = timezone.localize(end_datetime)
- end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
-
- sql_query = sql_query.replace("{{end}}", " AND c.created_at < :end")
- arg_dict["end"] = end_datetime_utc
- else:
- sql_query = sql_query.replace("{{end}}", "")
-
- response_data = []
-
- with db.engine.begin() as conn:
- rs = conn.execute(sa.text(sql_query), arg_dict)
- for i in rs:
- response_data.append(
- {"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
- )
+ response_data = self._workflow_run_repo.get_average_app_interaction_statistics(
+ tenant_id=app_model.tenant_id,
+ app_id=app_model.id,
+ triggered_from=WorkflowRunTriggeredFrom.APP_RUN,
+ start_date=start_date,
+ end_date=end_date,
+ timezone=account.timezone,
+ )
return jsonify({"data": response_data})
diff --git a/api/controllers/console/app/workflow_trigger.py b/api/controllers/console/app/workflow_trigger.py
new file mode 100644
index 0000000000..9433b732e4
--- /dev/null
+++ b/api/controllers/console/app/workflow_trigger.py
@@ -0,0 +1,157 @@
+import logging
+
+from flask import request
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+from werkzeug.exceptions import NotFound
+
+from configs import dify_config
+from extensions.ext_database import db
+from fields.workflow_trigger_fields import trigger_fields, triggers_list_fields, webhook_trigger_fields
+from libs.login import current_user, login_required
+from models.enums import AppTriggerStatus
+from models.model import Account, App, AppMode
+from models.trigger import AppTrigger, WorkflowWebhookTrigger
+
+from .. import console_ns
+from ..app.wraps import get_app_model
+from ..wraps import account_initialization_required, edit_permission_required, setup_required
+
+logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class Parser(BaseModel):
+ node_id: str
+
+
+class ParserEnable(BaseModel):
+ trigger_id: str
+ enable_trigger: bool
+
+
+console_ns.schema_model(Parser.__name__, Parser.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+console_ns.schema_model(
+ ParserEnable.__name__, ParserEnable.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
+
+@console_ns.route("/apps//workflows/triggers/webhook")
+class WebhookTriggerApi(Resource):
+ """Webhook Trigger API"""
+
+ @console_ns.expect(console_ns.models[Parser.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=AppMode.WORKFLOW)
+ @marshal_with(webhook_trigger_fields)
+ def get(self, app_model: App):
+ """Get webhook trigger for a node"""
+ args = Parser.model_validate(request.args.to_dict(flat=True)) # type: ignore
+
+ node_id = args.node_id
+
+ with Session(db.engine) as session:
+ # Get webhook trigger for this app and node
+ webhook_trigger = (
+ session.query(WorkflowWebhookTrigger)
+ .where(
+ WorkflowWebhookTrigger.app_id == app_model.id,
+ WorkflowWebhookTrigger.node_id == node_id,
+ )
+ .first()
+ )
+
+ if not webhook_trigger:
+ raise NotFound("Webhook trigger not found for this node")
+
+ return webhook_trigger
+
+
+@console_ns.route("/apps//triggers")
+class AppTriggersApi(Resource):
+ """App Triggers list API"""
+
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @get_app_model(mode=AppMode.WORKFLOW)
+ @marshal_with(triggers_list_fields)
+ def get(self, app_model: App):
+ """Get app triggers list"""
+ assert isinstance(current_user, Account)
+ assert current_user.current_tenant_id is not None
+
+ with Session(db.engine) as session:
+ # Get all triggers for this app using select API
+ triggers = (
+ session.execute(
+ select(AppTrigger)
+ .where(
+ AppTrigger.tenant_id == current_user.current_tenant_id,
+ AppTrigger.app_id == app_model.id,
+ )
+ .order_by(AppTrigger.created_at.desc(), AppTrigger.id.desc())
+ )
+ .scalars()
+ .all()
+ )
+
+ # Add computed icon field for each trigger
+ url_prefix = dify_config.CONSOLE_API_URL + "/console/api/workspaces/current/tool-provider/builtin/"
+ for trigger in triggers:
+ if trigger.trigger_type == "trigger-plugin":
+ trigger.icon = url_prefix + trigger.provider_name + "/icon" # type: ignore
+ else:
+ trigger.icon = "" # type: ignore
+
+ return {"data": triggers}
+
+
+@console_ns.route("/apps//trigger-enable")
+class AppTriggerEnableApi(Resource):
+ @console_ns.expect(console_ns.models[ParserEnable.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @edit_permission_required
+ @get_app_model(mode=AppMode.WORKFLOW)
+ @marshal_with(trigger_fields)
+ def post(self, app_model: App):
+ """Update app trigger (enable/disable)"""
+ args = ParserEnable.model_validate(console_ns.payload)
+
+ assert current_user.current_tenant_id is not None
+
+ trigger_id = args.trigger_id
+ with Session(db.engine) as session:
+ # Find the trigger using select
+ trigger = session.execute(
+ select(AppTrigger).where(
+ AppTrigger.id == trigger_id,
+ AppTrigger.tenant_id == current_user.current_tenant_id,
+ AppTrigger.app_id == app_model.id,
+ )
+ ).scalar_one_or_none()
+
+ if not trigger:
+ raise NotFound("Trigger not found")
+
+ # Update status based on enable_trigger boolean
+ trigger.status = AppTriggerStatus.ENABLED if args.enable_trigger else AppTriggerStatus.DISABLED
+
+ session.commit()
+ session.refresh(trigger)
+
+ # Add computed icon field
+ url_prefix = dify_config.CONSOLE_API_URL + "/console/api/workspaces/current/tool-provider/builtin/"
+ if trigger.trigger_type == "trigger-plugin":
+ trigger.icon = url_prefix + trigger.provider_name + "/icon" # type: ignore
+ else:
+ trigger.icon = "" # type: ignore
+
+ return trigger
diff --git a/api/controllers/console/app/wraps.py b/api/controllers/console/app/wraps.py
index 44aba01820..9bb2718f89 100644
--- a/api/controllers/console/app/wraps.py
+++ b/api/controllers/console/app/wraps.py
@@ -4,28 +4,29 @@ from typing import ParamSpec, TypeVar, Union
from controllers.console.app.error import AppNotFoundError
from extensions.ext_database import db
-from libs.login import current_user
+from libs.login import current_account_with_tenant
from models import App, AppMode
-from models.account import Account
P = ParamSpec("P")
R = TypeVar("R")
+P1 = ParamSpec("P1")
+R1 = TypeVar("R1")
def _load_app_model(app_id: str) -> App | None:
- assert isinstance(current_user, Account)
+ _, current_tenant_id = current_account_with_tenant()
app_model = (
db.session.query(App)
- .where(App.id == app_id, App.tenant_id == current_user.current_tenant_id, App.status == "normal")
+ .where(App.id == app_id, App.tenant_id == current_tenant_id, App.status == "normal")
.first()
)
return app_model
def get_app_model(view: Callable[P, R] | None = None, *, mode: Union[AppMode, list[AppMode], None] = None):
- def decorator(view_func: Callable[P, R]):
+ def decorator(view_func: Callable[P1, R1]):
@wraps(view_func)
- def decorated_view(*args: P.args, **kwargs: P.kwargs):
+ def decorated_view(*args: P1.args, **kwargs: P1.kwargs):
if not kwargs.get("app_id"):
raise ValueError("missing app_id in path parameters")
diff --git a/api/controllers/console/auth/activate.py b/api/controllers/console/auth/activate.py
index 76171e3f8a..6834656a7f 100644
--- a/api/controllers/console/auth/activate.py
+++ b/api/controllers/console/auth/activate.py
@@ -1,36 +1,57 @@
from flask import request
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
from constants.languages import supported_language
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.error import AlreadyActivateError
from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
-from libs.helper import StrLen, email, extract_remote_ip, timezone
-from models.account import AccountStatus
+from libs.helper import EmailStr, extract_remote_ip, timezone
+from models import AccountStatus
from services.account_service import AccountService, RegisterService
-active_check_parser = reqparse.RequestParser()
-active_check_parser.add_argument(
- "workspace_id", type=str, required=False, nullable=True, location="args", help="Workspace ID"
-)
-active_check_parser.add_argument(
- "email", type=email, required=False, nullable=True, location="args", help="Email address"
-)
-active_check_parser.add_argument(
- "token", type=str, required=True, nullable=False, location="args", help="Activation token"
-)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ActivateCheckQuery(BaseModel):
+ workspace_id: str | None = Field(default=None)
+ email: EmailStr | None = Field(default=None)
+ token: str
+
+
+class ActivatePayload(BaseModel):
+ workspace_id: str | None = Field(default=None)
+ email: EmailStr | None = Field(default=None)
+ token: str
+ name: str = Field(..., max_length=30)
+ interface_language: str = Field(...)
+ timezone: str = Field(...)
+
+ @field_validator("interface_language")
+ @classmethod
+ def validate_lang(cls, value: str) -> str:
+ return supported_language(value)
+
+ @field_validator("timezone")
+ @classmethod
+ def validate_tz(cls, value: str) -> str:
+ return timezone(value)
+
+
+for model in (ActivateCheckQuery, ActivatePayload):
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
@console_ns.route("/activate/check")
class ActivateCheckApi(Resource):
- @api.doc("check_activation_token")
- @api.doc(description="Check if activation token is valid")
- @api.expect(active_check_parser)
- @api.response(
+ @console_ns.doc("check_activation_token")
+ @console_ns.doc(description="Check if activation token is valid")
+ @console_ns.expect(console_ns.models[ActivateCheckQuery.__name__])
+ @console_ns.response(
200,
"Success",
- api.model(
+ console_ns.model(
"ActivationCheckResponse",
{
"is_valid": fields.Boolean(description="Whether token is valid"),
@@ -39,11 +60,11 @@ class ActivateCheckApi(Resource):
),
)
def get(self):
- args = active_check_parser.parse_args()
+ args = ActivateCheckQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- workspaceId = args["workspace_id"]
- reg_email = args["email"]
- token = args["token"]
+ workspaceId = args.workspace_id
+ reg_email = args.email
+ token = args.token
invitation = RegisterService.get_invitation_if_token_valid(workspaceId, reg_email, token)
if invitation:
@@ -60,26 +81,15 @@ class ActivateCheckApi(Resource):
return {"is_valid": False}
-active_parser = reqparse.RequestParser()
-active_parser.add_argument("workspace_id", type=str, required=False, nullable=True, location="json")
-active_parser.add_argument("email", type=email, required=False, nullable=True, location="json")
-active_parser.add_argument("token", type=str, required=True, nullable=False, location="json")
-active_parser.add_argument("name", type=StrLen(30), required=True, nullable=False, location="json")
-active_parser.add_argument(
- "interface_language", type=supported_language, required=True, nullable=False, location="json"
-)
-active_parser.add_argument("timezone", type=timezone, required=True, nullable=False, location="json")
-
-
@console_ns.route("/activate")
class ActivateApi(Resource):
- @api.doc("activate_account")
- @api.doc(description="Activate account with invitation token")
- @api.expect(active_parser)
- @api.response(
+ @console_ns.doc("activate_account")
+ @console_ns.doc(description="Activate account with invitation token")
+ @console_ns.expect(console_ns.models[ActivatePayload.__name__])
+ @console_ns.response(
200,
"Account activated successfully",
- api.model(
+ console_ns.model(
"ActivationResponse",
{
"result": fields.String(description="Operation result"),
@@ -87,21 +97,21 @@ class ActivateApi(Resource):
},
),
)
- @api.response(400, "Already activated or invalid token")
+ @console_ns.response(400, "Already activated or invalid token")
def post(self):
- args = active_parser.parse_args()
+ args = ActivatePayload.model_validate(console_ns.payload)
- invitation = RegisterService.get_invitation_if_token_valid(args["workspace_id"], args["email"], args["token"])
+ invitation = RegisterService.get_invitation_if_token_valid(args.workspace_id, args.email, args.token)
if invitation is None:
raise AlreadyActivateError()
- RegisterService.revoke_token(args["workspace_id"], args["email"], args["token"])
+ RegisterService.revoke_token(args.workspace_id, args.email, args.token)
account = invitation["account"]
- account.name = args["name"]
+ account.name = args.name
- account.interface_language = args["interface_language"]
- account.timezone = args["timezone"]
+ account.interface_language = args.interface_language
+ account.timezone = args.timezone
account.interface_theme = "light"
account.status = AccountStatus.ACTIVE
account.initialized_at = naive_utc_now()
diff --git a/api/controllers/console/auth/data_source_bearer_auth.py b/api/controllers/console/auth/data_source_bearer_auth.py
index 207303b212..905d0daef0 100644
--- a/api/controllers/console/auth/data_source_bearer_auth.py
+++ b/api/controllers/console/auth/data_source_bearer_auth.py
@@ -1,13 +1,26 @@
-from flask_login import current_user
-from flask_restx import Resource, reqparse
-from werkzeug.exceptions import Forbidden
+from flask_restx import Resource
+from pydantic import BaseModel, Field
-from controllers.console import console_ns
-from controllers.console.auth.error import ApiKeyAuthFailedError
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.auth.api_key_auth_service import ApiKeyAuthService
-from ..wraps import account_initialization_required, setup_required
+from .. import console_ns
+from ..auth.error import ApiKeyAuthFailedError
+from ..wraps import account_initialization_required, is_admin_or_owner_required, setup_required
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ApiKeyAuthBindingPayload(BaseModel):
+ category: str = Field(...)
+ provider: str = Field(...)
+ credentials: dict = Field(...)
+
+
+console_ns.schema_model(
+ ApiKeyAuthBindingPayload.__name__,
+ ApiKeyAuthBindingPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
@console_ns.route("/api-key-auth/data-source")
@@ -16,7 +29,8 @@ class ApiKeyAuthDataSource(Resource):
@login_required
@account_initialization_required
def get(self):
- data_source_api_key_bindings = ApiKeyAuthService.get_provider_auth_list(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ data_source_api_key_bindings = ApiKeyAuthService.get_provider_auth_list(current_tenant_id)
if data_source_api_key_bindings:
return {
"sources": [
@@ -39,18 +53,16 @@ class ApiKeyAuthDataSourceBinding(Resource):
@setup_required
@login_required
@account_initialization_required
+ @is_admin_or_owner_required
+ @console_ns.expect(console_ns.models[ApiKeyAuthBindingPayload.__name__])
def post(self):
# The role of the current user in the table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("category", type=str, required=True, nullable=False, location="json")
- parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
- ApiKeyAuthService.validate_api_key_auth_args(args)
+ _, current_tenant_id = current_account_with_tenant()
+ payload = ApiKeyAuthBindingPayload.model_validate(console_ns.payload)
+ data = payload.model_dump()
+ ApiKeyAuthService.validate_api_key_auth_args(data)
try:
- ApiKeyAuthService.create_provider_auth(current_user.current_tenant_id, args)
+ ApiKeyAuthService.create_provider_auth(current_tenant_id, data)
except Exception as e:
raise ApiKeyAuthFailedError(str(e))
return {"result": "success"}, 200
@@ -61,11 +73,11 @@ class ApiKeyAuthDataSourceBindingDelete(Resource):
@setup_required
@login_required
@account_initialization_required
+ @is_admin_or_owner_required
def delete(self, binding_id):
# The role of the current user in the table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, current_tenant_id = current_account_with_tenant()
- ApiKeyAuthService.delete_provider_auth(current_user.current_tenant_id, binding_id)
+ ApiKeyAuthService.delete_provider_auth(current_tenant_id, binding_id)
return {"result": "success"}, 204
diff --git a/api/controllers/console/auth/data_source_oauth.py b/api/controllers/console/auth/data_source_oauth.py
index 6f1fd2f11a..0dd7d33ae9 100644
--- a/api/controllers/console/auth/data_source_oauth.py
+++ b/api/controllers/console/auth/data_source_oauth.py
@@ -2,16 +2,14 @@ import logging
import httpx
from flask import current_app, redirect, request
-from flask_login import current_user
from flask_restx import Resource, fields
-from werkzeug.exceptions import Forbidden
from configs import dify_config
-from controllers.console import api, console_ns
from libs.login import login_required
from libs.oauth_data_source import NotionOAuth
-from ..wraps import account_initialization_required, setup_required
+from .. import console_ns
+from ..wraps import account_initialization_required, is_admin_or_owner_required, setup_required
logger = logging.getLogger(__name__)
@@ -30,23 +28,22 @@ def get_oauth_providers():
@console_ns.route("/oauth/data-source/")
class OAuthDataSource(Resource):
- @api.doc("oauth_data_source")
- @api.doc(description="Get OAuth authorization URL for data source provider")
- @api.doc(params={"provider": "Data source provider name (notion)"})
- @api.response(
+ @console_ns.doc("oauth_data_source")
+ @console_ns.doc(description="Get OAuth authorization URL for data source provider")
+ @console_ns.doc(params={"provider": "Data source provider name (notion)"})
+ @console_ns.response(
200,
"Authorization URL or internal setup success",
- api.model(
+ console_ns.model(
"OAuthDataSourceResponse",
{"data": fields.Raw(description="Authorization URL or 'internal' for internal setup")},
),
)
- @api.response(400, "Invalid provider")
- @api.response(403, "Admin privileges required")
+ @console_ns.response(400, "Invalid provider")
+ @console_ns.response(403, "Admin privileges required")
+ @is_admin_or_owner_required
def get(self, provider: str):
# The role of the current user in the table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
OAUTH_DATASOURCE_PROVIDERS = get_oauth_providers()
with current_app.app_context():
oauth_provider = OAUTH_DATASOURCE_PROVIDERS.get(provider)
@@ -65,17 +62,17 @@ class OAuthDataSource(Resource):
@console_ns.route("/oauth/data-source/callback/")
class OAuthDataSourceCallback(Resource):
- @api.doc("oauth_data_source_callback")
- @api.doc(description="Handle OAuth callback from data source provider")
- @api.doc(
+ @console_ns.doc("oauth_data_source_callback")
+ @console_ns.doc(description="Handle OAuth callback from data source provider")
+ @console_ns.doc(
params={
"provider": "Data source provider name (notion)",
"code": "Authorization code from OAuth provider",
"error": "Error message from OAuth provider",
}
)
- @api.response(302, "Redirect to console with result")
- @api.response(400, "Invalid provider")
+ @console_ns.response(302, "Redirect to console with result")
+ @console_ns.response(400, "Invalid provider")
def get(self, provider: str):
OAUTH_DATASOURCE_PROVIDERS = get_oauth_providers()
with current_app.app_context():
@@ -96,17 +93,17 @@ class OAuthDataSourceCallback(Resource):
@console_ns.route("/oauth/data-source/binding/")
class OAuthDataSourceBinding(Resource):
- @api.doc("oauth_data_source_binding")
- @api.doc(description="Bind OAuth data source with authorization code")
- @api.doc(
+ @console_ns.doc("oauth_data_source_binding")
+ @console_ns.doc(description="Bind OAuth data source with authorization code")
+ @console_ns.doc(
params={"provider": "Data source provider name (notion)", "code": "Authorization code from OAuth provider"}
)
- @api.response(
+ @console_ns.response(
200,
"Data source binding success",
- api.model("OAuthDataSourceBindingResponse", {"result": fields.String(description="Operation result")}),
+ console_ns.model("OAuthDataSourceBindingResponse", {"result": fields.String(description="Operation result")}),
)
- @api.response(400, "Invalid provider or code")
+ @console_ns.response(400, "Invalid provider or code")
def get(self, provider: str):
OAUTH_DATASOURCE_PROVIDERS = get_oauth_providers()
with current_app.app_context():
@@ -130,15 +127,15 @@ class OAuthDataSourceBinding(Resource):
@console_ns.route("/oauth/data-source///sync")
class OAuthDataSourceSync(Resource):
- @api.doc("oauth_data_source_sync")
- @api.doc(description="Sync data from OAuth data source")
- @api.doc(params={"provider": "Data source provider name (notion)", "binding_id": "Data source binding ID"})
- @api.response(
+ @console_ns.doc("oauth_data_source_sync")
+ @console_ns.doc(description="Sync data from OAuth data source")
+ @console_ns.doc(params={"provider": "Data source provider name (notion)", "binding_id": "Data source binding ID"})
+ @console_ns.response(
200,
"Data source sync success",
- api.model("OAuthDataSourceSyncResponse", {"result": fields.String(description="Operation result")}),
+ console_ns.model("OAuthDataSourceSyncResponse", {"result": fields.String(description="Operation result")}),
)
- @api.response(400, "Invalid provider or sync failed")
+ @console_ns.response(400, "Invalid provider or sync failed")
@setup_required
@login_required
@account_initialization_required
diff --git a/api/controllers/console/auth/email_register.py b/api/controllers/console/auth/email_register.py
index d3613d9183..fa082c735d 100644
--- a/api/controllers/console/auth/email_register.py
+++ b/api/controllers/console/auth/email_register.py
@@ -1,5 +1,6 @@
from flask import request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -14,16 +15,45 @@ from controllers.console.auth.error import (
InvalidTokenError,
PasswordMismatchError,
)
-from controllers.console.error import AccountInFreezeError, EmailSendIpLimitError
-from controllers.console.wraps import email_password_login_enabled, email_register_enabled, setup_required
from extensions.ext_database import db
-from libs.helper import email, extract_remote_ip
+from libs.helper import EmailStr, extract_remote_ip
from libs.password import valid_password
-from models.account import Account
+from models import Account
from services.account_service import AccountService
from services.billing_service import BillingService
from services.errors.account import AccountNotFoundError, AccountRegisterError
+from ..error import AccountInFreezeError, EmailSendIpLimitError
+from ..wraps import email_password_login_enabled, email_register_enabled, setup_required
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class EmailRegisterSendPayload(BaseModel):
+ email: EmailStr = Field(..., description="Email address")
+ language: str | None = Field(default=None, description="Language code")
+
+
+class EmailRegisterValidityPayload(BaseModel):
+ email: EmailStr = Field(...)
+ code: str = Field(...)
+ token: str = Field(...)
+
+
+class EmailRegisterResetPayload(BaseModel):
+ token: str = Field(...)
+ new_password: str = Field(...)
+ password_confirm: str = Field(...)
+
+ @field_validator("new_password", "password_confirm")
+ @classmethod
+ def validate_password(cls, value: str) -> str:
+ return valid_password(value)
+
+
+for model in (EmailRegisterSendPayload, EmailRegisterValidityPayload, EmailRegisterResetPayload):
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
@console_ns.route("/email-register/send-email")
class EmailRegisterSendEmailApi(Resource):
@@ -31,25 +61,22 @@ class EmailRegisterSendEmailApi(Resource):
@email_password_login_enabled
@email_register_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ args = EmailRegisterSendPayload.model_validate(console_ns.payload)
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
language = "en-US"
- if args["language"] in languages:
- language = args["language"]
+ if args.language in languages:
+ language = args.language
- if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(args["email"]):
+ if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(args.email):
raise AccountInFreezeError()
with Session(db.engine) as session:
- account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
+ account = session.execute(select(Account).filter_by(email=args.email)).scalar_one_or_none()
token = None
- token = AccountService.send_email_register_email(email=args["email"], account=account, language=language)
+ token = AccountService.send_email_register_email(email=args.email, account=account, language=language)
return {"result": "success", "data": token}
@@ -59,38 +86,34 @@ class EmailRegisterCheckApi(Resource):
@email_password_login_enabled
@email_register_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=str, required=True, location="json")
- parser.add_argument("code", type=str, required=True, location="json")
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = EmailRegisterValidityPayload.model_validate(console_ns.payload)
- user_email = args["email"]
+ user_email = args.email
- is_email_register_error_rate_limit = AccountService.is_email_register_error_rate_limit(args["email"])
+ is_email_register_error_rate_limit = AccountService.is_email_register_error_rate_limit(args.email)
if is_email_register_error_rate_limit:
raise EmailRegisterLimitError()
- token_data = AccountService.get_email_register_data(args["token"])
+ token_data = AccountService.get_email_register_data(args.token)
if token_data is None:
raise InvalidTokenError()
if user_email != token_data.get("email"):
raise InvalidEmailError()
- if args["code"] != token_data.get("code"):
- AccountService.add_email_register_error_rate_limit(args["email"])
+ if args.code != token_data.get("code"):
+ AccountService.add_email_register_error_rate_limit(args.email)
raise EmailCodeError()
# Verified, revoke the first token
- AccountService.revoke_email_register_token(args["token"])
+ AccountService.revoke_email_register_token(args.token)
# Refresh token data by generating a new token
_, new_token = AccountService.generate_email_register_token(
- user_email, code=args["code"], additional_data={"phase": "register"}
+ user_email, code=args.code, additional_data={"phase": "register"}
)
- AccountService.reset_email_register_error_rate_limit(args["email"])
+ AccountService.reset_email_register_error_rate_limit(args.email)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
@@ -100,18 +123,14 @@ class EmailRegisterResetApi(Resource):
@email_password_login_enabled
@email_register_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- parser.add_argument("new_password", type=valid_password, required=True, nullable=False, location="json")
- parser.add_argument("password_confirm", type=valid_password, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = EmailRegisterResetPayload.model_validate(console_ns.payload)
# Validate passwords match
- if args["new_password"] != args["password_confirm"]:
+ if args.new_password != args.password_confirm:
raise PasswordMismatchError()
# Validate token and get register data
- register_data = AccountService.get_email_register_data(args["token"])
+ register_data = AccountService.get_email_register_data(args.token)
if not register_data:
raise InvalidTokenError()
# Must use token in reset phase
@@ -119,7 +138,7 @@ class EmailRegisterResetApi(Resource):
raise InvalidTokenError()
# Revoke token to prevent reuse
- AccountService.revoke_email_register_token(args["token"])
+ AccountService.revoke_email_register_token(args.token)
email = register_data.get("email", "")
@@ -129,7 +148,7 @@ class EmailRegisterResetApi(Resource):
if account:
raise EmailAlreadyInUseError()
else:
- account = self._create_new_account(email, args["password_confirm"])
+ account = self._create_new_account(email, args.password_confirm)
if not account:
raise AccountNotFoundError()
token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
diff --git a/api/controllers/console/auth/forgot_password.py b/api/controllers/console/auth/forgot_password.py
index 704bcf8fb8..661f591182 100644
--- a/api/controllers/console/auth/forgot_password.py
+++ b/api/controllers/console/auth/forgot_password.py
@@ -2,11 +2,12 @@ import base64
import secrets
from flask import request
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.auth.error import (
EmailCodeError,
EmailPasswordResetLimitError,
@@ -18,30 +19,50 @@ from controllers.console.error import AccountNotFound, EmailSendIpLimitError
from controllers.console.wraps import email_password_login_enabled, setup_required
from events.tenant_event import tenant_was_created
from extensions.ext_database import db
-from libs.helper import email, extract_remote_ip
+from libs.helper import EmailStr, extract_remote_ip
from libs.password import hash_password, valid_password
-from models.account import Account
+from models import Account
from services.account_service import AccountService, TenantService
from services.feature_service import FeatureService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ForgotPasswordSendPayload(BaseModel):
+ email: EmailStr = Field(...)
+ language: str | None = Field(default=None)
+
+
+class ForgotPasswordCheckPayload(BaseModel):
+ email: EmailStr = Field(...)
+ code: str = Field(...)
+ token: str = Field(...)
+
+
+class ForgotPasswordResetPayload(BaseModel):
+ token: str = Field(...)
+ new_password: str = Field(...)
+ password_confirm: str = Field(...)
+
+ @field_validator("new_password", "password_confirm")
+ @classmethod
+ def validate_password(cls, value: str) -> str:
+ return valid_password(value)
+
+
+for model in (ForgotPasswordSendPayload, ForgotPasswordCheckPayload, ForgotPasswordResetPayload):
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
@console_ns.route("/forgot-password")
class ForgotPasswordSendEmailApi(Resource):
- @api.doc("send_forgot_password_email")
- @api.doc(description="Send password reset email")
- @api.expect(
- api.model(
- "ForgotPasswordEmailRequest",
- {
- "email": fields.String(required=True, description="Email address"),
- "language": fields.String(description="Language for email (zh-Hans/en-US)"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("send_forgot_password_email")
+ @console_ns.doc(description="Send password reset email")
+ @console_ns.expect(console_ns.models[ForgotPasswordSendPayload.__name__])
+ @console_ns.response(
200,
"Email sent successfully",
- api.model(
+ console_ns.model(
"ForgotPasswordEmailResponse",
{
"result": fields.String(description="Operation result"),
@@ -50,30 +71,27 @@ class ForgotPasswordSendEmailApi(Resource):
},
),
)
- @api.response(400, "Invalid email or rate limit exceeded")
+ @console_ns.response(400, "Invalid email or rate limit exceeded")
@setup_required
@email_password_login_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ args = ForgotPasswordSendPayload.model_validate(console_ns.payload)
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
- if args["language"] is not None and args["language"] == "zh-Hans":
+ if args.language is not None and args.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
with Session(db.engine) as session:
- account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
+ account = session.execute(select(Account).filter_by(email=args.email)).scalar_one_or_none()
token = AccountService.send_reset_password_email(
account=account,
- email=args["email"],
+ email=args.email,
language=language,
is_allow_register=FeatureService.get_system_features().is_allow_register,
)
@@ -83,22 +101,13 @@ class ForgotPasswordSendEmailApi(Resource):
@console_ns.route("/forgot-password/validity")
class ForgotPasswordCheckApi(Resource):
- @api.doc("check_forgot_password_code")
- @api.doc(description="Verify password reset code")
- @api.expect(
- api.model(
- "ForgotPasswordCheckRequest",
- {
- "email": fields.String(required=True, description="Email address"),
- "code": fields.String(required=True, description="Verification code"),
- "token": fields.String(required=True, description="Reset token"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("check_forgot_password_code")
+ @console_ns.doc(description="Verify password reset code")
+ @console_ns.expect(console_ns.models[ForgotPasswordCheckPayload.__name__])
+ @console_ns.response(
200,
"Code verified successfully",
- api.model(
+ console_ns.model(
"ForgotPasswordCheckResponse",
{
"is_valid": fields.Boolean(description="Whether code is valid"),
@@ -107,80 +116,63 @@ class ForgotPasswordCheckApi(Resource):
},
),
)
- @api.response(400, "Invalid code or token")
+ @console_ns.response(400, "Invalid code or token")
@setup_required
@email_password_login_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=str, required=True, location="json")
- parser.add_argument("code", type=str, required=True, location="json")
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = ForgotPasswordCheckPayload.model_validate(console_ns.payload)
- user_email = args["email"]
+ user_email = args.email
- is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args["email"])
+ is_forgot_password_error_rate_limit = AccountService.is_forgot_password_error_rate_limit(args.email)
if is_forgot_password_error_rate_limit:
raise EmailPasswordResetLimitError()
- token_data = AccountService.get_reset_password_data(args["token"])
+ token_data = AccountService.get_reset_password_data(args.token)
if token_data is None:
raise InvalidTokenError()
if user_email != token_data.get("email"):
raise InvalidEmailError()
- if args["code"] != token_data.get("code"):
- AccountService.add_forgot_password_error_rate_limit(args["email"])
+ if args.code != token_data.get("code"):
+ AccountService.add_forgot_password_error_rate_limit(args.email)
raise EmailCodeError()
# Verified, revoke the first token
- AccountService.revoke_reset_password_token(args["token"])
+ AccountService.revoke_reset_password_token(args.token)
# Refresh token data by generating a new token
_, new_token = AccountService.generate_reset_password_token(
- user_email, code=args["code"], additional_data={"phase": "reset"}
+ user_email, code=args.code, additional_data={"phase": "reset"}
)
- AccountService.reset_forgot_password_error_rate_limit(args["email"])
+ AccountService.reset_forgot_password_error_rate_limit(args.email)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
@console_ns.route("/forgot-password/resets")
class ForgotPasswordResetApi(Resource):
- @api.doc("reset_password")
- @api.doc(description="Reset password with verification token")
- @api.expect(
- api.model(
- "ForgotPasswordResetRequest",
- {
- "token": fields.String(required=True, description="Verification token"),
- "new_password": fields.String(required=True, description="New password"),
- "password_confirm": fields.String(required=True, description="Password confirmation"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("reset_password")
+ @console_ns.doc(description="Reset password with verification token")
+ @console_ns.expect(console_ns.models[ForgotPasswordResetPayload.__name__])
+ @console_ns.response(
200,
"Password reset successfully",
- api.model("ForgotPasswordResetResponse", {"result": fields.String(description="Operation result")}),
+ console_ns.model("ForgotPasswordResetResponse", {"result": fields.String(description="Operation result")}),
)
- @api.response(400, "Invalid token or password mismatch")
+ @console_ns.response(400, "Invalid token or password mismatch")
@setup_required
@email_password_login_enabled
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- parser.add_argument("new_password", type=valid_password, required=True, nullable=False, location="json")
- parser.add_argument("password_confirm", type=valid_password, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = ForgotPasswordResetPayload.model_validate(console_ns.payload)
# Validate passwords match
- if args["new_password"] != args["password_confirm"]:
+ if args.new_password != args.password_confirm:
raise PasswordMismatchError()
# Validate token and get reset data
- reset_data = AccountService.get_reset_password_data(args["token"])
+ reset_data = AccountService.get_reset_password_data(args.token)
if not reset_data:
raise InvalidTokenError()
# Must use token in reset phase
@@ -188,11 +180,11 @@ class ForgotPasswordResetApi(Resource):
raise InvalidTokenError()
# Revoke token to prevent reuse
- AccountService.revoke_reset_password_token(args["token"])
+ AccountService.revoke_reset_password_token(args.token)
# Generate secure salt and hash password
salt = secrets.token_bytes(16)
- password_hashed = hash_password(args["new_password"], salt)
+ password_hashed = hash_password(args.new_password, salt)
email = reset_data.get("email", "")
diff --git a/api/controllers/console/auth/login.py b/api/controllers/console/auth/login.py
index ba614aa828..f486f4c313 100644
--- a/api/controllers/console/auth/login.py
+++ b/api/controllers/console/auth/login.py
@@ -1,12 +1,11 @@
-from typing import cast
-
import flask_login
-from flask import request
-from flask_restx import Resource, reqparse
+from flask import make_response, request
+from flask_restx import Resource
+from pydantic import BaseModel, Field
import services
from configs import dify_config
-from constants.languages import languages
+from constants.languages import get_valid_language
from controllers.console import console_ns
from controllers.console.auth.error import (
AuthenticationFailedError,
@@ -25,14 +24,53 @@ from controllers.console.error import (
)
from controllers.console.wraps import email_password_login_enabled, setup_required
from events.tenant_event import tenant_was_created
-from libs.helper import email, extract_remote_ip
-from models.account import Account
+from libs.helper import EmailStr, extract_remote_ip
+from libs.login import current_account_with_tenant
+from libs.token import (
+ clear_access_token_from_cookie,
+ clear_csrf_token_from_cookie,
+ clear_refresh_token_from_cookie,
+ extract_refresh_token,
+ set_access_token_to_cookie,
+ set_csrf_token_to_cookie,
+ set_refresh_token_to_cookie,
+)
from services.account_service import AccountService, RegisterService, TenantService
from services.billing_service import BillingService
from services.errors.account import AccountRegisterError
from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkspacesLimitExceededError
from services.feature_service import FeatureService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class LoginPayload(BaseModel):
+ email: EmailStr = Field(..., description="Email address")
+ password: str = Field(..., description="Password")
+ remember_me: bool = Field(default=False, description="Remember me flag")
+ invite_token: str | None = Field(default=None, description="Invitation token")
+
+
+class EmailPayload(BaseModel):
+ email: EmailStr = Field(...)
+ language: str | None = Field(default=None)
+
+
+class EmailCodeLoginPayload(BaseModel):
+ email: EmailStr = Field(...)
+ code: str = Field(...)
+ token: str = Field(...)
+ language: str | None = Field(default=None)
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(LoginPayload)
+reg(EmailPayload)
+reg(EmailCodeLoginPayload)
+
@console_ns.route("/login")
class LoginApi(Resource):
@@ -40,39 +78,36 @@ class LoginApi(Resource):
@setup_required
@email_password_login_enabled
+ @console_ns.expect(console_ns.models[LoginPayload.__name__])
def post(self):
"""Authenticate user and login."""
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("password", type=str, required=True, location="json")
- parser.add_argument("remember_me", type=bool, required=False, default=False, location="json")
- parser.add_argument("invite_token", type=str, required=False, default=None, location="json")
- args = parser.parse_args()
+ args = LoginPayload.model_validate(console_ns.payload)
- if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(args["email"]):
+ if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(args.email):
raise AccountInFreezeError()
- is_login_error_rate_limit = AccountService.is_login_error_rate_limit(args["email"])
+ is_login_error_rate_limit = AccountService.is_login_error_rate_limit(args.email)
if is_login_error_rate_limit:
raise EmailPasswordLoginLimitError()
- invitation = args["invite_token"]
+ # TODO: why invitation is re-assigned with different type?
+ invitation = args.invite_token # type: ignore
if invitation:
- invitation = RegisterService.get_invitation_if_token_valid(None, args["email"], invitation)
+ invitation = RegisterService.get_invitation_if_token_valid(None, args.email, invitation) # type: ignore
try:
if invitation:
- data = invitation.get("data", {})
+ data = invitation.get("data", {}) # type: ignore
invitee_email = data.get("email") if data else None
- if invitee_email != args["email"]:
+ if invitee_email != args.email:
raise InvalidEmailError()
- account = AccountService.authenticate(args["email"], args["password"], args["invite_token"])
+ account = AccountService.authenticate(args.email, args.password, args.invite_token)
else:
- account = AccountService.authenticate(args["email"], args["password"])
+ account = AccountService.authenticate(args.email, args.password)
except services.errors.account.AccountLoginError:
raise AccountBannedError()
except services.errors.account.AccountPasswordError:
- AccountService.add_login_error_rate_limit(args["email"])
+ AccountService.add_login_error_rate_limit(args.email)
raise AuthenticationFailedError()
# SELF_HOSTED only have one workspace
tenants = TenantService.get_join_tenants(account)
@@ -88,43 +123,58 @@ class LoginApi(Resource):
}
token_pair = AccountService.login(account=account, ip_address=extract_remote_ip(request))
- AccountService.reset_login_error_rate_limit(args["email"])
- return {"result": "success", "data": token_pair.model_dump()}
+ AccountService.reset_login_error_rate_limit(args.email)
+
+ # Create response with cookies instead of returning tokens in body
+ response = make_response({"result": "success"})
+
+ set_access_token_to_cookie(request, response, token_pair.access_token)
+ set_refresh_token_to_cookie(request, response, token_pair.refresh_token)
+ set_csrf_token_to_cookie(request, response, token_pair.csrf_token)
+
+ return response
@console_ns.route("/logout")
class LogoutApi(Resource):
@setup_required
- def get(self):
- account = cast(Account, flask_login.current_user)
+ def post(self):
+ current_user, _ = current_account_with_tenant()
+ account = current_user
if isinstance(account, flask_login.AnonymousUserMixin):
- return {"result": "success"}
- AccountService.logout(account=account)
- flask_login.logout_user()
- return {"result": "success"}
+ response = make_response({"result": "success"})
+ else:
+ AccountService.logout(account=account)
+ flask_login.logout_user()
+ response = make_response({"result": "success"})
+
+ # Clear cookies on logout
+ clear_access_token_from_cookie(response)
+ clear_refresh_token_from_cookie(response)
+ clear_csrf_token_from_cookie(response)
+
+ return response
@console_ns.route("/reset-password")
class ResetPasswordSendEmailApi(Resource):
@setup_required
@email_password_login_enabled
+ @console_ns.expect(console_ns.models[EmailPayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ args = EmailPayload.model_validate(console_ns.payload)
- if args["language"] is not None and args["language"] == "zh-Hans":
+ if args.language is not None and args.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
try:
- account = AccountService.get_user_through_email(args["email"])
+ account = AccountService.get_user_through_email(args.email)
except AccountRegisterError:
raise AccountInFreezeError()
token = AccountService.send_reset_password_email(
- email=args["email"],
+ email=args.email,
account=account,
language=language,
is_allow_register=FeatureService.get_system_features().is_allow_register,
@@ -136,28 +186,26 @@ class ResetPasswordSendEmailApi(Resource):
@console_ns.route("/email-code-login")
class EmailCodeLoginSendEmailApi(Resource):
@setup_required
+ @console_ns.expect(console_ns.models[EmailPayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ args = EmailPayload.model_validate(console_ns.payload)
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
- if args["language"] is not None and args["language"] == "zh-Hans":
+ if args.language is not None and args.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
try:
- account = AccountService.get_user_through_email(args["email"])
+ account = AccountService.get_user_through_email(args.email)
except AccountRegisterError:
raise AccountInFreezeError()
if account is None:
if FeatureService.get_system_features().is_allow_register:
- token = AccountService.send_email_code_login_email(email=args["email"], language=language)
+ token = AccountService.send_email_code_login_email(email=args.email, language=language)
else:
raise AccountNotFound()
else:
@@ -169,26 +217,24 @@ class EmailCodeLoginSendEmailApi(Resource):
@console_ns.route("/email-code-login/validity")
class EmailCodeLoginApi(Resource):
@setup_required
+ @console_ns.expect(console_ns.models[EmailCodeLoginPayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=str, required=True, location="json")
- parser.add_argument("code", type=str, required=True, location="json")
- parser.add_argument("token", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = EmailCodeLoginPayload.model_validate(console_ns.payload)
- user_email = args["email"]
+ user_email = args.email
+ language = args.language
- token_data = AccountService.get_email_code_login_data(args["token"])
+ token_data = AccountService.get_email_code_login_data(args.token)
if token_data is None:
raise InvalidTokenError()
- if token_data["email"] != args["email"]:
+ if token_data["email"] != args.email:
raise InvalidEmailError()
- if token_data["code"] != args["code"]:
+ if token_data["code"] != args.code:
raise EmailCodeError()
- AccountService.revoke_email_code_login_token(args["token"])
+ AccountService.revoke_email_code_login_token(args.token)
try:
account = AccountService.get_user_through_email(user_email)
except AccountRegisterError:
@@ -210,7 +256,9 @@ class EmailCodeLoginApi(Resource):
if account is None:
try:
account = AccountService.create_account_and_tenant(
- email=user_email, name=user_email, interface_language=languages[0]
+ email=user_email,
+ name=user_email,
+ interface_language=get_valid_language(language),
)
except WorkSpaceNotAllowedCreateError:
raise NotAllowedCreateWorkspace()
@@ -219,19 +267,37 @@ class EmailCodeLoginApi(Resource):
except WorkspacesLimitExceededError:
raise WorkspacesLimitExceeded()
token_pair = AccountService.login(account, ip_address=extract_remote_ip(request))
- AccountService.reset_login_error_rate_limit(args["email"])
- return {"result": "success", "data": token_pair.model_dump()}
+ AccountService.reset_login_error_rate_limit(args.email)
+
+ # Create response with cookies instead of returning tokens in body
+ response = make_response({"result": "success"})
+
+ set_csrf_token_to_cookie(request, response, token_pair.csrf_token)
+ # Set HTTP-only secure cookies for tokens
+ set_access_token_to_cookie(request, response, token_pair.access_token)
+ set_refresh_token_to_cookie(request, response, token_pair.refresh_token)
+ return response
@console_ns.route("/refresh-token")
class RefreshTokenApi(Resource):
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("refresh_token", type=str, required=True, location="json")
- args = parser.parse_args()
+ # Get refresh token from cookie instead of request body
+ refresh_token = extract_refresh_token(request)
+
+ if not refresh_token:
+ return {"result": "fail", "message": "No refresh token provided"}, 401
try:
- new_token_pair = AccountService.refresh_token(args["refresh_token"])
- return {"result": "success", "data": new_token_pair.model_dump()}
+ new_token_pair = AccountService.refresh_token(refresh_token)
+
+ # Create response with new cookies
+ response = make_response({"result": "success"})
+
+ # Update cookies with new tokens
+ set_csrf_token_to_cookie(request, response, new_token_pair.csrf_token)
+ set_access_token_to_cookie(request, response, new_token_pair.access_token)
+ set_refresh_token_to_cookie(request, response, new_token_pair.refresh_token)
+ return response
except Exception as e:
- return {"result": "fail", "data": str(e)}, 401
+ return {"result": "fail", "message": str(e)}, 401
diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py
index 4efeceb676..7ad1e56373 100644
--- a/api/controllers/console/auth/oauth.py
+++ b/api/controllers/console/auth/oauth.py
@@ -14,15 +14,19 @@ from extensions.ext_database import db
from libs.datetime_utils import naive_utc_now
from libs.helper import extract_remote_ip
from libs.oauth import GitHubOAuth, GoogleOAuth, OAuthUserInfo
-from models import Account
-from models.account import AccountStatus
+from libs.token import (
+ set_access_token_to_cookie,
+ set_csrf_token_to_cookie,
+ set_refresh_token_to_cookie,
+)
+from models import Account, AccountStatus
from services.account_service import AccountService, RegisterService, TenantService
from services.billing_service import BillingService
from services.errors.account import AccountNotFoundError, AccountRegisterError
from services.errors.workspace import WorkSpaceNotAllowedCreateError, WorkSpaceNotFoundError
from services.feature_service import FeatureService
-from .. import api, console_ns
+from .. import console_ns
logger = logging.getLogger(__name__)
@@ -52,11 +56,13 @@ def get_oauth_providers():
@console_ns.route("/oauth/login/")
class OAuthLogin(Resource):
- @api.doc("oauth_login")
- @api.doc(description="Initiate OAuth login process")
- @api.doc(params={"provider": "OAuth provider name (github/google)", "invite_token": "Optional invitation token"})
- @api.response(302, "Redirect to OAuth authorization URL")
- @api.response(400, "Invalid provider")
+ @console_ns.doc("oauth_login")
+ @console_ns.doc(description="Initiate OAuth login process")
+ @console_ns.doc(
+ params={"provider": "OAuth provider name (github/google)", "invite_token": "Optional invitation token"}
+ )
+ @console_ns.response(302, "Redirect to OAuth authorization URL")
+ @console_ns.response(400, "Invalid provider")
def get(self, provider: str):
invite_token = request.args.get("invite_token") or None
OAUTH_PROVIDERS = get_oauth_providers()
@@ -71,17 +77,17 @@ class OAuthLogin(Resource):
@console_ns.route("/oauth/authorize/")
class OAuthCallback(Resource):
- @api.doc("oauth_callback")
- @api.doc(description="Handle OAuth callback and complete login process")
- @api.doc(
+ @console_ns.doc("oauth_callback")
+ @console_ns.doc(description="Handle OAuth callback and complete login process")
+ @console_ns.doc(
params={
"provider": "OAuth provider name (github/google)",
"code": "Authorization code from OAuth provider",
"state": "Optional state parameter (used for invite token)",
}
)
- @api.response(302, "Redirect to console with access token")
- @api.response(400, "OAuth process failed")
+ @console_ns.response(302, "Redirect to console with access token")
+ @console_ns.response(400, "OAuth process failed")
def get(self, provider: str):
OAUTH_PROVIDERS = get_oauth_providers()
with current_app.app_context():
@@ -153,9 +159,12 @@ class OAuthCallback(Resource):
ip_address=extract_remote_ip(request),
)
- return redirect(
- f"{dify_config.CONSOLE_WEB_URL}?access_token={token_pair.access_token}&refresh_token={token_pair.refresh_token}"
- )
+ response = redirect(f"{dify_config.CONSOLE_WEB_URL}")
+
+ set_access_token_to_cookie(request, response, token_pair.access_token)
+ set_refresh_token_to_cookie(request, response, token_pair.refresh_token)
+ set_csrf_token_to_cookie(request, response, token_pair.csrf_token)
+ return response
def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) -> Account | None:
diff --git a/api/controllers/console/auth/oauth_server.py b/api/controllers/console/auth/oauth_server.py
index 46281860ae..6162d88a0b 100644
--- a/api/controllers/console/auth/oauth_server.py
+++ b/api/controllers/console/auth/oauth_server.py
@@ -1,16 +1,16 @@
from collections.abc import Callable
from functools import wraps
-from typing import Concatenate, ParamSpec, TypeVar, cast
+from typing import Concatenate, ParamSpec, TypeVar
-import flask_login
from flask import jsonify, request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel
from werkzeug.exceptions import BadRequest, NotFound
from controllers.console.wraps import account_initialization_required, setup_required
from core.model_runtime.utils.encoders import jsonable_encoder
-from libs.login import login_required
-from models.account import Account
+from libs.login import current_account_with_tenant, login_required
+from models import Account
from models.model import OAuthProviderApp
from services.oauth_server import OAUTH_ACCESS_TOKEN_EXPIRES_IN, OAuthGrantType, OAuthServerService
@@ -21,16 +21,34 @@ R = TypeVar("R")
T = TypeVar("T")
+class OAuthClientPayload(BaseModel):
+ client_id: str
+
+
+class OAuthProviderRequest(BaseModel):
+ client_id: str
+ redirect_uri: str
+
+
+class OAuthTokenRequest(BaseModel):
+ client_id: str
+ grant_type: str
+ code: str | None = None
+ client_secret: str | None = None
+ redirect_uri: str | None = None
+ refresh_token: str | None = None
+
+
def oauth_server_client_id_required(view: Callable[Concatenate[T, OAuthProviderApp, P], R]):
@wraps(view)
def decorated(self: T, *args: P.args, **kwargs: P.kwargs):
- parser = reqparse.RequestParser()
- parser.add_argument("client_id", type=str, required=True, location="json")
- parsed_args = parser.parse_args()
- client_id = parsed_args.get("client_id")
- if not client_id:
+ json_data = request.get_json()
+ if json_data is None:
raise BadRequest("client_id is required")
+ payload = OAuthClientPayload.model_validate(json_data)
+ client_id = payload.client_id
+
oauth_provider_app = OAuthServerService.get_oauth_provider_app(client_id)
if not oauth_provider_app:
raise NotFound("client_id is invalid")
@@ -91,10 +109,8 @@ class OAuthServerAppApi(Resource):
@setup_required
@oauth_server_client_id_required
def post(self, oauth_provider_app: OAuthProviderApp):
- parser = reqparse.RequestParser()
- parser.add_argument("redirect_uri", type=str, required=True, location="json")
- parsed_args = parser.parse_args()
- redirect_uri = parsed_args.get("redirect_uri")
+ payload = OAuthProviderRequest.model_validate(request.get_json())
+ redirect_uri = payload.redirect_uri
# check if redirect_uri is valid
if redirect_uri not in oauth_provider_app.redirect_uris:
@@ -116,7 +132,8 @@ class OAuthServerUserAuthorizeApi(Resource):
@account_initialization_required
@oauth_server_client_id_required
def post(self, oauth_provider_app: OAuthProviderApp):
- account = cast(Account, flask_login.current_user)
+ current_user, _ = current_account_with_tenant()
+ account = current_user
user_account_id = account.id
code = OAuthServerService.sign_oauth_authorization_code(oauth_provider_app.client_id, user_account_id)
@@ -132,31 +149,25 @@ class OAuthServerUserTokenApi(Resource):
@setup_required
@oauth_server_client_id_required
def post(self, oauth_provider_app: OAuthProviderApp):
- parser = reqparse.RequestParser()
- parser.add_argument("grant_type", type=str, required=True, location="json")
- parser.add_argument("code", type=str, required=False, location="json")
- parser.add_argument("client_secret", type=str, required=False, location="json")
- parser.add_argument("redirect_uri", type=str, required=False, location="json")
- parser.add_argument("refresh_token", type=str, required=False, location="json")
- parsed_args = parser.parse_args()
+ payload = OAuthTokenRequest.model_validate(request.get_json())
try:
- grant_type = OAuthGrantType(parsed_args["grant_type"])
+ grant_type = OAuthGrantType(payload.grant_type)
except ValueError:
raise BadRequest("invalid grant_type")
if grant_type == OAuthGrantType.AUTHORIZATION_CODE:
- if not parsed_args["code"]:
+ if not payload.code:
raise BadRequest("code is required")
- if parsed_args["client_secret"] != oauth_provider_app.client_secret:
+ if payload.client_secret != oauth_provider_app.client_secret:
raise BadRequest("client_secret is invalid")
- if parsed_args["redirect_uri"] not in oauth_provider_app.redirect_uris:
+ if payload.redirect_uri not in oauth_provider_app.redirect_uris:
raise BadRequest("redirect_uri is invalid")
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
- grant_type, code=parsed_args["code"], client_id=oauth_provider_app.client_id
+ grant_type, code=payload.code, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
@@ -167,11 +178,11 @@ class OAuthServerUserTokenApi(Resource):
}
)
elif grant_type == OAuthGrantType.REFRESH_TOKEN:
- if not parsed_args["refresh_token"]:
+ if not payload.refresh_token:
raise BadRequest("refresh_token is required")
access_token, refresh_token = OAuthServerService.sign_oauth_access_token(
- grant_type, refresh_token=parsed_args["refresh_token"], client_id=oauth_provider_app.client_id
+ grant_type, refresh_token=payload.refresh_token, client_id=oauth_provider_app.client_id
)
return jsonable_encoder(
{
diff --git a/api/controllers/console/billing/billing.py b/api/controllers/console/billing/billing.py
index fa89f45122..7f907dc420 100644
--- a/api/controllers/console/billing/billing.py
+++ b/api/controllers/console/billing/billing.py
@@ -1,11 +1,45 @@
-from flask_restx import Resource, reqparse
+import base64
+
+from flask import request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
+from werkzeug.exceptions import BadRequest
from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, only_edition_cloud, setup_required
-from libs.login import current_user, login_required
-from models.model import Account
+from enums.cloud_plan import CloudPlan
+from libs.login import current_account_with_tenant, login_required
from services.billing_service import BillingService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class SubscriptionQuery(BaseModel):
+ plan: str = Field(..., description="Subscription plan")
+ interval: str = Field(..., description="Billing interval")
+
+ @field_validator("plan")
+ @classmethod
+ def validate_plan(cls, value: str) -> str:
+ if value not in [CloudPlan.PROFESSIONAL, CloudPlan.TEAM]:
+ raise ValueError("Invalid plan")
+ return value
+
+ @field_validator("interval")
+ @classmethod
+ def validate_interval(cls, value: str) -> str:
+ if value not in {"month", "year"}:
+ raise ValueError("Invalid interval")
+ return value
+
+
+class PartnerTenantsPayload(BaseModel):
+ click_id: str = Field(..., description="Click Id from partner referral link")
+
+
+for model in (SubscriptionQuery, PartnerTenantsPayload):
+ console_ns.schema_model(model.__name__, model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
@console_ns.route("/billing/subscription")
class Subscription(Resource):
@@ -14,17 +48,10 @@ class Subscription(Resource):
@account_initialization_required
@only_edition_cloud
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("plan", type=str, required=True, location="args", choices=["professional", "team"])
- parser.add_argument("interval", type=str, required=True, location="args", choices=["month", "year"])
- args = parser.parse_args()
- assert isinstance(current_user, Account)
-
+ current_user, current_tenant_id = current_account_with_tenant()
+ args = SubscriptionQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
BillingService.is_tenant_owner_or_admin(current_user)
- assert current_user.current_tenant_id is not None
- return BillingService.get_subscription(
- args["plan"], args["interval"], current_user.email, current_user.current_tenant_id
- )
+ return BillingService.get_subscription(args.plan, args.interval, current_user.email, current_tenant_id)
@console_ns.route("/billing/invoices")
@@ -34,7 +61,39 @@ class Invoices(Resource):
@account_initialization_required
@only_edition_cloud
def get(self):
- assert isinstance(current_user, Account)
+ current_user, current_tenant_id = current_account_with_tenant()
BillingService.is_tenant_owner_or_admin(current_user)
- assert current_user.current_tenant_id is not None
- return BillingService.get_invoices(current_user.email, current_user.current_tenant_id)
+ return BillingService.get_invoices(current_user.email, current_tenant_id)
+
+
+@console_ns.route("/billing/partners//tenants")
+class PartnerTenants(Resource):
+ @console_ns.doc("sync_partner_tenants_bindings")
+ @console_ns.doc(description="Sync partner tenants bindings")
+ @console_ns.doc(params={"partner_key": "Partner key"})
+ @console_ns.expect(
+ console_ns.model(
+ "SyncPartnerTenantsBindingsRequest",
+ {"click_id": fields.String(required=True, description="Click Id from partner referral link")},
+ )
+ )
+ @console_ns.response(200, "Tenants synced to partner successfully")
+ @console_ns.response(400, "Invalid partner information")
+ @setup_required
+ @login_required
+ @account_initialization_required
+ @only_edition_cloud
+ def put(self, partner_key: str):
+ current_user, _ = current_account_with_tenant()
+
+ try:
+ args = PartnerTenantsPayload.model_validate(console_ns.payload or {})
+ click_id = args.click_id
+ decoded_partner_key = base64.b64decode(partner_key).decode("utf-8")
+ except Exception:
+ raise BadRequest("Invalid partner_key")
+
+ if not click_id or not decoded_partner_key or not current_user.id:
+ raise BadRequest("Invalid partner information")
+
+ return BillingService.sync_partner_tenants_bindings(current_user.id, decoded_partner_key, click_id)
diff --git a/api/controllers/console/billing/compliance.py b/api/controllers/console/billing/compliance.py
index e489b48c82..afc5f92b68 100644
--- a/api/controllers/console/billing/compliance.py
+++ b/api/controllers/console/billing/compliance.py
@@ -1,33 +1,44 @@
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from libs.helper import extract_remote_ip
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.billing_service import BillingService
from .. import console_ns
from ..wraps import account_initialization_required, only_edition_cloud, setup_required
+class ComplianceDownloadQuery(BaseModel):
+ doc_name: str = Field(..., description="Compliance document name")
+
+
+console_ns.schema_model(
+ ComplianceDownloadQuery.__name__,
+ ComplianceDownloadQuery.model_json_schema(ref_template="#/definitions/{model}"),
+)
+
+
@console_ns.route("/compliance/download")
class ComplianceApi(Resource):
+ @console_ns.expect(console_ns.models[ComplianceDownloadQuery.__name__])
+ @console_ns.doc("download_compliance_document")
+ @console_ns.doc(description="Get compliance document download link")
@setup_required
@login_required
@account_initialization_required
@only_edition_cloud
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("doc_name", type=str, required=True, location="args")
- args = parser.parse_args()
+ current_user, current_tenant_id = current_account_with_tenant()
+ args = ComplianceDownloadQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
ip_address = extract_remote_ip(request)
device_info = request.headers.get("User-Agent", "Unknown device")
-
return BillingService.get_compliance_download_link(
doc_name=args.doc_name,
account_id=current_user.id,
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
ip=ip_address,
device_info=device_info,
)
diff --git a/api/controllers/console/datasets/data_source.py b/api/controllers/console/datasets/data_source.py
index 6d9d675e87..01f268d94d 100644
--- a/api/controllers/console/datasets/data_source.py
+++ b/api/controllers/console/datasets/data_source.py
@@ -1,16 +1,15 @@
import json
from collections.abc import Generator
-from typing import cast
+from typing import Any, cast
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound
-from controllers.console import console_ns
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.common.schema import register_schema_model
from core.datasource.entities.datasource_entities import DatasourceProviderType, OnlineDocumentPagesMessage
from core.datasource.online_document.online_document_plugin import OnlineDocumentDatasourcePlugin
from core.indexing_runner import IndexingRunner
@@ -20,12 +19,25 @@ from core.rag.extractor.notion_extractor import NotionExtractor
from extensions.ext_database import db
from fields.data_source_fields import integrate_list_fields, integrate_notion_info_list_fields
from libs.datetime_utils import naive_utc_now
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models import DataSourceOauthBinding, Document
from services.dataset_service import DatasetService, DocumentService
from services.datasource_provider_service import DatasourceProviderService
from tasks.document_indexing_sync_task import document_indexing_sync_task
+from .. import console_ns
+from ..wraps import account_initialization_required, setup_required
+
+
+class NotionEstimatePayload(BaseModel):
+ notion_info_list: list[dict[str, Any]]
+ process_rule: dict[str, Any]
+ doc_form: str = Field(default="text_model")
+ doc_language: str = Field(default="English")
+
+
+register_schema_model(console_ns, NotionEstimatePayload)
+
@console_ns.route(
"/data-source/integrates",
@@ -37,10 +49,12 @@ class DataSourceApi(Resource):
@account_initialization_required
@marshal_with(integrate_list_fields)
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
+
# get workspace data source integrates
data_source_integrates = db.session.scalars(
select(DataSourceOauthBinding).where(
- DataSourceOauthBinding.tenant_id == current_user.current_tenant_id,
+ DataSourceOauthBinding.tenant_id == current_tenant_id,
DataSourceOauthBinding.disabled == False,
)
).all()
@@ -120,13 +134,15 @@ class DataSourceNotionListApi(Resource):
@account_initialization_required
@marshal_with(integrate_notion_info_list_fields)
def get(self):
+ current_user, current_tenant_id = current_account_with_tenant()
+
dataset_id = request.args.get("dataset_id", default=None, type=str)
credential_id = request.args.get("credential_id", default=None, type=str)
if not credential_id:
raise ValueError("Credential id is required.")
datasource_provider_service = DatasourceProviderService()
credential = datasource_provider_service.get_datasource_credentials(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
credential_id=credential_id,
provider="notion_datasource",
plugin_id="langgenius/notion_datasource",
@@ -146,7 +162,7 @@ class DataSourceNotionListApi(Resource):
documents = session.scalars(
select(Document).filter_by(
dataset_id=dataset_id,
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
data_source_type="notion_import",
enabled=True,
)
@@ -161,7 +177,7 @@ class DataSourceNotionListApi(Resource):
datasource_runtime = DatasourceManager.get_datasource_runtime(
provider_id="langgenius/notion_datasource/notion_datasource",
datasource_name="notion_datasource",
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
datasource_type=DatasourceProviderType.ONLINE_DOCUMENT,
)
datasource_provider_service = DatasourceProviderService()
@@ -210,12 +226,14 @@ class DataSourceNotionApi(Resource):
@login_required
@account_initialization_required
def get(self, workspace_id, page_id, page_type):
+ _, current_tenant_id = current_account_with_tenant()
+
credential_id = request.args.get("credential_id", default=None, type=str)
if not credential_id:
raise ValueError("Credential id is required.")
datasource_provider_service = DatasourceProviderService()
credential = datasource_provider_service.get_datasource_credentials(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
credential_id=credential_id,
provider="notion_datasource",
plugin_id="langgenius/notion_datasource",
@@ -229,7 +247,7 @@ class DataSourceNotionApi(Resource):
notion_obj_id=page_id,
notion_page_type=page_type,
notion_access_token=credential.get("integration_secret"),
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
)
text_docs = extractor.extract()
@@ -238,18 +256,15 @@ class DataSourceNotionApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @console_ns.expect(console_ns.models[NotionEstimatePayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("notion_info_list", type=list, required=True, nullable=True, location="json")
- parser.add_argument("process_rule", type=dict, required=True, nullable=True, location="json")
- parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
- parser.add_argument(
- "doc_language", type=str, default="English", required=False, nullable=False, location="json"
- )
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+
+ payload = NotionEstimatePayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump()
# validate args
DocumentService.estimate_args_validate(args)
- notion_info_list = args["notion_info_list"]
+ notion_info_list = payload.notion_info_list
extract_settings = []
for notion_info in notion_info_list:
workspace_id = notion_info["workspace_id"]
@@ -263,7 +278,7 @@ class DataSourceNotionApi(Resource):
"notion_workspace_id": workspace_id,
"notion_obj_id": page["page_id"],
"notion_page_type": page["type"],
- "tenant_id": current_user.current_tenant_id,
+ "tenant_id": current_tenant_id,
}
),
document_model=args["doc_form"],
@@ -271,7 +286,7 @@ class DataSourceNotionApi(Resource):
extract_settings.append(extract_setting)
indexing_runner = IndexingRunner()
response = indexing_runner.indexing_estimate(
- current_user.current_tenant_id,
+ current_tenant_id,
extract_settings,
args["process_rule"],
args["doc_form"],
diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
index dda0125687..ea21c4480d 100644
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -1,21 +1,26 @@
from typing import Any, cast
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field, field_validator
from sqlalchemy import select
from werkzeug.exceptions import Forbidden, NotFound
import services
from configs import dify_config
-from controllers.console import api, console_ns
-from controllers.console.apikey import api_key_fields, api_key_list
+from controllers.common.schema import register_schema_models
+from controllers.console import console_ns
+from controllers.console.apikey import (
+ api_key_item_model,
+ api_key_list_model,
+)
from controllers.console.app.error import ProviderNotInitializeError
from controllers.console.datasets.error import DatasetInUseError, DatasetNameDuplicateError, IndexingEstimateError
from controllers.console.wraps import (
account_initialization_required,
cloud_edition_billing_rate_limit_check,
enterprise_license_required,
+ is_admin_or_owner_required,
setup_required,
)
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
@@ -27,29 +32,233 @@ from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
-from fields.app_fields import related_app_list
-from fields.dataset_fields import dataset_detail_fields, dataset_query_detail_fields
+from fields.app_fields import app_detail_kernel_fields, related_app_list
+from fields.dataset_fields import (
+ dataset_detail_fields,
+ dataset_fields,
+ dataset_query_detail_fields,
+ dataset_retrieval_model_fields,
+ doc_metadata_fields,
+ external_knowledge_info_fields,
+ external_retrieval_model_fields,
+ icon_info_fields,
+ keyword_setting_fields,
+ reranking_model_fields,
+ tag_fields,
+ vector_setting_fields,
+ weighted_score_fields,
+)
from fields.document_fields import document_status_fields
-from libs.login import login_required
-from libs.validators import validate_description_length
+from libs.login import current_account_with_tenant, login_required
from models import ApiToken, Dataset, Document, DocumentSegment, UploadFile
-from models.account import Account
from models.dataset import DatasetPermissionEnum
from models.provider_ids import ModelProviderID
from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
-def _validate_name(name: str) -> str:
- if not name or len(name) < 1 or len(name) > 40:
- raise ValueError("Name must be between 1 to 40 characters.")
- return name
+def _get_or_create_model(model_name: str, field_def):
+ existing = console_ns.models.get(model_name)
+ if existing is None:
+ existing = console_ns.model(model_name, field_def)
+ return existing
+
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+dataset_base_model = _get_or_create_model("DatasetBase", dataset_fields)
+
+tag_model = _get_or_create_model("Tag", tag_fields)
+
+keyword_setting_model = _get_or_create_model("DatasetKeywordSetting", keyword_setting_fields)
+vector_setting_model = _get_or_create_model("DatasetVectorSetting", vector_setting_fields)
+
+weighted_score_fields_copy = weighted_score_fields.copy()
+weighted_score_fields_copy["keyword_setting"] = fields.Nested(keyword_setting_model)
+weighted_score_fields_copy["vector_setting"] = fields.Nested(vector_setting_model)
+weighted_score_model = _get_or_create_model("DatasetWeightedScore", weighted_score_fields_copy)
+
+reranking_model = _get_or_create_model("DatasetRerankingModel", reranking_model_fields)
+
+dataset_retrieval_model_fields_copy = dataset_retrieval_model_fields.copy()
+dataset_retrieval_model_fields_copy["reranking_model"] = fields.Nested(reranking_model)
+dataset_retrieval_model_fields_copy["weights"] = fields.Nested(weighted_score_model, allow_null=True)
+dataset_retrieval_model = _get_or_create_model("DatasetRetrievalModel", dataset_retrieval_model_fields_copy)
+
+external_knowledge_info_model = _get_or_create_model("ExternalKnowledgeInfo", external_knowledge_info_fields)
+
+external_retrieval_model = _get_or_create_model("ExternalRetrievalModel", external_retrieval_model_fields)
+
+doc_metadata_model = _get_or_create_model("DatasetDocMetadata", doc_metadata_fields)
+
+icon_info_model = _get_or_create_model("DatasetIconInfo", icon_info_fields)
+
+dataset_detail_fields_copy = dataset_detail_fields.copy()
+dataset_detail_fields_copy["retrieval_model_dict"] = fields.Nested(dataset_retrieval_model)
+dataset_detail_fields_copy["tags"] = fields.List(fields.Nested(tag_model))
+dataset_detail_fields_copy["external_knowledge_info"] = fields.Nested(external_knowledge_info_model)
+dataset_detail_fields_copy["external_retrieval_model"] = fields.Nested(external_retrieval_model, allow_null=True)
+dataset_detail_fields_copy["doc_metadata"] = fields.List(fields.Nested(doc_metadata_model))
+dataset_detail_fields_copy["icon_info"] = fields.Nested(icon_info_model)
+dataset_detail_model = _get_or_create_model("DatasetDetail", dataset_detail_fields_copy)
+
+dataset_query_detail_model = _get_or_create_model("DatasetQueryDetail", dataset_query_detail_fields)
+
+app_detail_kernel_model = _get_or_create_model("AppDetailKernel", app_detail_kernel_fields)
+related_app_list_copy = related_app_list.copy()
+related_app_list_copy["data"] = fields.List(fields.Nested(app_detail_kernel_model))
+related_app_list_model = _get_or_create_model("RelatedAppList", related_app_list_copy)
+
+
+def _validate_indexing_technique(value: str | None) -> str | None:
+ if value is None:
+ return value
+ if value not in Dataset.INDEXING_TECHNIQUE_LIST:
+ raise ValueError("Invalid indexing technique.")
+ return value
+
+
+class DatasetCreatePayload(BaseModel):
+ name: str = Field(..., min_length=1, max_length=40)
+ description: str = Field("", max_length=400)
+ indexing_technique: str | None = None
+ permission: DatasetPermissionEnum | None = DatasetPermissionEnum.ONLY_ME
+ provider: str = "vendor"
+ external_knowledge_api_id: str | None = None
+ external_knowledge_id: str | None = None
+
+ @field_validator("indexing_technique")
+ @classmethod
+ def validate_indexing(cls, value: str | None) -> str | None:
+ return _validate_indexing_technique(value)
+
+ @field_validator("provider")
+ @classmethod
+ def validate_provider(cls, value: str) -> str:
+ if value not in Dataset.PROVIDER_LIST:
+ raise ValueError("Invalid provider.")
+ return value
+
+
+class DatasetUpdatePayload(BaseModel):
+ name: str | None = Field(None, min_length=1, max_length=40)
+ description: str | None = Field(None, max_length=400)
+ permission: DatasetPermissionEnum | None = None
+ indexing_technique: str | None = None
+ embedding_model: str | None = None
+ embedding_model_provider: str | None = None
+ retrieval_model: dict[str, Any] | None = None
+ partial_member_list: list[str] | None = None
+ external_retrieval_model: dict[str, Any] | None = None
+ external_knowledge_id: str | None = None
+ external_knowledge_api_id: str | None = None
+ icon_info: dict[str, Any] | None = None
+ is_multimodal: bool | None = False
+
+ @field_validator("indexing_technique")
+ @classmethod
+ def validate_indexing(cls, value: str | None) -> str | None:
+ return _validate_indexing_technique(value)
+
+
+class IndexingEstimatePayload(BaseModel):
+ info_list: dict[str, Any]
+ process_rule: dict[str, Any]
+ indexing_technique: str
+ doc_form: str = "text_model"
+ dataset_id: str | None = None
+ doc_language: str = "English"
+
+ @field_validator("indexing_technique")
+ @classmethod
+ def validate_indexing(cls, value: str) -> str:
+ result = _validate_indexing_technique(value)
+ if result is None:
+ raise ValueError("indexing_technique is required.")
+ return result
+
+
+register_schema_models(console_ns, DatasetCreatePayload, DatasetUpdatePayload, IndexingEstimatePayload)
+
+
+def _get_retrieval_methods_by_vector_type(vector_type: str | None, is_mock: bool = False) -> dict[str, list[str]]:
+ """
+ Get supported retrieval methods based on vector database type.
+
+ Args:
+ vector_type: Vector database type, can be None
+ is_mock: Whether this is a Mock API, affects MILVUS handling
+
+ Returns:
+ Dictionary containing supported retrieval methods
+
+ Raises:
+ ValueError: If vector_type is None or unsupported
+ """
+ if vector_type is None:
+ raise ValueError("Vector store type is not configured.")
+
+ # Define vector database types that only support semantic search
+ semantic_only_types = {
+ VectorType.RELYT,
+ VectorType.TIDB_VECTOR,
+ VectorType.CHROMA,
+ VectorType.PGVECTO_RS,
+ VectorType.VIKINGDB,
+ VectorType.UPSTASH,
+ }
+
+ # Define vector database types that support all retrieval methods
+ full_search_types = {
+ VectorType.QDRANT,
+ VectorType.WEAVIATE,
+ VectorType.OPENSEARCH,
+ VectorType.ANALYTICDB,
+ VectorType.MYSCALE,
+ VectorType.ORACLE,
+ VectorType.ELASTICSEARCH,
+ VectorType.ELASTICSEARCH_JA,
+ VectorType.PGVECTOR,
+ VectorType.VASTBASE,
+ VectorType.TIDB_ON_QDRANT,
+ VectorType.LINDORM,
+ VectorType.COUCHBASE,
+ VectorType.OPENGAUSS,
+ VectorType.OCEANBASE,
+ VectorType.SEEKDB,
+ VectorType.TABLESTORE,
+ VectorType.HUAWEI_CLOUD,
+ VectorType.TENCENT,
+ VectorType.MATRIXONE,
+ VectorType.CLICKZETTA,
+ VectorType.BAIDU,
+ VectorType.ALIBABACLOUD_MYSQL,
+ VectorType.IRIS,
+ }
+
+ semantic_methods = {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
+ full_methods = {
+ "retrieval_method": [
+ RetrievalMethod.SEMANTIC_SEARCH.value,
+ RetrievalMethod.FULL_TEXT_SEARCH.value,
+ RetrievalMethod.HYBRID_SEARCH.value,
+ ]
+ }
+
+ if vector_type == VectorType.MILVUS:
+ return semantic_methods if is_mock else full_methods
+
+ if vector_type in semantic_only_types:
+ return semantic_methods
+ elif vector_type in full_search_types:
+ return full_methods
+ else:
+ raise ValueError(f"Unsupported vector db type {vector_type}.")
@console_ns.route("/datasets")
class DatasetListApi(Resource):
- @api.doc("get_datasets")
- @api.doc(description="Get list of datasets")
- @api.doc(
+ @console_ns.doc("get_datasets")
+ @console_ns.doc(description="Get list of datasets")
+ @console_ns.doc(
params={
"page": "Page number (default: 1)",
"limit": "Number of items per page (default: 20)",
@@ -59,12 +268,13 @@ class DatasetListApi(Resource):
"include_all": "Include all datasets (default: false)",
}
)
- @api.response(200, "Datasets retrieved successfully")
+ @console_ns.response(200, "Datasets retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@enterprise_license_required
def get(self):
+ current_user, current_tenant_id = current_account_with_tenant()
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
ids = request.args.getlist("ids")
@@ -73,15 +283,15 @@ class DatasetListApi(Resource):
tag_ids = request.args.getlist("tag_ids")
include_all = request.args.get("include_all", default="false").lower() == "true"
if ids:
- datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
+ datasets, total = DatasetService.get_datasets_by_ids(ids, current_tenant_id)
else:
datasets, total = DatasetService.get_datasets(
- page, limit, current_user.current_tenant_id, current_user, search, tag_ids, include_all
+ page, limit, current_tenant_id, current_user, search, tag_ids, include_all
)
# check embedding setting
provider_manager = ProviderManager()
- configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
+ configurations = provider_manager.get_configurations(tenant_id=current_tenant_id)
embedding_models = configurations.get_models(model_type=ModelType.TEXT_EMBEDDING, only_active=True)
@@ -111,73 +321,18 @@ class DatasetListApi(Resource):
response = {"data": data, "has_more": len(datasets) == limit, "limit": limit, "total": total, "page": page}
return response, 200
- @api.doc("create_dataset")
- @api.doc(description="Create a new dataset")
- @api.expect(
- api.model(
- "CreateDatasetRequest",
- {
- "name": fields.String(required=True, description="Dataset name (1-40 characters)"),
- "description": fields.String(description="Dataset description (max 400 characters)"),
- "indexing_technique": fields.String(description="Indexing technique"),
- "permission": fields.String(description="Dataset permission"),
- "provider": fields.String(description="Provider"),
- "external_knowledge_api_id": fields.String(description="External knowledge API ID"),
- "external_knowledge_id": fields.String(description="External knowledge ID"),
- },
- )
- )
- @api.response(201, "Dataset created successfully")
- @api.response(400, "Invalid request parameters")
+ @console_ns.doc("create_dataset")
+ @console_ns.doc(description="Create a new dataset")
+ @console_ns.expect(console_ns.models[DatasetCreatePayload.__name__])
+ @console_ns.response(201, "Dataset created successfully")
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="type is required. Name must be between 1 to 40 characters.",
- type=_validate_name,
- )
- parser.add_argument(
- "description",
- type=validate_description_length,
- nullable=True,
- required=False,
- default="",
- )
- parser.add_argument(
- "indexing_technique",
- type=str,
- location="json",
- choices=Dataset.INDEXING_TECHNIQUE_LIST,
- nullable=True,
- help="Invalid indexing technique.",
- )
- parser.add_argument(
- "external_knowledge_api_id",
- type=str,
- nullable=True,
- required=False,
- )
- parser.add_argument(
- "provider",
- type=str,
- nullable=True,
- choices=Dataset.PROVIDER_LIST,
- required=False,
- default="vendor",
- )
- parser.add_argument(
- "external_knowledge_id",
- type=str,
- nullable=True,
- required=False,
- )
- args = parser.parse_args()
+ payload = DatasetCreatePayload.model_validate(console_ns.payload or {})
+ current_user, current_tenant_id = current_account_with_tenant()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
@@ -185,15 +340,15 @@ class DatasetListApi(Resource):
try:
dataset = DatasetService.create_empty_dataset(
- tenant_id=current_user.current_tenant_id,
- name=args["name"],
- description=args["description"],
- indexing_technique=args["indexing_technique"],
- account=cast(Account, current_user),
- permission=DatasetPermissionEnum.ONLY_ME,
- provider=args["provider"],
- external_knowledge_api_id=args["external_knowledge_api_id"],
- external_knowledge_id=args["external_knowledge_id"],
+ tenant_id=current_tenant_id,
+ name=payload.name,
+ description=payload.description,
+ indexing_technique=payload.indexing_technique,
+ account=current_user,
+ permission=payload.permission or DatasetPermissionEnum.ONLY_ME,
+ provider=payload.provider,
+ external_knowledge_api_id=payload.external_knowledge_api_id,
+ external_knowledge_id=payload.external_knowledge_id,
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
@@ -203,16 +358,17 @@ class DatasetListApi(Resource):
@console_ns.route("/datasets/")
class DatasetApi(Resource):
- @api.doc("get_dataset")
- @api.doc(description="Get dataset details")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Dataset retrieved successfully", dataset_detail_fields)
- @api.response(404, "Dataset not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("get_dataset")
+ @console_ns.doc(description="Get dataset details")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Dataset retrieved successfully", dataset_detail_model)
+ @console_ns.response(404, "Dataset not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
+ current_user, current_tenant_id = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -232,7 +388,7 @@ class DatasetApi(Resource):
# check embedding setting
provider_manager = ProviderManager()
- configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
+ configurations = provider_manager.get_configurations(tenant_id=current_tenant_id)
embedding_models = configurations.get_models(model_type=ModelType.TEXT_EMBEDDING, only_active=True)
@@ -251,23 +407,12 @@ class DatasetApi(Resource):
return data, 200
- @api.doc("update_dataset")
- @api.doc(description="Update dataset details")
- @api.expect(
- api.model(
- "UpdateDatasetRequest",
- {
- "name": fields.String(description="Dataset name"),
- "description": fields.String(description="Dataset description"),
- "permission": fields.String(description="Dataset permission"),
- "indexing_technique": fields.String(description="Indexing technique"),
- "external_retrieval_model": fields.Raw(description="External retrieval model settings"),
- },
- )
- )
- @api.response(200, "Dataset updated successfully", dataset_detail_fields)
- @api.response(404, "Dataset not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("update_dataset")
+ @console_ns.doc(description="Update dataset details")
+ @console_ns.expect(console_ns.models[DatasetUpdatePayload.__name__])
+ @console_ns.response(200, "Dataset updated successfully", dataset_detail_model)
+ @console_ns.response(404, "Dataset not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
@@ -278,106 +423,36 @@ class DatasetApi(Resource):
if dataset is None:
raise NotFound("Dataset not found.")
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- help="type is required. Name must be between 1 to 40 characters.",
- type=_validate_name,
- )
- parser.add_argument("description", location="json", store_missing=False, type=validate_description_length)
- parser.add_argument(
- "indexing_technique",
- type=str,
- location="json",
- choices=Dataset.INDEXING_TECHNIQUE_LIST,
- nullable=True,
- help="Invalid indexing technique.",
- )
- parser.add_argument(
- "permission",
- type=str,
- location="json",
- choices=(DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM, DatasetPermissionEnum.PARTIAL_TEAM),
- help="Invalid permission.",
- )
- parser.add_argument("embedding_model", type=str, location="json", help="Invalid embedding model.")
- parser.add_argument(
- "embedding_model_provider", type=str, location="json", help="Invalid embedding model provider."
- )
- parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.")
- parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.")
-
- parser.add_argument(
- "external_retrieval_model",
- type=dict,
- required=False,
- nullable=True,
- location="json",
- help="Invalid external retrieval model.",
- )
-
- parser.add_argument(
- "external_knowledge_id",
- type=str,
- required=False,
- nullable=True,
- location="json",
- help="Invalid external knowledge id.",
- )
-
- parser.add_argument(
- "external_knowledge_api_id",
- type=str,
- required=False,
- nullable=True,
- location="json",
- help="Invalid external knowledge api id.",
- )
-
- parser.add_argument(
- "icon_info",
- type=dict,
- required=False,
- nullable=True,
- location="json",
- help="Invalid icon info.",
- )
- args = parser.parse_args()
- data = request.get_json()
-
+ payload = DatasetUpdatePayload.model_validate(console_ns.payload or {})
+ current_user, current_tenant_id = current_account_with_tenant()
# check embedding model setting
if (
- data.get("indexing_technique") == "high_quality"
- and data.get("embedding_model_provider") is not None
- and data.get("embedding_model") is not None
+ payload.indexing_technique == "high_quality"
+ and payload.embedding_model_provider is not None
+ and payload.embedding_model is not None
):
- DatasetService.check_embedding_model_setting(
- dataset.tenant_id, data.get("embedding_model_provider"), data.get("embedding_model")
+ is_multimodal = DatasetService.check_is_multimodal_model(
+ dataset.tenant_id, payload.embedding_model_provider, payload.embedding_model
)
-
+ payload.is_multimodal = is_multimodal
+ payload_data = payload.model_dump(exclude_unset=True)
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
DatasetPermissionService.check_permission(
- current_user, dataset, data.get("permission"), data.get("partial_member_list")
+ current_user, dataset, payload.permission, payload.partial_member_list
)
- dataset = DatasetService.update_dataset(dataset_id_str, args, current_user)
+ dataset = DatasetService.update_dataset(dataset_id_str, payload_data, current_user)
if dataset is None:
raise NotFound("Dataset not found.")
result_data = cast(dict[str, Any], marshal(dataset, dataset_detail_fields))
- tenant_id = current_user.current_tenant_id
+ tenant_id = current_tenant_id
- if data.get("partial_member_list") and data.get("permission") == "partial_members":
- DatasetPermissionService.update_partial_member_list(
- tenant_id, dataset_id_str, data.get("partial_member_list")
- )
+ if payload.partial_member_list is not None and payload.permission == DatasetPermissionEnum.PARTIAL_TEAM:
+ DatasetPermissionService.update_partial_member_list(tenant_id, dataset_id_str, payload.partial_member_list)
# clear partial member list when permission is only_me or all_team_members
- elif (
- data.get("permission") == DatasetPermissionEnum.ONLY_ME
- or data.get("permission") == DatasetPermissionEnum.ALL_TEAM
- ):
+ elif payload.permission in {DatasetPermissionEnum.ONLY_ME, DatasetPermissionEnum.ALL_TEAM}:
DatasetPermissionService.clear_partial_member_list(dataset_id_str)
partial_member_list = DatasetPermissionService.get_dataset_partial_member_list(dataset_id_str)
@@ -391,9 +466,9 @@ class DatasetApi(Resource):
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id):
dataset_id_str = str(dataset_id)
+ current_user, _ = current_account_with_tenant()
- # The role of the current user in the ta table must be admin, owner, or editor
- if not (current_user.is_editor or current_user.is_dataset_operator):
+ if not (current_user.has_edit_permission or current_user.is_dataset_operator):
raise Forbidden()
try:
@@ -408,10 +483,10 @@ class DatasetApi(Resource):
@console_ns.route("/datasets//use-check")
class DatasetUseCheckApi(Resource):
- @api.doc("check_dataset_use")
- @api.doc(description="Check if dataset is in use")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Dataset use status retrieved successfully")
+ @console_ns.doc("check_dataset_use")
+ @console_ns.doc(description="Check if dataset is in use")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Dataset use status retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@@ -424,14 +499,15 @@ class DatasetUseCheckApi(Resource):
@console_ns.route("/datasets//queries")
class DatasetQueryApi(Resource):
- @api.doc("get_dataset_queries")
- @api.doc(description="Get dataset query history")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Query history retrieved successfully", dataset_query_detail_fields)
+ @console_ns.doc("get_dataset_queries")
+ @console_ns.doc(description="Get dataset query history")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Query history retrieved successfully", dataset_query_detail_model)
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -448,7 +524,7 @@ class DatasetQueryApi(Resource):
dataset_queries, total = DatasetService.get_dataset_queries(dataset_id=dataset.id, page=page, per_page=limit)
response = {
- "data": marshal(dataset_queries, dataset_query_detail_fields),
+ "data": marshal(dataset_queries, dataset_query_detail_model),
"has_more": len(dataset_queries) == limit,
"limit": limit,
"total": total,
@@ -459,39 +535,24 @@ class DatasetQueryApi(Resource):
@console_ns.route("/datasets/indexing-estimate")
class DatasetIndexingEstimateApi(Resource):
- @api.doc("estimate_dataset_indexing")
- @api.doc(description="Estimate dataset indexing cost")
- @api.response(200, "Indexing estimate calculated successfully")
+ @console_ns.doc("estimate_dataset_indexing")
+ @console_ns.doc(description="Estimate dataset indexing cost")
+ @console_ns.response(200, "Indexing estimate calculated successfully")
@setup_required
@login_required
@account_initialization_required
+ @console_ns.expect(console_ns.models[IndexingEstimatePayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("info_list", type=dict, required=True, nullable=True, location="json")
- parser.add_argument("process_rule", type=dict, required=True, nullable=True, location="json")
- parser.add_argument(
- "indexing_technique",
- type=str,
- required=True,
- choices=Dataset.INDEXING_TECHNIQUE_LIST,
- nullable=True,
- location="json",
- )
- parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
- parser.add_argument("dataset_id", type=str, required=False, nullable=False, location="json")
- parser.add_argument(
- "doc_language", type=str, default="English", required=False, nullable=False, location="json"
- )
- args = parser.parse_args()
+ payload = IndexingEstimatePayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump()
+ _, current_tenant_id = current_account_with_tenant()
# validate args
DocumentService.estimate_args_validate(args)
extract_settings = []
if args["info_list"]["data_source_type"] == "upload_file":
file_ids = args["info_list"]["file_info_list"]["file_ids"]
file_details = db.session.scalars(
- select(UploadFile).where(
- UploadFile.tenant_id == current_user.current_tenant_id, UploadFile.id.in_(file_ids)
- )
+ select(UploadFile).where(UploadFile.tenant_id == current_tenant_id, UploadFile.id.in_(file_ids))
).all()
if file_details is None:
@@ -519,7 +580,7 @@ class DatasetIndexingEstimateApi(Resource):
"notion_workspace_id": workspace_id,
"notion_obj_id": page["page_id"],
"notion_page_type": page["type"],
- "tenant_id": current_user.current_tenant_id,
+ "tenant_id": current_tenant_id,
}
),
document_model=args["doc_form"],
@@ -535,7 +596,7 @@ class DatasetIndexingEstimateApi(Resource):
"provider": website_info_list["provider"],
"job_id": website_info_list["job_id"],
"url": url,
- "tenant_id": current_user.current_tenant_id,
+ "tenant_id": current_tenant_id,
"mode": "crawl",
"only_main_content": website_info_list["only_main_content"],
}
@@ -548,7 +609,7 @@ class DatasetIndexingEstimateApi(Resource):
indexing_runner = IndexingRunner()
try:
response = indexing_runner.indexing_estimate(
- current_user.current_tenant_id,
+ current_tenant_id,
extract_settings,
args["process_rule"],
args["doc_form"],
@@ -570,15 +631,16 @@ class DatasetIndexingEstimateApi(Resource):
@console_ns.route("/datasets//related-apps")
class DatasetRelatedAppListApi(Resource):
- @api.doc("get_dataset_related_apps")
- @api.doc(description="Get applications related to dataset")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Related apps retrieved successfully", related_app_list)
+ @console_ns.doc("get_dataset_related_apps")
+ @console_ns.doc(description="Get applications related to dataset")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Related apps retrieved successfully", related_app_list_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(related_app_list)
+ @marshal_with(related_app_list_model)
def get(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -602,19 +664,18 @@ class DatasetRelatedAppListApi(Resource):
@console_ns.route("/datasets//indexing-status")
class DatasetIndexingStatusApi(Resource):
- @api.doc("get_dataset_indexing_status")
- @api.doc(description="Get dataset indexing status")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Indexing status retrieved successfully")
+ @console_ns.doc("get_dataset_indexing_status")
+ @console_ns.doc(description="Get dataset indexing status")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Indexing status retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
+ _, current_tenant_id = current_account_with_tenant()
dataset_id = str(dataset_id)
documents = db.session.scalars(
- select(Document).where(
- Document.dataset_id == dataset_id, Document.tenant_id == current_user.current_tenant_id
- )
+ select(Document).where(Document.dataset_id == dataset_id, Document.tenant_id == current_tenant_id)
).all()
documents_status = []
for document in documents:
@@ -658,38 +719,36 @@ class DatasetApiKeyApi(Resource):
token_prefix = "dataset-"
resource_type = "dataset"
- @api.doc("get_dataset_api_keys")
- @api.doc(description="Get dataset API keys")
- @api.response(200, "API keys retrieved successfully", api_key_list)
+ @console_ns.doc("get_dataset_api_keys")
+ @console_ns.doc(description="Get dataset API keys")
+ @console_ns.response(200, "API keys retrieved successfully", api_key_list_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(api_key_list)
+ @marshal_with(api_key_list_model)
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
keys = db.session.scalars(
- select(ApiToken).where(
- ApiToken.type == self.resource_type, ApiToken.tenant_id == current_user.current_tenant_id
- )
+ select(ApiToken).where(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_tenant_id)
).all()
return {"items": keys}
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
- @marshal_with(api_key_fields)
+ @marshal_with(api_key_item_model)
def post(self):
- # The role of the current user in the ta table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, current_tenant_id = current_account_with_tenant()
current_key_count = (
db.session.query(ApiToken)
- .where(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_user.current_tenant_id)
+ .where(ApiToken.type == self.resource_type, ApiToken.tenant_id == current_tenant_id)
.count()
)
if current_key_count >= self.max_keys:
- api.abort(
+ console_ns.abort(
400,
message=f"Cannot create more than {self.max_keys} API keys for this resource type.",
code="max_keys_exceeded",
@@ -697,7 +756,7 @@ class DatasetApiKeyApi(Resource):
key = ApiToken.generate_api_key(self.token_prefix, 24)
api_token = ApiToken()
- api_token.tenant_id = current_user.current_tenant_id
+ api_token.tenant_id = current_tenant_id
api_token.token = key
api_token.type = self.resource_type
db.session.add(api_token)
@@ -709,24 +768,21 @@ class DatasetApiKeyApi(Resource):
class DatasetApiDeleteApi(Resource):
resource_type = "dataset"
- @api.doc("delete_dataset_api_key")
- @api.doc(description="Delete dataset API key")
- @api.doc(params={"api_key_id": "API key ID"})
- @api.response(204, "API key deleted successfully")
+ @console_ns.doc("delete_dataset_api_key")
+ @console_ns.doc(description="Delete dataset API key")
+ @console_ns.doc(params={"api_key_id": "API key ID"})
+ @console_ns.response(204, "API key deleted successfully")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def delete(self, api_key_id):
+ _, current_tenant_id = current_account_with_tenant()
api_key_id = str(api_key_id)
-
- # The role of the current user in the ta table must be admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
key = (
db.session.query(ApiToken)
.where(
- ApiToken.tenant_id == current_user.current_tenant_id,
+ ApiToken.tenant_id == current_tenant_id,
ApiToken.type == self.resource_type,
ApiToken.id == api_key_id,
)
@@ -734,7 +790,7 @@ class DatasetApiDeleteApi(Resource):
)
if key is None:
- api.abort(404, message="API key not found")
+ console_ns.abort(404, message="API key not found")
db.session.query(ApiToken).where(ApiToken.id == api_key_id).delete()
db.session.commit()
@@ -757,9 +813,9 @@ class DatasetEnableApiApi(Resource):
@console_ns.route("/datasets/api-base-info")
class DatasetApiBaseUrlApi(Resource):
- @api.doc("get_dataset_api_base_info")
- @api.doc(description="Get dataset API base information")
- @api.response(200, "API base info retrieved successfully")
+ @console_ns.doc("get_dataset_api_base_info")
+ @console_ns.doc(description="Get dataset API base information")
+ @console_ns.response(200, "API base info retrieved successfully")
@setup_required
@login_required
@account_initialization_required
@@ -769,120 +825,37 @@ class DatasetApiBaseUrlApi(Resource):
@console_ns.route("/datasets/retrieval-setting")
class DatasetRetrievalSettingApi(Resource):
- @api.doc("get_dataset_retrieval_setting")
- @api.doc(description="Get dataset retrieval settings")
- @api.response(200, "Retrieval settings retrieved successfully")
+ @console_ns.doc("get_dataset_retrieval_setting")
+ @console_ns.doc(description="Get dataset retrieval settings")
+ @console_ns.response(200, "Retrieval settings retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self):
vector_type = dify_config.VECTOR_STORE
- match vector_type:
- case (
- VectorType.RELYT
- | VectorType.TIDB_VECTOR
- | VectorType.CHROMA
- | VectorType.PGVECTO_RS
- | VectorType.VIKINGDB
- | VectorType.UPSTASH
- ):
- return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH]}
- case (
- VectorType.QDRANT
- | VectorType.WEAVIATE
- | VectorType.OPENSEARCH
- | VectorType.ANALYTICDB
- | VectorType.MYSCALE
- | VectorType.ORACLE
- | VectorType.ELASTICSEARCH
- | VectorType.ELASTICSEARCH_JA
- | VectorType.PGVECTOR
- | VectorType.VASTBASE
- | VectorType.TIDB_ON_QDRANT
- | VectorType.LINDORM
- | VectorType.COUCHBASE
- | VectorType.MILVUS
- | VectorType.OPENGAUSS
- | VectorType.OCEANBASE
- | VectorType.TABLESTORE
- | VectorType.HUAWEI_CLOUD
- | VectorType.TENCENT
- | VectorType.MATRIXONE
- | VectorType.CLICKZETTA
- | VectorType.BAIDU
- ):
- return {
- "retrieval_method": [
- RetrievalMethod.SEMANTIC_SEARCH,
- RetrievalMethod.FULL_TEXT_SEARCH,
- RetrievalMethod.HYBRID_SEARCH,
- ]
- }
- case _:
- raise ValueError(f"Unsupported vector db type {vector_type}.")
+ return _get_retrieval_methods_by_vector_type(vector_type, is_mock=False)
@console_ns.route("/datasets/retrieval-setting/")
class DatasetRetrievalSettingMockApi(Resource):
- @api.doc("get_dataset_retrieval_setting_mock")
- @api.doc(description="Get mock dataset retrieval settings by vector type")
- @api.doc(params={"vector_type": "Vector store type"})
- @api.response(200, "Mock retrieval settings retrieved successfully")
+ @console_ns.doc("get_dataset_retrieval_setting_mock")
+ @console_ns.doc(description="Get mock dataset retrieval settings by vector type")
+ @console_ns.doc(params={"vector_type": "Vector store type"})
+ @console_ns.response(200, "Mock retrieval settings retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self, vector_type):
- match vector_type:
- case (
- VectorType.MILVUS
- | VectorType.RELYT
- | VectorType.TIDB_VECTOR
- | VectorType.CHROMA
- | VectorType.PGVECTO_RS
- | VectorType.VIKINGDB
- | VectorType.UPSTASH
- ):
- return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH]}
- case (
- VectorType.QDRANT
- | VectorType.WEAVIATE
- | VectorType.OPENSEARCH
- | VectorType.ANALYTICDB
- | VectorType.MYSCALE
- | VectorType.ORACLE
- | VectorType.ELASTICSEARCH
- | VectorType.ELASTICSEARCH_JA
- | VectorType.COUCHBASE
- | VectorType.PGVECTOR
- | VectorType.VASTBASE
- | VectorType.LINDORM
- | VectorType.OPENGAUSS
- | VectorType.OCEANBASE
- | VectorType.TABLESTORE
- | VectorType.TENCENT
- | VectorType.HUAWEI_CLOUD
- | VectorType.MATRIXONE
- | VectorType.CLICKZETTA
- | VectorType.BAIDU
- ):
- return {
- "retrieval_method": [
- RetrievalMethod.SEMANTIC_SEARCH,
- RetrievalMethod.FULL_TEXT_SEARCH,
- RetrievalMethod.HYBRID_SEARCH,
- ]
- }
- case _:
- raise ValueError(f"Unsupported vector db type {vector_type}.")
+ return _get_retrieval_methods_by_vector_type(vector_type, is_mock=True)
@console_ns.route("/datasets//error-docs")
class DatasetErrorDocs(Resource):
- @api.doc("get_dataset_error_docs")
- @api.doc(description="Get dataset error documents")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Error documents retrieved successfully")
- @api.response(404, "Dataset not found")
+ @console_ns.doc("get_dataset_error_docs")
+ @console_ns.doc(description="Get dataset error documents")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Error documents retrieved successfully")
+ @console_ns.response(404, "Dataset not found")
@setup_required
@login_required
@account_initialization_required
@@ -898,16 +871,17 @@ class DatasetErrorDocs(Resource):
@console_ns.route("/datasets//permission-part-users")
class DatasetPermissionUserListApi(Resource):
- @api.doc("get_dataset_permission_users")
- @api.doc(description="Get dataset permission user list")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Permission users retrieved successfully")
- @api.response(404, "Dataset not found")
- @api.response(403, "Permission denied")
+ @console_ns.doc("get_dataset_permission_users")
+ @console_ns.doc(description="Get dataset permission user list")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Permission users retrieved successfully")
+ @console_ns.response(404, "Dataset not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -926,11 +900,11 @@ class DatasetPermissionUserListApi(Resource):
@console_ns.route("/datasets//auto-disable-logs")
class DatasetAutoDisableLogApi(Resource):
- @api.doc("get_dataset_auto_disable_logs")
- @api.doc(description="Get dataset auto disable logs")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.response(200, "Auto disable logs retrieved successfully")
- @api.response(404, "Dataset not found")
+ @console_ns.doc("get_dataset_auto_disable_logs")
+ @console_ns.doc(description="Get dataset auto disable logs")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.response(200, "Auto disable logs retrieved successfully")
+ @console_ns.response(404, "Dataset not found")
@setup_required
@login_required
@account_initialization_required
diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py
index 011dacde76..6145da31a5 100644
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -6,32 +6,14 @@ from typing import Literal, cast
import sqlalchemy as sa
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel
from sqlalchemy import asc, desc, select
from werkzeug.exceptions import Forbidden, NotFound
import services
-from controllers.console import api, console_ns
-from controllers.console.app.error import (
- ProviderModelCurrentlyNotSupportError,
- ProviderNotInitializeError,
- ProviderQuotaExceededError,
-)
-from controllers.console.datasets.error import (
- ArchivedDocumentImmutableError,
- DocumentAlreadyFinishedError,
- DocumentIndexingError,
- IndexingEstimateError,
- InvalidActionError,
- InvalidMetadataError,
-)
-from controllers.console.wraps import (
- account_initialization_required,
- cloud_edition_billing_rate_limit_check,
- cloud_edition_billing_resource_check,
- setup_required,
-)
+from controllers.common.schema import register_schema_models
+from controllers.console import console_ns
from core.errors.error import (
LLMBadRequestError,
ModelCurrentlyNotSupportError,
@@ -46,25 +28,95 @@ from core.plugin.impl.exc import PluginDaemonClientSideError
from core.rag.extractor.entity.datasource_type import DatasourceType
from core.rag.extractor.entity.extract_setting import ExtractSetting, NotionInfo, WebsiteInfo
from extensions.ext_database import db
+from fields.dataset_fields import dataset_fields
from fields.document_fields import (
dataset_and_document_fields,
document_fields,
+ document_metadata_fields,
document_status_fields,
document_with_segments_fields,
)
from libs.datetime_utils import naive_utc_now
-from libs.login import login_required
-from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
-from models.account import Account
+from libs.login import current_account_with_tenant, login_required
+from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
from models.dataset import DocumentPipelineExecutionLog
from services.dataset_service import DatasetService, DocumentService
-from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
+from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig, ProcessRule, RetrievalModel
+
+from ..app.error import (
+ ProviderModelCurrentlyNotSupportError,
+ ProviderNotInitializeError,
+ ProviderQuotaExceededError,
+)
+from ..datasets.error import (
+ ArchivedDocumentImmutableError,
+ DocumentAlreadyFinishedError,
+ DocumentIndexingError,
+ IndexingEstimateError,
+ InvalidActionError,
+ InvalidMetadataError,
+)
+from ..wraps import (
+ account_initialization_required,
+ cloud_edition_billing_rate_limit_check,
+ cloud_edition_billing_resource_check,
+ setup_required,
+)
logger = logging.getLogger(__name__)
+def _get_or_create_model(model_name: str, field_def):
+ existing = console_ns.models.get(model_name)
+ if existing is None:
+ existing = console_ns.model(model_name, field_def)
+ return existing
+
+
+# Register models for flask_restx to avoid dict type issues in Swagger
+dataset_model = _get_or_create_model("Dataset", dataset_fields)
+
+document_metadata_model = _get_or_create_model("DocumentMetadata", document_metadata_fields)
+
+document_fields_copy = document_fields.copy()
+document_fields_copy["doc_metadata"] = fields.List(
+ fields.Nested(document_metadata_model), attribute="doc_metadata_details"
+)
+document_model = _get_or_create_model("Document", document_fields_copy)
+
+document_with_segments_fields_copy = document_with_segments_fields.copy()
+document_with_segments_fields_copy["doc_metadata"] = fields.List(
+ fields.Nested(document_metadata_model), attribute="doc_metadata_details"
+)
+document_with_segments_model = _get_or_create_model("DocumentWithSegments", document_with_segments_fields_copy)
+
+dataset_and_document_fields_copy = dataset_and_document_fields.copy()
+dataset_and_document_fields_copy["dataset"] = fields.Nested(dataset_model)
+dataset_and_document_fields_copy["documents"] = fields.List(fields.Nested(document_model))
+dataset_and_document_model = _get_or_create_model("DatasetAndDocument", dataset_and_document_fields_copy)
+
+
+class DocumentRetryPayload(BaseModel):
+ document_ids: list[str]
+
+
+class DocumentRenamePayload(BaseModel):
+ name: str
+
+
+register_schema_models(
+ console_ns,
+ KnowledgeConfig,
+ ProcessRule,
+ RetrievalModel,
+ DocumentRetryPayload,
+ DocumentRenamePayload,
+)
+
+
class DocumentResource(Resource):
def get_document(self, dataset_id: str, document_id: str) -> Document:
+ current_user, current_tenant_id = current_account_with_tenant()
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound("Dataset not found.")
@@ -79,12 +131,13 @@ class DocumentResource(Resource):
if not document:
raise NotFound("Document not found.")
- if document.tenant_id != current_user.current_tenant_id:
+ if document.tenant_id != current_tenant_id:
raise Forbidden("No permission.")
return document
def get_batch_documents(self, dataset_id: str, batch: str) -> Sequence[Document]:
+ current_user, _ = current_account_with_tenant()
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound("Dataset not found.")
@@ -104,14 +157,15 @@ class DocumentResource(Resource):
@console_ns.route("/datasets/process-rule")
class GetProcessRuleApi(Resource):
- @api.doc("get_process_rule")
- @api.doc(description="Get dataset document processing rules")
- @api.doc(params={"document_id": "Document ID (optional)"})
- @api.response(200, "Process rules retrieved successfully")
+ @console_ns.doc("get_process_rule")
+ @console_ns.doc(description="Get dataset document processing rules")
+ @console_ns.doc(params={"document_id": "Document ID (optional)"})
+ @console_ns.response(200, "Process rules retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self):
+ current_user, _ = current_account_with_tenant()
req_data = request.args
document_id = req_data.get("document_id")
@@ -151,9 +205,9 @@ class GetProcessRuleApi(Resource):
@console_ns.route("/datasets//documents")
class DatasetDocumentListApi(Resource):
- @api.doc("get_dataset_documents")
- @api.doc(description="Get documents in a dataset")
- @api.doc(
+ @console_ns.doc("get_dataset_documents")
+ @console_ns.doc(description="Get documents in a dataset")
+ @console_ns.doc(
params={
"dataset_id": "Dataset ID",
"page": "Page number (default: 1)",
@@ -161,18 +215,21 @@ class DatasetDocumentListApi(Resource):
"keyword": "Search keyword",
"sort": "Sort order (default: -created_at)",
"fetch": "Fetch full details (default: false)",
+ "status": "Filter documents by display status",
}
)
- @api.response(200, "Documents retrieved successfully")
+ @console_ns.response(200, "Documents retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
+ current_user, current_tenant_id = current_account_with_tenant()
dataset_id = str(dataset_id)
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
sort = request.args.get("sort", default="-created_at", type=str)
+ status = request.args.get("status", default=None, type=str)
# "yes", "true", "t", "y", "1" convert to True, while others convert to False.
try:
fetch_val = request.args.get("fetch", default="false")
@@ -199,7 +256,10 @@ class DatasetDocumentListApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
- query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id)
+ query = select(Document).filter_by(dataset_id=str(dataset_id), tenant_id=current_tenant_id)
+
+ if status:
+ query = DocumentService.apply_display_status_filter(query, status)
if search:
search = f"%{search}%"
@@ -269,10 +329,12 @@ class DatasetDocumentListApi(Resource):
@setup_required
@login_required
@account_initialization_required
- @marshal_with(dataset_and_document_fields)
+ @marshal_with(dataset_and_document_model)
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[KnowledgeConfig.__name__])
def post(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -289,23 +351,7 @@ class DatasetDocumentListApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
- parser = reqparse.RequestParser()
- parser.add_argument(
- "indexing_technique", type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False, location="json"
- )
- parser.add_argument("data_source", type=dict, required=False, location="json")
- parser.add_argument("process_rule", type=dict, required=False, location="json")
- parser.add_argument("duplicate", type=bool, default=True, nullable=False, location="json")
- parser.add_argument("original_document_id", type=str, required=False, location="json")
- parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
- parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
- parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
- parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
- parser.add_argument(
- "doc_language", type=str, default="English", required=False, nullable=False, location="json"
- )
- args = parser.parse_args()
- knowledge_config = KnowledgeConfig.model_validate(args)
+ knowledge_config = KnowledgeConfig.model_validate(console_ns.payload or {})
if not dataset.indexing_technique and not knowledge_config.indexing_technique:
raise ValueError("indexing_technique is required.")
@@ -349,64 +395,39 @@ class DatasetDocumentListApi(Resource):
@console_ns.route("/datasets/init")
class DatasetInitApi(Resource):
- @api.doc("init_dataset")
- @api.doc(description="Initialize dataset with documents")
- @api.expect(
- api.model(
- "DatasetInitRequest",
- {
- "upload_file_id": fields.String(required=True, description="Upload file ID"),
- "indexing_technique": fields.String(description="Indexing technique"),
- "process_rule": fields.Raw(description="Processing rules"),
- "data_source": fields.Raw(description="Data source configuration"),
- },
- )
- )
- @api.response(201, "Dataset initialized successfully", dataset_and_document_fields)
- @api.response(400, "Invalid request parameters")
+ @console_ns.doc("init_dataset")
+ @console_ns.doc(description="Initialize dataset with documents")
+ @console_ns.expect(console_ns.models[KnowledgeConfig.__name__])
+ @console_ns.response(201, "Dataset initialized successfully", dataset_and_document_model)
+ @console_ns.response(400, "Invalid request parameters")
@setup_required
@login_required
@account_initialization_required
- @marshal_with(dataset_and_document_fields)
+ @marshal_with(dataset_and_document_model)
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
def post(self):
# The role of the current user in the ta table must be admin, owner, dataset_operator, or editor
+ current_user, current_tenant_id = current_account_with_tenant()
if not current_user.is_dataset_editor:
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument(
- "indexing_technique",
- type=str,
- choices=Dataset.INDEXING_TECHNIQUE_LIST,
- required=True,
- nullable=False,
- location="json",
- )
- parser.add_argument("data_source", type=dict, required=True, nullable=True, location="json")
- parser.add_argument("process_rule", type=dict, required=True, nullable=True, location="json")
- parser.add_argument("doc_form", type=str, default="text_model", required=False, nullable=False, location="json")
- parser.add_argument(
- "doc_language", type=str, default="English", required=False, nullable=False, location="json"
- )
- parser.add_argument("retrieval_model", type=dict, required=False, nullable=False, location="json")
- parser.add_argument("embedding_model", type=str, required=False, nullable=True, location="json")
- parser.add_argument("embedding_model_provider", type=str, required=False, nullable=True, location="json")
- args = parser.parse_args()
-
- knowledge_config = KnowledgeConfig.model_validate(args)
+ knowledge_config = KnowledgeConfig.model_validate(console_ns.payload or {})
if knowledge_config.indexing_technique == "high_quality":
if knowledge_config.embedding_model is None or knowledge_config.embedding_model_provider is None:
raise ValueError("embedding model and embedding model provider are required for high quality indexing.")
try:
model_manager = ModelManager()
model_manager.get_model_instance(
- tenant_id=current_user.current_tenant_id,
- provider=args["embedding_model_provider"],
+ tenant_id=current_tenant_id,
+ provider=knowledge_config.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
- model=args["embedding_model"],
+ model=knowledge_config.embedding_model,
)
+ is_multimodal = DatasetService.check_is_multimodal_model(
+ current_tenant_id, knowledge_config.embedding_model_provider, knowledge_config.embedding_model
+ )
+ knowledge_config.is_multimodal = is_multimodal
except InvokeAuthorizationError:
raise ProviderNotInitializeError(
"No Embedding Model available. Please configure a valid provider in the Settings -> Model Provider."
@@ -419,9 +440,9 @@ class DatasetInitApi(Resource):
try:
dataset, documents, batch = DocumentService.save_document_without_dataset_id(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
knowledge_config=knowledge_config,
- account=cast(Account, current_user),
+ account=current_user,
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
@@ -437,16 +458,17 @@ class DatasetInitApi(Resource):
@console_ns.route("/datasets//documents//indexing-estimate")
class DocumentIndexingEstimateApi(DocumentResource):
- @api.doc("estimate_document_indexing")
- @api.doc(description="Estimate document indexing cost")
- @api.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
- @api.response(200, "Indexing estimate calculated successfully")
- @api.response(404, "Document not found")
- @api.response(400, "Document already finished")
+ @console_ns.doc("estimate_document_indexing")
+ @console_ns.doc(description="Estimate document indexing cost")
+ @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+ @console_ns.response(200, "Indexing estimate calculated successfully")
+ @console_ns.response(404, "Document not found")
+ @console_ns.response(400, "Document already finished")
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
+ _, current_tenant_id = current_account_with_tenant()
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
@@ -482,7 +504,7 @@ class DocumentIndexingEstimateApi(DocumentResource):
try:
estimate_response = indexing_runner.indexing_estimate(
- current_user.current_tenant_id,
+ current_tenant_id,
[extract_setting],
data_process_rule_dict,
document.doc_form,
@@ -511,6 +533,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
@login_required
@account_initialization_required
def get(self, dataset_id, batch):
+ _, current_tenant_id = current_account_with_tenant()
dataset_id = str(dataset_id)
batch = str(batch)
documents = self.get_batch_documents(dataset_id, batch)
@@ -530,7 +553,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
file_id = data_source_info["upload_file_id"]
file_detail = (
db.session.query(UploadFile)
- .where(UploadFile.tenant_id == current_user.current_tenant_id, UploadFile.id == file_id)
+ .where(UploadFile.tenant_id == current_tenant_id, UploadFile.id == file_id)
.first()
)
@@ -553,7 +576,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
"notion_workspace_id": data_source_info["notion_workspace_id"],
"notion_obj_id": data_source_info["notion_page_id"],
"notion_page_type": data_source_info["type"],
- "tenant_id": current_user.current_tenant_id,
+ "tenant_id": current_tenant_id,
}
),
document_model=document.doc_form,
@@ -569,7 +592,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
"provider": data_source_info["provider"],
"job_id": data_source_info["job_id"],
"url": data_source_info["url"],
- "tenant_id": current_user.current_tenant_id,
+ "tenant_id": current_tenant_id,
"mode": data_source_info["mode"],
"only_main_content": data_source_info["only_main_content"],
}
@@ -583,7 +606,7 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
indexing_runner = IndexingRunner()
try:
response = indexing_runner.indexing_estimate(
- current_user.current_tenant_id,
+ current_tenant_id,
extract_settings,
data_process_rule_dict,
document.doc_form,
@@ -650,11 +673,11 @@ class DocumentBatchIndexingStatusApi(DocumentResource):
@console_ns.route("/datasets//documents//indexing-status")
class DocumentIndexingStatusApi(DocumentResource):
- @api.doc("get_document_indexing_status")
- @api.doc(description="Get document indexing status")
- @api.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
- @api.response(200, "Indexing status retrieved successfully")
- @api.response(404, "Document not found")
+ @console_ns.doc("get_document_indexing_status")
+ @console_ns.doc(description="Get document indexing status")
+ @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+ @console_ns.response(200, "Indexing status retrieved successfully")
+ @console_ns.response(404, "Document not found")
@setup_required
@login_required
@account_initialization_required
@@ -700,17 +723,17 @@ class DocumentIndexingStatusApi(DocumentResource):
class DocumentApi(DocumentResource):
METADATA_CHOICES = {"all", "only", "without"}
- @api.doc("get_document")
- @api.doc(description="Get document details")
- @api.doc(
+ @console_ns.doc("get_document")
+ @console_ns.doc(description="Get document details")
+ @console_ns.doc(
params={
"dataset_id": "Dataset ID",
"document_id": "Document ID",
"metadata": "Metadata inclusion (all/only/without)",
}
)
- @api.response(200, "Document retrieved successfully")
- @api.response(404, "Document not found")
+ @console_ns.response(200, "Document retrieved successfully")
+ @console_ns.response(404, "Document not found")
@setup_required
@login_required
@account_initialization_required
@@ -740,7 +763,7 @@ class DocumentApi(DocumentResource):
"name": document.name,
"created_from": document.created_from,
"created_by": document.created_by,
- "created_at": document.created_at.timestamp(),
+ "created_at": int(document.created_at.timestamp()),
"tokens": document.tokens,
"indexing_status": document.indexing_status,
"completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
@@ -773,7 +796,7 @@ class DocumentApi(DocumentResource):
"name": document.name,
"created_from": document.created_from,
"created_by": document.created_by,
- "created_at": document.created_at.timestamp(),
+ "created_at": int(document.created_at.timestamp()),
"tokens": document.tokens,
"indexing_status": document.indexing_status,
"completed_at": int(document.completed_at.timestamp()) if document.completed_at else None,
@@ -821,19 +844,20 @@ class DocumentApi(DocumentResource):
@console_ns.route("/datasets//documents//processing/")
class DocumentProcessingApi(DocumentResource):
- @api.doc("update_document_processing")
- @api.doc(description="Update document processing status (pause/resume)")
- @api.doc(
+ @console_ns.doc("update_document_processing")
+ @console_ns.doc(description="Update document processing status (pause/resume)")
+ @console_ns.doc(
params={"dataset_id": "Dataset ID", "document_id": "Document ID", "action": "Action to perform (pause/resume)"}
)
- @api.response(200, "Processing status updated successfully")
- @api.response(404, "Document not found")
- @api.response(400, "Invalid action")
+ @console_ns.response(200, "Processing status updated successfully")
+ @console_ns.response(404, "Document not found")
+ @console_ns.response(400, "Invalid action")
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def patch(self, dataset_id, document_id, action: Literal["pause", "resume"]):
+ current_user, _ = current_account_with_tenant()
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
@@ -865,11 +889,11 @@ class DocumentProcessingApi(DocumentResource):
@console_ns.route("/datasets//documents//metadata")
class DocumentMetadataApi(DocumentResource):
- @api.doc("update_document_metadata")
- @api.doc(description="Update document metadata")
- @api.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
- @api.expect(
- api.model(
+ @console_ns.doc("update_document_metadata")
+ @console_ns.doc(description="Update document metadata")
+ @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+ @console_ns.expect(
+ console_ns.model(
"UpdateDocumentMetadataRequest",
{
"doc_type": fields.String(description="Document type"),
@@ -877,13 +901,14 @@ class DocumentMetadataApi(DocumentResource):
},
)
)
- @api.response(200, "Document metadata updated successfully")
- @api.response(404, "Document not found")
- @api.response(403, "Permission denied")
+ @console_ns.response(200, "Document metadata updated successfully")
+ @console_ns.response(404, "Document not found")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
def put(self, dataset_id, document_id):
+ current_user, _ = current_account_with_tenant()
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
@@ -931,6 +956,7 @@ class DocumentStatusApi(DocumentResource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
def patch(self, dataset_id, action: Literal["enable", "disable", "archive", "un_archive"]):
+ current_user, _ = current_account_with_tenant()
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
@@ -1031,18 +1057,16 @@ class DocumentRetryApi(DocumentResource):
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[DocumentRetryPayload.__name__])
def post(self, dataset_id):
"""retry document."""
-
- parser = reqparse.RequestParser()
- parser.add_argument("document_ids", type=list, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = DocumentRetryPayload.model_validate(console_ns.payload or {})
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
retry_documents = []
if not dataset:
raise NotFound("Dataset not found.")
- for document_id in args["document_ids"]:
+ for document_id in payload.document_ids:
try:
document_id = str(document_id)
@@ -1075,20 +1099,20 @@ class DocumentRenameApi(DocumentResource):
@login_required
@account_initialization_required
@marshal_with(document_fields)
+ @console_ns.expect(console_ns.models[DocumentRenamePayload.__name__])
def post(self, dataset_id, document_id):
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
+ current_user, _ = current_account_with_tenant()
if not current_user.is_dataset_editor:
raise Forbidden()
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound("Dataset not found.")
- DatasetService.check_dataset_operator_permission(cast(Account, current_user), dataset)
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ DatasetService.check_dataset_operator_permission(current_user, dataset)
+ payload = DocumentRenamePayload.model_validate(console_ns.payload or {})
try:
- document = DocumentService.rename_document(dataset_id, document_id, args["name"])
+ document = DocumentService.rename_document(dataset_id, document_id, payload.name)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
@@ -1102,6 +1126,7 @@ class WebsiteDocumentSyncApi(DocumentResource):
@account_initialization_required
def get(self, dataset_id, document_id):
"""sync website document."""
+ _, current_tenant_id = current_account_with_tenant()
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
@@ -1110,7 +1135,7 @@ class WebsiteDocumentSyncApi(DocumentResource):
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
- if document.tenant_id != current_user.current_tenant_id:
+ if document.tenant_id != current_tenant_id:
raise Forbidden("No permission.")
if document.data_source_type != "website_crawl":
raise ValueError("Document is not a website document.")
diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py
index d6bd02483d..e73abc2555 100644
--- a/api/controllers/console/datasets/datasets_segments.py
+++ b/api/controllers/console/datasets/datasets_segments.py
@@ -1,12 +1,13 @@
import uuid
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, marshal, reqparse
+from flask_restx import Resource, marshal
+from pydantic import BaseModel, Field
from sqlalchemy import select
from werkzeug.exceptions import Forbidden, NotFound
import services
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.app.error import ProviderNotInitializeError
from controllers.console.datasets.error import (
@@ -27,7 +28,7 @@ from core.model_runtime.entities.model_entities import ModelType
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from fields.segment_fields import child_chunk_fields, segment_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.dataset import ChildChunk, DocumentSegment
from models.model import UploadFile
from services.dataset_service import DatasetService, DocumentService, SegmentService
@@ -37,12 +38,66 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
+class SegmentListQuery(BaseModel):
+ limit: int = Field(default=20, ge=1, le=100)
+ status: list[str] = Field(default_factory=list)
+ hit_count_gte: int | None = None
+ enabled: str = Field(default="all")
+ keyword: str | None = None
+ page: int = Field(default=1, ge=1)
+
+
+class SegmentCreatePayload(BaseModel):
+ content: str
+ answer: str | None = None
+ keywords: list[str] | None = None
+ attachment_ids: list[str] | None = None
+
+
+class SegmentUpdatePayload(BaseModel):
+ content: str
+ answer: str | None = None
+ keywords: list[str] | None = None
+ regenerate_child_chunks: bool = False
+ attachment_ids: list[str] | None = None
+
+
+class BatchImportPayload(BaseModel):
+ upload_file_id: str
+
+
+class ChildChunkCreatePayload(BaseModel):
+ content: str
+
+
+class ChildChunkUpdatePayload(BaseModel):
+ content: str
+
+
+class ChildChunkBatchUpdatePayload(BaseModel):
+ chunks: list[ChildChunkUpdateArgs]
+
+
+register_schema_models(
+ console_ns,
+ SegmentListQuery,
+ SegmentCreatePayload,
+ SegmentUpdatePayload,
+ BatchImportPayload,
+ ChildChunkCreatePayload,
+ ChildChunkUpdatePayload,
+ ChildChunkBatchUpdatePayload,
+)
+
+
@console_ns.route("/datasets//documents//segments")
class DatasetDocumentSegmentListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -59,27 +114,24 @@ class DatasetDocumentSegmentListApi(Resource):
if not document:
raise NotFound("Document not found.")
- parser = reqparse.RequestParser()
- parser.add_argument("limit", type=int, default=20, location="args")
- parser.add_argument("status", type=str, action="append", default=[], location="args")
- parser.add_argument("hit_count_gte", type=int, default=None, location="args")
- parser.add_argument("enabled", type=str, default="all", location="args")
- parser.add_argument("keyword", type=str, default=None, location="args")
- parser.add_argument("page", type=int, default=1, location="args")
+ args = SegmentListQuery.model_validate(
+ {
+ **request.args.to_dict(),
+ "status": request.args.getlist("status"),
+ }
+ )
- args = parser.parse_args()
-
- page = args["page"]
- limit = min(args["limit"], 100)
- status_list = args["status"]
- hit_count_gte = args["hit_count_gte"]
- keyword = args["keyword"]
+ page = args.page
+ limit = min(args.limit, 100)
+ status_list = args.status
+ hit_count_gte = args.hit_count_gte
+ keyword = args.keyword
query = (
select(DocumentSegment)
.where(
DocumentSegment.document_id == str(document_id),
- DocumentSegment.tenant_id == current_user.current_tenant_id,
+ DocumentSegment.tenant_id == current_tenant_id,
)
.order_by(DocumentSegment.position.asc())
)
@@ -93,10 +145,10 @@ class DatasetDocumentSegmentListApi(Resource):
if keyword:
query = query.where(DocumentSegment.content.ilike(f"%{keyword}%"))
- if args["enabled"].lower() != "all":
- if args["enabled"].lower() == "true":
+ if args.enabled.lower() != "all":
+ if args.enabled.lower() == "true":
query = query.where(DocumentSegment.enabled == True)
- elif args["enabled"].lower() == "false":
+ elif args.enabled.lower() == "false":
query = query.where(DocumentSegment.enabled == False)
segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
@@ -115,6 +167,8 @@ class DatasetDocumentSegmentListApi(Resource):
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id, document_id):
+ current_user, _ = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -148,6 +202,8 @@ class DatasetDocumentSegmentApi(Resource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
def patch(self, dataset_id, document_id, action):
+ current_user, current_tenant_id = current_account_with_tenant()
+
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
@@ -171,7 +227,7 @@ class DatasetDocumentSegmentApi(Resource):
try:
model_manager = ModelManager()
model_manager.get_model_instance(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model,
@@ -203,7 +259,10 @@ class DatasetDocumentSegmentAddApi(Resource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_knowledge_limit_check("add_segment")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[SegmentCreatePayload.__name__])
def post(self, dataset_id, document_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -221,7 +280,7 @@ class DatasetDocumentSegmentAddApi(Resource):
try:
model_manager = ModelManager()
model_manager.get_model_instance(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model,
@@ -237,13 +296,10 @@ class DatasetDocumentSegmentAddApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# validate args
- parser = reqparse.RequestParser()
- parser.add_argument("content", type=str, required=True, nullable=False, location="json")
- parser.add_argument("answer", type=str, required=False, nullable=True, location="json")
- parser.add_argument("keywords", type=list, required=False, nullable=True, location="json")
- args = parser.parse_args()
- SegmentService.segment_create_args_validate(args, document)
- segment = SegmentService.create_segment(args, document, dataset)
+ payload = SegmentCreatePayload.model_validate(console_ns.payload or {})
+ payload_dict = payload.model_dump(exclude_none=True)
+ SegmentService.segment_create_args_validate(payload_dict, document)
+ segment = SegmentService.create_segment(payload_dict, document, dataset)
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
@@ -254,7 +310,10 @@ class DatasetDocumentSegmentUpdateApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[SegmentUpdatePayload.__name__])
def patch(self, dataset_id, document_id, segment_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -272,7 +331,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
try:
model_manager = ModelManager()
model_manager.get_model_instance(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model,
@@ -287,7 +346,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -300,16 +359,12 @@ class DatasetDocumentSegmentUpdateApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# validate args
- parser = reqparse.RequestParser()
- parser.add_argument("content", type=str, required=True, nullable=False, location="json")
- parser.add_argument("answer", type=str, required=False, nullable=True, location="json")
- parser.add_argument("keywords", type=list, required=False, nullable=True, location="json")
- parser.add_argument(
- "regenerate_child_chunks", type=bool, required=False, nullable=True, default=False, location="json"
+ payload = SegmentUpdatePayload.model_validate(console_ns.payload or {})
+ payload_dict = payload.model_dump(exclude_none=True)
+ SegmentService.segment_create_args_validate(payload_dict, document)
+ segment = SegmentService.update_segment(
+ SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
)
- args = parser.parse_args()
- SegmentService.segment_create_args_validate(args, document)
- segment = SegmentService.update_segment(SegmentUpdateArgs.model_validate(args), segment, document, dataset)
return {"data": marshal(segment, segment_fields), "doc_form": document.doc_form}, 200
@setup_required
@@ -317,6 +372,8 @@ class DatasetDocumentSegmentUpdateApi(Resource):
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id, document_id, segment_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -333,7 +390,7 @@ class DatasetDocumentSegmentUpdateApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -360,7 +417,10 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_knowledge_limit_check("add_segment")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[BatchImportPayload.__name__])
def post(self, dataset_id, document_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -372,10 +432,8 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
if not document:
raise NotFound("Document not found.")
- parser = reqparse.RequestParser()
- parser.add_argument("upload_file_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
- upload_file_id = args["upload_file_id"]
+ payload = BatchImportPayload.model_validate(console_ns.payload or {})
+ upload_file_id = payload.upload_file_id
upload_file = db.session.query(UploadFile).where(UploadFile.id == upload_file_id).first()
if not upload_file:
@@ -396,7 +454,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
upload_file_id,
dataset_id,
document_id,
- current_user.current_tenant_id,
+ current_tenant_id,
current_user.id,
)
except Exception as e:
@@ -426,7 +484,10 @@ class ChildChunkAddApi(Resource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_knowledge_limit_check("add_segment")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[ChildChunkCreatePayload.__name__])
def post(self, dataset_id, document_id, segment_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -441,7 +502,7 @@ class ChildChunkAddApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -453,7 +514,7 @@ class ChildChunkAddApi(Resource):
try:
model_manager = ModelManager()
model_manager.get_model_instance(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=dataset.embedding_model_provider,
model_type=ModelType.TEXT_EMBEDDING,
model=dataset.embedding_model,
@@ -469,12 +530,9 @@ class ChildChunkAddApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# validate args
- parser = reqparse.RequestParser()
- parser.add_argument("content", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
try:
- content = args["content"]
- child_chunk = SegmentService.create_child_chunk(content, segment, document, dataset)
+ payload = ChildChunkCreatePayload.model_validate(console_ns.payload or {})
+ child_chunk = SegmentService.create_child_chunk(payload.content, segment, document, dataset)
except ChildChunkIndexingServiceError as e:
raise ChildChunkIndexingError(str(e))
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
@@ -483,6 +541,8 @@ class ChildChunkAddApi(Resource):
@login_required
@account_initialization_required
def get(self, dataset_id, document_id, segment_id):
+ _, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -499,21 +559,22 @@ class ChildChunkAddApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
raise NotFound("Segment not found.")
- parser = reqparse.RequestParser()
- parser.add_argument("limit", type=int, default=20, location="args")
- parser.add_argument("keyword", type=str, default=None, location="args")
- parser.add_argument("page", type=int, default=1, location="args")
+ args = SegmentListQuery.model_validate(
+ {
+ "limit": request.args.get("limit", default=20, type=int),
+ "keyword": request.args.get("keyword"),
+ "page": request.args.get("page", default=1, type=int),
+ }
+ )
- args = parser.parse_args()
-
- page = args["page"]
- limit = min(args["limit"], 100)
- keyword = args["keyword"]
+ page = args.page
+ limit = min(args.limit, 100)
+ keyword = args.keyword
child_chunks = SegmentService.get_child_chunks(segment_id, document_id, dataset_id, page, limit, keyword)
return {
@@ -530,6 +591,8 @@ class ChildChunkAddApi(Resource):
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
def patch(self, dataset_id, document_id, segment_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -546,7 +609,7 @@ class ChildChunkAddApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -559,13 +622,9 @@ class ChildChunkAddApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# validate args
- parser = reqparse.RequestParser()
- parser.add_argument("chunks", type=list, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = ChildChunkBatchUpdatePayload.model_validate(console_ns.payload or {})
try:
- chunks_data = args["chunks"]
- chunks = [ChildChunkUpdateArgs.model_validate(chunk) for chunk in chunks_data]
- child_chunks = SegmentService.update_child_chunks(chunks, segment, document, dataset)
+ child_chunks = SegmentService.update_child_chunks(payload.chunks, segment, document, dataset)
except ChildChunkIndexingServiceError as e:
raise ChildChunkIndexingError(str(e))
return {"data": marshal(child_chunks, child_chunk_fields)}, 200
@@ -580,6 +639,8 @@ class ChildChunkUpdateApi(Resource):
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id, document_id, segment_id, child_chunk_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -596,7 +657,7 @@ class ChildChunkUpdateApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -607,7 +668,7 @@ class ChildChunkUpdateApi(Resource):
db.session.query(ChildChunk)
.where(
ChildChunk.id == str(child_chunk_id),
- ChildChunk.tenant_id == current_user.current_tenant_id,
+ ChildChunk.tenant_id == current_tenant_id,
ChildChunk.segment_id == segment.id,
ChildChunk.document_id == document_id,
)
@@ -633,7 +694,10 @@ class ChildChunkUpdateApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("vector_space")
@cloud_edition_billing_rate_limit_check("knowledge")
+ @console_ns.expect(console_ns.models[ChildChunkUpdatePayload.__name__])
def patch(self, dataset_id, document_id, segment_id, child_chunk_id):
+ current_user, current_tenant_id = current_account_with_tenant()
+
# check dataset
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
@@ -650,7 +714,7 @@ class ChildChunkUpdateApi(Resource):
segment_id = str(segment_id)
segment = (
db.session.query(DocumentSegment)
- .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_user.current_tenant_id)
+ .where(DocumentSegment.id == str(segment_id), DocumentSegment.tenant_id == current_tenant_id)
.first()
)
if not segment:
@@ -661,7 +725,7 @@ class ChildChunkUpdateApi(Resource):
db.session.query(ChildChunk)
.where(
ChildChunk.id == str(child_chunk_id),
- ChildChunk.tenant_id == current_user.current_tenant_id,
+ ChildChunk.tenant_id == current_tenant_id,
ChildChunk.segment_id == segment.id,
ChildChunk.document_id == document_id,
)
@@ -677,12 +741,9 @@ class ChildChunkUpdateApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# validate args
- parser = reqparse.RequestParser()
- parser.add_argument("content", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
try:
- content = args["content"]
- child_chunk = SegmentService.update_child_chunk(content, child_chunk, segment, document, dataset)
+ payload = ChildChunkUpdatePayload.model_validate(console_ns.payload or {})
+ child_chunk = SegmentService.update_child_chunk(payload.content, child_chunk, segment, document, dataset)
except ChildChunkIndexingServiceError as e:
raise ChildChunkIndexingError(str(e))
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
diff --git a/api/controllers/console/datasets/external.py b/api/controllers/console/datasets/external.py
index adf9f53523..89c9fcad36 100644
--- a/api/controllers/console/datasets/external.py
+++ b/api/controllers/console/datasets/external.py
@@ -1,51 +1,135 @@
-from typing import cast
-
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal, reqparse
+from flask_restx import Resource, fields, marshal
+from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
-from controllers.console import api, console_ns
+from controllers.common.schema import register_schema_models
+from controllers.console import console_ns
from controllers.console.datasets.error import DatasetNameDuplicateError
-from controllers.console.wraps import account_initialization_required, setup_required
-from fields.dataset_fields import dataset_detail_fields
-from libs.login import login_required
-from models.account import Account
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
+from fields.dataset_fields import (
+ dataset_detail_fields,
+ dataset_retrieval_model_fields,
+ doc_metadata_fields,
+ external_knowledge_info_fields,
+ external_retrieval_model_fields,
+ icon_info_fields,
+ keyword_setting_fields,
+ reranking_model_fields,
+ tag_fields,
+ vector_setting_fields,
+ weighted_score_fields,
+)
+from libs.login import current_account_with_tenant, login_required
from services.dataset_service import DatasetService
from services.external_knowledge_service import ExternalDatasetService
from services.hit_testing_service import HitTestingService
from services.knowledge_service import ExternalDatasetTestService
-def _validate_name(name: str) -> str:
- if not name or len(name) < 1 or len(name) > 100:
- raise ValueError("Name must be between 1 to 100 characters.")
- return name
+def _get_or_create_model(model_name: str, field_def):
+ existing = console_ns.models.get(model_name)
+ if existing is None:
+ existing = console_ns.model(model_name, field_def)
+ return existing
+
+
+def _build_dataset_detail_model():
+ keyword_setting_model = _get_or_create_model("DatasetKeywordSetting", keyword_setting_fields)
+ vector_setting_model = _get_or_create_model("DatasetVectorSetting", vector_setting_fields)
+
+ weighted_score_fields_copy = weighted_score_fields.copy()
+ weighted_score_fields_copy["keyword_setting"] = fields.Nested(keyword_setting_model)
+ weighted_score_fields_copy["vector_setting"] = fields.Nested(vector_setting_model)
+ weighted_score_model = _get_or_create_model("DatasetWeightedScore", weighted_score_fields_copy)
+
+ reranking_model = _get_or_create_model("DatasetRerankingModel", reranking_model_fields)
+
+ dataset_retrieval_model_fields_copy = dataset_retrieval_model_fields.copy()
+ dataset_retrieval_model_fields_copy["reranking_model"] = fields.Nested(reranking_model)
+ dataset_retrieval_model_fields_copy["weights"] = fields.Nested(weighted_score_model, allow_null=True)
+ dataset_retrieval_model = _get_or_create_model("DatasetRetrievalModel", dataset_retrieval_model_fields_copy)
+
+ tag_model = _get_or_create_model("Tag", tag_fields)
+ doc_metadata_model = _get_or_create_model("DatasetDocMetadata", doc_metadata_fields)
+ external_knowledge_info_model = _get_or_create_model("ExternalKnowledgeInfo", external_knowledge_info_fields)
+ external_retrieval_model = _get_or_create_model("ExternalRetrievalModel", external_retrieval_model_fields)
+ icon_info_model = _get_or_create_model("DatasetIconInfo", icon_info_fields)
+
+ dataset_detail_fields_copy = dataset_detail_fields.copy()
+ dataset_detail_fields_copy["retrieval_model_dict"] = fields.Nested(dataset_retrieval_model)
+ dataset_detail_fields_copy["tags"] = fields.List(fields.Nested(tag_model))
+ dataset_detail_fields_copy["external_knowledge_info"] = fields.Nested(external_knowledge_info_model)
+ dataset_detail_fields_copy["external_retrieval_model"] = fields.Nested(external_retrieval_model, allow_null=True)
+ dataset_detail_fields_copy["doc_metadata"] = fields.List(fields.Nested(doc_metadata_model))
+ dataset_detail_fields_copy["icon_info"] = fields.Nested(icon_info_model)
+ return _get_or_create_model("DatasetDetail", dataset_detail_fields_copy)
+
+
+try:
+ dataset_detail_model = console_ns.models["DatasetDetail"]
+except KeyError:
+ dataset_detail_model = _build_dataset_detail_model()
+
+
+class ExternalKnowledgeApiPayload(BaseModel):
+ name: str = Field(..., min_length=1, max_length=40)
+ settings: dict[str, object]
+
+
+class ExternalDatasetCreatePayload(BaseModel):
+ external_knowledge_api_id: str
+ external_knowledge_id: str
+ name: str = Field(..., min_length=1, max_length=40)
+ description: str | None = Field(None, max_length=400)
+ external_retrieval_model: dict[str, object] | None = None
+
+
+class ExternalHitTestingPayload(BaseModel):
+ query: str
+ external_retrieval_model: dict[str, object] | None = None
+ metadata_filtering_conditions: dict[str, object] | None = None
+
+
+class BedrockRetrievalPayload(BaseModel):
+ retrieval_setting: dict[str, object]
+ query: str
+ knowledge_id: str
+
+
+register_schema_models(
+ console_ns,
+ ExternalKnowledgeApiPayload,
+ ExternalDatasetCreatePayload,
+ ExternalHitTestingPayload,
+ BedrockRetrievalPayload,
+)
@console_ns.route("/datasets/external-knowledge-api")
class ExternalApiTemplateListApi(Resource):
- @api.doc("get_external_api_templates")
- @api.doc(description="Get external knowledge API templates")
- @api.doc(
+ @console_ns.doc("get_external_api_templates")
+ @console_ns.doc(description="Get external knowledge API templates")
+ @console_ns.doc(
params={
"page": "Page number (default: 1)",
"limit": "Number of items per page (default: 20)",
"keyword": "Search keyword",
}
)
- @api.response(200, "External API templates retrieved successfully")
+ @console_ns.response(200, "External API templates retrieved successfully")
@setup_required
@login_required
@account_initialization_required
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis(
- page, limit, current_user.current_tenant_id, search
+ page, limit, current_tenant_id, search
)
response = {
"data": [item.to_dict() for item in external_knowledge_apis],
@@ -59,25 +143,12 @@ class ExternalApiTemplateListApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__])
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="Name is required. Name must be between 1 to 100 characters.",
- type=_validate_name,
- )
- parser.add_argument(
- "settings",
- type=dict,
- location="json",
- nullable=False,
- required=True,
- )
- args = parser.parse_args()
+ current_user, current_tenant_id = current_account_with_tenant()
+ payload = ExternalKnowledgeApiPayload.model_validate(console_ns.payload or {})
- ExternalDatasetService.validate_api_list(args["settings"])
+ ExternalDatasetService.validate_api_list(payload.settings)
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
@@ -85,7 +156,7 @@ class ExternalApiTemplateListApi(Resource):
try:
external_knowledge_api = ExternalDatasetService.create_external_knowledge_api(
- tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args
+ tenant_id=current_tenant_id, user_id=current_user.id, args=payload.model_dump()
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
@@ -95,11 +166,11 @@ class ExternalApiTemplateListApi(Resource):
@console_ns.route("/datasets/external-knowledge-api/")
class ExternalApiTemplateApi(Resource):
- @api.doc("get_external_api_template")
- @api.doc(description="Get external knowledge API template details")
- @api.doc(params={"external_knowledge_api_id": "External knowledge API ID"})
- @api.response(200, "External API template retrieved successfully")
- @api.response(404, "Template not found")
+ @console_ns.doc("get_external_api_template")
+ @console_ns.doc(description="Get external knowledge API template details")
+ @console_ns.doc(params={"external_knowledge_api_id": "External knowledge API ID"})
+ @console_ns.response(200, "External API template retrieved successfully")
+ @console_ns.response(404, "Template not found")
@setup_required
@login_required
@account_initialization_required
@@ -114,32 +185,19 @@ class ExternalApiTemplateApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @console_ns.expect(console_ns.models[ExternalKnowledgeApiPayload.__name__])
def patch(self, external_knowledge_api_id):
+ current_user, current_tenant_id = current_account_with_tenant()
external_knowledge_api_id = str(external_knowledge_api_id)
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="type is required. Name must be between 1 to 100 characters.",
- type=_validate_name,
- )
- parser.add_argument(
- "settings",
- type=dict,
- location="json",
- nullable=False,
- required=True,
- )
- args = parser.parse_args()
- ExternalDatasetService.validate_api_list(args["settings"])
+ payload = ExternalKnowledgeApiPayload.model_validate(console_ns.payload or {})
+ ExternalDatasetService.validate_api_list(payload.settings)
external_knowledge_api = ExternalDatasetService.update_external_knowledge_api(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
user_id=current_user.id,
external_knowledge_api_id=external_knowledge_api_id,
- args=args,
+ args=payload.model_dump(),
)
return external_knowledge_api.to_dict(), 200
@@ -148,22 +206,22 @@ class ExternalApiTemplateApi(Resource):
@login_required
@account_initialization_required
def delete(self, external_knowledge_api_id):
+ current_user, current_tenant_id = current_account_with_tenant()
external_knowledge_api_id = str(external_knowledge_api_id)
- # The role of the current user in the ta table must be admin, owner, or editor
- if not (current_user.is_editor or current_user.is_dataset_operator):
+ if not (current_user.has_edit_permission or current_user.is_dataset_operator):
raise Forbidden()
- ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id)
+ ExternalDatasetService.delete_external_knowledge_api(current_tenant_id, external_knowledge_api_id)
return {"result": "success"}, 204
@console_ns.route("/datasets/external-knowledge-api//use-check")
class ExternalApiUseCheckApi(Resource):
- @api.doc("check_external_api_usage")
- @api.doc(description="Check if external knowledge API is being used")
- @api.doc(params={"external_knowledge_api_id": "External knowledge API ID"})
- @api.response(200, "Usage check completed successfully")
+ @console_ns.doc("check_external_api_usage")
+ @console_ns.doc(description="Check if external knowledge API is being used")
+ @console_ns.doc(params={"external_knowledge_api_id": "External knowledge API ID"})
+ @console_ns.response(200, "Usage check completed successfully")
@setup_required
@login_required
@account_initialization_required
@@ -178,44 +236,21 @@ class ExternalApiUseCheckApi(Resource):
@console_ns.route("/datasets/external")
class ExternalDatasetCreateApi(Resource):
- @api.doc("create_external_dataset")
- @api.doc(description="Create external knowledge dataset")
- @api.expect(
- api.model(
- "CreateExternalDatasetRequest",
- {
- "external_knowledge_api_id": fields.String(required=True, description="External knowledge API ID"),
- "external_knowledge_id": fields.String(required=True, description="External knowledge ID"),
- "name": fields.String(required=True, description="Dataset name"),
- "description": fields.String(description="Dataset description"),
- },
- )
- )
- @api.response(201, "External dataset created successfully", dataset_detail_fields)
- @api.response(400, "Invalid parameters")
- @api.response(403, "Permission denied")
+ @console_ns.doc("create_external_dataset")
+ @console_ns.doc(description="Create external knowledge dataset")
+ @console_ns.expect(console_ns.models[ExternalDatasetCreatePayload.__name__])
+ @console_ns.response(201, "External dataset created successfully", dataset_detail_model)
+ @console_ns.response(400, "Invalid parameters")
+ @console_ns.response(403, "Permission denied")
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self):
# The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="name is required. Name must be between 1 to 100 characters.",
- type=_validate_name,
- )
- parser.add_argument("description", type=str, required=False, nullable=True, location="json")
- parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
-
- args = parser.parse_args()
+ current_user, current_tenant_id = current_account_with_tenant()
+ payload = ExternalDatasetCreatePayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
@@ -223,7 +258,7 @@ class ExternalDatasetCreateApi(Resource):
try:
dataset = ExternalDatasetService.create_external_dataset(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
user_id=current_user.id,
args=args,
)
@@ -235,26 +270,18 @@ class ExternalDatasetCreateApi(Resource):
@console_ns.route("/datasets//external-hit-testing")
class ExternalKnowledgeHitTestingApi(Resource):
- @api.doc("test_external_knowledge_retrieval")
- @api.doc(description="Test external knowledge retrieval for dataset")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.expect(
- api.model(
- "ExternalHitTestingRequest",
- {
- "query": fields.String(required=True, description="Query text for testing"),
- "retrieval_model": fields.Raw(description="Retrieval model configuration"),
- "external_retrieval_model": fields.Raw(description="External retrieval model configuration"),
- },
- )
- )
- @api.response(200, "External hit testing completed successfully")
- @api.response(404, "Dataset not found")
- @api.response(400, "Invalid parameters")
+ @console_ns.doc("test_external_knowledge_retrieval")
+ @console_ns.doc(description="Test external knowledge retrieval for dataset")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.expect(console_ns.models[ExternalHitTestingPayload.__name__])
+ @console_ns.response(200, "External hit testing completed successfully")
+ @console_ns.response(404, "Dataset not found")
+ @console_ns.response(400, "Invalid parameters")
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -265,21 +292,16 @@ class ExternalKnowledgeHitTestingApi(Resource):
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
- parser = reqparse.RequestParser()
- parser.add_argument("query", type=str, location="json")
- parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
- parser.add_argument("metadata_filtering_conditions", type=dict, required=False, location="json")
- args = parser.parse_args()
-
- HitTestingService.hit_testing_args_check(args)
+ payload = ExternalHitTestingPayload.model_validate(console_ns.payload or {})
+ HitTestingService.hit_testing_args_check(payload.model_dump())
try:
response = HitTestingService.external_retrieve(
dataset=dataset,
- query=args["query"],
- account=cast(Account, current_user),
- external_retrieval_model=args["external_retrieval_model"],
- metadata_filtering_conditions=args["metadata_filtering_conditions"],
+ query=payload.query,
+ account=current_user,
+ external_retrieval_model=payload.external_retrieval_model,
+ metadata_filtering_conditions=payload.metadata_filtering_conditions,
)
return response
@@ -290,33 +312,15 @@ class ExternalKnowledgeHitTestingApi(Resource):
@console_ns.route("/test/retrieval")
class BedrockRetrievalApi(Resource):
# this api is only for internal testing
- @api.doc("bedrock_retrieval_test")
- @api.doc(description="Bedrock retrieval test (internal use only)")
- @api.expect(
- api.model(
- "BedrockRetrievalTestRequest",
- {
- "retrieval_setting": fields.Raw(required=True, description="Retrieval settings"),
- "query": fields.String(required=True, description="Query text"),
- "knowledge_id": fields.String(required=True, description="Knowledge ID"),
- },
- )
- )
- @api.response(200, "Bedrock retrieval test completed")
+ @console_ns.doc("bedrock_retrieval_test")
+ @console_ns.doc(description="Bedrock retrieval test (internal use only)")
+ @console_ns.expect(console_ns.models[BedrockRetrievalPayload.__name__])
+ @console_ns.response(200, "Bedrock retrieval test completed")
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("retrieval_setting", nullable=False, required=True, type=dict, location="json")
- parser.add_argument(
- "query",
- nullable=False,
- required=True,
- type=str,
- )
- parser.add_argument("knowledge_id", nullable=False, required=True, type=str)
- args = parser.parse_args()
+ payload = BedrockRetrievalPayload.model_validate(console_ns.payload or {})
# Call the knowledge retrieval service
result = ExternalDatasetTestService.knowledge_retrieval(
- args["retrieval_setting"], args["query"], args["knowledge_id"]
+ payload.retrieval_setting, payload.query, payload.knowledge_id
)
return result, 200
diff --git a/api/controllers/console/datasets/hit_testing.py b/api/controllers/console/datasets/hit_testing.py
index abaca88090..932cb4fcce 100644
--- a/api/controllers/console/datasets/hit_testing.py
+++ b/api/controllers/console/datasets/hit_testing.py
@@ -1,34 +1,28 @@
-from flask_restx import Resource, fields
+from flask_restx import Resource
-from controllers.console import api, console_ns
-from controllers.console.datasets.hit_testing_base import DatasetsHitTestingBase
-from controllers.console.wraps import (
+from controllers.common.schema import register_schema_model
+from libs.login import login_required
+
+from .. import console_ns
+from ..datasets.hit_testing_base import DatasetsHitTestingBase, HitTestingPayload
+from ..wraps import (
account_initialization_required,
cloud_edition_billing_rate_limit_check,
setup_required,
)
-from libs.login import login_required
+
+register_schema_model(console_ns, HitTestingPayload)
@console_ns.route("/datasets//hit-testing")
class HitTestingApi(Resource, DatasetsHitTestingBase):
- @api.doc("test_dataset_retrieval")
- @api.doc(description="Test dataset knowledge retrieval")
- @api.doc(params={"dataset_id": "Dataset ID"})
- @api.expect(
- api.model(
- "HitTestingRequest",
- {
- "query": fields.String(required=True, description="Query text for testing"),
- "retrieval_model": fields.Raw(description="Retrieval model configuration"),
- "top_k": fields.Integer(description="Number of top results to return"),
- "score_threshold": fields.Float(description="Score threshold for filtering results"),
- },
- )
- )
- @api.response(200, "Hit testing completed successfully")
- @api.response(404, "Dataset not found")
- @api.response(400, "Invalid parameters")
+ @console_ns.doc("test_dataset_retrieval")
+ @console_ns.doc(description="Test dataset knowledge retrieval")
+ @console_ns.doc(params={"dataset_id": "Dataset ID"})
+ @console_ns.expect(console_ns.models[HitTestingPayload.__name__])
+ @console_ns.response(200, "Hit testing completed successfully")
+ @console_ns.response(404, "Dataset not found")
+ @console_ns.response(400, "Invalid parameters")
@setup_required
@login_required
@account_initialization_required
@@ -37,7 +31,8 @@ class HitTestingApi(Resource, DatasetsHitTestingBase):
dataset_id_str = str(dataset_id)
dataset = self.get_and_validate_dataset(dataset_id_str)
- args = self.parse_args()
+ payload = HitTestingPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
self.hit_testing_args_check(args)
return self.perform_hit_testing(dataset, args)
diff --git a/api/controllers/console/datasets/hit_testing_base.py b/api/controllers/console/datasets/hit_testing_base.py
index a68e337135..db7c50f422 100644
--- a/api/controllers/console/datasets/hit_testing_base.py
+++ b/api/controllers/console/datasets/hit_testing_base.py
@@ -1,8 +1,8 @@
import logging
-from typing import cast
+from typing import Any
-from flask_login import current_user
from flask_restx import marshal, reqparse
+from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
@@ -21,6 +21,7 @@ from core.errors.error import (
)
from core.model_runtime.errors.invoke import InvokeError
from fields.hit_testing_fields import hit_testing_record_fields
+from libs.login import current_user
from models.account import Account
from services.dataset_service import DatasetService
from services.hit_testing_service import HitTestingService
@@ -28,9 +29,17 @@ from services.hit_testing_service import HitTestingService
logger = logging.getLogger(__name__)
+class HitTestingPayload(BaseModel):
+ query: str = Field(max_length=250)
+ retrieval_model: dict[str, Any] | None = None
+ external_retrieval_model: dict[str, Any] | None = None
+ attachment_ids: list[str] | None = None
+
+
class DatasetsHitTestingBase:
@staticmethod
def get_and_validate_dataset(dataset_id: str):
+ assert isinstance(current_user, Account)
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
raise NotFound("Dataset not found.")
@@ -43,27 +52,31 @@ class DatasetsHitTestingBase:
return dataset
@staticmethod
- def hit_testing_args_check(args):
+ def hit_testing_args_check(args: dict[str, Any]):
HitTestingService.hit_testing_args_check(args)
@staticmethod
def parse_args():
- parser = reqparse.RequestParser()
-
- parser.add_argument("query", type=str, location="json")
- parser.add_argument("retrieval_model", type=dict, required=False, location="json")
- parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
+ parser = (
+ reqparse.RequestParser()
+ .add_argument("query", type=str, required=False, location="json")
+ .add_argument("attachment_ids", type=list, required=False, location="json")
+ .add_argument("retrieval_model", type=dict, required=False, location="json")
+ .add_argument("external_retrieval_model", type=dict, required=False, location="json")
+ )
return parser.parse_args()
@staticmethod
def perform_hit_testing(dataset, args):
+ assert isinstance(current_user, Account)
try:
response = HitTestingService.retrieve(
dataset=dataset,
- query=args["query"],
- account=cast(Account, current_user),
- retrieval_model=args["retrieval_model"],
- external_retrieval_model=args["external_retrieval_model"],
+ query=args.get("query"),
+ account=current_user,
+ retrieval_model=args.get("retrieval_model"),
+ external_retrieval_model=args.get("external_retrieval_model"),
+ attachment_ids=args.get("attachment_ids"),
limit=10,
)
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
diff --git a/api/controllers/console/datasets/metadata.py b/api/controllers/console/datasets/metadata.py
index 8438458617..8eead1696a 100644
--- a/api/controllers/console/datasets/metadata.py
+++ b/api/controllers/console/datasets/metadata.py
@@ -1,13 +1,14 @@
from typing import Literal
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel
from werkzeug.exceptions import NotFound
+from controllers.common.schema import register_schema_model, register_schema_models
from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, enterprise_license_required, setup_required
from fields.dataset_fields import dataset_metadata_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.dataset_service import DatasetService
from services.entities.knowledge_entities.knowledge_entities import (
MetadataArgs,
@@ -16,6 +17,14 @@ from services.entities.knowledge_entities.knowledge_entities import (
from services.metadata_service import MetadataService
+class MetadataUpdatePayload(BaseModel):
+ name: str
+
+
+register_schema_models(console_ns, MetadataArgs, MetadataOperationData)
+register_schema_model(console_ns, MetadataUpdatePayload)
+
+
@console_ns.route("/datasets//metadata")
class DatasetMetadataCreateApi(Resource):
@setup_required
@@ -23,12 +32,10 @@ class DatasetMetadataCreateApi(Resource):
@account_initialization_required
@enterprise_license_required
@marshal_with(dataset_metadata_fields)
+ @console_ns.expect(console_ns.models[MetadataArgs.__name__])
def post(self, dataset_id):
- parser = reqparse.RequestParser()
- parser.add_argument("type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
- metadata_args = MetadataArgs.model_validate(args)
+ current_user, _ = current_account_with_tenant()
+ metadata_args = MetadataArgs.model_validate(console_ns.payload or {})
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
@@ -58,11 +65,11 @@ class DatasetMetadataApi(Resource):
@account_initialization_required
@enterprise_license_required
@marshal_with(dataset_metadata_fields)
+ @console_ns.expect(console_ns.models[MetadataUpdatePayload.__name__])
def patch(self, dataset_id, metadata_id):
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
- name = args["name"]
+ current_user, _ = current_account_with_tenant()
+ payload = MetadataUpdatePayload.model_validate(console_ns.payload or {})
+ name = payload.name
dataset_id_str = str(dataset_id)
metadata_id_str = str(metadata_id)
@@ -79,6 +86,7 @@ class DatasetMetadataApi(Resource):
@account_initialization_required
@enterprise_license_required
def delete(self, dataset_id, metadata_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
metadata_id_str = str(metadata_id)
dataset = DatasetService.get_dataset(dataset_id_str)
@@ -108,6 +116,7 @@ class DatasetMetadataBuiltInFieldActionApi(Resource):
@account_initialization_required
@enterprise_license_required
def post(self, dataset_id, action: Literal["enable", "disable"]):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
@@ -127,17 +136,16 @@ class DocumentMetadataEditApi(Resource):
@login_required
@account_initialization_required
@enterprise_license_required
+ @console_ns.expect(console_ns.models[MetadataOperationData.__name__])
def post(self, dataset_id):
+ current_user, _ = current_account_with_tenant()
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
DatasetService.check_dataset_permission(dataset, current_user)
- parser = reqparse.RequestParser()
- parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
- args = parser.parse_args()
- metadata_args = MetadataOperationData.model_validate(args)
+ metadata_args = MetadataOperationData.model_validate(console_ns.payload or {})
MetadataService.update_documents_metadata(dataset, metadata_args)
diff --git a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
index 53b5a0d965..1a47e226e5 100644
--- a/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_auth.py
@@ -1,34 +1,73 @@
+from typing import Any
+
from flask import make_response, redirect, request
-from flask_login import current_user
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, NotFound
from configs import dify_config
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
-from controllers.console.wraps import (
- account_initialization_required,
- setup_required,
-)
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.utils.encoders import jsonable_encoder
from core.plugin.impl.oauth import OAuthHandler
-from libs.helper import StrLen
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.provider_ids import DatasourceProviderID
from services.datasource_provider_service import DatasourceProviderService
from services.plugin.oauth_service import OAuthProxyService
+class DatasourceCredentialPayload(BaseModel):
+ name: str | None = Field(default=None, max_length=100)
+ credentials: dict[str, Any]
+
+
+class DatasourceCredentialDeletePayload(BaseModel):
+ credential_id: str
+
+
+class DatasourceCredentialUpdatePayload(BaseModel):
+ credential_id: str
+ name: str | None = Field(default=None, max_length=100)
+ credentials: dict[str, Any] | None = None
+
+
+class DatasourceCustomClientPayload(BaseModel):
+ client_params: dict[str, Any] | None = None
+ enable_oauth_custom_client: bool | None = None
+
+
+class DatasourceDefaultPayload(BaseModel):
+ id: str
+
+
+class DatasourceUpdateNamePayload(BaseModel):
+ credential_id: str
+ name: str = Field(max_length=100)
+
+
+register_schema_models(
+ console_ns,
+ DatasourceCredentialPayload,
+ DatasourceCredentialDeletePayload,
+ DatasourceCredentialUpdatePayload,
+ DatasourceCustomClientPayload,
+ DatasourceDefaultPayload,
+ DatasourceUpdateNamePayload,
+)
+
+
@console_ns.route("/oauth/plugin//datasource/get-authorization-url")
class DatasourcePluginOAuthAuthorizationUrl(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def get(self, provider_id: str):
- user = current_user
- tenant_id = user.current_tenant_id
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, current_tenant_id = current_account_with_tenant()
+
+ tenant_id = current_tenant_id
credential_id = request.args.get("credential_id")
datasource_provider_id = DatasourceProviderID(provider_id)
@@ -52,7 +91,7 @@ class DatasourcePluginOAuthAuthorizationUrl(Resource):
redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider_id}/datasource/callback"
authorization_url_response = oauth_handler.get_authorization_url(
tenant_id=tenant_id,
- user_id=user.id,
+ user_id=current_user.id,
plugin_id=plugin_id,
provider=provider_name,
redirect_uri=redirect_uri,
@@ -127,28 +166,24 @@ class DatasourceOAuthCallback(Resource):
@console_ns.route("/auth/plugin/datasource/")
class DatasourceAuth(Resource):
+ @console_ns.expect(console_ns.models[DatasourceCredentialPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
- if not current_user.is_editor:
- raise Forbidden()
+ _, current_tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name", type=StrLen(max_length=100), required=False, nullable=True, location="json", default=None
- )
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = DatasourceCredentialPayload.model_validate(console_ns.payload or {})
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
try:
datasource_provider_service.add_datasource_api_key_provider(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider_id=datasource_provider_id,
- credentials=args["credentials"],
- name=args["name"],
+ credentials=payload.credentials,
+ name=payload.name,
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
@@ -160,8 +195,10 @@ class DatasourceAuth(Resource):
def get(self, provider_id: str):
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
+ _, current_tenant_id = current_account_with_tenant()
+
datasources = datasource_provider_service.list_datasource_credentials(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=datasource_provider_id.provider_name,
plugin_id=datasource_provider_id.plugin_id,
)
@@ -170,22 +207,23 @@ class DatasourceAuth(Resource):
@console_ns.route("/auth/plugin/datasource//delete")
class DatasourceAuthDeleteApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceCredentialDeletePayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
+ _, current_tenant_id = current_account_with_tenant()
+
datasource_provider_id = DatasourceProviderID(provider_id)
plugin_id = datasource_provider_id.plugin_id
provider_name = datasource_provider_id.provider_name
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+
+ payload = DatasourceCredentialDeletePayload.model_validate(console_ns.payload or {})
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.remove_datasource_credentials(
- tenant_id=current_user.current_tenant_id,
- auth_id=args["credential_id"],
+ tenant_id=current_tenant_id,
+ auth_id=payload.credential_id,
provider=provider_name,
plugin_id=plugin_id,
)
@@ -194,26 +232,25 @@ class DatasourceAuthDeleteApi(Resource):
@console_ns.route("/auth/plugin/datasource//update")
class DatasourceAuthUpdateApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceCredentialUpdatePayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
+ _, current_tenant_id = current_account_with_tenant()
+
datasource_provider_id = DatasourceProviderID(provider_id)
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=False, nullable=True, location="json")
- parser.add_argument("name", type=StrLen(max_length=100), required=False, nullable=True, location="json")
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
- if not current_user.is_editor:
- raise Forbidden()
+ payload = DatasourceCredentialUpdatePayload.model_validate(console_ns.payload or {})
+
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.update_datasource_credentials(
- tenant_id=current_user.current_tenant_id,
- auth_id=args["credential_id"],
+ tenant_id=current_tenant_id,
+ auth_id=payload.credential_id,
provider=datasource_provider_id.provider_name,
plugin_id=datasource_provider_id.plugin_id,
- credentials=args.get("credentials", {}),
- name=args.get("name", None),
+ credentials=payload.credentials or {},
+ name=payload.name,
)
return {"result": "success"}, 201
@@ -224,10 +261,10 @@ class DatasourceAuthListApi(Resource):
@login_required
@account_initialization_required
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
+
datasource_provider_service = DatasourceProviderService()
- datasources = datasource_provider_service.get_all_datasource_credentials(
- tenant_id=current_user.current_tenant_id
- )
+ datasources = datasource_provider_service.get_all_datasource_credentials(tenant_id=current_tenant_id)
return {"result": jsonable_encoder(datasources)}, 200
@@ -237,32 +274,31 @@ class DatasourceHardCodeAuthListApi(Resource):
@login_required
@account_initialization_required
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
+
datasource_provider_service = DatasourceProviderService()
- datasources = datasource_provider_service.get_hard_code_datasource_credentials(
- tenant_id=current_user.current_tenant_id
- )
+ datasources = datasource_provider_service.get_hard_code_datasource_credentials(tenant_id=current_tenant_id)
return {"result": jsonable_encoder(datasources)}, 200
@console_ns.route("/auth/plugin/datasource//custom-client")
class DatasourceAuthOauthCustomClient(Resource):
+ @console_ns.expect(console_ns.models[DatasourceCustomClientPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("client_params", type=dict, required=False, nullable=True, location="json")
- parser.add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+
+ payload = DatasourceCustomClientPayload.model_validate(console_ns.payload or {})
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.setup_oauth_custom_client_params(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
datasource_provider_id=datasource_provider_id,
- client_params=args.get("client_params", {}),
- enabled=args.get("enable_oauth_custom_client", False),
+ client_params=payload.client_params or {},
+ enabled=payload.enable_oauth_custom_client or False,
)
return {"result": "success"}, 200
@@ -270,10 +306,12 @@ class DatasourceAuthOauthCustomClient(Resource):
@login_required
@account_initialization_required
def delete(self, provider_id: str):
+ _, current_tenant_id = current_account_with_tenant()
+
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.remove_oauth_custom_client_params(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
datasource_provider_id=datasource_provider_id,
)
return {"result": "success"}, 200
@@ -281,43 +319,42 @@ class DatasourceAuthOauthCustomClient(Resource):
@console_ns.route("/auth/plugin/datasource//default")
class DatasourceAuthDefaultApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceDefaultPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+
+ payload = DatasourceDefaultPayload.model_validate(console_ns.payload or {})
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.set_default_datasource_provider(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
datasource_provider_id=datasource_provider_id,
- credential_id=args["id"],
+ credential_id=payload.id,
)
return {"result": "success"}, 200
@console_ns.route("/auth/plugin/datasource//update-name")
class DatasourceUpdateProviderNameApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceUpdateNamePayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def post(self, provider_id: str):
- if not current_user.is_editor:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=StrLen(max_length=100), required=True, nullable=False, location="json")
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+
+ payload = DatasourceUpdateNamePayload.model_validate(console_ns.payload or {})
datasource_provider_id = DatasourceProviderID(provider_id)
datasource_provider_service = DatasourceProviderService()
datasource_provider_service.update_datasource_provider_name(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
datasource_provider_id=datasource_provider_id,
- name=args["name"],
- credential_id=args["credential_id"],
+ name=payload.name,
+ credential_id=payload.credential_id,
)
return {"result": "success"}, 200
diff --git a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
index 6c04cc877a..7caf5b52ed 100644
--- a/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
+++ b/api/controllers/console/datasets/rag_pipeline/datasource_content_preview.py
@@ -1,7 +1,7 @@
from flask_restx import ( # type: ignore
Resource, # type: ignore
- reqparse,
)
+from pydantic import BaseModel
from werkzeug.exceptions import Forbidden
from controllers.console import console_ns
@@ -12,9 +12,21 @@ from models import Account
from models.dataset import Pipeline
from services.rag_pipeline.rag_pipeline import RagPipelineService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class Parser(BaseModel):
+ inputs: dict
+ datasource_type: str
+ credential_id: str | None = None
+
+
+console_ns.schema_model(Parser.__name__, Parser.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
@console_ns.route("/rag/pipelines//workflows/published/datasource/nodes//preview")
class DataSourceContentPreviewApi(Resource):
+ @console_ns.expect(console_ns.models[Parser.__name__])
@setup_required
@login_required
@account_initialization_required
@@ -26,19 +38,10 @@ class DataSourceContentPreviewApi(Resource):
if not isinstance(current_user, Account):
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("credential_id", type=str, required=False, location="json")
- args = parser.parse_args()
-
- inputs = args.get("inputs")
- if inputs is None:
- raise ValueError("missing inputs")
- datasource_type = args.get("datasource_type")
- if datasource_type is None:
- raise ValueError("missing datasource_type")
+ args = Parser.model_validate(console_ns.payload)
+ inputs = args.inputs
+ datasource_type = args.datasource_type
rag_pipeline_service = RagPipelineService()
preview_content = rag_pipeline_service.run_datasource_node_preview(
pipeline=pipeline,
@@ -47,6 +50,6 @@ class DataSourceContentPreviewApi(Resource):
account=current_user,
datasource_type=datasource_type,
is_published=True,
- credential_id=args.get("credential_id"),
+ credential_id=args.credential_id,
)
return preview_content, 200
diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
index e021f95283..6e0cd31b8d 100644
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline.py
@@ -1,9 +1,11 @@
import logging
from flask import request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.wraps import (
account_initialization_required,
@@ -20,18 +22,6 @@ from services.rag_pipeline.rag_pipeline import RagPipelineService
logger = logging.getLogger(__name__)
-def _validate_name(name: str) -> str:
- if not name or len(name) < 1 or len(name) > 40:
- raise ValueError("Name must be between 1 to 40 characters.")
- return name
-
-
-def _validate_description_length(description: str) -> str:
- if len(description) > 400:
- raise ValueError("Description cannot exceed 400 characters.")
- return description
-
-
@console_ns.route("/rag/pipeline/templates")
class PipelineTemplateListApi(Resource):
@setup_required
@@ -59,6 +49,15 @@ class PipelineTemplateDetailApi(Resource):
return pipeline_template, 200
+class Payload(BaseModel):
+ name: str = Field(..., min_length=1, max_length=40)
+ description: str = Field(default="", max_length=400)
+ icon_info: dict[str, object] | None = None
+
+
+register_schema_models(console_ns, Payload)
+
+
@console_ns.route("/rag/pipeline/customized/templates/")
class CustomizedPipelineTemplateApi(Resource):
@setup_required
@@ -66,29 +65,8 @@ class CustomizedPipelineTemplateApi(Resource):
@account_initialization_required
@enterprise_license_required
def patch(self, template_id: str):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="Name must be between 1 to 40 characters.",
- type=_validate_name,
- )
- parser.add_argument(
- "description",
- type=_validate_description_length,
- nullable=True,
- required=False,
- default="",
- )
- parser.add_argument(
- "icon_info",
- type=dict,
- location="json",
- nullable=True,
- )
- args = parser.parse_args()
- pipeline_template_info = PipelineTemplateInfoEntity.model_validate(args)
+ payload = Payload.model_validate(console_ns.payload or {})
+ pipeline_template_info = PipelineTemplateInfoEntity.model_validate(payload.model_dump())
RagPipelineService.update_customized_pipeline_template(template_id, pipeline_template_info)
return 200
@@ -117,34 +95,14 @@ class CustomizedPipelineTemplateApi(Resource):
@console_ns.route("/rag/pipelines//customized/publish")
class PublishCustomizedPipelineTemplateApi(Resource):
+ @console_ns.expect(console_ns.models[Payload.__name__])
@setup_required
@login_required
@account_initialization_required
@enterprise_license_required
@knowledge_pipeline_publish_enabled
def post(self, pipeline_id: str):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name",
- nullable=False,
- required=True,
- help="Name must be between 1 to 40 characters.",
- type=_validate_name,
- )
- parser.add_argument(
- "description",
- type=_validate_description_length,
- nullable=True,
- required=False,
- default="",
- )
- parser.add_argument(
- "icon_info",
- type=dict,
- location="json",
- nullable=True,
- )
- args = parser.parse_args()
+ payload = Payload.model_validate(console_ns.payload or {})
rag_pipeline_service = RagPipelineService()
- rag_pipeline_service.publish_customized_pipeline_template(pipeline_id, args)
+ rag_pipeline_service.publish_customized_pipeline_template(pipeline_id, payload.model_dump())
return {"result": "success"}
diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
index 404aa42073..e65cb19b39 100644
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_datasets.py
@@ -1,9 +1,10 @@
-from flask_login import current_user
-from flask_restx import Resource, marshal, reqparse
+from flask_restx import Resource, marshal
+from pydantic import BaseModel
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden
import services
+from controllers.common.schema import register_schema_model
from controllers.console import console_ns
from controllers.console.datasets.error import DatasetNameDuplicateError
from controllers.console.wraps import (
@@ -13,32 +14,30 @@ from controllers.console.wraps import (
)
from extensions.ext_database import db
from fields.dataset_fields import dataset_detail_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.dataset import DatasetPermissionEnum
from services.dataset_service import DatasetPermissionService, DatasetService
from services.entities.knowledge_entities.rag_pipeline_entities import IconInfo, RagPipelineDatasetCreateEntity
from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+class RagPipelineDatasetImportPayload(BaseModel):
+ yaml_content: str
+
+
+register_schema_model(console_ns, RagPipelineDatasetImportPayload)
+
+
@console_ns.route("/rag/pipeline/dataset")
class CreateRagPipelineDatasetApi(Resource):
+ @console_ns.expect(console_ns.models[RagPipelineDatasetImportPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def post(self):
- parser = reqparse.RequestParser()
-
- parser.add_argument(
- "yaml_content",
- type=str,
- nullable=False,
- required=True,
- help="yaml_content is required.",
- )
-
- args = parser.parse_args()
-
+ payload = RagPipelineDatasetImportPayload.model_validate(console_ns.payload or {})
+ current_user, current_tenant_id = current_account_with_tenant()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
@@ -52,18 +51,18 @@ class CreateRagPipelineDatasetApi(Resource):
),
permission=DatasetPermissionEnum.ONLY_ME,
partial_member_list=None,
- yaml_content=args["yaml_content"],
+ yaml_content=payload.yaml_content,
)
try:
with Session(db.engine) as session:
rag_pipeline_dsl_service = RagPipelineDslService(session)
import_info = rag_pipeline_dsl_service.create_rag_pipeline_dataset(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
rag_pipeline_dataset_create_entity=rag_pipeline_dataset_create_entity,
)
if rag_pipeline_dataset_create_entity.permission == "partial_members":
DatasetPermissionService.update_partial_member_list(
- current_user.current_tenant_id,
+ current_tenant_id,
import_info["dataset_id"],
rag_pipeline_dataset_create_entity.partial_member_list,
)
@@ -81,10 +80,12 @@ class CreateEmptyRagPipelineDatasetApi(Resource):
@cloud_edition_billing_rate_limit_check("knowledge")
def post(self):
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
+ current_user, current_tenant_id = current_account_with_tenant()
+
if not current_user.is_dataset_editor:
raise Forbidden()
dataset = DatasetService.create_empty_rag_pipeline_dataset(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
rag_pipeline_dataset_create_entity=RagPipelineDatasetCreateEntity(
name="",
description="",
diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py
index bef6bfd13e..720e2ce365 100644
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_draft_variable.py
@@ -1,11 +1,13 @@
import logging
-from typing import NoReturn
+from typing import Any, NoReturn
-from flask import Response
-from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse
+from flask import Response, request
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.app.error import (
DraftWorkflowNotExist,
@@ -23,7 +25,7 @@ from extensions.ext_database import db
from factories.file_factory import build_from_mapping, build_from_mappings
from factories.variable_factory import build_segment_with_type
from libs.login import current_user, login_required
-from models.account import Account
+from models import Account
from models.dataset import Pipeline
from models.workflow import WorkflowDraftVariable
from services.rag_pipeline.rag_pipeline import RagPipelineService
@@ -33,17 +35,21 @@ logger = logging.getLogger(__name__)
def _create_pagination_parser():
- parser = reqparse.RequestParser()
- parser.add_argument(
- "page",
- type=inputs.int_range(1, 100_000),
- required=False,
- default=1,
- location="args",
- help="the page of data requested",
- )
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- return parser
+ class PaginationQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=100_000)
+ limit: int = Field(default=20, ge=1, le=100)
+
+ register_schema_models(console_ns, PaginationQuery)
+
+ return PaginationQuery
+
+
+class WorkflowDraftVariablePatchPayload(BaseModel):
+ name: str | None = None
+ value: Any | None = None
+
+
+register_schema_models(console_ns, WorkflowDraftVariablePatchPayload)
def _get_items(var_list: WorkflowDraftVariableList) -> list[WorkflowDraftVariable]:
@@ -91,8 +97,8 @@ class RagPipelineVariableCollectionApi(Resource):
"""
Get draft workflow
"""
- parser = _create_pagination_parser()
- args = parser.parse_args()
+ pagination = _create_pagination_parser()
+ query = pagination.model_validate(request.args.to_dict())
# fetch draft workflow by app_model
rag_pipeline_service = RagPipelineService()
@@ -107,8 +113,8 @@ class RagPipelineVariableCollectionApi(Resource):
)
workflow_vars = draft_var_srv.list_variables_without_values(
app_id=pipeline.id,
- page=args.page,
- limit=args.limit,
+ page=query.page,
+ limit=query.limit,
)
return workflow_vars
@@ -184,6 +190,7 @@ class RagPipelineVariableApi(Resource):
@_api_prerequisite
@marshal_with(_WORKFLOW_DRAFT_VARIABLE_FIELDS)
+ @console_ns.expect(console_ns.models[WorkflowDraftVariablePatchPayload.__name__])
def patch(self, pipeline: Pipeline, variable_id: str):
# Request payload for file types:
#
@@ -206,15 +213,11 @@ class RagPipelineVariableApi(Resource):
# "upload_file_id": "1602650a-4fe4-423c-85a2-af76c083e3c4"
# }
- parser = reqparse.RequestParser()
- parser.add_argument(self._PATCH_NAME_FIELD, type=str, required=False, nullable=True, location="json")
- # Parse 'value' field as-is to maintain its original data structure
- parser.add_argument(self._PATCH_VALUE_FIELD, type=lambda x: x, required=False, nullable=True, location="json")
-
draft_var_srv = WorkflowDraftVariableService(
session=db.session(),
)
- args = parser.parse_args(strict=True)
+ payload = WorkflowDraftVariablePatchPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
variable = draft_var_srv.get_variable(variable_id=variable_id)
if variable is None:
diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
index a82872ba2b..d43ee9a6e0 100644
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_import.py
@@ -1,60 +1,68 @@
-from typing import cast
-
-from flask_login import current_user # type: ignore
-from flask_restx import Resource, marshal_with, reqparse # type: ignore
+from flask import request
+from flask_restx import Resource, marshal_with # type: ignore
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
-from werkzeug.exceptions import Forbidden
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.datasets.wraps import get_rag_pipeline
from controllers.console.wraps import (
account_initialization_required,
+ edit_permission_required,
setup_required,
)
from extensions.ext_database import db
from fields.rag_pipeline_fields import pipeline_import_check_dependencies_fields, pipeline_import_fields
-from libs.login import login_required
-from models import Account
+from libs.login import current_account_with_tenant, login_required
from models.dataset import Pipeline
from services.app_dsl_service import ImportStatus
from services.rag_pipeline.rag_pipeline_dsl_service import RagPipelineDslService
+class RagPipelineImportPayload(BaseModel):
+ mode: str
+ yaml_content: str | None = None
+ yaml_url: str | None = None
+ name: str | None = None
+ description: str | None = None
+ icon_type: str | None = None
+ icon: str | None = None
+ icon_background: str | None = None
+ pipeline_id: str | None = None
+
+
+class IncludeSecretQuery(BaseModel):
+ include_secret: str = Field(default="false")
+
+
+register_schema_models(console_ns, RagPipelineImportPayload, IncludeSecretQuery)
+
+
@console_ns.route("/rag/pipelines/imports")
class RagPipelineImportApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@marshal_with(pipeline_import_fields)
+ @console_ns.expect(console_ns.models[RagPipelineImportPayload.__name__])
def post(self):
# Check user role first
- if not current_user.is_editor:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("mode", type=str, required=True, location="json")
- parser.add_argument("yaml_content", type=str, location="json")
- parser.add_argument("yaml_url", type=str, location="json")
- parser.add_argument("name", type=str, location="json")
- parser.add_argument("description", type=str, location="json")
- parser.add_argument("icon_type", type=str, location="json")
- parser.add_argument("icon", type=str, location="json")
- parser.add_argument("icon_background", type=str, location="json")
- parser.add_argument("pipeline_id", type=str, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = RagPipelineImportPayload.model_validate(console_ns.payload or {})
# Create service with session
with Session(db.engine) as session:
import_service = RagPipelineDslService(session)
# Import app
- account = cast(Account, current_user)
+ account = current_user
result = import_service.import_rag_pipeline(
account=account,
- import_mode=args["mode"],
- yaml_content=args.get("yaml_content"),
- yaml_url=args.get("yaml_url"),
- pipeline_id=args.get("pipeline_id"),
- dataset_name=args.get("name"),
+ import_mode=payload.mode,
+ yaml_content=payload.yaml_content,
+ yaml_url=payload.yaml_url,
+ pipeline_id=payload.pipeline_id,
+ dataset_name=payload.name,
)
session.commit()
@@ -72,17 +80,16 @@ class RagPipelineImportConfirmApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@marshal_with(pipeline_import_fields)
def post(self, import_id):
- # Check user role first
- if not current_user.is_editor:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
# Create service with session
with Session(db.engine) as session:
import_service = RagPipelineDslService(session)
# Confirm import
- account = cast(Account, current_user)
+ account = current_user
result = import_service.confirm_import(import_id=import_id, account=account)
session.commit()
@@ -98,11 +105,9 @@ class RagPipelineImportCheckDependenciesApi(Resource):
@login_required
@get_rag_pipeline
@account_initialization_required
+ @edit_permission_required
@marshal_with(pipeline_import_check_dependencies_fields)
def get(self, pipeline: Pipeline):
- if not current_user.is_editor:
- raise Forbidden()
-
with Session(db.engine) as session:
import_service = RagPipelineDslService(session)
result = import_service.check_dependencies(pipeline=pipeline)
@@ -116,19 +121,15 @@ class RagPipelineExportApi(Resource):
@login_required
@get_rag_pipeline
@account_initialization_required
+ @edit_permission_required
def get(self, pipeline: Pipeline):
- if not current_user.is_editor:
- raise Forbidden()
-
- # Add include_secret params
- parser = reqparse.RequestParser()
- parser.add_argument("include_secret", type=str, default="false", location="args")
- args = parser.parse_args()
+ # Add include_secret params
+ query = IncludeSecretQuery.model_validate(request.args.to_dict())
with Session(db.engine) as session:
export_service = RagPipelineDslService(session)
result = export_service.export_rag_pipeline_dsl(
- pipeline=pipeline, include_secret=args["include_secret"] == "true"
+ pipeline=pipeline, include_secret=query.include_secret == "true"
)
return {"data": result}, 200
diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
index a75c121fbe..debe8eed97 100644
--- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
+++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py
@@ -1,14 +1,16 @@
import json
import logging
-from typing import cast
+from typing import Any, Literal, cast
+from uuid import UUID
from flask import abort, request
-from flask_restx import Resource, inputs, marshal_with, reqparse # type: ignore # type: ignore
-from flask_restx.inputs import int_range # type: ignore
+from flask_restx import Resource, marshal_with # type: ignore
+from pydantic import BaseModel, Field
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.app.error import (
ConversationCompletedError,
@@ -18,6 +20,7 @@ from controllers.console.app.error import (
from controllers.console.datasets.wraps import get_rag_pipeline
from controllers.console.wraps import (
account_initialization_required,
+ edit_permission_required,
setup_required,
)
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
@@ -35,9 +38,9 @@ from fields.workflow_run_fields import (
workflow_run_pagination_fields,
)
from libs import helper
-from libs.helper import TimestampField, uuid_value
-from libs.login import current_user, login_required
-from models.account import Account
+from libs.helper import TimestampField
+from libs.login import current_account_with_tenant, current_user, login_required
+from models import Account
from models.dataset import Pipeline
from models.model import EndUser
from services.errors.app import WorkflowHashNotEqualError
@@ -50,21 +53,103 @@ from services.rag_pipeline.rag_pipeline_transform_service import RagPipelineTran
logger = logging.getLogger(__name__)
+class DraftWorkflowSyncPayload(BaseModel):
+ graph: dict[str, Any]
+ hash: str | None = None
+ environment_variables: list[dict[str, Any]] | None = None
+ conversation_variables: list[dict[str, Any]] | None = None
+ rag_pipeline_variables: list[dict[str, Any]] | None = None
+ features: dict[str, Any] | None = None
+
+
+class NodeRunPayload(BaseModel):
+ inputs: dict[str, Any] | None = None
+
+
+class NodeRunRequiredPayload(BaseModel):
+ inputs: dict[str, Any]
+
+
+class DatasourceNodeRunPayload(BaseModel):
+ inputs: dict[str, Any]
+ datasource_type: str
+ credential_id: str | None = None
+
+
+class DraftWorkflowRunPayload(BaseModel):
+ inputs: dict[str, Any]
+ datasource_type: str
+ datasource_info_list: list[dict[str, Any]]
+ start_node_id: str
+
+
+class PublishedWorkflowRunPayload(DraftWorkflowRunPayload):
+ is_preview: bool = False
+ response_mode: Literal["streaming", "blocking"] = "streaming"
+ original_document_id: str | None = None
+
+
+class DefaultBlockConfigQuery(BaseModel):
+ q: str | None = None
+
+
+class WorkflowListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=99999)
+ limit: int = Field(default=10, ge=1, le=100)
+ user_id: str | None = None
+ named_only: bool = False
+
+
+class WorkflowUpdatePayload(BaseModel):
+ marked_name: str | None = Field(default=None, max_length=20)
+ marked_comment: str | None = Field(default=None, max_length=100)
+
+
+class NodeIdQuery(BaseModel):
+ node_id: str
+
+
+class WorkflowRunQuery(BaseModel):
+ last_id: UUID | None = None
+ limit: int = Field(default=20, ge=1, le=100)
+
+
+class DatasourceVariablesPayload(BaseModel):
+ datasource_type: str
+ datasource_info: dict[str, Any]
+ start_node_id: str
+ start_node_title: str
+
+
+register_schema_models(
+ console_ns,
+ DraftWorkflowSyncPayload,
+ NodeRunPayload,
+ NodeRunRequiredPayload,
+ DatasourceNodeRunPayload,
+ DraftWorkflowRunPayload,
+ PublishedWorkflowRunPayload,
+ DefaultBlockConfigQuery,
+ WorkflowListQuery,
+ WorkflowUpdatePayload,
+ NodeIdQuery,
+ WorkflowRunQuery,
+ DatasourceVariablesPayload,
+)
+
+
@console_ns.route("/rag/pipelines//workflows/draft")
class DraftRagPipelineApi(Resource):
@setup_required
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
@marshal_with(workflow_fields)
def get(self, pipeline: Pipeline):
"""
Get draft rag pipeline's workflow
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
# fetch draft workflow by app_model
rag_pipeline_service = RagPipelineService()
workflow = rag_pipeline_service.get_draft_workflow(pipeline=pipeline)
@@ -79,24 +164,18 @@ class DraftRagPipelineApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def post(self, pipeline: Pipeline):
"""
Sync draft workflow
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
content_type = request.headers.get("Content-Type", "")
if "application/json" in content_type:
- parser = reqparse.RequestParser()
- parser.add_argument("graph", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("hash", type=str, required=False, location="json")
- parser.add_argument("environment_variables", type=list, required=False, location="json")
- parser.add_argument("conversation_variables", type=list, required=False, location="json")
- parser.add_argument("rag_pipeline_variables", type=list, required=False, location="json")
- args = parser.parse_args()
+ payload_dict = console_ns.payload or {}
elif "text/plain" in content_type:
try:
data = json.loads(request.data.decode("utf-8"))
@@ -106,7 +185,7 @@ class DraftRagPipelineApi(Resource):
if not isinstance(data.get("graph"), dict):
raise ValueError("graph is not a dict")
- args = {
+ payload_dict = {
"graph": data.get("graph"),
"features": data.get("features"),
"hash": data.get("hash"),
@@ -119,24 +198,26 @@ class DraftRagPipelineApi(Resource):
else:
abort(415)
+ payload = DraftWorkflowSyncPayload.model_validate(payload_dict)
+
try:
- environment_variables_list = args.get("environment_variables") or []
+ environment_variables_list = payload.environment_variables or []
environment_variables = [
variable_factory.build_environment_variable_from_mapping(obj) for obj in environment_variables_list
]
- conversation_variables_list = args.get("conversation_variables") or []
+ conversation_variables_list = payload.conversation_variables or []
conversation_variables = [
variable_factory.build_conversation_variable_from_mapping(obj) for obj in conversation_variables_list
]
rag_pipeline_service = RagPipelineService()
workflow = rag_pipeline_service.sync_draft_workflow(
pipeline=pipeline,
- graph=args["graph"],
- unique_hash=args.get("hash"),
+ graph=payload.graph,
+ unique_hash=payload.hash,
account=current_user,
environment_variables=environment_variables,
conversation_variables=conversation_variables,
- rag_pipeline_variables=args.get("rag_pipeline_variables") or [],
+ rag_pipeline_variables=payload.rag_pipeline_variables or [],
)
except WorkflowHashNotEqualError:
raise DraftWorkflowNotSync()
@@ -150,21 +231,21 @@ class DraftRagPipelineApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/iteration/nodes//run")
class RagPipelineDraftRunIterationNodeApi(Resource):
+ @console_ns.expect(console_ns.models[NodeRunPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def post(self, pipeline: Pipeline, node_id: str):
"""
Run draft workflow iteration node
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ payload = NodeRunPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
try:
response = PipelineGenerateService.generate_single_iteration(
@@ -185,21 +266,21 @@ class RagPipelineDraftRunIterationNodeApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/loop/nodes//run")
class RagPipelineDraftRunLoopNodeApi(Resource):
+ @console_ns.expect(console_ns.models[NodeRunPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def post(self, pipeline: Pipeline, node_id: str):
"""
Run draft workflow loop node
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, location="json")
- args = parser.parse_args()
+ payload = NodeRunPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
try:
response = PipelineGenerateService.generate_single_loop(
@@ -220,24 +301,21 @@ class RagPipelineDraftRunLoopNodeApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/run")
class DraftRagPipelineRunApi(Resource):
+ @console_ns.expect(console_ns.models[DraftWorkflowRunPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def post(self, pipeline: Pipeline):
"""
Run draft workflow
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("datasource_info_list", type=list, required=True, location="json")
- parser.add_argument("start_node_id", type=str, required=True, location="json")
- args = parser.parse_args()
+ payload = DraftWorkflowRunPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump()
try:
response = PipelineGenerateService.generate(
@@ -255,36 +333,29 @@ class DraftRagPipelineRunApi(Resource):
@console_ns.route("/rag/pipelines//workflows/published/run")
class PublishedRagPipelineRunApi(Resource):
+ @console_ns.expect(console_ns.models[PublishedWorkflowRunPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def post(self, pipeline: Pipeline):
"""
Run published workflow
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("datasource_info_list", type=list, required=True, location="json")
- parser.add_argument("start_node_id", type=str, required=True, location="json")
- parser.add_argument("is_preview", type=bool, required=True, location="json", default=False)
- parser.add_argument("response_mode", type=str, required=True, location="json", default="streaming")
- parser.add_argument("original_document_id", type=str, required=False, location="json")
- args = parser.parse_args()
-
- streaming = args["response_mode"] == "streaming"
+ payload = PublishedWorkflowRunPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
+ streaming = payload.response_mode == "streaming"
try:
response = PipelineGenerateService.generate(
pipeline=pipeline,
user=current_user,
args=args,
- invoke_from=InvokeFrom.DEBUGGER if args.get("is_preview") else InvokeFrom.PUBLISHED,
+ invoke_from=InvokeFrom.DEBUGGER if payload.is_preview else InvokeFrom.PUBLISHED,
streaming=streaming,
)
@@ -303,15 +374,16 @@ class PublishedRagPipelineRunApi(Resource):
# Run rag pipeline datasource
# """
# # The role of the current user in the ta table must be admin, owner, or editor
-# if not current_user.is_editor:
+# if not current_user.has_edit_permission:
# raise Forbidden()
#
# if not isinstance(current_user, Account):
# raise Forbidden()
#
-# parser = reqparse.RequestParser()
-# parser.add_argument("job_id", type=str, required=True, nullable=False, location="json")
-# parser.add_argument("datasource_type", type=str, required=True, location="json")
+# parser = (reqparse.RequestParser()
+# .add_argument("job_id", type=str, required=True, nullable=False, location="json")
+# .add_argument("datasource_type", type=str, required=True, location="json")
+# )
# args = parser.parse_args()
#
# job_id = args.get("job_id")
@@ -344,15 +416,16 @@ class PublishedRagPipelineRunApi(Resource):
# Run rag pipeline datasource
# """
# # The role of the current user in the ta table must be admin, owner, or editor
-# if not current_user.is_editor:
+# if not current_user.has_edit_permission:
# raise Forbidden()
#
# if not isinstance(current_user, Account):
# raise Forbidden()
#
-# parser = reqparse.RequestParser()
-# parser.add_argument("job_id", type=str, required=True, nullable=False, location="json")
-# parser.add_argument("datasource_type", type=str, required=True, location="json")
+# parser = (reqparse.RequestParser()
+# .add_argument("job_id", type=str, required=True, nullable=False, location="json")
+# .add_argument("datasource_type", type=str, required=True, location="json")
+# )
# args = parser.parse_args()
#
# job_id = args.get("job_id")
@@ -376,30 +449,20 @@ class PublishedRagPipelineRunApi(Resource):
#
@console_ns.route("/rag/pipelines//workflows/published/datasource/nodes//run")
class RagPipelinePublishedDatasourceNodeRunApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceNodeRunPayload.__name__])
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def post(self, pipeline: Pipeline, node_id: str):
"""
Run rag pipeline datasource
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("credential_id", type=str, required=False, location="json")
- args = parser.parse_args()
-
- inputs = args.get("inputs")
- if inputs is None:
- raise ValueError("missing inputs")
- datasource_type = args.get("datasource_type")
- if datasource_type is None:
- raise ValueError("missing datasource_type")
+ payload = DatasourceNodeRunPayload.model_validate(console_ns.payload or {})
rag_pipeline_service = RagPipelineService()
return helper.compact_generate_response(
@@ -407,11 +470,11 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource):
rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
- user_inputs=inputs,
+ user_inputs=payload.inputs,
account=current_user,
- datasource_type=datasource_type,
+ datasource_type=payload.datasource_type,
is_published=False,
- credential_id=args.get("credential_id"),
+ credential_id=payload.credential_id,
)
)
)
@@ -419,8 +482,10 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/datasource/nodes//run")
class RagPipelineDraftDatasourceNodeRunApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceNodeRunPayload.__name__])
@setup_required
@login_required
+ @edit_permission_required
@account_initialization_required
@get_rag_pipeline
def post(self, pipeline: Pipeline, node_id: str):
@@ -428,21 +493,9 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource):
Run rag pipeline datasource
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("credential_id", type=str, required=False, location="json")
- args = parser.parse_args()
-
- inputs = args.get("inputs")
- if inputs is None:
- raise ValueError("missing inputs")
- datasource_type = args.get("datasource_type")
- if datasource_type is None:
- raise ValueError("missing datasource_type")
+ payload = DatasourceNodeRunPayload.model_validate(console_ns.payload or {})
rag_pipeline_service = RagPipelineService()
return helper.compact_generate_response(
@@ -450,11 +503,11 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource):
rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
- user_inputs=inputs,
+ user_inputs=payload.inputs,
account=current_user,
- datasource_type=datasource_type,
+ datasource_type=payload.datasource_type,
is_published=False,
- credential_id=args.get("credential_id"),
+ credential_id=payload.credential_id,
)
)
)
@@ -462,8 +515,10 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/nodes//run")
class RagPipelineDraftNodeRunApi(Resource):
+ @console_ns.expect(console_ns.models[NodeRunRequiredPayload.__name__])
@setup_required
@login_required
+ @edit_permission_required
@account_initialization_required
@get_rag_pipeline
@marshal_with(workflow_run_node_execution_fields)
@@ -472,16 +527,10 @@ class RagPipelineDraftNodeRunApi(Resource):
Run draft workflow node
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
-
- inputs = args.get("inputs")
- if inputs == None:
- raise ValueError("missing inputs")
+ payload = NodeRunRequiredPayload.model_validate(console_ns.payload or {})
+ inputs = payload.inputs
rag_pipeline_service = RagPipelineService()
workflow_node_execution = rag_pipeline_service.run_draft_workflow_node(
@@ -498,6 +547,7 @@ class RagPipelineDraftNodeRunApi(Resource):
class RagPipelineTaskStopApi(Resource):
@setup_required
@login_required
+ @edit_permission_required
@account_initialization_required
@get_rag_pipeline
def post(self, pipeline: Pipeline, task_id: str):
@@ -505,8 +555,7 @@ class RagPipelineTaskStopApi(Resource):
Stop workflow task
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
AppQueueManager.set_stop_flag(task_id, InvokeFrom.DEBUGGER, current_user.id)
@@ -518,6 +567,7 @@ class PublishedRagPipelineApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
@marshal_with(workflow_fields)
def get(self, pipeline: Pipeline):
@@ -525,8 +575,6 @@ class PublishedRagPipelineApi(Resource):
Get published pipeline
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
if not pipeline.is_published:
return None
# fetch published workflow by pipeline
@@ -539,15 +587,14 @@ class PublishedRagPipelineApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def post(self, pipeline: Pipeline):
"""
Publish workflow
"""
# The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
+ current_user, _ = current_account_with_tenant()
rag_pipeline_service = RagPipelineService()
with Session(db.engine) as session:
pipeline = session.merge(pipeline)
@@ -574,15 +621,12 @@ class DefaultRagPipelineBlockConfigsApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def get(self, pipeline: Pipeline):
"""
Get default block config
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
# Get default block configs
rag_pipeline_service = RagPipelineService()
return rag_pipeline_service.get_default_block_configs()
@@ -593,25 +637,18 @@ class DefaultRagPipelineBlockConfigApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
def get(self, pipeline: Pipeline, block_type: str):
"""
Get default block config
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("q", type=str, location="args")
- args = parser.parse_args()
-
- q = args.get("q")
+ query = DefaultBlockConfigQuery.model_validate(request.args.to_dict())
filters = None
- if q:
+ if query.q:
try:
- filters = json.loads(args.get("q", ""))
+ filters = json.loads(query.q)
except json.JSONDecodeError:
raise ValueError("Invalid filters")
@@ -625,30 +662,25 @@ class PublishedAllRagPipelineApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
@marshal_with(workflow_pagination_fields)
def get(self, pipeline: Pipeline):
"""
Get published workflows
"""
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args")
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- parser.add_argument("user_id", type=str, required=False, location="args")
- parser.add_argument("named_only", type=inputs.boolean, required=False, default=False, location="args")
- args = parser.parse_args()
- page = int(args.get("page", 1))
- limit = int(args.get("limit", 10))
- user_id = args.get("user_id")
- named_only = args.get("named_only", False)
+ query = WorkflowListQuery.model_validate(request.args.to_dict())
+
+ page = query.page
+ limit = query.limit
+ user_id = query.user_id
+ named_only = query.named_only
if user_id:
if user_id != current_user.id:
raise Forbidden()
- user_id = cast(str, user_id)
rag_pipeline_service = RagPipelineService()
with Session(db.engine) as session:
@@ -674,6 +706,7 @@ class RagPipelineByIdApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
@get_rag_pipeline
@marshal_with(workflow_fields)
def patch(self, pipeline: Pipeline, workflow_id: str):
@@ -681,27 +714,10 @@ class RagPipelineByIdApi(Resource):
Update workflow attributes
"""
# Check permission
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
+ current_user, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("marked_name", type=str, required=False, location="json")
- parser.add_argument("marked_comment", type=str, required=False, location="json")
- args = parser.parse_args()
-
- # Validate name and comment length
- if args.marked_name and len(args.marked_name) > 20:
- raise ValueError("Marked name cannot exceed 20 characters")
- if args.marked_comment and len(args.marked_comment) > 100:
- raise ValueError("Marked comment cannot exceed 100 characters")
- args = parser.parse_args()
-
- # Prepare update data
- update_data = {}
- if args.get("marked_name") is not None:
- update_data["marked_name"] = args["marked_name"]
- if args.get("marked_comment") is not None:
- update_data["marked_comment"] = args["marked_comment"]
+ payload = WorkflowUpdatePayload.model_validate(console_ns.payload or {})
+ update_data = payload.model_dump(exclude_unset=True)
if not update_data:
return {"message": "No valid fields to update"}, 400
@@ -733,19 +749,13 @@ class PublishedRagPipelineSecondStepApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def get(self, pipeline: Pipeline):
"""
Get second step parameters of rag pipeline
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("node_id", type=str, required=True, location="args")
- args = parser.parse_args()
- node_id = args.get("node_id")
- if not node_id:
- raise ValueError("Node ID is required")
+ query = NodeIdQuery.model_validate(request.args.to_dict())
+ node_id = query.node_id
rag_pipeline_service = RagPipelineService()
variables = rag_pipeline_service.get_second_step_parameters(pipeline=pipeline, node_id=node_id, is_draft=False)
return {
@@ -759,19 +769,13 @@ class PublishedRagPipelineFirstStepApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def get(self, pipeline: Pipeline):
"""
Get first step parameters of rag pipeline
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("node_id", type=str, required=True, location="args")
- args = parser.parse_args()
- node_id = args.get("node_id")
- if not node_id:
- raise ValueError("Node ID is required")
+ query = NodeIdQuery.model_validate(request.args.to_dict())
+ node_id = query.node_id
rag_pipeline_service = RagPipelineService()
variables = rag_pipeline_service.get_first_step_parameters(pipeline=pipeline, node_id=node_id, is_draft=False)
return {
@@ -785,19 +789,13 @@ class DraftRagPipelineFirstStepApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def get(self, pipeline: Pipeline):
"""
Get first step parameters of rag pipeline
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("node_id", type=str, required=True, location="args")
- args = parser.parse_args()
- node_id = args.get("node_id")
- if not node_id:
- raise ValueError("Node ID is required")
+ query = NodeIdQuery.model_validate(request.args.to_dict())
+ node_id = query.node_id
rag_pipeline_service = RagPipelineService()
variables = rag_pipeline_service.get_first_step_parameters(pipeline=pipeline, node_id=node_id, is_draft=True)
return {
@@ -811,19 +809,13 @@ class DraftRagPipelineSecondStepApi(Resource):
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
def get(self, pipeline: Pipeline):
"""
Get second step parameters of rag pipeline
"""
- # The role of the current user in the ta table must be admin, owner, or editor
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("node_id", type=str, required=True, location="args")
- args = parser.parse_args()
- node_id = args.get("node_id")
- if not node_id:
- raise ValueError("Node ID is required")
+ query = NodeIdQuery.model_validate(request.args.to_dict())
+ node_id = query.node_id
rag_pipeline_service = RagPipelineService()
variables = rag_pipeline_service.get_second_step_parameters(pipeline=pipeline, node_id=node_id, is_draft=True)
@@ -843,10 +835,16 @@ class RagPipelineWorkflowRunListApi(Resource):
"""
Get workflow run list
"""
- parser = reqparse.RequestParser()
- parser.add_argument("last_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ query = WorkflowRunQuery.model_validate(
+ {
+ "last_id": request.args.get("last_id"),
+ "limit": request.args.get("limit", type=int, default=20),
+ }
+ )
+ args = {
+ "last_id": str(query.last_id) if query.last_id else None,
+ "limit": query.limit,
+ }
rag_pipeline_service = RagPipelineService()
result = rag_pipeline_service.get_rag_pipeline_paginate_workflow_runs(pipeline=pipeline, args=args)
@@ -880,7 +878,7 @@ class RagPipelineWorkflowRunNodeExecutionListApi(Resource):
@account_initialization_required
@get_rag_pipeline
@marshal_with(workflow_run_node_execution_list_fields)
- def get(self, pipeline: Pipeline, run_id):
+ def get(self, pipeline: Pipeline, run_id: str):
"""
Get workflow run node execution list
"""
@@ -903,14 +901,8 @@ class DatasourceListApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
- if not isinstance(user, Account):
- raise Forbidden()
- tenant_id = user.current_tenant_id
- if not tenant_id:
- raise Forbidden()
-
- return jsonable_encoder(RagPipelineManageService.list_rag_pipeline_datasources(tenant_id))
+ _, current_tenant_id = current_account_with_tenant()
+ return jsonable_encoder(RagPipelineManageService.list_rag_pipeline_datasources(current_tenant_id))
@console_ns.route("/rag/pipelines//workflows/draft/nodes//last-run")
@@ -940,9 +932,8 @@ class RagPipelineTransformApi(Resource):
@setup_required
@login_required
@account_initialization_required
- def post(self, dataset_id):
- if not isinstance(current_user, Account):
- raise Forbidden()
+ def post(self, dataset_id: str):
+ current_user, _ = current_account_with_tenant()
if not (current_user.has_edit_permission or current_user.is_dataset_operator):
raise Forbidden()
@@ -955,24 +946,19 @@ class RagPipelineTransformApi(Resource):
@console_ns.route("/rag/pipelines//workflows/draft/datasource/variables-inspect")
class RagPipelineDatasourceVariableApi(Resource):
+ @console_ns.expect(console_ns.models[DatasourceVariablesPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@get_rag_pipeline
+ @edit_permission_required
@marshal_with(workflow_run_node_execution_fields)
def post(self, pipeline: Pipeline):
"""
Set datasource variables
"""
- if not isinstance(current_user, Account) or not current_user.has_edit_permission:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("datasource_type", type=str, required=True, location="json")
- parser.add_argument("datasource_info", type=dict, required=True, location="json")
- parser.add_argument("start_node_id", type=str, required=True, location="json")
- parser.add_argument("start_node_title", type=str, required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ args = DatasourceVariablesPayload.model_validate(console_ns.payload or {}).model_dump()
rag_pipeline_service = RagPipelineService()
workflow_node_execution = rag_pipeline_service.set_datasource_variables(
diff --git a/api/controllers/console/datasets/website.py b/api/controllers/console/datasets/website.py
index b9c1f65bfd..335c8f6030 100644
--- a/api/controllers/console/datasets/website.py
+++ b/api/controllers/console/datasets/website.py
@@ -1,52 +1,46 @@
-from flask_restx import Resource, fields, reqparse
+from typing import Literal
-from controllers.console import api, console_ns
+from flask import request
+from flask_restx import Resource
+from pydantic import BaseModel
+
+from controllers.common.schema import register_schema_models
+from controllers.console import console_ns
from controllers.console.datasets.error import WebsiteCrawlError
from controllers.console.wraps import account_initialization_required, setup_required
from libs.login import login_required
from services.website_service import WebsiteCrawlApiRequest, WebsiteCrawlStatusApiRequest, WebsiteService
+class WebsiteCrawlPayload(BaseModel):
+ provider: Literal["firecrawl", "watercrawl", "jinareader"]
+ url: str
+ options: dict[str, object]
+
+
+class WebsiteCrawlStatusQuery(BaseModel):
+ provider: Literal["firecrawl", "watercrawl", "jinareader"]
+
+
+register_schema_models(console_ns, WebsiteCrawlPayload, WebsiteCrawlStatusQuery)
+
+
@console_ns.route("/website/crawl")
class WebsiteCrawlApi(Resource):
- @api.doc("crawl_website")
- @api.doc(description="Crawl website content")
- @api.expect(
- api.model(
- "WebsiteCrawlRequest",
- {
- "provider": fields.String(
- required=True,
- description="Crawl provider (firecrawl/watercrawl/jinareader)",
- enum=["firecrawl", "watercrawl", "jinareader"],
- ),
- "url": fields.String(required=True, description="URL to crawl"),
- "options": fields.Raw(required=True, description="Crawl options"),
- },
- )
- )
- @api.response(200, "Website crawl initiated successfully")
- @api.response(400, "Invalid crawl parameters")
+ @console_ns.doc("crawl_website")
+ @console_ns.doc(description="Crawl website content")
+ @console_ns.expect(console_ns.models[WebsiteCrawlPayload.__name__])
+ @console_ns.response(200, "Website crawl initiated successfully")
+ @console_ns.response(400, "Invalid crawl parameters")
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "provider",
- type=str,
- choices=["firecrawl", "watercrawl", "jinareader"],
- required=True,
- nullable=True,
- location="json",
- )
- parser.add_argument("url", type=str, required=True, nullable=True, location="json")
- parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
- args = parser.parse_args()
+ payload = WebsiteCrawlPayload.model_validate(console_ns.payload or {})
# Create typed request and validate
try:
- api_request = WebsiteCrawlApiRequest.from_args(args)
+ api_request = WebsiteCrawlApiRequest.from_args(payload.model_dump())
except ValueError as e:
raise WebsiteCrawlError(str(e))
@@ -60,25 +54,22 @@ class WebsiteCrawlApi(Resource):
@console_ns.route("/website/crawl/status/")
class WebsiteCrawlStatusApi(Resource):
- @api.doc("get_crawl_status")
- @api.doc(description="Get website crawl status")
- @api.doc(params={"job_id": "Crawl job ID", "provider": "Crawl provider (firecrawl/watercrawl/jinareader)"})
- @api.response(200, "Crawl status retrieved successfully")
- @api.response(404, "Crawl job not found")
- @api.response(400, "Invalid provider")
+ @console_ns.doc("get_crawl_status")
+ @console_ns.doc(description="Get website crawl status")
+ @console_ns.doc(params={"job_id": "Crawl job ID", "provider": "Crawl provider (firecrawl/watercrawl/jinareader)"})
+ @console_ns.expect(console_ns.models[WebsiteCrawlStatusQuery.__name__])
+ @console_ns.response(200, "Crawl status retrieved successfully")
+ @console_ns.response(404, "Crawl job not found")
+ @console_ns.response(400, "Invalid provider")
@setup_required
@login_required
@account_initialization_required
def get(self, job_id: str):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "provider", type=str, choices=["firecrawl", "watercrawl", "jinareader"], required=True, location="args"
- )
- args = parser.parse_args()
+ args = WebsiteCrawlStatusQuery.model_validate(request.args.to_dict())
# Create typed request and validate
try:
- api_request = WebsiteCrawlStatusApiRequest.from_args(args, job_id)
+ api_request = WebsiteCrawlStatusApiRequest.from_args(args.model_dump(), job_id)
except ValueError as e:
raise WebsiteCrawlError(str(e))
diff --git a/api/controllers/console/datasets/wraps.py b/api/controllers/console/datasets/wraps.py
index 98abb3ef8d..3ef1341abc 100644
--- a/api/controllers/console/datasets/wraps.py
+++ b/api/controllers/console/datasets/wraps.py
@@ -1,46 +1,40 @@
from collections.abc import Callable
from functools import wraps
+from typing import ParamSpec, TypeVar
from controllers.console.datasets.error import PipelineNotFoundError
from extensions.ext_database import db
-from libs.login import current_user
-from models.account import Account
+from libs.login import current_account_with_tenant
from models.dataset import Pipeline
+P = ParamSpec("P")
+R = TypeVar("R")
-def get_rag_pipeline(
- view: Callable | None = None,
-):
- def decorator(view_func):
- @wraps(view_func)
- def decorated_view(*args, **kwargs):
- if not kwargs.get("pipeline_id"):
- raise ValueError("missing pipeline_id in path parameters")
- if not isinstance(current_user, Account):
- raise ValueError("current_user is not an account")
+def get_rag_pipeline(view_func: Callable[P, R]):
+ @wraps(view_func)
+ def decorated_view(*args: P.args, **kwargs: P.kwargs):
+ if not kwargs.get("pipeline_id"):
+ raise ValueError("missing pipeline_id in path parameters")
- pipeline_id = kwargs.get("pipeline_id")
- pipeline_id = str(pipeline_id)
+ _, current_tenant_id = current_account_with_tenant()
- del kwargs["pipeline_id"]
+ pipeline_id = kwargs.get("pipeline_id")
+ pipeline_id = str(pipeline_id)
- pipeline = (
- db.session.query(Pipeline)
- .where(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_user.current_tenant_id)
- .first()
- )
+ del kwargs["pipeline_id"]
- if not pipeline:
- raise PipelineNotFoundError()
+ pipeline = (
+ db.session.query(Pipeline)
+ .where(Pipeline.id == pipeline_id, Pipeline.tenant_id == current_tenant_id)
+ .first()
+ )
- kwargs["pipeline"] = pipeline
+ if not pipeline:
+ raise PipelineNotFoundError()
- return view_func(*args, **kwargs)
+ kwargs["pipeline"] = pipeline
- return decorated_view
+ return view_func(*args, **kwargs)
- if view is None:
- return decorator
- else:
- return decorator(view)
+ return decorated_view
diff --git a/api/controllers/console/explore/audio.py b/api/controllers/console/explore/audio.py
index 7c20fb49d8..0311db1584 100644
--- a/api/controllers/console/explore/audio.py
+++ b/api/controllers/console/explore/audio.py
@@ -1,9 +1,11 @@
import logging
from flask import request
+from pydantic import BaseModel, Field
from werkzeug.exceptions import InternalServerError
import services
+from controllers.common.schema import register_schema_model
from controllers.console.app.error import (
AppUnavailableError,
AudioTooLargeError,
@@ -31,6 +33,16 @@ from .. import console_ns
logger = logging.getLogger(__name__)
+class TextToAudioPayload(BaseModel):
+ message_id: str | None = None
+ voice: str | None = None
+ text: str | None = None
+ streaming: bool | None = Field(default=None, description="Enable streaming response")
+
+
+register_schema_model(console_ns, TextToAudioPayload)
+
+
@console_ns.route(
"/installed-apps//audio-to-text",
endpoint="installed_app_audio",
@@ -76,21 +88,15 @@ class ChatAudioApi(InstalledAppResource):
endpoint="installed_app_text",
)
class ChatTextApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[TextToAudioPayload.__name__])
def post(self, installed_app):
- from flask_restx import reqparse
-
app_model = installed_app.app
try:
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", type=str, required=False, location="json")
- parser.add_argument("voice", type=str, location="json")
- parser.add_argument("text", type=str, location="json")
- parser.add_argument("streaming", type=bool, location="json")
- args = parser.parse_args()
+ payload = TextToAudioPayload.model_validate(console_ns.payload or {})
- message_id = args.get("message_id", None)
- text = args.get("text", None)
- voice = args.get("voice", None)
+ message_id = payload.message_id
+ text = payload.text
+ voice = payload.voice
response = AudioService.transcript_tts(app_model=app_model, text=text, voice=voice, message_id=message_id)
return response
diff --git a/api/controllers/console/explore/completion.py b/api/controllers/console/explore/completion.py
index 1102b815eb..5901eca915 100644
--- a/api/controllers/console/explore/completion.py
+++ b/api/controllers/console/explore/completion.py
@@ -1,9 +1,12 @@
import logging
+from typing import Any, Literal
+from uuid import UUID
-from flask_restx import reqparse
+from pydantic import BaseModel, Field, field_validator
from werkzeug.exceptions import InternalServerError, NotFound
import services
+from controllers.common.schema import register_schema_models
from controllers.console.app.error import (
AppUnavailableError,
CompletionRequestError,
@@ -15,7 +18,6 @@ from controllers.console.app.error import (
from controllers.console.explore.error import NotChatAppError, NotCompletionAppError
from controllers.console.explore.wraps import InstalledAppResource
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
-from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
ModelCurrentlyNotSupportError,
@@ -26,11 +28,11 @@ from core.model_runtime.errors.invoke import InvokeError
from extensions.ext_database import db
from libs import helper
from libs.datetime_utils import naive_utc_now
-from libs.helper import uuid_value
from libs.login import current_user
from models import Account
from models.model import AppMode
from services.app_generate_service import AppGenerateService
+from services.app_task_service import AppTaskService
from services.errors.llm import InvokeRateLimitError
from .. import console_ns
@@ -38,26 +40,56 @@ from .. import console_ns
logger = logging.getLogger(__name__)
+class CompletionMessagePayload(BaseModel):
+ inputs: dict[str, Any]
+ query: str = ""
+ files: list[dict[str, Any]] | None = None
+ response_mode: Literal["blocking", "streaming"] | None = None
+ retriever_from: str = Field(default="explore_app")
+
+
+class ChatMessagePayload(BaseModel):
+ inputs: dict[str, Any]
+ query: str
+ files: list[dict[str, Any]] | None = None
+ conversation_id: str | None = None
+ parent_message_id: str | None = None
+ retriever_from: str = Field(default="explore_app")
+
+ @field_validator("conversation_id", "parent_message_id", mode="before")
+ @classmethod
+ def normalize_uuid(cls, value: str | UUID | None) -> str | None:
+ """
+ Accept blank IDs and validate UUID format when provided.
+ """
+ if not value:
+ return None
+
+ try:
+ return helper.uuid_value(value)
+ except ValueError as exc:
+ raise ValueError("must be a valid UUID") from exc
+
+
+register_schema_models(console_ns, CompletionMessagePayload, ChatMessagePayload)
+
+
# define completion api for user
@console_ns.route(
"/installed-apps//completion-messages",
endpoint="installed_app_completion",
)
class CompletionApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[CompletionMessagePayload.__name__])
def post(self, installed_app):
app_model = installed_app.app
- if app_model.mode != "completion":
+ if app_model.mode != AppMode.COMPLETION:
raise NotCompletionAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, location="json")
- parser.add_argument("query", type=str, location="json", default="")
- parser.add_argument("files", type=list, required=False, location="json")
- parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
- parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
- args = parser.parse_args()
+ payload = CompletionMessagePayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
- streaming = args["response_mode"] == "streaming"
+ streaming = payload.response_mode == "streaming"
args["auto_generate_name"] = False
installed_app.last_used_at = naive_utc_now()
@@ -100,12 +132,18 @@ class CompletionApi(InstalledAppResource):
class CompletionStopApi(InstalledAppResource):
def post(self, installed_app, task_id):
app_model = installed_app.app
- if app_model.mode != "completion":
+ if app_model.mode != AppMode.COMPLETION:
raise NotCompletionAppError()
if not isinstance(current_user, Account):
raise ValueError("current_user must be an Account instance")
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.EXPLORE, current_user.id)
+
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.EXPLORE,
+ user_id=current_user.id,
+ app_mode=AppMode.value_of(app_model.mode),
+ )
return {"result": "success"}, 200
@@ -115,20 +153,15 @@ class CompletionStopApi(InstalledAppResource):
endpoint="installed_app_chat_completion",
)
class ChatApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[ChatMessagePayload.__name__])
def post(self, installed_app):
app_model = installed_app.app
app_mode = AppMode.value_of(app_model.mode)
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, location="json")
- parser.add_argument("query", type=str, required=True, location="json")
- parser.add_argument("files", type=list, required=False, location="json")
- parser.add_argument("conversation_id", type=uuid_value, location="json")
- parser.add_argument("parent_message_id", type=uuid_value, required=False, location="json")
- parser.add_argument("retriever_from", type=str, required=False, default="explore_app", location="json")
- args = parser.parse_args()
+ payload = ChatMessagePayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
args["auto_generate_name"] = False
@@ -180,6 +213,12 @@ class ChatStopApi(InstalledAppResource):
if not isinstance(current_user, Account):
raise ValueError("current_user must be an Account instance")
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.EXPLORE, current_user.id)
+
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.EXPLORE,
+ user_id=current_user.id,
+ app_mode=app_mode,
+ )
return {"result": "success"}, 200
diff --git a/api/controllers/console/explore/conversation.py b/api/controllers/console/explore/conversation.py
index feabea2524..92da591ab4 100644
--- a/api/controllers/console/explore/conversation.py
+++ b/api/controllers/console/explore/conversation.py
@@ -1,14 +1,18 @@
-from flask_restx import marshal_with, reqparse
-from flask_restx.inputs import int_range
+from typing import Any
+from uuid import UUID
+
+from flask import request
+from flask_restx import marshal_with
+from pydantic import BaseModel, Field, model_validator
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound
+from controllers.common.schema import register_schema_models
from controllers.console.explore.error import NotChatAppError
from controllers.console.explore.wraps import InstalledAppResource
from core.app.entities.app_invoke_entities import InvokeFrom
from extensions.ext_database import db
from fields.conversation_fields import conversation_infinite_scroll_pagination_fields, simple_conversation_fields
-from libs.helper import uuid_value
from libs.login import current_user
from models import Account
from models.model import AppMode
@@ -19,27 +23,51 @@ from services.web_conversation_service import WebConversationService
from .. import console_ns
+class ConversationListQuery(BaseModel):
+ last_id: UUID | None = None
+ limit: int = Field(default=20, ge=1, le=100)
+ pinned: bool | None = None
+
+
+class ConversationRenamePayload(BaseModel):
+ name: str | None = None
+ auto_generate: bool = False
+
+ @model_validator(mode="after")
+ def validate_name_requirement(self):
+ if not self.auto_generate:
+ if self.name is None or not self.name.strip():
+ raise ValueError("name is required when auto_generate is false")
+ return self
+
+
+register_schema_models(console_ns, ConversationListQuery, ConversationRenamePayload)
+
+
@console_ns.route(
"/installed-apps//conversations",
endpoint="installed_app_conversations",
)
class ConversationListApi(InstalledAppResource):
@marshal_with(conversation_infinite_scroll_pagination_fields)
+ @console_ns.expect(console_ns.models[ConversationListQuery.__name__])
def get(self, installed_app):
app_model = installed_app.app
app_mode = AppMode.value_of(app_model.mode)
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("last_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- parser.add_argument("pinned", type=str, choices=["true", "false", None], location="args")
- args = parser.parse_args()
-
- pinned = None
- if "pinned" in args and args["pinned"] is not None:
- pinned = args["pinned"] == "true"
+ raw_args: dict[str, Any] = {
+ "last_id": request.args.get("last_id"),
+ "limit": request.args.get("limit", default=20, type=int),
+ "pinned": request.args.get("pinned"),
+ }
+ if raw_args["last_id"] is None:
+ raw_args["last_id"] = None
+ pinned_value = raw_args["pinned"]
+ if isinstance(pinned_value, str):
+ raw_args["pinned"] = pinned_value == "true"
+ args = ConversationListQuery.model_validate(raw_args)
try:
if not isinstance(current_user, Account):
@@ -49,10 +77,10 @@ class ConversationListApi(InstalledAppResource):
session=session,
app_model=app_model,
user=current_user,
- last_id=args["last_id"],
- limit=args["limit"],
+ last_id=str(args.last_id) if args.last_id else None,
+ limit=args.limit,
invoke_from=InvokeFrom.EXPLORE,
- pinned=pinned,
+ pinned=args.pinned,
)
except LastConversationNotExistsError:
raise NotFound("Last Conversation Not Exists.")
@@ -86,6 +114,7 @@ class ConversationApi(InstalledAppResource):
)
class ConversationRenameApi(InstalledAppResource):
@marshal_with(simple_conversation_fields)
+ @console_ns.expect(console_ns.models[ConversationRenamePayload.__name__])
def post(self, installed_app, c_id):
app_model = installed_app.app
app_mode = AppMode.value_of(app_model.mode)
@@ -94,16 +123,13 @@ class ConversationRenameApi(InstalledAppResource):
conversation_id = str(c_id)
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=False, location="json")
- parser.add_argument("auto_generate", type=bool, required=False, default=False, location="json")
- args = parser.parse_args()
+ payload = ConversationRenamePayload.model_validate(console_ns.payload or {})
try:
if not isinstance(current_user, Account):
raise ValueError("current_user must be an Account instance")
return ConversationService.rename(
- app_model, conversation_id, current_user, args["name"], args["auto_generate"]
+ app_model, conversation_id, current_user, payload.name, payload.auto_generate
)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
diff --git a/api/controllers/console/explore/installed_app.py b/api/controllers/console/explore/installed_app.py
index c86c243c9b..3c95779475 100644
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@@ -12,10 +12,9 @@ from controllers.console.wraps import account_initialization_required, cloud_edi
from extensions.ext_database import db
from fields.installed_app_fields import installed_app_list_fields
from libs.datetime_utils import naive_utc_now
-from libs.login import current_user, login_required
-from models import Account, App, InstalledApp, RecommendedApp
+from libs.login import current_account_with_tenant, login_required
+from models import App, InstalledApp, RecommendedApp
from services.account_service import TenantService
-from services.app_service import AppService
from services.enterprise.enterprise_service import EnterpriseService
from services.feature_service import FeatureService
@@ -29,9 +28,7 @@ class InstalledAppsListApi(Resource):
@marshal_with(installed_app_list_fields)
def get(self):
app_id = request.args.get("app_id", default=None, type=str)
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- current_tenant_id = current_user.current_tenant_id
+ current_user, current_tenant_id = current_account_with_tenant()
if app_id:
installed_apps = db.session.scalars(
@@ -69,31 +66,26 @@ class InstalledAppsListApi(Resource):
# Pre-filter out apps without setting or with sso_verified
filtered_installed_apps = []
- app_id_to_app_code = {}
for installed_app in installed_app_list:
app_id = installed_app["app"].id
webapp_setting = webapp_settings.get(app_id)
if not webapp_setting or webapp_setting.access_mode == "sso_verified":
continue
- app_code = AppService.get_app_code_by_id(str(app_id))
- app_id_to_app_code[app_id] = app_code
filtered_installed_apps.append(installed_app)
- app_codes = list(app_id_to_app_code.values())
-
# Batch permission check
+ app_ids = [installed_app["app"].id for installed_app in filtered_installed_apps]
permissions = EnterpriseService.WebAppAuth.batch_is_user_allowed_to_access_webapps(
user_id=user_id,
- app_codes=app_codes,
+ app_ids=app_ids,
)
# Keep only allowed apps
res = []
for installed_app in filtered_installed_apps:
app_id = installed_app["app"].id
- app_code = app_id_to_app_code[app_id]
- if permissions.get(app_code):
+ if permissions.get(app_id):
res.append(installed_app)
installed_app_list = res
@@ -113,17 +105,15 @@ class InstalledAppsListApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("apps")
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("app_id", type=str, required=True, help="Invalid app_id")
+ parser = reqparse.RequestParser().add_argument("app_id", type=str, required=True, help="Invalid app_id")
args = parser.parse_args()
recommended_app = db.session.query(RecommendedApp).where(RecommendedApp.app_id == args["app_id"]).first()
if recommended_app is None:
raise NotFound("App not found")
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- current_tenant_id = current_user.current_tenant_id
+ _, current_tenant_id = current_account_with_tenant()
+
app = db.session.query(App).where(App.id == args["app_id"]).first()
if app is None:
@@ -163,9 +153,8 @@ class InstalledAppApi(InstalledAppResource):
"""
def delete(self, installed_app):
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- if installed_app.app_owner_tenant_id == current_user.current_tenant_id:
+ _, current_tenant_id = current_account_with_tenant()
+ if installed_app.app_owner_tenant_id == current_tenant_id:
raise BadRequest("You can't uninstall an app owned by the current tenant")
db.session.delete(installed_app)
@@ -174,8 +163,7 @@ class InstalledAppApi(InstalledAppResource):
return {"result": "success", "message": "App uninstalled successfully"}, 204
def patch(self, installed_app):
- parser = reqparse.RequestParser()
- parser.add_argument("is_pinned", type=inputs.boolean)
+ parser = reqparse.RequestParser().add_argument("is_pinned", type=inputs.boolean)
args = parser.parse_args()
commit_args = False
diff --git a/api/controllers/console/explore/message.py b/api/controllers/console/explore/message.py
index b045e47846..229b7c8865 100644
--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -1,9 +1,13 @@
import logging
+from typing import Literal
+from uuid import UUID
-from flask_restx import marshal_with, reqparse
-from flask_restx.inputs import int_range
+from flask import request
+from flask_restx import marshal_with
+from pydantic import BaseModel, Field
from werkzeug.exceptions import InternalServerError, NotFound
+from controllers.common.schema import register_schema_models
from controllers.console.app.error import (
AppMoreLikeThisDisabledError,
CompletionRequestError,
@@ -22,9 +26,7 @@ from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotIni
from core.model_runtime.errors.invoke import InvokeError
from fields.message_fields import message_infinite_scroll_pagination_fields
from libs import helper
-from libs.helper import uuid_value
-from libs.login import current_user
-from models import Account
+from libs.login import current_account_with_tenant
from models.model import AppMode
from services.app_generate_service import AppGenerateService
from services.errors.app import MoreLikeThisDisabledError
@@ -41,30 +43,47 @@ from .. import console_ns
logger = logging.getLogger(__name__)
+class MessageListQuery(BaseModel):
+ conversation_id: UUID
+ first_id: UUID | None = None
+ limit: int = Field(default=20, ge=1, le=100)
+
+
+class MessageFeedbackPayload(BaseModel):
+ rating: Literal["like", "dislike"] | None = None
+ content: str | None = None
+
+
+class MoreLikeThisQuery(BaseModel):
+ response_mode: Literal["blocking", "streaming"]
+
+
+register_schema_models(console_ns, MessageListQuery, MessageFeedbackPayload, MoreLikeThisQuery)
+
+
@console_ns.route(
"/installed-apps//messages",
endpoint="installed_app_messages",
)
class MessageListApi(InstalledAppResource):
@marshal_with(message_infinite_scroll_pagination_fields)
+ @console_ns.expect(console_ns.models[MessageListQuery.__name__])
def get(self, installed_app):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
app_mode = AppMode.value_of(app_model.mode)
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
-
- parser = reqparse.RequestParser()
- parser.add_argument("conversation_id", required=True, type=uuid_value, location="args")
- parser.add_argument("first_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ args = MessageListQuery.model_validate(request.args.to_dict())
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
return MessageService.pagination_by_first_id(
- app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
+ app_model,
+ current_user,
+ str(args.conversation_id),
+ str(args.first_id) if args.first_id else None,
+ args.limit,
)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@@ -77,25 +96,22 @@ class MessageListApi(InstalledAppResource):
endpoint="installed_app_message_feedback",
)
class MessageFeedbackApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[MessageFeedbackPayload.__name__])
def post(self, installed_app, message_id):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
message_id = str(message_id)
- parser = reqparse.RequestParser()
- parser.add_argument("rating", type=str, choices=["like", "dislike", None], location="json")
- parser.add_argument("content", type=str, location="json")
- args = parser.parse_args()
+ payload = MessageFeedbackPayload.model_validate(console_ns.payload or {})
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
MessageService.create_feedback(
app_model=app_model,
message_id=message_id,
user=current_user,
- rating=args.get("rating"),
- content=args.get("content"),
+ rating=payload.rating,
+ content=payload.content,
)
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
@@ -108,24 +124,20 @@ class MessageFeedbackApi(InstalledAppResource):
endpoint="installed_app_more_like_this",
)
class MessageMoreLikeThisApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[MoreLikeThisQuery.__name__])
def get(self, installed_app, message_id):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
if app_model.mode != "completion":
raise NotCompletionAppError()
message_id = str(message_id)
- parser = reqparse.RequestParser()
- parser.add_argument(
- "response_mode", type=str, required=True, choices=["blocking", "streaming"], location="args"
- )
- args = parser.parse_args()
+ args = MoreLikeThisQuery.model_validate(request.args.to_dict())
- streaming = args["response_mode"] == "streaming"
+ streaming = args.response_mode == "streaming"
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
response = AppGenerateService.generate_more_like_this(
app_model=app_model,
user=current_user,
@@ -159,6 +171,7 @@ class MessageMoreLikeThisApi(InstalledAppResource):
)
class MessageSuggestedQuestionApi(InstalledAppResource):
def get(self, installed_app, message_id):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
app_mode = AppMode.value_of(app_model.mode)
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
@@ -167,8 +180,6 @@ class MessageSuggestedQuestionApi(InstalledAppResource):
message_id = str(message_id)
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
questions = MessageService.get_suggested_questions_after_answer(
app_model=app_model, user=current_user, message_id=message_id, invoke_from=InvokeFrom.EXPLORE
)
diff --git a/api/controllers/console/explore/recommended_app.py b/api/controllers/console/explore/recommended_app.py
index 6d627a929a..2b2f807694 100644
--- a/api/controllers/console/explore/recommended_app.py
+++ b/api/controllers/console/explore/recommended_app.py
@@ -1,4 +1,6 @@
-from flask_restx import Resource, fields, marshal_with, reqparse
+from flask import request
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field
from constants.languages import languages
from controllers.console import console_ns
@@ -35,18 +37,26 @@ recommended_app_list_fields = {
}
+class RecommendedAppsQuery(BaseModel):
+ language: str | None = Field(default=None)
+
+
+console_ns.schema_model(
+ RecommendedAppsQuery.__name__,
+ RecommendedAppsQuery.model_json_schema(ref_template="#/definitions/{model}"),
+)
+
+
@console_ns.route("/explore/apps")
class RecommendedAppListApi(Resource):
+ @console_ns.expect(console_ns.models[RecommendedAppsQuery.__name__])
@login_required
@account_initialization_required
@marshal_with(recommended_app_list_fields)
def get(self):
# language args
- parser = reqparse.RequestParser()
- parser.add_argument("language", type=str, location="args")
- args = parser.parse_args()
-
- language = args.get("language")
+ args = RecommendedAppsQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ language = args.language
if language and language in languages:
language_prefix = language
elif current_user and current_user.interface_language:
diff --git a/api/controllers/console/explore/saved_message.py b/api/controllers/console/explore/saved_message.py
index 79e4a4339e..6a9e274a0e 100644
--- a/api/controllers/console/explore/saved_message.py
+++ b/api/controllers/console/explore/saved_message.py
@@ -1,17 +1,33 @@
-from flask_restx import fields, marshal_with, reqparse
-from flask_restx.inputs import int_range
+from uuid import UUID
+
+from flask import request
+from flask_restx import fields, marshal_with
+from pydantic import BaseModel, Field
from werkzeug.exceptions import NotFound
+from controllers.common.schema import register_schema_models
from controllers.console import console_ns
from controllers.console.explore.error import NotCompletionAppError
from controllers.console.explore.wraps import InstalledAppResource
from fields.conversation_fields import message_file_fields
-from libs.helper import TimestampField, uuid_value
-from libs.login import current_user
-from models import Account
+from libs.helper import TimestampField
+from libs.login import current_account_with_tenant
from services.errors.message import MessageNotExistsError
from services.saved_message_service import SavedMessageService
+
+class SavedMessageListQuery(BaseModel):
+ last_id: UUID | None = None
+ limit: int = Field(default=20, ge=1, le=100)
+
+
+class SavedMessageCreatePayload(BaseModel):
+ message_id: UUID
+
+
+register_schema_models(console_ns, SavedMessageListQuery, SavedMessageCreatePayload)
+
+
feedback_fields = {"rating": fields.String}
message_fields = {
@@ -34,33 +50,33 @@ class SavedMessageListApi(InstalledAppResource):
}
@marshal_with(saved_message_infinite_scroll_pagination_fields)
+ @console_ns.expect(console_ns.models[SavedMessageListQuery.__name__])
def get(self, installed_app):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
if app_model.mode != "completion":
raise NotCompletionAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("last_id", type=uuid_value, location="args")
- parser.add_argument("limit", type=int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ args = SavedMessageListQuery.model_validate(request.args.to_dict())
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- return SavedMessageService.pagination_by_last_id(app_model, current_user, args["last_id"], args["limit"])
+ return SavedMessageService.pagination_by_last_id(
+ app_model,
+ current_user,
+ str(args.last_id) if args.last_id else None,
+ args.limit,
+ )
+ @console_ns.expect(console_ns.models[SavedMessageCreatePayload.__name__])
def post(self, installed_app):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
if app_model.mode != "completion":
raise NotCompletionAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("message_id", type=uuid_value, required=True, location="json")
- args = parser.parse_args()
+ payload = SavedMessageCreatePayload.model_validate(console_ns.payload or {})
try:
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
- SavedMessageService.save(app_model, current_user, args["message_id"])
+ SavedMessageService.save(app_model, current_user, str(payload.message_id))
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
@@ -72,6 +88,7 @@ class SavedMessageListApi(InstalledAppResource):
)
class SavedMessageApi(InstalledAppResource):
def delete(self, installed_app, message_id):
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
message_id = str(message_id)
@@ -79,8 +96,6 @@ class SavedMessageApi(InstalledAppResource):
if app_model.mode != "completion":
raise NotCompletionAppError()
- if not isinstance(current_user, Account):
- raise ValueError("current_user must be an Account instance")
SavedMessageService.delete(app_model, current_user, message_id)
return {"result": "success"}, 204
diff --git a/api/controllers/console/explore/workflow.py b/api/controllers/console/explore/workflow.py
index e32f2814eb..d679d0722d 100644
--- a/api/controllers/console/explore/workflow.py
+++ b/api/controllers/console/explore/workflow.py
@@ -1,8 +1,10 @@
import logging
+from typing import Any
-from flask_restx import reqparse
+from pydantic import BaseModel
from werkzeug.exceptions import InternalServerError
+from controllers.common.schema import register_schema_model
from controllers.console.app.error import (
CompletionRequestError,
ProviderModelCurrentlyNotSupportError,
@@ -22,7 +24,7 @@ from core.errors.error import (
from core.model_runtime.errors.invoke import InvokeError
from core.workflow.graph_engine.manager import GraphEngineManager
from libs import helper
-from libs.login import current_user
+from libs.login import current_account_with_tenant
from models.model import AppMode, InstalledApp
from services.app_generate_service import AppGenerateService
from services.errors.llm import InvokeRateLimitError
@@ -32,12 +34,22 @@ from .. import console_ns
logger = logging.getLogger(__name__)
+class WorkflowRunPayload(BaseModel):
+ inputs: dict[str, Any]
+ files: list[dict[str, Any]] | None = None
+
+
+register_schema_model(console_ns, WorkflowRunPayload)
+
+
@console_ns.route("/installed-apps//workflows/run")
class InstalledAppWorkflowRunApi(InstalledAppResource):
+ @console_ns.expect(console_ns.models[WorkflowRunPayload.__name__])
def post(self, installed_app: InstalledApp):
"""
Run workflow
"""
+ current_user, _ = current_account_with_tenant()
app_model = installed_app.app
if not app_model:
raise NotWorkflowAppError()
@@ -45,11 +57,8 @@ class InstalledAppWorkflowRunApi(InstalledAppResource):
if app_mode != AppMode.WORKFLOW:
raise NotWorkflowAppError()
- parser = reqparse.RequestParser()
- parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("files", type=list, required=False, location="json")
- args = parser.parse_args()
- assert current_user is not None
+ payload = WorkflowRunPayload.model_validate(console_ns.payload or {})
+ args = payload.model_dump(exclude_none=True)
try:
response = AppGenerateService.generate(
app_model=app_model, user=current_user, args=args, invoke_from=InvokeFrom.EXPLORE, streaming=True
@@ -85,7 +94,6 @@ class InstalledAppWorkflowTaskStopApi(InstalledAppResource):
app_mode = AppMode.value_of(app_model.mode)
if app_mode != AppMode.WORKFLOW:
raise NotWorkflowAppError()
- assert current_user is not None
# Stop using both mechanisms for backward compatibility
# Legacy stop flag mechanism (without user check)
diff --git a/api/controllers/console/explore/wraps.py b/api/controllers/console/explore/wraps.py
index 3a8ba64a03..2a97d312aa 100644
--- a/api/controllers/console/explore/wraps.py
+++ b/api/controllers/console/explore/wraps.py
@@ -2,16 +2,14 @@ from collections.abc import Callable
from functools import wraps
from typing import Concatenate, ParamSpec, TypeVar
-from flask_login import current_user
from flask_restx import Resource
from werkzeug.exceptions import NotFound
from controllers.console.explore.error import AppAccessDeniedError
from controllers.console.wraps import account_initialization_required
from extensions.ext_database import db
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models import InstalledApp
-from services.app_service import AppService
from services.enterprise.enterprise_service import EnterpriseService
from services.feature_service import FeatureService
@@ -24,11 +22,10 @@ def installed_app_required(view: Callable[Concatenate[InstalledApp, P], R] | Non
def decorator(view: Callable[Concatenate[InstalledApp, P], R]):
@wraps(view)
def decorated(installed_app_id: str, *args: P.args, **kwargs: P.kwargs):
+ _, current_tenant_id = current_account_with_tenant()
installed_app = (
db.session.query(InstalledApp)
- .where(
- InstalledApp.id == str(installed_app_id), InstalledApp.tenant_id == current_user.current_tenant_id
- )
+ .where(InstalledApp.id == str(installed_app_id), InstalledApp.tenant_id == current_tenant_id)
.first()
)
@@ -54,13 +51,13 @@ def user_allowed_to_access_app(view: Callable[Concatenate[InstalledApp, P], R] |
def decorator(view: Callable[Concatenate[InstalledApp, P], R]):
@wraps(view)
def decorated(installed_app: InstalledApp, *args: P.args, **kwargs: P.kwargs):
+ current_user, _ = current_account_with_tenant()
feature = FeatureService.get_system_features()
if feature.webapp_auth.enabled:
app_id = installed_app.app_id
- app_code = AppService.get_app_code_by_id(app_id)
res = EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
user_id=str(current_user.id),
- app_code=app_code,
+ app_id=app_id,
)
if not res:
raise AppAccessDeniedError()
diff --git a/api/controllers/console/extension.py b/api/controllers/console/extension.py
index 57f5ab191e..08f29b4655 100644
--- a/api/controllers/console/extension.py
+++ b/api/controllers/console/extension.py
@@ -1,27 +1,32 @@
-from flask_login import current_user
from flask_restx import Resource, fields, marshal_with, reqparse
from constants import HIDDEN_VALUE
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, setup_required
from fields.api_based_extension_fields import api_based_extension_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.api_based_extension import APIBasedExtension
from services.api_based_extension_service import APIBasedExtensionService
from services.code_based_extension_service import CodeBasedExtensionService
+api_based_extension_model = console_ns.model("ApiBasedExtensionModel", api_based_extension_fields)
+
+api_based_extension_list_model = fields.List(fields.Nested(api_based_extension_model))
+
@console_ns.route("/code-based-extension")
class CodeBasedExtensionAPI(Resource):
- @api.doc("get_code_based_extension")
- @api.doc(description="Get code-based extension data by module name")
- @api.expect(
- api.parser().add_argument("module", type=str, required=True, location="args", help="Extension module name")
+ @console_ns.doc("get_code_based_extension")
+ @console_ns.doc(description="Get code-based extension data by module name")
+ @console_ns.expect(
+ console_ns.parser().add_argument(
+ "module", type=str, required=True, location="args", help="Extension module name"
+ )
)
- @api.response(
+ @console_ns.response(
200,
"Success",
- api.model(
+ console_ns.model(
"CodeBasedExtensionResponse",
{"module": fields.String(description="Module name"), "data": fields.Raw(description="Extension data")},
),
@@ -30,8 +35,7 @@ class CodeBasedExtensionAPI(Resource):
@login_required
@account_initialization_required
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("module", type=str, required=True, location="args")
+ parser = reqparse.RequestParser().add_argument("module", type=str, required=True, location="args")
args = parser.parse_args()
return {"module": args["module"], "data": CodeBasedExtensionService.get_code_based_extension(args["module"])}
@@ -39,21 +43,21 @@ class CodeBasedExtensionAPI(Resource):
@console_ns.route("/api-based-extension")
class APIBasedExtensionAPI(Resource):
- @api.doc("get_api_based_extensions")
- @api.doc(description="Get all API-based extensions for current tenant")
- @api.response(200, "Success", fields.List(fields.Nested(api_based_extension_fields)))
+ @console_ns.doc("get_api_based_extensions")
+ @console_ns.doc(description="Get all API-based extensions for current tenant")
+ @console_ns.response(200, "Success", api_based_extension_list_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(api_based_extension_fields)
+ @marshal_with(api_based_extension_model)
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return APIBasedExtensionService.get_all_by_tenant_id(tenant_id)
- @api.doc("create_api_based_extension")
- @api.doc(description="Create a new API-based extension")
- @api.expect(
- api.model(
+ @console_ns.doc("create_api_based_extension")
+ @console_ns.doc(description="Create a new API-based extension")
+ @console_ns.expect(
+ console_ns.model(
"CreateAPIBasedExtensionRequest",
{
"name": fields.String(required=True, description="Extension name"),
@@ -62,20 +66,17 @@ class APIBasedExtensionAPI(Resource):
},
)
)
- @api.response(201, "Extension created successfully", api_based_extension_fields)
+ @console_ns.response(201, "Extension created successfully", api_based_extension_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(api_based_extension_fields)
+ @marshal_with(api_based_extension_model)
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- parser.add_argument("api_endpoint", type=str, required=True, location="json")
- parser.add_argument("api_key", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = console_ns.payload
+ _, current_tenant_id = current_account_with_tenant()
extension_data = APIBasedExtension(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
name=args["name"],
api_endpoint=args["api_endpoint"],
api_key=args["api_key"],
@@ -86,25 +87,25 @@ class APIBasedExtensionAPI(Resource):
@console_ns.route("/api-based-extension/")
class APIBasedExtensionDetailAPI(Resource):
- @api.doc("get_api_based_extension")
- @api.doc(description="Get API-based extension by ID")
- @api.doc(params={"id": "Extension ID"})
- @api.response(200, "Success", api_based_extension_fields)
+ @console_ns.doc("get_api_based_extension")
+ @console_ns.doc(description="Get API-based extension by ID")
+ @console_ns.doc(params={"id": "Extension ID"})
+ @console_ns.response(200, "Success", api_based_extension_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(api_based_extension_fields)
+ @marshal_with(api_based_extension_model)
def get(self, id):
api_based_extension_id = str(id)
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return APIBasedExtensionService.get_with_tenant_id(tenant_id, api_based_extension_id)
- @api.doc("update_api_based_extension")
- @api.doc(description="Update API-based extension")
- @api.doc(params={"id": "Extension ID"})
- @api.expect(
- api.model(
+ @console_ns.doc("update_api_based_extension")
+ @console_ns.doc(description="Update API-based extension")
+ @console_ns.doc(params={"id": "Extension ID"})
+ @console_ns.expect(
+ console_ns.model(
"UpdateAPIBasedExtensionRequest",
{
"name": fields.String(required=True, description="Extension name"),
@@ -113,22 +114,18 @@ class APIBasedExtensionDetailAPI(Resource):
},
)
)
- @api.response(200, "Extension updated successfully", api_based_extension_fields)
+ @console_ns.response(200, "Extension updated successfully", api_based_extension_model)
@setup_required
@login_required
@account_initialization_required
- @marshal_with(api_based_extension_fields)
+ @marshal_with(api_based_extension_model)
def post(self, id):
api_based_extension_id = str(id)
- tenant_id = current_user.current_tenant_id
+ _, current_tenant_id = current_account_with_tenant()
- extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(tenant_id, api_based_extension_id)
+ extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(current_tenant_id, api_based_extension_id)
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- parser.add_argument("api_endpoint", type=str, required=True, location="json")
- parser.add_argument("api_key", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = console_ns.payload
extension_data_from_db.name = args["name"]
extension_data_from_db.api_endpoint = args["api_endpoint"]
@@ -138,18 +135,18 @@ class APIBasedExtensionDetailAPI(Resource):
return APIBasedExtensionService.save(extension_data_from_db)
- @api.doc("delete_api_based_extension")
- @api.doc(description="Delete API-based extension")
- @api.doc(params={"id": "Extension ID"})
- @api.response(204, "Extension deleted successfully")
+ @console_ns.doc("delete_api_based_extension")
+ @console_ns.doc(description="Delete API-based extension")
+ @console_ns.doc(params={"id": "Extension ID"})
+ @console_ns.response(204, "Extension deleted successfully")
@setup_required
@login_required
@account_initialization_required
def delete(self, id):
api_based_extension_id = str(id)
- tenant_id = current_user.current_tenant_id
+ _, current_tenant_id = current_account_with_tenant()
- extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(tenant_id, api_based_extension_id)
+ extension_data_from_db = APIBasedExtensionService.get_with_tenant_id(current_tenant_id, api_based_extension_id)
APIBasedExtensionService.delete(extension_data_from_db)
diff --git a/api/controllers/console/feature.py b/api/controllers/console/feature.py
index d43b839291..6951c906e9 100644
--- a/api/controllers/console/feature.py
+++ b/api/controllers/console/feature.py
@@ -1,21 +1,20 @@
-from flask_login import current_user
from flask_restx import Resource, fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.feature_service import FeatureService
-from . import api, console_ns
+from . import console_ns
from .wraps import account_initialization_required, cloud_utm_record, setup_required
@console_ns.route("/features")
class FeatureApi(Resource):
- @api.doc("get_tenant_features")
- @api.doc(description="Get feature configuration for current tenant")
- @api.response(
+ @console_ns.doc("get_tenant_features")
+ @console_ns.doc(description="Get feature configuration for current tenant")
+ @console_ns.response(
200,
"Success",
- api.model("FeatureResponse", {"features": fields.Raw(description="Feature configuration object")}),
+ console_ns.model("FeatureResponse", {"features": fields.Raw(description="Feature configuration object")}),
)
@setup_required
@login_required
@@ -23,17 +22,21 @@ class FeatureApi(Resource):
@cloud_utm_record
def get(self):
"""Get feature configuration for current tenant"""
- return FeatureService.get_features(current_user.current_tenant_id).model_dump()
+ _, current_tenant_id = current_account_with_tenant()
+
+ return FeatureService.get_features(current_tenant_id).model_dump()
@console_ns.route("/system-features")
class SystemFeatureApi(Resource):
- @api.doc("get_system_features")
- @api.doc(description="Get system-wide feature configuration")
- @api.response(
+ @console_ns.doc("get_system_features")
+ @console_ns.doc(description="Get system-wide feature configuration")
+ @console_ns.response(
200,
"Success",
- api.model("SystemFeatureResponse", {"features": fields.Raw(description="System feature configuration object")}),
+ console_ns.model(
+ "SystemFeatureResponse", {"features": fields.Raw(description="System feature configuration object")}
+ ),
)
def get(self):
"""Get system-wide feature configuration"""
diff --git a/api/controllers/console/files.py b/api/controllers/console/files.py
index 34f186e2f0..29417dc896 100644
--- a/api/controllers/console/files.py
+++ b/api/controllers/console/files.py
@@ -1,7 +1,6 @@
from typing import Literal
from flask import request
-from flask_login import current_user
from flask_restx import Resource, marshal_with
from werkzeug.exceptions import Forbidden
@@ -9,6 +8,7 @@ import services
from configs import dify_config
from constants import DOCUMENT_EXTENSIONS
from controllers.common.errors import (
+ BlockedFileExtensionError,
FilenameNotExistsError,
FileTooLargeError,
NoFileUploadedError,
@@ -22,8 +22,7 @@ from controllers.console.wraps import (
)
from extensions.ext_database import db
from fields.file_fields import file_fields, upload_config_fields
-from libs.login import login_required
-from models import Account
+from libs.login import current_account_with_tenant, login_required
from services.file_service import FileService
from . import console_ns
@@ -41,10 +40,14 @@ class FileApi(Resource):
return {
"file_size_limit": dify_config.UPLOAD_FILE_SIZE_LIMIT,
"batch_count_limit": dify_config.UPLOAD_FILE_BATCH_LIMIT,
+ "file_upload_limit": dify_config.BATCH_UPLOAD_LIMIT,
"image_file_size_limit": dify_config.UPLOAD_IMAGE_FILE_SIZE_LIMIT,
"video_file_size_limit": dify_config.UPLOAD_VIDEO_FILE_SIZE_LIMIT,
"audio_file_size_limit": dify_config.UPLOAD_AUDIO_FILE_SIZE_LIMIT,
"workflow_file_upload_limit": dify_config.WORKFLOW_FILE_UPLOAD_LIMIT,
+ "image_file_batch_limit": dify_config.IMAGE_FILE_BATCH_LIMIT,
+ "single_chunk_attachment_limit": dify_config.SINGLE_CHUNK_ATTACHMENT_LIMIT,
+ "attachment_image_file_size_limit": dify_config.ATTACHMENT_IMAGE_FILE_SIZE_LIMIT,
}, 200
@setup_required
@@ -53,6 +56,7 @@ class FileApi(Resource):
@marshal_with(file_fields)
@cloud_edition_billing_resource_check("documents")
def post(self):
+ current_user, _ = current_account_with_tenant()
source_str = request.form.get("source")
source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None
@@ -65,16 +69,12 @@ class FileApi(Resource):
if not file.filename:
raise FilenameNotExistsError
-
if source == "datasets" and not current_user.is_dataset_editor:
raise Forbidden()
if source not in ("datasets", None):
source = None
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
-
try:
upload_file = FileService(db.engine).upload_file(
filename=file.filename,
@@ -87,6 +87,8 @@ class FileApi(Resource):
raise FileTooLargeError(file_too_large_error.description)
except services.errors.file.UnsupportedFileTypeError:
raise UnsupportedFileTypeError()
+ except services.errors.file.BlockedFileExtensionError as blocked_extension_error:
+ raise BlockedFileExtensionError(blocked_extension_error.description)
return upload_file, 201
@@ -108,4 +110,4 @@ class FileSupportTypeApi(Resource):
@login_required
@account_initialization_required
def get(self):
- return {"allowed_extensions": DOCUMENT_EXTENSIONS}
+ return {"allowed_extensions": list(DOCUMENT_EXTENSIONS)}
diff --git a/api/controllers/console/init_validate.py b/api/controllers/console/init_validate.py
index 30b53458b2..2bebe79eac 100644
--- a/api/controllers/console/init_validate.py
+++ b/api/controllers/console/init_validate.py
@@ -1,29 +1,41 @@
import os
from flask import session
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field
from sqlalchemy import select
from sqlalchemy.orm import Session
from configs import dify_config
from extensions.ext_database import db
-from libs.helper import StrLen
from models.model import DifySetup
from services.account_service import TenantService
-from . import api, console_ns
+from . import console_ns
from .error import AlreadySetupError, InitValidateFailedError
from .wraps import only_edition_self_hosted
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class InitValidatePayload(BaseModel):
+ password: str = Field(..., max_length=30)
+
+
+console_ns.schema_model(
+ InitValidatePayload.__name__,
+ InitValidatePayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
@console_ns.route("/init")
class InitValidateAPI(Resource):
- @api.doc("get_init_status")
- @api.doc(description="Get initialization validation status")
- @api.response(
+ @console_ns.doc("get_init_status")
+ @console_ns.doc(description="Get initialization validation status")
+ @console_ns.response(
200,
"Success",
- model=api.model(
+ model=console_ns.model(
"InitStatusResponse",
{"status": fields.String(description="Initialization status", enum=["finished", "not_started"])},
),
@@ -35,20 +47,15 @@ class InitValidateAPI(Resource):
return {"status": "finished"}
return {"status": "not_started"}
- @api.doc("validate_init_password")
- @api.doc(description="Validate initialization password for self-hosted edition")
- @api.expect(
- api.model(
- "InitValidateRequest",
- {"password": fields.String(required=True, description="Initialization password", max_length=30)},
- )
- )
- @api.response(
+ @console_ns.doc("validate_init_password")
+ @console_ns.doc(description="Validate initialization password for self-hosted edition")
+ @console_ns.expect(console_ns.models[InitValidatePayload.__name__])
+ @console_ns.response(
201,
"Success",
- model=api.model("InitValidateResponse", {"result": fields.String(description="Operation result")}),
+ model=console_ns.model("InitValidateResponse", {"result": fields.String(description="Operation result")}),
)
- @api.response(400, "Already setup or validation failed")
+ @console_ns.response(400, "Already setup or validation failed")
@only_edition_self_hosted
def post(self):
"""Validate initialization password"""
@@ -57,9 +64,8 @@ class InitValidateAPI(Resource):
if tenant_count > 0:
raise AlreadySetupError()
- parser = reqparse.RequestParser()
- parser.add_argument("password", type=StrLen(30), required=True, location="json")
- input_password = parser.parse_args()["password"]
+ payload = InitValidatePayload.model_validate(console_ns.payload)
+ input_password = payload.password
if input_password != os.environ.get("INIT_PASSWORD"):
session["is_init_validated"] = False
diff --git a/api/controllers/console/ping.py b/api/controllers/console/ping.py
index 29f49b99de..25a3d80522 100644
--- a/api/controllers/console/ping.py
+++ b/api/controllers/console/ping.py
@@ -1,16 +1,16 @@
from flask_restx import Resource, fields
-from . import api, console_ns
+from . import console_ns
@console_ns.route("/ping")
class PingApi(Resource):
- @api.doc("health_check")
- @api.doc(description="Health check endpoint for connection testing")
- @api.response(
+ @console_ns.doc("health_check")
+ @console_ns.doc(description="Health check endpoint for connection testing")
+ @console_ns.response(
200,
"Success",
- api.model("PingResponse", {"result": fields.String(description="Health check result", example="pong")}),
+ console_ns.model("PingResponse", {"result": fields.String(description="Health check result", example="pong")}),
)
def get(self):
"""Health check endpoint for connection testing"""
diff --git a/api/controllers/console/remote_files.py b/api/controllers/console/remote_files.py
index 7aaf807fb0..47eef7eb7e 100644
--- a/api/controllers/console/remote_files.py
+++ b/api/controllers/console/remote_files.py
@@ -1,9 +1,8 @@
import urllib.parse
-from typing import cast
import httpx
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field
import services
from controllers.common import helpers
@@ -16,7 +15,7 @@ from core.file import helpers as file_helpers
from core.helper import ssrf_proxy
from extensions.ext_database import db
from fields.file_fields import file_fields_with_signed_url, remote_file_info_fields
-from models.account import Account
+from libs.login import current_account_with_tenant
from services.file_service import FileService
from . import console_ns
@@ -38,15 +37,23 @@ class RemoteFileInfoApi(Resource):
}
+class RemoteFileUploadPayload(BaseModel):
+ url: str = Field(..., description="URL to fetch")
+
+
+console_ns.schema_model(
+ RemoteFileUploadPayload.__name__,
+ RemoteFileUploadPayload.model_json_schema(ref_template="#/definitions/{model}"),
+)
+
+
@console_ns.route("/remote-files/upload")
class RemoteFileUploadApi(Resource):
+ @console_ns.expect(console_ns.models[RemoteFileUploadPayload.__name__])
@marshal_with(file_fields_with_signed_url)
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("url", type=str, required=True, help="URL is required")
- args = parser.parse_args()
-
- url = args["url"]
+ args = RemoteFileUploadPayload.model_validate(console_ns.payload)
+ url = args.url
try:
resp = ssrf_proxy.head(url=url)
@@ -65,7 +72,7 @@ class RemoteFileUploadApi(Resource):
content = resp.content if resp.request.method == "GET" else ssrf_proxy.get(url).content
try:
- user = cast(Account, current_user)
+ user, _ = current_account_with_tenant()
upload_file = FileService(db.engine).upload_file(
filename=file_info.filename,
content=content,
diff --git a/api/controllers/console/setup.py b/api/controllers/console/setup.py
index bff5fc1651..7fa02ae280 100644
--- a/api/controllers/console/setup.py
+++ b/api/controllers/console/setup.py
@@ -1,26 +1,47 @@
from flask import request
-from flask_restx import Resource, fields, reqparse
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field, field_validator
from configs import dify_config
-from libs.helper import StrLen, email, extract_remote_ip
+from libs.helper import EmailStr, extract_remote_ip
from libs.password import valid_password
from models.model import DifySetup, db
from services.account_service import RegisterService, TenantService
-from . import api, console_ns
+from . import console_ns
from .error import AlreadySetupError, NotInitValidateError
from .init_validate import get_init_validate_status
from .wraps import only_edition_self_hosted
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class SetupRequestPayload(BaseModel):
+ email: EmailStr = Field(..., description="Admin email address")
+ name: str = Field(..., max_length=30, description="Admin name (max 30 characters)")
+ password: str = Field(..., description="Admin password")
+ language: str | None = Field(default=None, description="Admin language")
+
+ @field_validator("password")
+ @classmethod
+ def validate_password(cls, value: str) -> str:
+ return valid_password(value)
+
+
+console_ns.schema_model(
+ SetupRequestPayload.__name__,
+ SetupRequestPayload.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
+)
+
@console_ns.route("/setup")
class SetupApi(Resource):
- @api.doc("get_setup_status")
- @api.doc(description="Get system setup status")
- @api.response(
+ @console_ns.doc("get_setup_status")
+ @console_ns.doc(description="Get system setup status")
+ @console_ns.response(
200,
"Success",
- api.model(
+ console_ns.model(
"SetupStatusResponse",
{
"step": fields.String(description="Setup step status", enum=["not_started", "finished"]),
@@ -40,20 +61,13 @@ class SetupApi(Resource):
return {"step": "not_started"}
return {"step": "finished"}
- @api.doc("setup_system")
- @api.doc(description="Initialize system setup with admin account")
- @api.expect(
- api.model(
- "SetupRequest",
- {
- "email": fields.String(required=True, description="Admin email address"),
- "name": fields.String(required=True, description="Admin name (max 30 characters)"),
- "password": fields.String(required=True, description="Admin password"),
- },
- )
+ @console_ns.doc("setup_system")
+ @console_ns.doc(description="Initialize system setup with admin account")
+ @console_ns.expect(console_ns.models[SetupRequestPayload.__name__])
+ @console_ns.response(
+ 201, "Success", console_ns.model("SetupResponse", {"result": fields.String(description="Setup result")})
)
- @api.response(201, "Success", api.model("SetupResponse", {"result": fields.String(description="Setup result")}))
- @api.response(400, "Already setup or validation failed")
+ @console_ns.response(400, "Already setup or validation failed")
@only_edition_self_hosted
def post(self):
"""Initialize system setup with admin account"""
@@ -69,15 +83,15 @@ class SetupApi(Resource):
if not get_init_validate_status():
raise NotInitValidateError()
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("name", type=StrLen(30), required=True, location="json")
- parser.add_argument("password", type=valid_password, required=True, location="json")
- args = parser.parse_args()
+ args = SetupRequestPayload.model_validate(console_ns.payload)
# setup
RegisterService.setup(
- email=args["email"], name=args["name"], password=args["password"], ip_address=extract_remote_ip(request)
+ email=args.email,
+ name=args.name,
+ password=args.password,
+ ip_address=extract_remote_ip(request),
+ language=args.language,
)
return {"result": "success"}, 201
diff --git a/api/controllers/console/tag/tags.py b/api/controllers/console/tag/tags.py
index 3d29b3ee61..17cfc3ff4b 100644
--- a/api/controllers/console/tag/tags.py
+++ b/api/controllers/console/tag/tags.py
@@ -1,12 +1,11 @@
from flask import request
-from flask_login import current_user
from flask_restx import Resource, marshal_with, reqparse
from werkzeug.exceptions import Forbidden
from controllers.console import console_ns
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, edit_permission_required, setup_required
from fields.tag_fields import dataset_tag_fields
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.model import Tag
from services.tag_service import TagService
@@ -17,6 +16,19 @@ def _validate_name(name):
return name
+parser_tags = (
+ reqparse.RequestParser()
+ .add_argument(
+ "name",
+ nullable=False,
+ required=True,
+ help="Name must be between 1 to 50 characters.",
+ type=_validate_name,
+ )
+ .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
+)
+
+
@console_ns.route("/tags")
class TagListApi(Resource):
@setup_required
@@ -24,28 +36,24 @@ class TagListApi(Resource):
@account_initialization_required
@marshal_with(dataset_tag_fields)
def get(self):
+ _, current_tenant_id = current_account_with_tenant()
tag_type = request.args.get("type", type=str, default="")
keyword = request.args.get("keyword", default=None, type=str)
- tags = TagService.get_tags(tag_type, current_user.current_tenant_id, keyword)
+ tags = TagService.get_tags(tag_type, current_tenant_id, keyword)
return tags, 200
+ @console_ns.expect(parser_tags)
@setup_required
@login_required
@account_initialization_required
def post(self):
+ current_user, _ = current_account_with_tenant()
# The role of the current user in the ta table must be admin, owner, or editor
- if not (current_user.is_editor or current_user.is_dataset_editor):
+ if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name
- )
- parser.add_argument(
- "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type."
- )
- args = parser.parse_args()
+ args = parser_tags.parse_args()
tag = TagService.save_tags(args)
response = {"id": tag.id, "name": tag.name, "type": tag.type, "binding_count": 0}
@@ -53,22 +61,25 @@ class TagListApi(Resource):
return response, 200
+parser_tag_id = reqparse.RequestParser().add_argument(
+ "name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name
+)
+
+
@console_ns.route("/tags/")
class TagUpdateDeleteApi(Resource):
+ @console_ns.expect(parser_tag_id)
@setup_required
@login_required
@account_initialization_required
def patch(self, tag_id):
+ current_user, _ = current_account_with_tenant()
tag_id = str(tag_id)
# The role of the current user in the ta table must be admin, owner, or editor
- if not (current_user.is_editor or current_user.is_dataset_editor):
+ if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument(
- "name", nullable=False, required=True, help="Name must be between 1 to 50 characters.", type=_validate_name
- )
- args = parser.parse_args()
+ args = parser_tag_id.parse_args()
tag = TagService.update_tags(args, tag_id)
binding_count = TagService.get_tag_binding_count(tag_id)
@@ -80,60 +91,62 @@ class TagUpdateDeleteApi(Resource):
@setup_required
@login_required
@account_initialization_required
+ @edit_permission_required
def delete(self, tag_id):
tag_id = str(tag_id)
- # The role of the current user in the ta table must be admin, owner, or editor
- if not current_user.is_editor:
- raise Forbidden()
TagService.delete_tag(tag_id)
return 204
+parser_create = (
+ reqparse.RequestParser()
+ .add_argument("tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required.")
+ .add_argument("target_id", type=str, nullable=False, required=True, location="json", help="Target ID is required.")
+ .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
+)
+
+
@console_ns.route("/tag-bindings/create")
class TagBindingCreateApi(Resource):
+ @console_ns.expect(parser_create)
@setup_required
@login_required
@account_initialization_required
def post(self):
+ current_user, _ = current_account_with_tenant()
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
- if not (current_user.is_editor or current_user.is_dataset_editor):
+ if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument(
- "tag_ids", type=list, nullable=False, required=True, location="json", help="Tag IDs is required."
- )
- parser.add_argument(
- "target_id", type=str, nullable=False, required=True, location="json", help="Target ID is required."
- )
- parser.add_argument(
- "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type."
- )
- args = parser.parse_args()
+ args = parser_create.parse_args()
TagService.save_tag_binding(args)
return {"result": "success"}, 200
+parser_remove = (
+ reqparse.RequestParser()
+ .add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.")
+ .add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.")
+ .add_argument("type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type.")
+)
+
+
@console_ns.route("/tag-bindings/remove")
class TagBindingDeleteApi(Resource):
+ @console_ns.expect(parser_remove)
@setup_required
@login_required
@account_initialization_required
def post(self):
+ current_user, _ = current_account_with_tenant()
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
- if not (current_user.is_editor or current_user.is_dataset_editor):
+ if not (current_user.has_edit_permission or current_user.is_dataset_editor):
raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("tag_id", type=str, nullable=False, required=True, help="Tag ID is required.")
- parser.add_argument("target_id", type=str, nullable=False, required=True, help="Target ID is required.")
- parser.add_argument(
- "type", type=str, location="json", choices=Tag.TAG_TYPE_LIST, nullable=True, help="Invalid tag type."
- )
- args = parser.parse_args()
+ args = parser_remove.parse_args()
TagService.delete_tag_binding(args)
return {"result": "success"}, 200
diff --git a/api/controllers/console/version.py b/api/controllers/console/version.py
index 965a520f70..419261ba2a 100644
--- a/api/controllers/console/version.py
+++ b/api/controllers/console/version.py
@@ -2,29 +2,37 @@ import json
import logging
import httpx
-from flask_restx import Resource, fields, reqparse
+from flask import request
+from flask_restx import Resource, fields
from packaging import version
+from pydantic import BaseModel, Field
from configs import dify_config
-from . import api, console_ns
+from . import console_ns
logger = logging.getLogger(__name__)
+class VersionQuery(BaseModel):
+ current_version: str = Field(..., description="Current application version")
+
+
+console_ns.schema_model(
+ VersionQuery.__name__,
+ VersionQuery.model_json_schema(ref_template="#/definitions/{model}"),
+)
+
+
@console_ns.route("/version")
class VersionApi(Resource):
- @api.doc("check_version_update")
- @api.doc(description="Check for application version updates")
- @api.expect(
- api.parser().add_argument(
- "current_version", type=str, required=True, location="args", help="Current application version"
- )
- )
- @api.response(
+ @console_ns.doc("check_version_update")
+ @console_ns.doc(description="Check for application version updates")
+ @console_ns.expect(console_ns.models[VersionQuery.__name__])
+ @console_ns.response(
200,
"Success",
- api.model(
+ console_ns.model(
"VersionResponse",
{
"version": fields.String(description="Latest version number"),
@@ -37,9 +45,7 @@ class VersionApi(Resource):
)
def get(self):
"""Check for application version updates"""
- parser = reqparse.RequestParser()
- parser.add_argument("current_version", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = VersionQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
check_update_url = dify_config.CHECK_UPDATE_URL
result = {
@@ -59,16 +65,16 @@ class VersionApi(Resource):
try:
response = httpx.get(
check_update_url,
- params={"current_version": args["current_version"]},
- timeout=httpx.Timeout(connect=3, read=10),
+ params={"current_version": args.current_version},
+ timeout=httpx.Timeout(timeout=10.0, connect=3.0),
)
except Exception as error:
logger.warning("Check update version error: %s.", str(error))
- result["version"] = args["current_version"]
+ result["version"] = args.current_version
return result
content = json.loads(response.content)
- if _has_new_version(latest_version=content["version"], current_version=f"{args['current_version']}"):
+ if _has_new_version(latest_version=content["version"], current_version=f"{args.current_version}"):
result["version"] = content["version"]
result["release_date"] = content["releaseDate"]
result["release_notes"] = content["releaseNotes"]
diff --git a/api/controllers/console/workspace/__init__.py b/api/controllers/console/workspace/__init__.py
index 4a048f3c5e..876e2301f2 100644
--- a/api/controllers/console/workspace/__init__.py
+++ b/api/controllers/console/workspace/__init__.py
@@ -2,11 +2,11 @@ from collections.abc import Callable
from functools import wraps
from typing import ParamSpec, TypeVar
-from flask_login import current_user
from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden
from extensions.ext_database import db
+from libs.login import current_account_with_tenant
from models.account import TenantPluginPermission
P = ParamSpec("P")
@@ -20,8 +20,9 @@ def plugin_permission_required(
def interceptor(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
+ current_user, current_tenant_id = current_account_with_tenant()
user = current_user
- tenant_id = user.current_tenant_id
+ tenant_id = current_tenant_id
with Session(db.engine) as session:
permission = (
diff --git a/api/controllers/console/workspace/account.py b/api/controllers/console/workspace/account.py
index e2b0e3f84d..55eaa2f09f 100644
--- a/api/controllers/console/workspace/account.py
+++ b/api/controllers/console/workspace/account.py
@@ -1,9 +1,10 @@
from datetime import datetime
+from typing import Literal
import pytz
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal_with
+from pydantic import BaseModel, Field, field_validator, model_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -36,45 +37,162 @@ from controllers.console.wraps import (
from extensions.ext_database import db
from fields.member_fields import account_fields
from libs.datetime_utils import naive_utc_now
-from libs.helper import TimestampField, email, extract_remote_ip, timezone
-from libs.login import login_required
-from models import AccountIntegrate, InvitationCode
-from models.account import Account
+from libs.helper import EmailStr, TimestampField, extract_remote_ip, timezone
+from libs.login import current_account_with_tenant, login_required
+from models import Account, AccountIntegrate, InvitationCode
from services.account_service import AccountService
from services.billing_service import BillingService
from services.errors.account import CurrentPasswordIncorrectError as ServiceCurrentPasswordIncorrectError
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class AccountInitPayload(BaseModel):
+ interface_language: str
+ timezone: str
+ invitation_code: str | None = None
+
+ @field_validator("interface_language")
+ @classmethod
+ def validate_language(cls, value: str) -> str:
+ return supported_language(value)
+
+ @field_validator("timezone")
+ @classmethod
+ def validate_timezone(cls, value: str) -> str:
+ return timezone(value)
+
+
+class AccountNamePayload(BaseModel):
+ name: str = Field(min_length=3, max_length=30)
+
+
+class AccountAvatarPayload(BaseModel):
+ avatar: str
+
+
+class AccountInterfaceLanguagePayload(BaseModel):
+ interface_language: str
+
+ @field_validator("interface_language")
+ @classmethod
+ def validate_language(cls, value: str) -> str:
+ return supported_language(value)
+
+
+class AccountInterfaceThemePayload(BaseModel):
+ interface_theme: Literal["light", "dark"]
+
+
+class AccountTimezonePayload(BaseModel):
+ timezone: str
+
+ @field_validator("timezone")
+ @classmethod
+ def validate_timezone(cls, value: str) -> str:
+ return timezone(value)
+
+
+class AccountPasswordPayload(BaseModel):
+ password: str | None = None
+ new_password: str
+ repeat_new_password: str
+
+ @model_validator(mode="after")
+ def check_passwords_match(self) -> "AccountPasswordPayload":
+ if self.new_password != self.repeat_new_password:
+ raise RepeatPasswordNotMatchError()
+ return self
+
+
+class AccountDeletePayload(BaseModel):
+ token: str
+ code: str
+
+
+class AccountDeletionFeedbackPayload(BaseModel):
+ email: EmailStr
+ feedback: str
+
+
+class EducationActivatePayload(BaseModel):
+ token: str
+ institution: str
+ role: str
+
+
+class EducationAutocompleteQuery(BaseModel):
+ keywords: str
+ page: int = 0
+ limit: int = 20
+
+
+class ChangeEmailSendPayload(BaseModel):
+ email: EmailStr
+ language: str | None = None
+ phase: str | None = None
+ token: str | None = None
+
+
+class ChangeEmailValidityPayload(BaseModel):
+ email: EmailStr
+ code: str
+ token: str
+
+
+class ChangeEmailResetPayload(BaseModel):
+ new_email: EmailStr
+ token: str
+
+
+class CheckEmailUniquePayload(BaseModel):
+ email: EmailStr
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(AccountInitPayload)
+reg(AccountNamePayload)
+reg(AccountAvatarPayload)
+reg(AccountInterfaceLanguagePayload)
+reg(AccountInterfaceThemePayload)
+reg(AccountTimezonePayload)
+reg(AccountPasswordPayload)
+reg(AccountDeletePayload)
+reg(AccountDeletionFeedbackPayload)
+reg(EducationActivatePayload)
+reg(EducationAutocompleteQuery)
+reg(ChangeEmailSendPayload)
+reg(ChangeEmailValidityPayload)
+reg(ChangeEmailResetPayload)
+reg(CheckEmailUniquePayload)
+
@console_ns.route("/account/init")
class AccountInitApi(Resource):
+ @console_ns.expect(console_ns.models[AccountInitPayload.__name__])
@setup_required
@login_required
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
if account.status == "active":
raise AccountAlreadyInitedError()
- parser = reqparse.RequestParser()
+ payload = console_ns.payload or {}
+ args = AccountInitPayload.model_validate(payload)
if dify_config.EDITION == "CLOUD":
- parser.add_argument("invitation_code", type=str, location="json")
-
- parser.add_argument("interface_language", type=supported_language, required=True, location="json")
- parser.add_argument("timezone", type=timezone, required=True, location="json")
- args = parser.parse_args()
-
- if dify_config.EDITION == "CLOUD":
- if not args["invitation_code"]:
+ if not args.invitation_code:
raise ValueError("invitation_code is required")
# check invitation code
invitation_code = (
db.session.query(InvitationCode)
.where(
- InvitationCode.code == args["invitation_code"],
+ InvitationCode.code == args.invitation_code,
InvitationCode.status == "unused",
)
.first()
@@ -88,8 +206,8 @@ class AccountInitApi(Resource):
invitation_code.used_by_tenant_id = account.current_tenant_id
invitation_code.used_by_account_id = account.id
- account.interface_language = args["interface_language"]
- account.timezone = args["timezone"]
+ account.interface_language = args.interface_language
+ account.timezone = args.timezone
account.interface_theme = "light"
account.status = "active"
account.initialized_at = naive_utc_now()
@@ -106,129 +224,108 @@ class AccountProfileApi(Resource):
@marshal_with(account_fields)
@enterprise_license_required
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
return current_user
@console_ns.route("/account/name")
class AccountNameApi(Resource):
+ @console_ns.expect(console_ns.models[AccountNamePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- args = parser.parse_args()
-
- # Validate account name length
- if len(args["name"]) < 3 or len(args["name"]) > 30:
- raise ValueError("Account name must be between 3 and 30 characters.")
-
- updated_account = AccountService.update_account(current_user, name=args["name"])
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountNamePayload.model_validate(payload)
+ updated_account = AccountService.update_account(current_user, name=args.name)
return updated_account
@console_ns.route("/account/avatar")
class AccountAvatarApi(Resource):
+ @console_ns.expect(console_ns.models[AccountAvatarPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("avatar", type=str, required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountAvatarPayload.model_validate(payload)
- updated_account = AccountService.update_account(current_user, avatar=args["avatar"])
+ updated_account = AccountService.update_account(current_user, avatar=args.avatar)
return updated_account
@console_ns.route("/account/interface-language")
class AccountInterfaceLanguageApi(Resource):
+ @console_ns.expect(console_ns.models[AccountInterfaceLanguagePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("interface_language", type=supported_language, required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountInterfaceLanguagePayload.model_validate(payload)
- updated_account = AccountService.update_account(current_user, interface_language=args["interface_language"])
+ updated_account = AccountService.update_account(current_user, interface_language=args.interface_language)
return updated_account
@console_ns.route("/account/interface-theme")
class AccountInterfaceThemeApi(Resource):
+ @console_ns.expect(console_ns.models[AccountInterfaceThemePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("interface_theme", type=str, choices=["light", "dark"], required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountInterfaceThemePayload.model_validate(payload)
- updated_account = AccountService.update_account(current_user, interface_theme=args["interface_theme"])
+ updated_account = AccountService.update_account(current_user, interface_theme=args.interface_theme)
return updated_account
@console_ns.route("/account/timezone")
class AccountTimezoneApi(Resource):
+ @console_ns.expect(console_ns.models[AccountTimezonePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("timezone", type=str, required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountTimezonePayload.model_validate(payload)
- # Validate timezone string, e.g. America/New_York, Asia/Shanghai
- if args["timezone"] not in pytz.all_timezones:
- raise ValueError("Invalid timezone string.")
-
- updated_account = AccountService.update_account(current_user, timezone=args["timezone"])
+ updated_account = AccountService.update_account(current_user, timezone=args.timezone)
return updated_account
@console_ns.route("/account/password")
class AccountPasswordApi(Resource):
+ @console_ns.expect(console_ns.models[AccountPasswordPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("password", type=str, required=False, location="json")
- parser.add_argument("new_password", type=str, required=True, location="json")
- parser.add_argument("repeat_new_password", type=str, required=True, location="json")
- args = parser.parse_args()
-
- if args["new_password"] != args["repeat_new_password"]:
- raise RepeatPasswordNotMatchError()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = AccountPasswordPayload.model_validate(payload)
try:
- AccountService.update_account_password(current_user, args["password"], args["new_password"])
+ AccountService.update_account_password(current_user, args.password, args.new_password)
except ServiceCurrentPasswordIncorrectError:
raise CurrentPasswordIncorrectError()
@@ -253,9 +350,7 @@ class AccountIntegrateApi(Resource):
@account_initialization_required
@marshal_with(integrate_list_fields)
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
account_integrates = db.session.scalars(
select(AccountIntegrate).where(AccountIntegrate.account_id == account.id)
@@ -298,9 +393,7 @@ class AccountDeleteVerifyApi(Resource):
@login_required
@account_initialization_required
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
token, code = AccountService.generate_account_deletion_verification_code(account)
AccountService.send_account_deletion_verification_email(account, code)
@@ -310,20 +403,17 @@ class AccountDeleteVerifyApi(Resource):
@console_ns.route("/account/delete")
class AccountDeleteApi(Resource):
+ @console_ns.expect(console_ns.models[AccountDeletePayload.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("token", type=str, required=True, location="json")
- parser.add_argument("code", type=str, required=True, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = AccountDeletePayload.model_validate(payload)
- if not AccountService.verify_account_deletion_code(args["token"], args["code"]):
+ if not AccountService.verify_account_deletion_code(args.token, args.code):
raise InvalidAccountDeletionCodeError()
AccountService.delete_account(account)
@@ -333,14 +423,13 @@ class AccountDeleteApi(Resource):
@console_ns.route("/account/delete/feedback")
class AccountDeleteUpdateFeedbackApi(Resource):
+ @console_ns.expect(console_ns.models[AccountDeletionFeedbackPayload.__name__])
@setup_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=str, required=True, location="json")
- parser.add_argument("feedback", type=str, required=True, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = AccountDeletionFeedbackPayload.model_validate(payload)
- BillingService.update_account_deletion_feedback(args["email"], args["feedback"])
+ BillingService.update_account_deletion_feedback(args.email, args.feedback)
return {"result": "success"}
@@ -358,9 +447,7 @@ class EducationVerifyApi(Resource):
@cloud_edition_billing_enabled
@marshal_with(verify_fields)
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
return BillingService.EducationIdentity.verify(account.id, account.email)
@@ -374,23 +461,19 @@ class EducationApi(Resource):
"allow_refresh": fields.Boolean,
}
+ @console_ns.expect(console_ns.models[EducationActivatePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@only_edition_cloud
@cloud_edition_billing_enabled
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("token", type=str, required=True, location="json")
- parser.add_argument("institution", type=str, required=True, location="json")
- parser.add_argument("role", type=str, required=True, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = EducationActivatePayload.model_validate(payload)
- return BillingService.EducationIdentity.activate(account, args["token"], args["institution"], args["role"])
+ return BillingService.EducationIdentity.activate(account, args.token, args.institution, args.role)
@setup_required
@login_required
@@ -399,9 +482,7 @@ class EducationApi(Resource):
@cloud_edition_billing_enabled
@marshal_with(status_fields)
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- account = current_user
+ account, _ = current_account_with_tenant()
res = BillingService.EducationIdentity.status(account.id)
# convert expire_at to UTC timestamp from isoformat
@@ -418,6 +499,7 @@ class EducationAutoCompleteApi(Resource):
"has_next": fields.Boolean,
}
+ @console_ns.expect(console_ns.models[EducationAutocompleteQuery.__name__])
@setup_required
@login_required
@account_initialization_required
@@ -425,141 +507,130 @@ class EducationAutoCompleteApi(Resource):
@cloud_edition_billing_enabled
@marshal_with(data_fields)
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("keywords", type=str, required=True, location="args")
- parser.add_argument("page", type=int, required=False, location="args", default=0)
- parser.add_argument("limit", type=int, required=False, location="args", default=20)
- args = parser.parse_args()
+ payload = request.args.to_dict(flat=True) # type: ignore
+ args = EducationAutocompleteQuery.model_validate(payload)
- return BillingService.EducationIdentity.autocomplete(args["keywords"], args["page"], args["limit"])
+ return BillingService.EducationIdentity.autocomplete(args.keywords, args.page, args.limit)
@console_ns.route("/account/change-email")
class ChangeEmailSendEmailApi(Resource):
+ @console_ns.expect(console_ns.models[ChangeEmailSendPayload.__name__])
@enable_change_email
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- parser.add_argument("phase", type=str, required=False, location="json")
- parser.add_argument("token", type=str, required=False, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = ChangeEmailSendPayload.model_validate(payload)
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
- if args["language"] is not None and args["language"] == "zh-Hans":
+ if args.language is not None and args.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
account = None
- user_email = args["email"]
- if args["phase"] is not None and args["phase"] == "new_email":
- if args["token"] is None:
+ user_email = args.email
+ if args.phase is not None and args.phase == "new_email":
+ if args.token is None:
raise InvalidTokenError()
- reset_data = AccountService.get_change_email_data(args["token"])
+ reset_data = AccountService.get_change_email_data(args.token)
if reset_data is None:
raise InvalidTokenError()
user_email = reset_data.get("email", "")
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
if user_email != current_user.email:
raise InvalidEmailError()
else:
with Session(db.engine) as session:
- account = session.execute(select(Account).filter_by(email=args["email"])).scalar_one_or_none()
+ account = session.execute(select(Account).filter_by(email=args.email)).scalar_one_or_none()
if account is None:
raise AccountNotFound()
token = AccountService.send_change_email_email(
- account=account, email=args["email"], old_email=user_email, language=language, phase=args["phase"]
+ account=account, email=args.email, old_email=user_email, language=language, phase=args.phase
)
return {"result": "success", "data": token}
@console_ns.route("/account/change-email/validity")
class ChangeEmailCheckApi(Resource):
+ @console_ns.expect(console_ns.models[ChangeEmailValidityPayload.__name__])
@enable_change_email
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- parser.add_argument("code", type=str, required=True, location="json")
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = ChangeEmailValidityPayload.model_validate(payload)
- user_email = args["email"]
+ user_email = args.email
- is_change_email_error_rate_limit = AccountService.is_change_email_error_rate_limit(args["email"])
+ is_change_email_error_rate_limit = AccountService.is_change_email_error_rate_limit(args.email)
if is_change_email_error_rate_limit:
raise EmailChangeLimitError()
- token_data = AccountService.get_change_email_data(args["token"])
+ token_data = AccountService.get_change_email_data(args.token)
if token_data is None:
raise InvalidTokenError()
if user_email != token_data.get("email"):
raise InvalidEmailError()
- if args["code"] != token_data.get("code"):
- AccountService.add_change_email_error_rate_limit(args["email"])
+ if args.code != token_data.get("code"):
+ AccountService.add_change_email_error_rate_limit(args.email)
raise EmailCodeError()
# Verified, revoke the first token
- AccountService.revoke_change_email_token(args["token"])
+ AccountService.revoke_change_email_token(args.token)
# Refresh token data by generating a new token
_, new_token = AccountService.generate_change_email_token(
- user_email, code=args["code"], old_email=token_data.get("old_email"), additional_data={}
+ user_email, code=args.code, old_email=token_data.get("old_email"), additional_data={}
)
- AccountService.reset_change_email_error_rate_limit(args["email"])
+ AccountService.reset_change_email_error_rate_limit(args.email)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
@console_ns.route("/account/change-email/reset")
class ChangeEmailResetApi(Resource):
+ @console_ns.expect(console_ns.models[ChangeEmailResetPayload.__name__])
@enable_change_email
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("new_email", type=email, required=True, location="json")
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = ChangeEmailResetPayload.model_validate(payload)
- if AccountService.is_account_in_freeze(args["new_email"]):
+ if AccountService.is_account_in_freeze(args.new_email):
raise AccountInFreezeError()
- if not AccountService.check_email_unique(args["new_email"]):
+ if not AccountService.check_email_unique(args.new_email):
raise EmailAlreadyInUseError()
- reset_data = AccountService.get_change_email_data(args["token"])
+ reset_data = AccountService.get_change_email_data(args.token)
if not reset_data:
raise InvalidTokenError()
- AccountService.revoke_change_email_token(args["token"])
+ AccountService.revoke_change_email_token(args.token)
old_email = reset_data.get("old_email", "")
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if current_user.email != old_email:
raise AccountNotFound()
- updated_account = AccountService.update_account_email(current_user, email=args["new_email"])
+ updated_account = AccountService.update_account_email(current_user, email=args.new_email)
AccountService.send_change_email_completed_notify_email(
- email=args["new_email"],
+ email=args.new_email,
)
return updated_account
@@ -567,13 +638,13 @@ class ChangeEmailResetApi(Resource):
@console_ns.route("/account/change-email/check-email-unique")
class CheckEmailUnique(Resource):
+ @console_ns.expect(console_ns.models[CheckEmailUniquePayload.__name__])
@setup_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("email", type=email, required=True, location="json")
- args = parser.parse_args()
- if AccountService.is_account_in_freeze(args["email"]):
+ payload = console_ns.payload or {}
+ args = CheckEmailUniquePayload.model_validate(payload)
+ if AccountService.is_account_in_freeze(args.email):
raise AccountInFreezeError()
- if not AccountService.check_email_unique(args["email"]):
+ if not AccountService.check_email_unique(args.email):
raise EmailAlreadyInUseError()
return {"result": "success"}
diff --git a/api/controllers/console/workspace/agent_providers.py b/api/controllers/console/workspace/agent_providers.py
index 0a2c8fcfb4..9527fe782e 100644
--- a/api/controllers/console/workspace/agent_providers.py
+++ b/api/controllers/console/workspace/agent_providers.py
@@ -1,18 +1,17 @@
-from flask_login import current_user
from flask_restx import Resource, fields
-from controllers.console import api, console_ns
+from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, setup_required
from core.model_runtime.utils.encoders import jsonable_encoder
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.agent_service import AgentService
@console_ns.route("/workspaces/current/agent-providers")
class AgentProviderListApi(Resource):
- @api.doc("list_agent_providers")
- @api.doc(description="Get list of available agent providers")
- @api.response(
+ @console_ns.doc("list_agent_providers")
+ @console_ns.doc(description="Get list of available agent providers")
+ @console_ns.response(
200,
"Success",
fields.List(fields.Raw(description="Agent provider information")),
@@ -21,20 +20,21 @@ class AgentProviderListApi(Resource):
@login_required
@account_initialization_required
def get(self):
+ current_user, current_tenant_id = current_account_with_tenant()
user = current_user
user_id = user.id
- tenant_id = user.current_tenant_id
+ tenant_id = current_tenant_id
return jsonable_encoder(AgentService.list_agent_providers(user_id, tenant_id))
@console_ns.route("/workspaces/current/agent-provider/")
class AgentProviderApi(Resource):
- @api.doc("get_agent_provider")
- @api.doc(description="Get specific agent provider details")
- @api.doc(params={"provider_name": "Agent provider name"})
- @api.response(
+ @console_ns.doc("get_agent_provider")
+ @console_ns.doc(description="Get specific agent provider details")
+ @console_ns.doc(params={"provider_name": "Agent provider name"})
+ @console_ns.response(
200,
"Success",
fields.Raw(description="Agent provider details"),
@@ -43,7 +43,5 @@ class AgentProviderApi(Resource):
@login_required
@account_initialization_required
def get(self, provider_name: str):
- user = current_user
- user_id = user.id
- tenant_id = user.current_tenant_id
- return jsonable_encoder(AgentService.get_agent_provider(user_id, tenant_id, provider_name))
+ current_user, current_tenant_id = current_account_with_tenant()
+ return jsonable_encoder(AgentService.get_agent_provider(current_user.id, current_tenant_id, provider_name))
diff --git a/api/controllers/console/workspace/endpoint.py b/api/controllers/console/workspace/endpoint.py
index 0657b764cc..bfd9fc6c29 100644
--- a/api/controllers/console/workspace/endpoint.py
+++ b/api/controllers/console/workspace/endpoint.py
@@ -1,61 +1,82 @@
-from flask_login import current_user
-from flask_restx import Resource, fields, reqparse
-from werkzeug.exceptions import Forbidden
+from typing import Any
-from controllers.console import api, console_ns
-from controllers.console.wraps import account_initialization_required, setup_required
+from flask import request
+from flask_restx import Resource, fields
+from pydantic import BaseModel, Field
+
+from controllers.console import console_ns
+from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required
from core.model_runtime.utils.encoders import jsonable_encoder
from core.plugin.impl.exc import PluginPermissionDeniedError
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from services.plugin.endpoint_service import EndpointService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class EndpointCreatePayload(BaseModel):
+ plugin_unique_identifier: str
+ settings: dict[str, Any]
+ name: str = Field(min_length=1)
+
+
+class EndpointIdPayload(BaseModel):
+ endpoint_id: str
+
+
+class EndpointUpdatePayload(EndpointIdPayload):
+ settings: dict[str, Any]
+ name: str = Field(min_length=1)
+
+
+class EndpointListQuery(BaseModel):
+ page: int = Field(ge=1)
+ page_size: int = Field(gt=0)
+
+
+class EndpointListForPluginQuery(EndpointListQuery):
+ plugin_id: str
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(EndpointCreatePayload)
+reg(EndpointIdPayload)
+reg(EndpointUpdatePayload)
+reg(EndpointListQuery)
+reg(EndpointListForPluginQuery)
+
@console_ns.route("/workspaces/current/endpoints/create")
class EndpointCreateApi(Resource):
- @api.doc("create_endpoint")
- @api.doc(description="Create a new plugin endpoint")
- @api.expect(
- api.model(
- "EndpointCreateRequest",
- {
- "plugin_unique_identifier": fields.String(required=True, description="Plugin unique identifier"),
- "settings": fields.Raw(required=True, description="Endpoint settings"),
- "name": fields.String(required=True, description="Endpoint name"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("create_endpoint")
+ @console_ns.doc(description="Create a new plugin endpoint")
+ @console_ns.expect(console_ns.models[EndpointCreatePayload.__name__])
+ @console_ns.response(
200,
"Endpoint created successfully",
- api.model("EndpointCreateResponse", {"success": fields.Boolean(description="Operation success")}),
+ console_ns.model("EndpointCreateResponse", {"success": fields.Boolean(description="Operation success")}),
)
- @api.response(403, "Admin privileges required")
+ @console_ns.response(403, "Admin privileges required")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_unique_identifier", type=str, required=True)
- parser.add_argument("settings", type=dict, required=True)
- parser.add_argument("name", type=str, required=True)
- args = parser.parse_args()
-
- plugin_unique_identifier = args["plugin_unique_identifier"]
- settings = args["settings"]
- name = args["name"]
+ args = EndpointCreatePayload.model_validate(console_ns.payload)
try:
return {
"success": EndpointService.create_endpoint(
- tenant_id=user.current_tenant_id,
+ tenant_id=tenant_id,
user_id=user.id,
- plugin_unique_identifier=plugin_unique_identifier,
- name=name,
- settings=settings,
+ plugin_unique_identifier=args.plugin_unique_identifier,
+ name=args.name,
+ settings=args.settings,
)
}
except PluginPermissionDeniedError as e:
@@ -64,36 +85,31 @@ class EndpointCreateApi(Resource):
@console_ns.route("/workspaces/current/endpoints/list")
class EndpointListApi(Resource):
- @api.doc("list_endpoints")
- @api.doc(description="List plugin endpoints with pagination")
- @api.expect(
- api.parser()
- .add_argument("page", type=int, required=True, location="args", help="Page number")
- .add_argument("page_size", type=int, required=True, location="args", help="Page size")
- )
- @api.response(
+ @console_ns.doc("list_endpoints")
+ @console_ns.doc(description="List plugin endpoints with pagination")
+ @console_ns.expect(console_ns.models[EndpointListQuery.__name__])
+ @console_ns.response(
200,
"Success",
- api.model("EndpointListResponse", {"endpoints": fields.List(fields.Raw(description="Endpoint information"))}),
+ console_ns.model(
+ "EndpointListResponse", {"endpoints": fields.List(fields.Raw(description="Endpoint information"))}
+ ),
)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=int, required=True, location="args")
- parser.add_argument("page_size", type=int, required=True, location="args")
- args = parser.parse_args()
+ args = EndpointListQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- page = args["page"]
- page_size = args["page_size"]
+ page = args.page
+ page_size = args.page_size
return jsonable_encoder(
{
"endpoints": EndpointService.list_endpoints(
- tenant_id=user.current_tenant_id,
+ tenant_id=tenant_id,
user_id=user.id,
page=page,
page_size=page_size,
@@ -104,18 +120,13 @@ class EndpointListApi(Resource):
@console_ns.route("/workspaces/current/endpoints/list/plugin")
class EndpointListForSinglePluginApi(Resource):
- @api.doc("list_plugin_endpoints")
- @api.doc(description="List endpoints for a specific plugin")
- @api.expect(
- api.parser()
- .add_argument("page", type=int, required=True, location="args", help="Page number")
- .add_argument("page_size", type=int, required=True, location="args", help="Page size")
- .add_argument("plugin_id", type=str, required=True, location="args", help="Plugin ID")
- )
- @api.response(
+ @console_ns.doc("list_plugin_endpoints")
+ @console_ns.doc(description="List endpoints for a specific plugin")
+ @console_ns.expect(console_ns.models[EndpointListForPluginQuery.__name__])
+ @console_ns.response(
200,
"Success",
- api.model(
+ console_ns.model(
"PluginEndpointListResponse", {"endpoints": fields.List(fields.Raw(description="Endpoint information"))}
),
)
@@ -123,22 +134,18 @@ class EndpointListForSinglePluginApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=int, required=True, location="args")
- parser.add_argument("page_size", type=int, required=True, location="args")
- parser.add_argument("plugin_id", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = EndpointListForPluginQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- page = args["page"]
- page_size = args["page_size"]
- plugin_id = args["plugin_id"]
+ page = args.page
+ page_size = args.page_size
+ plugin_id = args.plugin_id
return jsonable_encoder(
{
"endpoints": EndpointService.list_endpoints_for_single_plugin(
- tenant_id=user.current_tenant_id,
+ tenant_id=tenant_id,
user_id=user.id,
plugin_id=plugin_id,
page=page,
@@ -150,154 +157,111 @@ class EndpointListForSinglePluginApi(Resource):
@console_ns.route("/workspaces/current/endpoints/delete")
class EndpointDeleteApi(Resource):
- @api.doc("delete_endpoint")
- @api.doc(description="Delete a plugin endpoint")
- @api.expect(
- api.model("EndpointDeleteRequest", {"endpoint_id": fields.String(required=True, description="Endpoint ID")})
- )
- @api.response(
+ @console_ns.doc("delete_endpoint")
+ @console_ns.doc(description="Delete a plugin endpoint")
+ @console_ns.expect(console_ns.models[EndpointIdPayload.__name__])
+ @console_ns.response(
200,
"Endpoint deleted successfully",
- api.model("EndpointDeleteResponse", {"success": fields.Boolean(description="Operation success")}),
+ console_ns.model("EndpointDeleteResponse", {"success": fields.Boolean(description="Operation success")}),
)
- @api.response(403, "Admin privileges required")
+ @console_ns.response(403, "Admin privileges required")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("endpoint_id", type=str, required=True)
- args = parser.parse_args()
-
- if not user.is_admin_or_owner:
- raise Forbidden()
-
- endpoint_id = args["endpoint_id"]
+ args = EndpointIdPayload.model_validate(console_ns.payload)
return {
"success": EndpointService.delete_endpoint(
- tenant_id=user.current_tenant_id, user_id=user.id, endpoint_id=endpoint_id
+ tenant_id=tenant_id, user_id=user.id, endpoint_id=args.endpoint_id
)
}
@console_ns.route("/workspaces/current/endpoints/update")
class EndpointUpdateApi(Resource):
- @api.doc("update_endpoint")
- @api.doc(description="Update a plugin endpoint")
- @api.expect(
- api.model(
- "EndpointUpdateRequest",
- {
- "endpoint_id": fields.String(required=True, description="Endpoint ID"),
- "settings": fields.Raw(required=True, description="Updated settings"),
- "name": fields.String(required=True, description="Updated name"),
- },
- )
- )
- @api.response(
+ @console_ns.doc("update_endpoint")
+ @console_ns.doc(description="Update a plugin endpoint")
+ @console_ns.expect(console_ns.models[EndpointUpdatePayload.__name__])
+ @console_ns.response(
200,
"Endpoint updated successfully",
- api.model("EndpointUpdateResponse", {"success": fields.Boolean(description="Operation success")}),
+ console_ns.model("EndpointUpdateResponse", {"success": fields.Boolean(description="Operation success")}),
)
- @api.response(403, "Admin privileges required")
+ @console_ns.response(403, "Admin privileges required")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("endpoint_id", type=str, required=True)
- parser.add_argument("settings", type=dict, required=True)
- parser.add_argument("name", type=str, required=True)
- args = parser.parse_args()
-
- endpoint_id = args["endpoint_id"]
- settings = args["settings"]
- name = args["name"]
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ args = EndpointUpdatePayload.model_validate(console_ns.payload)
return {
"success": EndpointService.update_endpoint(
- tenant_id=user.current_tenant_id,
+ tenant_id=tenant_id,
user_id=user.id,
- endpoint_id=endpoint_id,
- name=name,
- settings=settings,
+ endpoint_id=args.endpoint_id,
+ name=args.name,
+ settings=args.settings,
)
}
@console_ns.route("/workspaces/current/endpoints/enable")
class EndpointEnableApi(Resource):
- @api.doc("enable_endpoint")
- @api.doc(description="Enable a plugin endpoint")
- @api.expect(
- api.model("EndpointEnableRequest", {"endpoint_id": fields.String(required=True, description="Endpoint ID")})
- )
- @api.response(
+ @console_ns.doc("enable_endpoint")
+ @console_ns.doc(description="Enable a plugin endpoint")
+ @console_ns.expect(console_ns.models[EndpointIdPayload.__name__])
+ @console_ns.response(
200,
"Endpoint enabled successfully",
- api.model("EndpointEnableResponse", {"success": fields.Boolean(description="Operation success")}),
+ console_ns.model("EndpointEnableResponse", {"success": fields.Boolean(description="Operation success")}),
)
- @api.response(403, "Admin privileges required")
+ @console_ns.response(403, "Admin privileges required")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("endpoint_id", type=str, required=True)
- args = parser.parse_args()
-
- endpoint_id = args["endpoint_id"]
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ args = EndpointIdPayload.model_validate(console_ns.payload)
return {
"success": EndpointService.enable_endpoint(
- tenant_id=user.current_tenant_id, user_id=user.id, endpoint_id=endpoint_id
+ tenant_id=tenant_id, user_id=user.id, endpoint_id=args.endpoint_id
)
}
@console_ns.route("/workspaces/current/endpoints/disable")
class EndpointDisableApi(Resource):
- @api.doc("disable_endpoint")
- @api.doc(description="Disable a plugin endpoint")
- @api.expect(
- api.model("EndpointDisableRequest", {"endpoint_id": fields.String(required=True, description="Endpoint ID")})
- )
- @api.response(
+ @console_ns.doc("disable_endpoint")
+ @console_ns.doc(description="Disable a plugin endpoint")
+ @console_ns.expect(console_ns.models[EndpointIdPayload.__name__])
+ @console_ns.response(
200,
"Endpoint disabled successfully",
- api.model("EndpointDisableResponse", {"success": fields.Boolean(description="Operation success")}),
+ console_ns.model("EndpointDisableResponse", {"success": fields.Boolean(description="Operation success")}),
)
- @api.response(403, "Admin privileges required")
+ @console_ns.response(403, "Admin privileges required")
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("endpoint_id", type=str, required=True)
- args = parser.parse_args()
-
- endpoint_id = args["endpoint_id"]
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ args = EndpointIdPayload.model_validate(console_ns.payload)
return {
"success": EndpointService.disable_endpoint(
- tenant_id=user.current_tenant_id, user_id=user.id, endpoint_id=endpoint_id
+ tenant_id=tenant_id, user_id=user.id, endpoint_id=args.endpoint_id
)
}
diff --git a/api/controllers/console/workspace/load_balancing_config.py b/api/controllers/console/workspace/load_balancing_config.py
index 99a1c1f032..9bf393ea2e 100644
--- a/api/controllers/console/workspace/load_balancing_config.py
+++ b/api/controllers/console/workspace/load_balancing_config.py
@@ -5,8 +5,8 @@ from controllers.console import console_ns
from controllers.console.wraps import account_initialization_required, setup_required
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
-from libs.login import current_user, login_required
-from models.account import Account, TenantAccountRole
+from libs.login import current_account_with_tenant, login_required
+from models import TenantAccountRole
from services.model_load_balancing_service import ModelLoadBalancingService
@@ -18,24 +18,25 @@ class LoadBalancingCredentialsValidateApi(Resource):
@login_required
@account_initialization_required
def post(self, provider: str):
- assert isinstance(current_user, Account)
+ current_user, current_tenant_id = current_account_with_tenant()
if not TenantAccountRole.is_privileged_role(current_user.current_role):
raise Forbidden()
- tenant_id = current_user.current_tenant_id
- assert tenant_id is not None
+ tenant_id = current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
+ parser = (
+ reqparse.RequestParser()
+ .add_argument("model", type=str, required=True, nullable=False, location="json")
+ .add_argument(
+ "model_type",
+ type=str,
+ required=True,
+ nullable=False,
+ choices=[mt.value for mt in ModelType],
+ location="json",
+ )
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
)
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
args = parser.parse_args()
# validate model load balancing credentials
@@ -72,24 +73,25 @@ class LoadBalancingConfigCredentialsValidateApi(Resource):
@login_required
@account_initialization_required
def post(self, provider: str, config_id: str):
- assert isinstance(current_user, Account)
+ current_user, current_tenant_id = current_account_with_tenant()
if not TenantAccountRole.is_privileged_role(current_user.current_role):
raise Forbidden()
- tenant_id = current_user.current_tenant_id
- assert tenant_id is not None
+ tenant_id = current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
+ parser = (
+ reqparse.RequestParser()
+ .add_argument("model", type=str, required=True, nullable=False, location="json")
+ .add_argument(
+ "model_type",
+ type=str,
+ required=True,
+ nullable=False,
+ choices=[mt.value for mt in ModelType],
+ location="json",
+ )
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
)
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
args = parser.parse_args()
# validate model load balancing config credentials
diff --git a/api/controllers/console/workspace/members.py b/api/controllers/console/workspace/members.py
index 8b89853bd9..0142e14fb0 100644
--- a/api/controllers/console/workspace/members.py
+++ b/api/controllers/console/workspace/members.py
@@ -1,8 +1,8 @@
from urllib import parse
from flask import abort, request
-from flask_login import current_user
-from flask_restx import Resource, marshal_with, reqparse
+from flask_restx import Resource, marshal_with
+from pydantic import BaseModel, Field
import services
from configs import dify_config
@@ -26,12 +26,48 @@ from controllers.console.wraps import (
from extensions.ext_database import db
from fields.member_fields import account_with_role_list_fields
from libs.helper import extract_remote_ip
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.account import Account, TenantAccountRole
from services.account_service import AccountService, RegisterService, TenantService
from services.errors.account import AccountAlreadyInTenantError
from services.feature_service import FeatureService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class MemberInvitePayload(BaseModel):
+ emails: list[str] = Field(default_factory=list)
+ role: TenantAccountRole
+ language: str | None = None
+
+
+class MemberRoleUpdatePayload(BaseModel):
+ role: str
+
+
+class OwnerTransferEmailPayload(BaseModel):
+ language: str | None = None
+
+
+class OwnerTransferCheckPayload(BaseModel):
+ code: str
+ token: str
+
+
+class OwnerTransferPayload(BaseModel):
+ token: str
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(MemberInvitePayload)
+reg(MemberRoleUpdatePayload)
+reg(OwnerTransferEmailPayload)
+reg(OwnerTransferCheckPayload)
+reg(OwnerTransferPayload)
+
@console_ns.route("/workspaces/current/members")
class MemberListApi(Resource):
@@ -42,8 +78,7 @@ class MemberListApi(Resource):
@account_initialization_required
@marshal_with(account_with_role_list_fields)
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
members = TenantService.get_tenant_members(current_user.current_tenant)
@@ -54,25 +89,21 @@ class MemberListApi(Resource):
class MemberInviteEmailApi(Resource):
"""Invite a new member by email."""
+ @console_ns.expect(console_ns.models[MemberInvitePayload.__name__])
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("members")
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("emails", type=list, required=True, location="json")
- parser.add_argument("role", type=str, required=True, default="admin", location="json")
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = MemberInvitePayload.model_validate(payload)
- invitee_emails = args["emails"]
- invitee_role = args["role"]
- interface_language = args["language"]
+ invitee_emails = args.emails
+ invitee_role = args.role
+ interface_language = args.language
if not TenantAccountRole.is_non_owner_role(invitee_role):
return {"code": "invalid-role", "message": "Invalid role"}, 400
-
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
inviter = current_user
if not inviter.current_tenant:
raise ValueError("No current tenant")
@@ -121,8 +152,7 @@ class MemberCancelInviteApi(Resource):
@login_required
@account_initialization_required
def delete(self, member_id):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
member = db.session.query(Account).where(Account.id == str(member_id)).first()
@@ -150,20 +180,18 @@ class MemberCancelInviteApi(Resource):
class MemberUpdateRoleApi(Resource):
"""Update member role."""
+ @console_ns.expect(console_ns.models[MemberRoleUpdatePayload.__name__])
@setup_required
@login_required
@account_initialization_required
def put(self, member_id):
- parser = reqparse.RequestParser()
- parser.add_argument("role", type=str, required=True, location="json")
- args = parser.parse_args()
- new_role = args["role"]
+ payload = console_ns.payload or {}
+ args = MemberRoleUpdatePayload.model_validate(payload)
+ new_role = args.role
if not TenantAccountRole.is_valid_role(new_role):
return {"code": "invalid-role", "message": "Invalid role"}, 400
-
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
member = db.session.get(Account, str(member_id))
@@ -190,8 +218,7 @@ class DatasetOperatorMemberListApi(Resource):
@account_initialization_required
@marshal_with(account_with_role_list_fields)
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
members = TenantService.get_dataset_operator_members(current_user.current_tenant)
@@ -202,27 +229,25 @@ class DatasetOperatorMemberListApi(Resource):
class SendOwnerTransferEmailApi(Resource):
"""Send owner transfer email."""
+ @console_ns.expect(console_ns.models[OwnerTransferEmailPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@is_allow_transfer_owner
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("language", type=str, required=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = OwnerTransferEmailPayload.model_validate(payload)
ip_address = extract_remote_ip(request)
if AccountService.is_email_send_ip_limit(ip_address):
raise EmailSendIpLimitError()
-
+ current_user, _ = current_account_with_tenant()
# check if the current user is the owner of the workspace
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
if not current_user.current_tenant:
raise ValueError("No current tenant")
if not TenantService.is_owner(current_user, current_user.current_tenant):
raise NotOwnerError()
- if args["language"] is not None and args["language"] == "zh-Hans":
+ if args.language is not None and args.language == "zh-Hans":
language = "zh-Hans"
else:
language = "en-US"
@@ -241,18 +266,16 @@ class SendOwnerTransferEmailApi(Resource):
@console_ns.route("/workspaces/current/members/owner-transfer-check")
class OwnerTransferCheckApi(Resource):
+ @console_ns.expect(console_ns.models[OwnerTransferCheckPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@is_allow_transfer_owner
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("code", type=str, required=True, location="json")
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = OwnerTransferCheckPayload.model_validate(payload)
# check if the current user is the owner of the workspace
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
if not TenantService.is_owner(current_user, current_user.current_tenant):
@@ -264,22 +287,22 @@ class OwnerTransferCheckApi(Resource):
if is_owner_transfer_error_rate_limit:
raise OwnerTransferLimitError()
- token_data = AccountService.get_owner_transfer_data(args["token"])
+ token_data = AccountService.get_owner_transfer_data(args.token)
if token_data is None:
raise InvalidTokenError()
if user_email != token_data.get("email"):
raise InvalidEmailError()
- if args["code"] != token_data.get("code"):
+ if args.code != token_data.get("code"):
AccountService.add_owner_transfer_error_rate_limit(user_email)
raise EmailCodeError()
# Verified, revoke the first token
- AccountService.revoke_owner_transfer_token(args["token"])
+ AccountService.revoke_owner_transfer_token(args.token)
# Refresh token data by generating a new token
- _, new_token = AccountService.generate_owner_transfer_token(user_email, code=args["code"], additional_data={})
+ _, new_token = AccountService.generate_owner_transfer_token(user_email, code=args.code, additional_data={})
AccountService.reset_owner_transfer_error_rate_limit(user_email)
return {"is_valid": True, "email": token_data.get("email"), "token": new_token}
@@ -287,18 +310,17 @@ class OwnerTransferCheckApi(Resource):
@console_ns.route("/workspaces/current/members//owner-transfer")
class OwnerTransfer(Resource):
+ @console_ns.expect(console_ns.models[OwnerTransferPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@is_allow_transfer_owner
def post(self, member_id):
- parser = reqparse.RequestParser()
- parser.add_argument("token", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = OwnerTransferPayload.model_validate(payload)
# check if the current user is the owner of the workspace
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
if not current_user.current_tenant:
raise ValueError("No current tenant")
if not TenantService.is_owner(current_user, current_user.current_tenant):
@@ -307,14 +329,14 @@ class OwnerTransfer(Resource):
if current_user.id == str(member_id):
raise CannotTransferOwnerToSelfError()
- transfer_token_data = AccountService.get_owner_transfer_data(args["token"])
+ transfer_token_data = AccountService.get_owner_transfer_data(args.token)
if not transfer_token_data:
raise InvalidTokenError()
if transfer_token_data.get("email") != current_user.email:
raise InvalidEmailError()
- AccountService.revoke_owner_transfer_token(args["token"])
+ AccountService.revoke_owner_transfer_token(args.token)
member = db.session.get(Account, str(member_id))
if not member:
diff --git a/api/controllers/console/workspace/model_providers.py b/api/controllers/console/workspace/model_providers.py
index 7012580362..7bada2fa12 100644
--- a/api/controllers/console/workspace/model_providers.py
+++ b/api/controllers/console/workspace/model_providers.py
@@ -1,153 +1,196 @@
import io
+from typing import Any, Literal
-from flask import send_file
-from flask_login import current_user
-from flask_restx import Resource, reqparse
-from werkzeug.exceptions import Forbidden
+from flask import request, send_file
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
from controllers.console import console_ns
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.utils.encoders import jsonable_encoder
-from libs.helper import StrLen, uuid_value
-from libs.login import login_required
-from models.account import Account
+from libs.helper import uuid_value
+from libs.login import current_account_with_tenant, login_required
from services.billing_service import BillingService
from services.model_provider_service import ModelProviderService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ParserModelList(BaseModel):
+ model_type: ModelType | None = None
+
+
+class ParserCredentialId(BaseModel):
+ credential_id: str | None = None
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_optional_credential_id(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class ParserCredentialCreate(BaseModel):
+ credentials: dict[str, Any]
+ name: str | None = Field(default=None, max_length=30)
+
+
+class ParserCredentialUpdate(BaseModel):
+ credential_id: str
+ credentials: dict[str, Any]
+ name: str | None = Field(default=None, max_length=30)
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_update_credential_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class ParserCredentialDelete(BaseModel):
+ credential_id: str
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_delete_credential_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class ParserCredentialSwitch(BaseModel):
+ credential_id: str
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_switch_credential_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class ParserCredentialValidate(BaseModel):
+ credentials: dict[str, Any]
+
+
+class ParserPreferredProviderType(BaseModel):
+ preferred_provider_type: Literal["system", "custom"]
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(ParserModelList)
+reg(ParserCredentialId)
+reg(ParserCredentialCreate)
+reg(ParserCredentialUpdate)
+reg(ParserCredentialDelete)
+reg(ParserCredentialSwitch)
+reg(ParserCredentialValidate)
+reg(ParserPreferredProviderType)
+
@console_ns.route("/workspaces/current/model-providers")
class ModelProviderListApi(Resource):
+ @console_ns.expect(console_ns.models[ParserModelList.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
- tenant_id = current_user.current_tenant_id
+ _, current_tenant_id = current_account_with_tenant()
+ tenant_id = current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument(
- "model_type",
- type=str,
- required=False,
- nullable=True,
- choices=[mt.value for mt in ModelType],
- location="args",
- )
- args = parser.parse_args()
+ payload = request.args.to_dict(flat=True) # type: ignore
+ args = ParserModelList.model_validate(payload)
model_provider_service = ModelProviderService()
- provider_list = model_provider_service.get_provider_list(tenant_id=tenant_id, model_type=args.get("model_type"))
+ provider_list = model_provider_service.get_provider_list(tenant_id=tenant_id, model_type=args.model_type)
return jsonable_encoder({"data": provider_list})
@console_ns.route("/workspaces/current/model-providers//credentials")
class ModelProviderCredentialApi(Resource):
+ @console_ns.expect(console_ns.models[ParserCredentialId.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
- tenant_id = current_user.current_tenant_id
+ _, current_tenant_id = current_account_with_tenant()
+ tenant_id = current_tenant_id
# if credential_id is not provided, return current used credential
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="args")
- args = parser.parse_args()
+ payload = request.args.to_dict(flat=True) # type: ignore
+ args = ParserCredentialId.model_validate(payload)
model_provider_service = ModelProviderService()
credentials = model_provider_service.get_provider_credential(
- tenant_id=tenant_id, provider=provider, credential_id=args.get("credential_id")
+ tenant_id=tenant_id, provider=provider, credential_id=args.credential_id
)
return {"credentials": credentials}
+ @console_ns.expect(console_ns.models[ParserCredentialCreate.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = ParserCredentialCreate.model_validate(payload)
model_provider_service = ModelProviderService()
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
try:
model_provider_service.create_provider_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- credentials=args["credentials"],
- credential_name=args["name"],
+ credentials=args.credentials,
+ credential_name=args.name,
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
return {"result": "success"}, 201
+ @console_ns.expect(console_ns.models[ParserCredentialUpdate.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def put(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, current_tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json")
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = ParserCredentialUpdate.model_validate(payload)
model_provider_service = ModelProviderService()
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
try:
model_provider_service.update_provider_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- credentials=args["credentials"],
- credential_id=args["credential_id"],
- credential_name=args["name"],
+ credentials=args.credentials,
+ credential_id=args.credential_id,
+ credential_name=args.name,
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
return {"result": "success"}
+ @console_ns.expect(console_ns.models[ParserCredentialDelete.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def delete(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.is_admin_or_owner:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = ParserCredentialDelete.model_validate(payload)
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
model_provider_service = ModelProviderService()
model_provider_service.remove_provider_credential(
- tenant_id=current_user.current_tenant_id, provider=provider, credential_id=args["credential_id"]
+ tenant_id=current_tenant_id, provider=provider, credential_id=args.credential_id
)
return {"result": "success"}, 204
@@ -155,44 +198,37 @@ class ModelProviderCredentialApi(Resource):
@console_ns.route("/workspaces/current/model-providers//credentials/switch")
class ModelProviderCredentialSwitchApi(Resource):
+ @console_ns.expect(console_ns.models[ParserCredentialSwitch.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.is_admin_or_owner:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = ParserCredentialSwitch.model_validate(payload)
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
service = ModelProviderService()
service.switch_active_provider_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- credential_id=args["credential_id"],
+ credential_id=args.credential_id,
)
return {"result": "success"}
@console_ns.route("/workspaces/current/model-providers//credentials/validate")
class ModelProviderValidateApi(Resource):
+ @console_ns.expect(console_ns.models[ParserCredentialValidate.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = ParserCredentialValidate.model_validate(payload)
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
- tenant_id = current_user.current_tenant_id
+ tenant_id = current_tenant_id
model_provider_service = ModelProviderService()
@@ -201,7 +237,7 @@ class ModelProviderValidateApi(Resource):
try:
model_provider_service.validate_provider_credentials(
- tenant_id=tenant_id, provider=provider, credentials=args["credentials"]
+ tenant_id=tenant_id, provider=provider, credentials=args.credentials
)
except CredentialsValidateFailedError as ex:
result = False
@@ -236,33 +272,22 @@ class ModelProviderIconApi(Resource):
@console_ns.route("/workspaces/current/model-providers//preferred-provider-type")
class PreferredProviderTypeUpdateApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPreferredProviderType.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, current_tenant_id = current_account_with_tenant()
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
- tenant_id = current_user.current_tenant_id
+ tenant_id = current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument(
- "preferred_provider_type",
- type=str,
- required=True,
- nullable=False,
- choices=["system", "custom"],
- location="json",
- )
- args = parser.parse_args()
+ payload = console_ns.payload or {}
+ args = ParserPreferredProviderType.model_validate(payload)
model_provider_service = ModelProviderService()
model_provider_service.switch_preferred_provider(
- tenant_id=tenant_id, provider=provider, preferred_provider_type=args["preferred_provider_type"]
+ tenant_id=tenant_id, provider=provider, preferred_provider_type=args.preferred_provider_type
)
return {"result": "success"}
@@ -276,14 +301,11 @@ class ModelProviderPaymentCheckoutUrlApi(Resource):
def get(self, provider: str):
if provider != "anthropic":
raise ValueError(f"provider name {provider} is invalid")
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, current_tenant_id = current_account_with_tenant()
BillingService.is_tenant_owner_or_admin(current_user)
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
data = BillingService.get_model_provider_payment_link(
provider_name=provider,
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
account_id=current_user.id,
prefilled_email=current_user.email,
)
diff --git a/api/controllers/console/workspace/models.py b/api/controllers/console/workspace/models.py
index d38bb16ea7..2def57ed7b 100644
--- a/api/controllers/console/workspace/models.py
+++ b/api/controllers/console/workspace/models.py
@@ -1,85 +1,170 @@
import logging
+from typing import Any, cast
-from flask_login import current_user
-from flask_restx import Resource, reqparse
-from werkzeug.exceptions import Forbidden
+from flask import request
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
from controllers.console import console_ns
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.utils.encoders import jsonable_encoder
-from libs.helper import StrLen, uuid_value
-from libs.login import login_required
+from libs.helper import uuid_value
+from libs.login import current_account_with_tenant, login_required
from services.model_load_balancing_service import ModelLoadBalancingService
from services.model_provider_service import ModelProviderService
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ParserGetDefault(BaseModel):
+ model_type: ModelType
+
+
+class ParserPostDefault(BaseModel):
+ class Inner(BaseModel):
+ model_type: ModelType
+ model: str | None = None
+ provider: str | None = None
+
+ model_settings: list[Inner]
+
+
+class ParserDeleteModels(BaseModel):
+ model: str
+ model_type: ModelType
+
+
+class LoadBalancingPayload(BaseModel):
+ configs: list[dict[str, Any]] | None = None
+ enabled: bool | None = None
+
+
+class ParserPostModels(BaseModel):
+ model: str
+ model_type: ModelType
+ load_balancing: LoadBalancingPayload | None = None
+ config_from: str | None = None
+ credential_id: str | None = None
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_credential_id(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class ParserGetCredentials(BaseModel):
+ model: str
+ model_type: ModelType
+ config_from: str | None = None
+ credential_id: str | None = None
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_get_credential_id(cls, value: str | None) -> str | None:
+ if value is None:
+ return value
+ return uuid_value(value)
+
+
+class ParserCredentialBase(BaseModel):
+ model: str
+ model_type: ModelType
+
+
+class ParserCreateCredential(ParserCredentialBase):
+ name: str | None = Field(default=None, max_length=30)
+ credentials: dict[str, Any]
+
+
+class ParserUpdateCredential(ParserCredentialBase):
+ credential_id: str
+ credentials: dict[str, Any]
+ name: str | None = Field(default=None, max_length=30)
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_update_credential_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class ParserDeleteCredential(ParserCredentialBase):
+ credential_id: str
+
+ @field_validator("credential_id")
+ @classmethod
+ def validate_delete_credential_id(cls, value: str) -> str:
+ return uuid_value(value)
+
+
+class ParserParameter(BaseModel):
+ model: str
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(ParserGetDefault)
+reg(ParserPostDefault)
+reg(ParserDeleteModels)
+reg(ParserPostModels)
+reg(ParserGetCredentials)
+reg(ParserCreateCredential)
+reg(ParserUpdateCredential)
+reg(ParserDeleteCredential)
+reg(ParserParameter)
@console_ns.route("/workspaces/current/default-model")
class DefaultModelApi(Resource):
+ @console_ns.expect(console_ns.models[ParserGetDefault.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="args",
- )
- args = parser.parse_args()
+ _, tenant_id = current_account_with_tenant()
- tenant_id = current_user.current_tenant_id
+ args = ParserGetDefault.model_validate(request.args.to_dict(flat=True)) # type: ignore
model_provider_service = ModelProviderService()
default_model_entity = model_provider_service.get_default_model_of_model_type(
- tenant_id=tenant_id, model_type=args["model_type"]
+ tenant_id=tenant_id, model_type=args.model_type
)
return jsonable_encoder({"data": default_model_entity})
+ @console_ns.expect(console_ns.models[ParserPostDefault.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("model_settings", type=list, required=True, nullable=False, location="json")
- args = parser.parse_args()
-
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
+ args = ParserPostDefault.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
- model_settings = args["model_settings"]
+ model_settings = args.model_settings
for model_setting in model_settings:
- if "model_type" not in model_setting or model_setting["model_type"] not in [mt.value for mt in ModelType]:
- raise ValueError("invalid model type")
-
- if "provider" not in model_setting:
+ if model_setting.provider is None:
continue
- if "model" not in model_setting:
- raise ValueError("invalid model")
-
try:
model_provider_service.update_default_model_of_model_type(
tenant_id=tenant_id,
- model_type=model_setting["model_type"],
- provider=model_setting["provider"],
- model=model_setting["model"],
+ model_type=model_setting.model_type,
+ provider=model_setting.provider,
+ model=cast(str, model_setting.model),
)
except Exception as ex:
logger.exception(
"Failed to update default model, model type: %s, model: %s",
- model_setting["model_type"],
- model_setting.get("model"),
+ model_setting.model_type,
+ model_setting.model,
)
raise ex
@@ -92,98 +177,72 @@ class ModelProviderModelApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
model_provider_service = ModelProviderService()
models = model_provider_service.get_models_by_provider(tenant_id=tenant_id, provider=provider)
return jsonable_encoder({"data": models})
+ @console_ns.expect(console_ns.models[ParserPostModels.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
# To save the model's load balance configs
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, tenant_id = current_account_with_tenant()
+ args = ParserPostModels.model_validate(console_ns.payload)
- tenant_id = current_user.current_tenant_id
-
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("load_balancing", type=dict, required=False, nullable=True, location="json")
- parser.add_argument("config_from", type=str, required=False, nullable=True, location="json")
- parser.add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="json")
- args = parser.parse_args()
-
- if args.get("config_from", "") == "custom-model":
- if not args.get("credential_id"):
+ if args.config_from == "custom-model":
+ if not args.credential_id:
raise ValueError("credential_id is required when configuring a custom-model")
service = ModelProviderService()
service.switch_active_custom_model_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=tenant_id,
provider=provider,
- model_type=args["model_type"],
- model=args["model"],
- credential_id=args["credential_id"],
+ model_type=args.model_type,
+ model=args.model,
+ credential_id=args.credential_id,
)
model_load_balancing_service = ModelLoadBalancingService()
- if "load_balancing" in args and args["load_balancing"] and "configs" in args["load_balancing"]:
+ if args.load_balancing and args.load_balancing.configs:
# save load balancing configs
model_load_balancing_service.update_load_balancing_configs(
tenant_id=tenant_id,
provider=provider,
- model=args["model"],
- model_type=args["model_type"],
- configs=args["load_balancing"]["configs"],
- config_from=args.get("config_from", ""),
+ model=args.model,
+ model_type=args.model_type,
+ configs=args.load_balancing.configs,
+ config_from=args.config_from or "",
)
- if args.get("load_balancing", {}).get("enabled"):
+ if args.load_balancing.enabled:
model_load_balancing_service.enable_model_load_balancing(
- tenant_id=tenant_id, provider=provider, model=args["model"], model_type=args["model_type"]
+ tenant_id=tenant_id, provider=provider, model=args.model, model_type=args.model_type
)
else:
model_load_balancing_service.disable_model_load_balancing(
- tenant_id=tenant_id, provider=provider, model=args["model"], model_type=args["model_type"]
+ tenant_id=tenant_id, provider=provider, model=args.model, model_type=args.model_type
)
return {"result": "success"}, 200
+ @console_ns.expect(console_ns.models[ParserDeleteModels.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def delete(self, provider: str):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, tenant_id = current_account_with_tenant()
- tenant_id = current_user.current_tenant_id
-
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- args = parser.parse_args()
+ args = ParserDeleteModels.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
model_provider_service.remove_model(
- tenant_id=tenant_id, provider=provider, model=args["model"], model_type=args["model_type"]
+ tenant_id=tenant_id, provider=provider, model=args.model, model_type=args.model_type
)
return {"result": "success"}, 204
@@ -191,52 +250,42 @@ class ModelProviderModelApi(Resource):
@console_ns.route("/workspaces/current/model-providers//models/credentials")
class ModelProviderModelCredentialApi(Resource):
+ @console_ns.expect(console_ns.models[ParserGetCredentials.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self, provider: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="args")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="args",
- )
- parser.add_argument("config_from", type=str, required=False, nullable=True, location="args")
- parser.add_argument("credential_id", type=uuid_value, required=False, nullable=True, location="args")
- args = parser.parse_args()
+ args = ParserGetCredentials.model_validate(request.args.to_dict(flat=True)) # type: ignore
model_provider_service = ModelProviderService()
current_credential = model_provider_service.get_model_credential(
tenant_id=tenant_id,
provider=provider,
- model_type=args["model_type"],
- model=args["model"],
- credential_id=args.get("credential_id"),
+ model_type=args.model_type,
+ model=args.model,
+ credential_id=args.credential_id,
)
model_load_balancing_service = ModelLoadBalancingService()
is_load_balancing_enabled, load_balancing_configs = model_load_balancing_service.get_load_balancing_configs(
tenant_id=tenant_id,
provider=provider,
- model=args["model"],
- model_type=args["model_type"],
- config_from=args.get("config_from", ""),
+ model=args.model,
+ model_type=args.model_type,
+ config_from=args.config_from or "",
)
- if args.get("config_from", "") == "predefined-model":
+ if args.config_from == "predefined-model":
available_credentials = model_provider_service.provider_manager.get_provider_available_credentials(
tenant_id=tenant_id, provider_name=provider
)
else:
- model_type = ModelType.value_of(args["model_type"]).to_origin_model_type()
+ # Normalize model_type to the origin value stored in DB (e.g., "text-generation" for LLM)
+ normalized_model_type = args.model_type.to_origin_model_type()
available_credentials = model_provider_service.provider_manager.get_provider_model_available_credentials(
- tenant_id=tenant_id, provider_name=provider, model_type=model_type, model_name=args["model"]
+ tenant_id=tenant_id, provider_name=provider, model_type=normalized_model_type, model_name=args.model
)
return jsonable_encoder(
@@ -253,148 +302,114 @@ class ModelProviderModelCredentialApi(Resource):
}
)
+ @console_ns.expect(console_ns.models[ParserCreateCredential.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = ParserCreateCredential.model_validate(console_ns.payload)
- tenant_id = current_user.current_tenant_id
model_provider_service = ModelProviderService()
try:
model_provider_service.create_model_credential(
tenant_id=tenant_id,
provider=provider,
- model=args["model"],
- model_type=args["model_type"],
- credentials=args["credentials"],
- credential_name=args["name"],
+ model=args.model,
+ model_type=args.model_type,
+ credentials=args.credentials,
+ credential_name=args.name,
)
except CredentialsValidateFailedError as ex:
logger.exception(
"Failed to save model credentials, tenant_id: %s, model: %s, model_type: %s",
tenant_id,
- args.get("model"),
- args.get("model_type"),
+ args.model,
+ args.model_type,
)
raise ValueError(str(ex))
return {"result": "success"}, 201
+ @console_ns.expect(console_ns.models[ParserUpdateCredential.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def put(self, provider: str):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json")
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ args = ParserUpdateCredential.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
try:
model_provider_service.update_model_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- model_type=args["model_type"],
- model=args["model"],
- credentials=args["credentials"],
- credential_id=args["credential_id"],
- credential_name=args["name"],
+ model_type=args.model_type,
+ model=args.model,
+ credentials=args.credentials,
+ credential_id=args.credential_id,
+ credential_name=args.name,
)
except CredentialsValidateFailedError as ex:
raise ValueError(str(ex))
return {"result": "success"}
+ @console_ns.expect(console_ns.models[ParserDeleteCredential.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def delete(self, provider: str):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("credential_id", type=uuid_value, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ args = ParserDeleteCredential.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
model_provider_service.remove_model_credential(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- model_type=args["model_type"],
- model=args["model"],
- credential_id=args["credential_id"],
+ model_type=args.model_type,
+ model=args.model,
+ credential_id=args.credential_id,
)
return {"result": "success"}, 204
+class ParserSwitch(BaseModel):
+ model: str
+ model_type: ModelType
+ credential_id: str
+
+
+console_ns.schema_model(
+ ParserSwitch.__name__, ParserSwitch.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
+
@console_ns.route("/workspaces/current/model-providers//models/credentials/switch")
class ModelProviderModelCredentialSwitchApi(Resource):
+ @console_ns.expect(console_ns.models[ParserSwitch.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider: str):
- if not current_user.is_admin_or_owner:
- raise Forbidden()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ args = ParserSwitch.model_validate(console_ns.payload)
service = ModelProviderService()
service.add_model_credential_to_model_list(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
provider=provider,
- model_type=args["model_type"],
- model=args["model"],
- credential_id=args["credential_id"],
+ model_type=args.model_type,
+ model=args.model,
+ credential_id=args.credential_id,
)
return {"result": "success"}
@@ -403,27 +418,18 @@ class ModelProviderModelCredentialSwitchApi(Resource):
"/workspaces/current/model-providers//models/enable", endpoint="model-provider-model-enable"
)
class ModelProviderModelEnableApi(Resource):
+ @console_ns.expect(console_ns.models[ParserDeleteModels.__name__])
@setup_required
@login_required
@account_initialization_required
def patch(self, provider: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- args = parser.parse_args()
+ args = ParserDeleteModels.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
model_provider_service.enable_model(
- tenant_id=tenant_id, provider=provider, model=args["model"], model_type=args["model_type"]
+ tenant_id=tenant_id, provider=provider, model=args.model, model_type=args.model_type
)
return {"result": "success"}
@@ -433,52 +439,43 @@ class ModelProviderModelEnableApi(Resource):
"/workspaces/current/model-providers//models/disable", endpoint="model-provider-model-disable"
)
class ModelProviderModelDisableApi(Resource):
+ @console_ns.expect(console_ns.models[ParserDeleteModels.__name__])
@setup_required
@login_required
@account_initialization_required
def patch(self, provider: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- args = parser.parse_args()
+ args = ParserDeleteModels.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
model_provider_service.disable_model(
- tenant_id=tenant_id, provider=provider, model=args["model"], model_type=args["model_type"]
+ tenant_id=tenant_id, provider=provider, model=args.model, model_type=args.model_type
)
return {"result": "success"}
+class ParserValidate(BaseModel):
+ model: str
+ model_type: ModelType
+ credentials: dict
+
+
+console_ns.schema_model(
+ ParserValidate.__name__, ParserValidate.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
+
@console_ns.route("/workspaces/current/model-providers//models/credentials/validate")
class ModelProviderModelValidateApi(Resource):
+ @console_ns.expect(console_ns.models[ParserValidate.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self, provider: str):
- tenant_id = current_user.current_tenant_id
-
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="json")
- parser.add_argument(
- "model_type",
- type=str,
- required=True,
- nullable=False,
- choices=[mt.value for mt in ModelType],
- location="json",
- )
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ _, tenant_id = current_account_with_tenant()
+ args = ParserValidate.model_validate(console_ns.payload)
model_provider_service = ModelProviderService()
@@ -489,9 +486,9 @@ class ModelProviderModelValidateApi(Resource):
model_provider_service.validate_model_credentials(
tenant_id=tenant_id,
provider=provider,
- model=args["model"],
- model_type=args["model_type"],
- credentials=args["credentials"],
+ model=args.model,
+ model_type=args.model_type,
+ credentials=args.credentials,
)
except CredentialsValidateFailedError as ex:
result = False
@@ -507,19 +504,17 @@ class ModelProviderModelValidateApi(Resource):
@console_ns.route("/workspaces/current/model-providers//models/parameter-rules")
class ModelProviderModelParameterRuleApi(Resource):
+ @console_ns.expect(console_ns.models[ParserParameter.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self, provider: str):
- parser = reqparse.RequestParser()
- parser.add_argument("model", type=str, required=True, nullable=False, location="args")
- args = parser.parse_args()
-
- tenant_id = current_user.current_tenant_id
+ args = ParserParameter.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ _, tenant_id = current_account_with_tenant()
model_provider_service = ModelProviderService()
parameter_rules = model_provider_service.get_model_parameter_rules(
- tenant_id=tenant_id, provider=provider, model=args["model"]
+ tenant_id=tenant_id, provider=provider, model=args.model
)
return jsonable_encoder({"data": parameter_rules})
@@ -531,8 +526,7 @@ class ModelProviderAvailableModelApi(Resource):
@login_required
@account_initialization_required
def get(self, model_type):
- tenant_id = current_user.current_tenant_id
-
+ _, tenant_id = current_account_with_tenant()
model_provider_service = ModelProviderService()
models = model_provider_service.get_models_by_model_type(tenant_id=tenant_id, model_type=model_type)
diff --git a/api/controllers/console/workspace/plugin.py b/api/controllers/console/workspace/plugin.py
index 7c70fb8aa0..805058ba5a 100644
--- a/api/controllers/console/workspace/plugin.py
+++ b/api/controllers/console/workspace/plugin.py
@@ -1,23 +1,30 @@
import io
+from typing import Literal
from flask import request, send_file
-from flask_login import current_user
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden
from configs import dify_config
from controllers.console import console_ns
from controllers.console.workspace import plugin_permission_required
-from controllers.console.wraps import account_initialization_required, setup_required
+from controllers.console.wraps import account_initialization_required, is_admin_or_owner_required, setup_required
from core.model_runtime.utils.encoders import jsonable_encoder
from core.plugin.impl.exc import PluginDaemonClientSideError
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.account import TenantPluginAutoUpgradeStrategy, TenantPluginPermission
from services.plugin.plugin_auto_upgrade_service import PluginAutoUpgradeService
from services.plugin.plugin_parameter_service import PluginParameterService
from services.plugin.plugin_permission_service import PluginPermissionService
from services.plugin.plugin_service import PluginService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
@console_ns.route("/workspaces/current/plugin/debugging-key")
class PluginDebuggingKeyApi(Resource):
@@ -26,7 +33,7 @@ class PluginDebuggingKeyApi(Resource):
@account_initialization_required
@plugin_permission_required(debug_required=True)
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
return {
@@ -38,37 +45,160 @@ class PluginDebuggingKeyApi(Resource):
raise ValueError(e)
+class ParserList(BaseModel):
+ page: int = Field(default=1, ge=1, description="Page number")
+ page_size: int = Field(default=256, ge=1, le=256, description="Page size (1-256)")
+
+
+reg(ParserList)
+
+
@console_ns.route("/workspaces/current/plugin/list")
class PluginListApi(Resource):
+ @console_ns.expect(console_ns.models[ParserList.__name__])
@setup_required
@login_required
@account_initialization_required
def get(self):
- tenant_id = current_user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=int, required=False, location="args", default=1)
- parser.add_argument("page_size", type=int, required=False, location="args", default=256)
- args = parser.parse_args()
+ _, tenant_id = current_account_with_tenant()
+ args = ParserList.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
- plugins_with_total = PluginService.list_with_total(tenant_id, args["page"], args["page_size"])
+ plugins_with_total = PluginService.list_with_total(tenant_id, args.page, args.page_size)
except PluginDaemonClientSideError as e:
raise ValueError(e)
return jsonable_encoder({"plugins": plugins_with_total.list, "total": plugins_with_total.total})
+class ParserLatest(BaseModel):
+ plugin_ids: list[str]
+
+
+class ParserIcon(BaseModel):
+ tenant_id: str
+ filename: str
+
+
+class ParserAsset(BaseModel):
+ plugin_unique_identifier: str
+ file_name: str
+
+
+class ParserGithubUpload(BaseModel):
+ repo: str
+ version: str
+ package: str
+
+
+class ParserPluginIdentifiers(BaseModel):
+ plugin_unique_identifiers: list[str]
+
+
+class ParserGithubInstall(BaseModel):
+ plugin_unique_identifier: str
+ repo: str
+ version: str
+ package: str
+
+
+class ParserPluginIdentifierQuery(BaseModel):
+ plugin_unique_identifier: str
+
+
+class ParserTasks(BaseModel):
+ page: int = Field(default=1, ge=1, description="Page number")
+ page_size: int = Field(default=256, ge=1, le=256, description="Page size (1-256)")
+
+
+class ParserMarketplaceUpgrade(BaseModel):
+ original_plugin_unique_identifier: str
+ new_plugin_unique_identifier: str
+
+
+class ParserGithubUpgrade(BaseModel):
+ original_plugin_unique_identifier: str
+ new_plugin_unique_identifier: str
+ repo: str
+ version: str
+ package: str
+
+
+class ParserUninstall(BaseModel):
+ plugin_installation_id: str
+
+
+class ParserPermissionChange(BaseModel):
+ install_permission: TenantPluginPermission.InstallPermission
+ debug_permission: TenantPluginPermission.DebugPermission
+
+
+class ParserDynamicOptions(BaseModel):
+ plugin_id: str
+ provider: str
+ action: str
+ parameter: str
+ credential_id: str | None = None
+ provider_type: Literal["tool", "trigger"]
+
+
+class PluginPermissionSettingsPayload(BaseModel):
+ install_permission: TenantPluginPermission.InstallPermission = TenantPluginPermission.InstallPermission.EVERYONE
+ debug_permission: TenantPluginPermission.DebugPermission = TenantPluginPermission.DebugPermission.EVERYONE
+
+
+class PluginAutoUpgradeSettingsPayload(BaseModel):
+ strategy_setting: TenantPluginAutoUpgradeStrategy.StrategySetting = (
+ TenantPluginAutoUpgradeStrategy.StrategySetting.FIX_ONLY
+ )
+ upgrade_time_of_day: int = 0
+ upgrade_mode: TenantPluginAutoUpgradeStrategy.UpgradeMode = TenantPluginAutoUpgradeStrategy.UpgradeMode.EXCLUDE
+ exclude_plugins: list[str] = Field(default_factory=list)
+ include_plugins: list[str] = Field(default_factory=list)
+
+
+class ParserPreferencesChange(BaseModel):
+ permission: PluginPermissionSettingsPayload
+ auto_upgrade: PluginAutoUpgradeSettingsPayload
+
+
+class ParserExcludePlugin(BaseModel):
+ plugin_id: str
+
+
+class ParserReadme(BaseModel):
+ plugin_unique_identifier: str
+ language: str = Field(default="en-US")
+
+
+reg(ParserLatest)
+reg(ParserIcon)
+reg(ParserAsset)
+reg(ParserGithubUpload)
+reg(ParserPluginIdentifiers)
+reg(ParserGithubInstall)
+reg(ParserPluginIdentifierQuery)
+reg(ParserTasks)
+reg(ParserMarketplaceUpgrade)
+reg(ParserGithubUpgrade)
+reg(ParserUninstall)
+reg(ParserPermissionChange)
+reg(ParserDynamicOptions)
+reg(ParserPreferencesChange)
+reg(ParserExcludePlugin)
+reg(ParserReadme)
+
+
@console_ns.route("/workspaces/current/plugin/list/latest-versions")
class PluginListLatestVersionsApi(Resource):
+ @console_ns.expect(console_ns.models[ParserLatest.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
- req = reqparse.RequestParser()
- req.add_argument("plugin_ids", type=list, required=True, location="json")
- args = req.parse_args()
+ args = ParserLatest.model_validate(console_ns.payload)
try:
- versions = PluginService.list_latest_versions(args["plugin_ids"])
+ versions = PluginService.list_latest_versions(args.plugin_ids)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -77,18 +207,17 @@ class PluginListLatestVersionsApi(Resource):
@console_ns.route("/workspaces/current/plugin/list/installations/ids")
class PluginListInstallationsFromIdsApi(Resource):
+ @console_ns.expect(console_ns.models[ParserLatest.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_ids", type=list, required=True, location="json")
- args = parser.parse_args()
+ args = ParserLatest.model_validate(console_ns.payload)
try:
- plugins = PluginService.list_installations_from_ids(tenant_id, args["plugin_ids"])
+ plugins = PluginService.list_installations_from_ids(tenant_id, args.plugin_ids)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -97,15 +226,13 @@ class PluginListInstallationsFromIdsApi(Resource):
@console_ns.route("/workspaces/current/plugin/icon")
class PluginIconApi(Resource):
+ @console_ns.expect(console_ns.models[ParserIcon.__name__])
@setup_required
def get(self):
- req = reqparse.RequestParser()
- req.add_argument("tenant_id", type=str, required=True, location="args")
- req.add_argument("filename", type=str, required=True, location="args")
- args = req.parse_args()
+ args = ParserIcon.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
- icon_bytes, mimetype = PluginService.get_asset(args["tenant_id"], args["filename"])
+ icon_bytes, mimetype = PluginService.get_asset(args.tenant_id, args.filename)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -113,6 +240,23 @@ class PluginIconApi(Resource):
return send_file(io.BytesIO(icon_bytes), mimetype=mimetype, max_age=icon_cache_max_age)
+@console_ns.route("/workspaces/current/plugin/asset")
+class PluginAssetApi(Resource):
+ @console_ns.expect(console_ns.models[ParserAsset.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self):
+ args = ParserAsset.model_validate(request.args.to_dict(flat=True)) # type: ignore
+
+ _, tenant_id = current_account_with_tenant()
+ try:
+ binary = PluginService.extract_asset(tenant_id, args.plugin_unique_identifier, args.file_name)
+ return send_file(io.BytesIO(binary), mimetype="application/octet-stream")
+ except PluginDaemonClientSideError as e:
+ raise ValueError(e)
+
+
@console_ns.route("/workspaces/current/plugin/upload/pkg")
class PluginUploadFromPkgApi(Resource):
@setup_required
@@ -120,7 +264,7 @@ class PluginUploadFromPkgApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
file = request.files["pkg"]
@@ -139,21 +283,18 @@ class PluginUploadFromPkgApi(Resource):
@console_ns.route("/workspaces/current/plugin/upload/github")
class PluginUploadFromGithubApi(Resource):
+ @console_ns.expect(console_ns.models[ParserGithubUpload.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("repo", type=str, required=True, location="json")
- parser.add_argument("version", type=str, required=True, location="json")
- parser.add_argument("package", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = ParserGithubUpload.model_validate(console_ns.payload)
try:
- response = PluginService.upload_pkg_from_github(tenant_id, args["repo"], args["version"], args["package"])
+ response = PluginService.upload_pkg_from_github(tenant_id, args.repo, args.version, args.package)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -167,7 +308,7 @@ class PluginUploadFromBundleApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
file = request.files["bundle"]
@@ -186,24 +327,17 @@ class PluginUploadFromBundleApi(Resource):
@console_ns.route("/workspaces/current/plugin/install/pkg")
class PluginInstallFromPkgApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPluginIdentifiers.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
-
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_unique_identifiers", type=list, required=True, location="json")
- args = parser.parse_args()
-
- # check if all plugin_unique_identifiers are valid string
- for plugin_unique_identifier in args["plugin_unique_identifiers"]:
- if not isinstance(plugin_unique_identifier, str):
- raise ValueError("Invalid plugin unique identifier")
+ _, tenant_id = current_account_with_tenant()
+ args = ParserPluginIdentifiers.model_validate(console_ns.payload)
try:
- response = PluginService.install_from_local_pkg(tenant_id, args["plugin_unique_identifiers"])
+ response = PluginService.install_from_local_pkg(tenant_id, args.plugin_unique_identifiers)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -212,27 +346,23 @@ class PluginInstallFromPkgApi(Resource):
@console_ns.route("/workspaces/current/plugin/install/github")
class PluginInstallFromGithubApi(Resource):
+ @console_ns.expect(console_ns.models[ParserGithubInstall.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("repo", type=str, required=True, location="json")
- parser.add_argument("version", type=str, required=True, location="json")
- parser.add_argument("package", type=str, required=True, location="json")
- parser.add_argument("plugin_unique_identifier", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = ParserGithubInstall.model_validate(console_ns.payload)
try:
response = PluginService.install_from_github(
tenant_id,
- args["plugin_unique_identifier"],
- args["repo"],
- args["version"],
- args["package"],
+ args.plugin_unique_identifier,
+ args.repo,
+ args.version,
+ args.package,
)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -242,24 +372,18 @@ class PluginInstallFromGithubApi(Resource):
@console_ns.route("/workspaces/current/plugin/install/marketplace")
class PluginInstallFromMarketplaceApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPluginIdentifiers.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_unique_identifiers", type=list, required=True, location="json")
- args = parser.parse_args()
-
- # check if all plugin_unique_identifiers are valid string
- for plugin_unique_identifier in args["plugin_unique_identifiers"]:
- if not isinstance(plugin_unique_identifier, str):
- raise ValueError("Invalid plugin unique identifier")
+ args = ParserPluginIdentifiers.model_validate(console_ns.payload)
try:
- response = PluginService.install_from_marketplace_pkg(tenant_id, args["plugin_unique_identifiers"])
+ response = PluginService.install_from_marketplace_pkg(tenant_id, args.plugin_unique_identifiers)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -268,23 +392,21 @@ class PluginInstallFromMarketplaceApi(Resource):
@console_ns.route("/workspaces/current/plugin/marketplace/pkg")
class PluginFetchMarketplacePkgApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPluginIdentifierQuery.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def get(self):
- tenant_id = current_user.current_tenant_id
-
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_unique_identifier", type=str, required=True, location="args")
- args = parser.parse_args()
+ _, tenant_id = current_account_with_tenant()
+ args = ParserPluginIdentifierQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
return jsonable_encoder(
{
"manifest": PluginService.fetch_marketplace_pkg(
tenant_id,
- args["plugin_unique_identifier"],
+ args.plugin_unique_identifier,
)
}
)
@@ -294,24 +416,19 @@ class PluginFetchMarketplacePkgApi(Resource):
@console_ns.route("/workspaces/current/plugin/fetch-manifest")
class PluginFetchManifestApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPluginIdentifierQuery.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_unique_identifier", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = ParserPluginIdentifierQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
return jsonable_encoder(
- {
- "manifest": PluginService.fetch_plugin_manifest(
- tenant_id, args["plugin_unique_identifier"]
- ).model_dump()
- }
+ {"manifest": PluginService.fetch_plugin_manifest(tenant_id, args.plugin_unique_identifier).model_dump()}
)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -319,22 +436,18 @@ class PluginFetchManifestApi(Resource):
@console_ns.route("/workspaces/current/plugin/tasks")
class PluginFetchInstallTasksApi(Resource):
+ @console_ns.expect(console_ns.models[ParserTasks.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=int, required=True, location="args")
- parser.add_argument("page_size", type=int, required=True, location="args")
- args = parser.parse_args()
+ args = ParserTasks.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
- return jsonable_encoder(
- {"tasks": PluginService.fetch_install_tasks(tenant_id, args["page"], args["page_size"])}
- )
+ return jsonable_encoder({"tasks": PluginService.fetch_install_tasks(tenant_id, args.page, args.page_size)})
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -346,7 +459,7 @@ class PluginFetchInstallTaskApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def get(self, task_id: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
return jsonable_encoder({"task": PluginService.fetch_install_task(tenant_id, task_id)})
@@ -361,7 +474,7 @@ class PluginDeleteInstallTaskApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self, task_id: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
return {"success": PluginService.delete_install_task(tenant_id, task_id)}
@@ -376,7 +489,7 @@ class PluginDeleteAllInstallTaskItemsApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
return {"success": PluginService.delete_all_install_task_items(tenant_id)}
@@ -391,7 +504,7 @@ class PluginDeleteInstallTaskItemApi(Resource):
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self, task_id: str, identifier: str):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
return {"success": PluginService.delete_install_task_item(tenant_id, task_id, identifier)}
@@ -401,22 +514,20 @@ class PluginDeleteInstallTaskItemApi(Resource):
@console_ns.route("/workspaces/current/plugin/upgrade/marketplace")
class PluginUpgradeFromMarketplaceApi(Resource):
+ @console_ns.expect(console_ns.models[ParserMarketplaceUpgrade.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("original_plugin_unique_identifier", type=str, required=True, location="json")
- parser.add_argument("new_plugin_unique_identifier", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = ParserMarketplaceUpgrade.model_validate(console_ns.payload)
try:
return jsonable_encoder(
PluginService.upgrade_plugin_with_marketplace(
- tenant_id, args["original_plugin_unique_identifier"], args["new_plugin_unique_identifier"]
+ tenant_id, args.original_plugin_unique_identifier, args.new_plugin_unique_identifier
)
)
except PluginDaemonClientSideError as e:
@@ -425,30 +536,25 @@ class PluginUpgradeFromMarketplaceApi(Resource):
@console_ns.route("/workspaces/current/plugin/upgrade/github")
class PluginUpgradeFromGithubApi(Resource):
+ @console_ns.expect(console_ns.models[ParserGithubUpgrade.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- parser = reqparse.RequestParser()
- parser.add_argument("original_plugin_unique_identifier", type=str, required=True, location="json")
- parser.add_argument("new_plugin_unique_identifier", type=str, required=True, location="json")
- parser.add_argument("repo", type=str, required=True, location="json")
- parser.add_argument("version", type=str, required=True, location="json")
- parser.add_argument("package", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = ParserGithubUpgrade.model_validate(console_ns.payload)
try:
return jsonable_encoder(
PluginService.upgrade_plugin_with_github(
tenant_id,
- args["original_plugin_unique_identifier"],
- args["new_plugin_unique_identifier"],
- args["repo"],
- args["version"],
- args["package"],
+ args.original_plugin_unique_identifier,
+ args.new_plugin_unique_identifier,
+ args.repo,
+ args.version,
+ args.package,
)
)
except PluginDaemonClientSideError as e:
@@ -457,44 +563,43 @@ class PluginUpgradeFromGithubApi(Resource):
@console_ns.route("/workspaces/current/plugin/uninstall")
class PluginUninstallApi(Resource):
+ @console_ns.expect(console_ns.models[ParserUninstall.__name__])
@setup_required
@login_required
@account_initialization_required
@plugin_permission_required(install_required=True)
def post(self):
- req = reqparse.RequestParser()
- req.add_argument("plugin_installation_id", type=str, required=True, location="json")
- args = req.parse_args()
+ args = ParserUninstall.model_validate(console_ns.payload)
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
try:
- return {"success": PluginService.uninstall(tenant_id, args["plugin_installation_id"])}
+ return {"success": PluginService.uninstall(tenant_id, args.plugin_installation_id)}
except PluginDaemonClientSideError as e:
raise ValueError(e)
@console_ns.route("/workspaces/current/plugin/permission/change")
class PluginChangePermissionApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPermissionChange.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
+ current_user, current_tenant_id = current_account_with_tenant()
user = current_user
if not user.is_admin_or_owner:
raise Forbidden()
- req = reqparse.RequestParser()
- req.add_argument("install_permission", type=str, required=True, location="json")
- req.add_argument("debug_permission", type=str, required=True, location="json")
- args = req.parse_args()
+ args = ParserPermissionChange.model_validate(console_ns.payload)
- install_permission = TenantPluginPermission.InstallPermission(args["install_permission"])
- debug_permission = TenantPluginPermission.DebugPermission(args["debug_permission"])
+ tenant_id = current_tenant_id
- tenant_id = user.current_tenant_id
-
- return {"success": PluginPermissionService.change_permission(tenant_id, install_permission, debug_permission)}
+ return {
+ "success": PluginPermissionService.change_permission(
+ tenant_id, args.install_permission, args.debug_permission
+ )
+ }
@console_ns.route("/workspaces/current/plugin/permission/fetch")
@@ -503,7 +608,7 @@ class PluginFetchPermissionApi(Resource):
@login_required
@account_initialization_required
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
permission = PluginPermissionService.get_permission(tenant_id)
if not permission:
@@ -524,34 +629,27 @@ class PluginFetchPermissionApi(Resource):
@console_ns.route("/workspaces/current/plugin/parameters/dynamic-options")
class PluginFetchDynamicSelectOptionsApi(Resource):
+ @console_ns.expect(console_ns.models[ParserDynamicOptions.__name__])
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def get(self):
- # check if the user is admin or owner
- if not current_user.is_admin_or_owner:
- raise Forbidden()
-
- tenant_id = current_user.current_tenant_id
+ current_user, tenant_id = current_account_with_tenant()
user_id = current_user.id
- parser = reqparse.RequestParser()
- parser.add_argument("plugin_id", type=str, required=True, location="args")
- parser.add_argument("provider", type=str, required=True, location="args")
- parser.add_argument("action", type=str, required=True, location="args")
- parser.add_argument("parameter", type=str, required=True, location="args")
- parser.add_argument("provider_type", type=str, required=True, location="args")
- args = parser.parse_args()
+ args = ParserDynamicOptions.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
options = PluginParameterService.get_dynamic_select_options(
- tenant_id,
- user_id,
- args["plugin_id"],
- args["provider"],
- args["action"],
- args["parameter"],
- args["provider_type"],
+ tenant_id=tenant_id,
+ user_id=user_id,
+ plugin_id=args.plugin_id,
+ provider=args.provider,
+ action=args.action,
+ parameter=args.parameter,
+ credential_id=args.credential_id,
+ provider_type=args.provider_type,
)
except PluginDaemonClientSideError as e:
raise ValueError(e)
@@ -561,35 +659,29 @@ class PluginFetchDynamicSelectOptionsApi(Resource):
@console_ns.route("/workspaces/current/plugin/preferences/change")
class PluginChangePreferencesApi(Resource):
+ @console_ns.expect(console_ns.models[ParserPreferencesChange.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
if not user.is_admin_or_owner:
raise Forbidden()
- req = reqparse.RequestParser()
- req.add_argument("permission", type=dict, required=True, location="json")
- req.add_argument("auto_upgrade", type=dict, required=True, location="json")
- args = req.parse_args()
+ args = ParserPreferencesChange.model_validate(console_ns.payload)
- tenant_id = user.current_tenant_id
+ permission = args.permission
- permission = args["permission"]
+ install_permission = permission.install_permission
+ debug_permission = permission.debug_permission
- install_permission = TenantPluginPermission.InstallPermission(permission.get("install_permission", "everyone"))
- debug_permission = TenantPluginPermission.DebugPermission(permission.get("debug_permission", "everyone"))
+ auto_upgrade = args.auto_upgrade
- auto_upgrade = args["auto_upgrade"]
-
- strategy_setting = TenantPluginAutoUpgradeStrategy.StrategySetting(
- auto_upgrade.get("strategy_setting", "fix_only")
- )
- upgrade_time_of_day = auto_upgrade.get("upgrade_time_of_day", 0)
- upgrade_mode = TenantPluginAutoUpgradeStrategy.UpgradeMode(auto_upgrade.get("upgrade_mode", "exclude"))
- exclude_plugins = auto_upgrade.get("exclude_plugins", [])
- include_plugins = auto_upgrade.get("include_plugins", [])
+ strategy_setting = auto_upgrade.strategy_setting
+ upgrade_time_of_day = auto_upgrade.upgrade_time_of_day
+ upgrade_mode = auto_upgrade.upgrade_mode
+ exclude_plugins = auto_upgrade.exclude_plugins
+ include_plugins = auto_upgrade.include_plugins
# set permission
set_permission_result = PluginPermissionService.change_permission(
@@ -621,7 +713,7 @@ class PluginFetchPreferencesApi(Resource):
@login_required
@account_initialization_required
def get(self):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
permission = PluginPermissionService.get_permission(tenant_id)
permission_dict = {
@@ -656,15 +748,28 @@ class PluginFetchPreferencesApi(Resource):
@console_ns.route("/workspaces/current/plugin/preferences/autoupgrade/exclude")
class PluginAutoUpgradeExcludePluginApi(Resource):
+ @console_ns.expect(console_ns.models[ParserExcludePlugin.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
# exclude one single plugin
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- req = reqparse.RequestParser()
- req.add_argument("plugin_id", type=str, required=True, location="json")
- args = req.parse_args()
+ args = ParserExcludePlugin.model_validate(console_ns.payload)
- return jsonable_encoder({"success": PluginAutoUpgradeService.exclude_plugin(tenant_id, args["plugin_id"])})
+ return jsonable_encoder({"success": PluginAutoUpgradeService.exclude_plugin(tenant_id, args.plugin_id)})
+
+
+@console_ns.route("/workspaces/current/plugin/readme")
+class PluginReadmeApi(Resource):
+ @console_ns.expect(console_ns.models[ParserReadme.__name__])
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self):
+ _, tenant_id = current_account_with_tenant()
+ args = ParserReadme.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ return jsonable_encoder(
+ {"readme": PluginService.fetch_plugin_readme(tenant_id, args.plugin_unique_identifier, args.language)}
+ )
diff --git a/api/controllers/console/workspace/tool_providers.py b/api/controllers/console/workspace/tool_providers.py
index 9285577f72..2c54aa5a20 100644
--- a/api/controllers/console/workspace/tool_providers.py
+++ b/api/controllers/console/workspace/tool_providers.py
@@ -2,11 +2,11 @@ import io
from urllib.parse import urlparse
from flask import make_response, redirect, request, send_file
-from flask_login import current_user
from flask_restx import (
Resource,
reqparse,
)
+from sqlalchemy.orm import Session
from werkzeug.exceptions import Forbidden
from configs import dify_config
@@ -14,22 +14,26 @@ from controllers.console import console_ns
from controllers.console.wraps import (
account_initialization_required,
enterprise_license_required,
+ is_admin_or_owner_required,
setup_required,
)
+from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration
from core.mcp.auth.auth_flow import auth, handle_callback
-from core.mcp.auth.auth_provider import OAuthClientProvider
-from core.mcp.error import MCPAuthError, MCPError
+from core.mcp.error import MCPAuthError, MCPError, MCPRefreshTokenError
from core.mcp.mcp_client import MCPClient
from core.model_runtime.utils.encoders import jsonable_encoder
+from core.plugin.entities.plugin_daemon import CredentialType
from core.plugin.impl.oauth import OAuthHandler
-from core.tools.entities.tool_entities import CredentialType
+from extensions.ext_database import db
from libs.helper import StrLen, alphanumeric, uuid_value
-from libs.login import login_required
+from libs.login import current_account_with_tenant, login_required
from models.provider_ids import ToolProviderID
+
+# from models.provider_ids import ToolProviderID
from services.plugin.oauth_service import OAuthProxyService
from services.tools.api_tools_manage_service import ApiToolManageService
from services.tools.builtin_tools_manage_service import BuiltinToolManageService
-from services.tools.mcp_tools_manage_service import MCPToolManageService
+from services.tools.mcp_tools_manage_service import MCPToolManageService, OAuthDataType
from services.tools.tool_labels_service import ToolLabelsService
from services.tools.tools_manage_service import ToolCommonService
from services.tools.tools_transform_service import ToolTransformService
@@ -43,31 +47,34 @@ def is_valid_url(url: str) -> bool:
try:
parsed = urlparse(url)
return all([parsed.scheme, parsed.netloc]) and parsed.scheme in ["http", "https"]
- except Exception:
+ except (ValueError, TypeError):
+ # ValueError: Invalid URL format
+ # TypeError: url is not a string
return False
+parser_tool = reqparse.RequestParser().add_argument(
+ "type",
+ type=str,
+ choices=["builtin", "model", "api", "workflow", "mcp"],
+ required=False,
+ nullable=True,
+ location="args",
+)
+
+
@console_ns.route("/workspaces/current/tool-providers")
class ToolProviderListApi(Resource):
+ @console_ns.expect(parser_tool)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- req = reqparse.RequestParser()
- req.add_argument(
- "type",
- type=str,
- choices=["builtin", "model", "api", "workflow", "mcp"],
- required=False,
- nullable=True,
- location="args",
- )
- args = req.parse_args()
+ args = parser_tool.parse_args()
return ToolCommonService.list_tool_providers(user_id, tenant_id, args.get("type", None))
@@ -78,9 +85,7 @@ class ToolBuiltinProviderListToolsApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
- user = current_user
-
- tenant_id = user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(
BuiltinToolManageService.list_builtin_tool_provider_tools(
@@ -96,27 +101,27 @@ class ToolBuiltinProviderInfoApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
- user = current_user
-
- tenant_id = user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(BuiltinToolManageService.get_builtin_tool_provider_info(tenant_id, provider))
+parser_delete = reqparse.RequestParser().add_argument(
+ "credential_id", type=str, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/builtin//delete")
class ToolBuiltinProviderDeleteApi(Resource):
+ @console_ns.expect(parser_delete)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider):
- user = current_user
- if not user.is_admin_or_owner:
- raise Forbidden()
+ _, tenant_id = current_account_with_tenant()
- tenant_id = user.current_tenant_id
- req = reqparse.RequestParser()
- req.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- args = req.parse_args()
+ args = parser_delete.parse_args()
return BuiltinToolManageService.delete_builtin_tool_provider(
tenant_id,
@@ -125,22 +130,26 @@ class ToolBuiltinProviderDeleteApi(Resource):
)
+parser_add = (
+ reqparse.RequestParser()
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+ .add_argument("name", type=StrLen(30), required=False, nullable=False, location="json")
+ .add_argument("type", type=str, required=True, nullable=False, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/builtin//add")
class ToolBuiltinProviderAddApi(Resource):
+ @console_ns.expect(parser_add)
@setup_required
@login_required
@account_initialization_required
def post(self, provider):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("name", type=StrLen(30), required=False, nullable=False, location="json")
- parser.add_argument("type", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ args = parser_add.parse_args()
if args["type"] not in CredentialType.values():
raise ValueError(f"Invalid credential type: {args['type']}")
@@ -155,26 +164,26 @@ class ToolBuiltinProviderAddApi(Resource):
)
+parser_update = (
+ reqparse.RequestParser()
+ .add_argument("credential_id", type=str, required=True, nullable=False, location="json")
+ .add_argument("credentials", type=dict, required=False, nullable=True, location="json")
+ .add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/builtin//update")
class ToolBuiltinProviderUpdateApi(Resource):
+ @console_ns.expect(parser_update)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self, provider):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
-
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("credential_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument("credentials", type=dict, required=False, nullable=True, location="json")
- parser.add_argument("name", type=StrLen(30), required=False, nullable=True, location="json")
-
- args = parser.parse_args()
+ args = parser_update.parse_args()
result = BuiltinToolManageService.update_builtin_tool_provider(
user_id=user_id,
@@ -193,7 +202,7 @@ class ToolBuiltinProviderGetCredentialsApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(
BuiltinToolManageService.get_builtin_tool_provider_credentials(
@@ -212,31 +221,32 @@ class ToolBuiltinProviderIconApi(Resource):
return send_file(io.BytesIO(icon_bytes), mimetype=mimetype, max_age=icon_cache_max_age)
+parser_api_add = (
+ reqparse.RequestParser()
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+ .add_argument("schema_type", type=str, required=True, nullable=False, location="json")
+ .add_argument("schema", type=str, required=True, nullable=False, location="json")
+ .add_argument("provider", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=dict, required=True, nullable=False, location="json")
+ .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json")
+ .add_argument("labels", type=list[str], required=False, nullable=True, location="json", default=[])
+ .add_argument("custom_disclaimer", type=str, required=False, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/add")
class ToolApiProviderAddApi(Resource):
+ @console_ns.expect(parser_api_add)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("schema_type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("schema", type=str, required=True, nullable=False, location="json")
- parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("privacy_policy", type=str, required=False, nullable=True, location="json")
- parser.add_argument("labels", type=list[str], required=False, nullable=True, location="json", default=[])
- parser.add_argument("custom_disclaimer", type=str, required=False, nullable=True, location="json")
-
- args = parser.parse_args()
+ args = parser_api_add.parse_args()
return ApiToolManageService.create_api_tool_provider(
user_id,
@@ -252,22 +262,21 @@ class ToolApiProviderAddApi(Resource):
)
+parser_remote = reqparse.RequestParser().add_argument("url", type=str, required=True, nullable=False, location="args")
+
+
@console_ns.route("/workspaces/current/tool-provider/api/remote")
class ToolApiProviderGetRemoteSchemaApi(Resource):
+ @console_ns.expect(parser_remote)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
-
- parser.add_argument("url", type=str, required=True, nullable=False, location="args")
-
- args = parser.parse_args()
+ args = parser_remote.parse_args()
return ApiToolManageService.get_api_tool_provider_remote_schema(
user_id,
@@ -276,22 +285,23 @@ class ToolApiProviderGetRemoteSchemaApi(Resource):
)
+parser_tools = reqparse.RequestParser().add_argument(
+ "provider", type=str, required=True, nullable=False, location="args"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/tools")
class ToolApiProviderListToolsApi(Resource):
+ @console_ns.expect(parser_tools)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
-
- parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-
- args = parser.parse_args()
+ args = parser_tools.parse_args()
return jsonable_encoder(
ApiToolManageService.list_api_tool_provider_tools(
@@ -302,32 +312,33 @@ class ToolApiProviderListToolsApi(Resource):
)
+parser_api_update = (
+ reqparse.RequestParser()
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+ .add_argument("schema_type", type=str, required=True, nullable=False, location="json")
+ .add_argument("schema", type=str, required=True, nullable=False, location="json")
+ .add_argument("provider", type=str, required=True, nullable=False, location="json")
+ .add_argument("original_provider", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=dict, required=True, nullable=False, location="json")
+ .add_argument("privacy_policy", type=str, required=True, nullable=True, location="json")
+ .add_argument("labels", type=list[str], required=False, nullable=True, location="json")
+ .add_argument("custom_disclaimer", type=str, required=True, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/update")
class ToolApiProviderUpdateApi(Resource):
+ @console_ns.expect(parser_api_update)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("schema_type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("schema", type=str, required=True, nullable=False, location="json")
- parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
- parser.add_argument("original_provider", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("privacy_policy", type=str, required=True, nullable=True, location="json")
- parser.add_argument("labels", type=list[str], required=False, nullable=True, location="json")
- parser.add_argument("custom_disclaimer", type=str, required=True, nullable=True, location="json")
-
- args = parser.parse_args()
+ args = parser_api_update.parse_args()
return ApiToolManageService.update_api_tool_provider(
user_id,
@@ -344,25 +355,24 @@ class ToolApiProviderUpdateApi(Resource):
)
+parser_api_delete = reqparse.RequestParser().add_argument(
+ "provider", type=str, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/delete")
class ToolApiProviderDeleteApi(Resource):
+ @console_ns.expect(parser_api_delete)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
-
- parser.add_argument("provider", type=str, required=True, nullable=False, location="json")
-
- args = parser.parse_args()
+ args = parser_api_delete.parse_args()
return ApiToolManageService.delete_api_tool_provider(
user_id,
@@ -371,22 +381,21 @@ class ToolApiProviderDeleteApi(Resource):
)
+parser_get = reqparse.RequestParser().add_argument("provider", type=str, required=True, nullable=False, location="args")
+
+
@console_ns.route("/workspaces/current/tool-provider/api/get")
class ToolApiProviderGetApi(Resource):
+ @console_ns.expect(parser_get)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
-
- parser.add_argument("provider", type=str, required=True, nullable=False, location="args")
-
- args = parser.parse_args()
+ args = parser_get.parse_args()
return ApiToolManageService.get_api_tool_provider(
user_id,
@@ -401,8 +410,7 @@ class ToolBuiltinProviderCredentialsSchemaApi(Resource):
@login_required
@account_initialization_required
def get(self, provider, credential_type):
- user = current_user
- tenant_id = user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(
BuiltinToolManageService.list_builtin_provider_credentials_schema(
@@ -411,42 +419,47 @@ class ToolBuiltinProviderCredentialsSchemaApi(Resource):
)
+parser_schema = reqparse.RequestParser().add_argument(
+ "schema", type=str, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/schema")
class ToolApiProviderSchemaApi(Resource):
+ @console_ns.expect(parser_schema)
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
-
- parser.add_argument("schema", type=str, required=True, nullable=False, location="json")
-
- args = parser.parse_args()
+ args = parser_schema.parse_args()
return ApiToolManageService.parser_api_schema(
schema=args["schema"],
)
+parser_pre = (
+ reqparse.RequestParser()
+ .add_argument("tool_name", type=str, required=True, nullable=False, location="json")
+ .add_argument("provider_name", type=str, required=False, nullable=False, location="json")
+ .add_argument("credentials", type=dict, required=True, nullable=False, location="json")
+ .add_argument("parameters", type=dict, required=True, nullable=False, location="json")
+ .add_argument("schema_type", type=str, required=True, nullable=False, location="json")
+ .add_argument("schema", type=str, required=True, nullable=False, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/api/test/pre")
class ToolApiProviderPreviousTestApi(Resource):
+ @console_ns.expect(parser_pre)
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
-
- parser.add_argument("tool_name", type=str, required=True, nullable=False, location="json")
- parser.add_argument("provider_name", type=str, required=False, nullable=False, location="json")
- parser.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("parameters", type=dict, required=True, nullable=False, location="json")
- parser.add_argument("schema_type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("schema", type=str, required=True, nullable=False, location="json")
-
- args = parser.parse_args()
-
+ args = parser_pre.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
return ApiToolManageService.test_api_tool_preview(
- current_user.current_tenant_id,
+ current_tenant_id,
args["provider_name"] or "",
args["tool_name"],
args["credentials"],
@@ -456,31 +469,32 @@ class ToolApiProviderPreviousTestApi(Resource):
)
+parser_create = (
+ reqparse.RequestParser()
+ .add_argument("workflow_app_id", type=uuid_value, required=True, nullable=False, location="json")
+ .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json")
+ .add_argument("label", type=str, required=True, nullable=False, location="json")
+ .add_argument("description", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=dict, required=True, nullable=False, location="json")
+ .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json")
+ .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="")
+ .add_argument("labels", type=list[str], required=False, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/workflow/create")
class ToolWorkflowProviderCreateApi(Resource):
+ @console_ns.expect(parser_create)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- reqparser = reqparse.RequestParser()
- reqparser.add_argument("workflow_app_id", type=uuid_value, required=True, nullable=False, location="json")
- reqparser.add_argument("name", type=alphanumeric, required=True, nullable=False, location="json")
- reqparser.add_argument("label", type=str, required=True, nullable=False, location="json")
- reqparser.add_argument("description", type=str, required=True, nullable=False, location="json")
- reqparser.add_argument("icon", type=dict, required=True, nullable=False, location="json")
- reqparser.add_argument("parameters", type=list[dict], required=True, nullable=False, location="json")
- reqparser.add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="")
- reqparser.add_argument("labels", type=list[str], required=False, nullable=True, location="json")
-
- args = reqparser.parse_args()
+ args = parser_create.parse_args()
return WorkflowToolManageService.create_workflow_tool(
user_id=user_id,
@@ -496,31 +510,31 @@ class ToolWorkflowProviderCreateApi(Resource):
)
+parser_workflow_update = (
+ reqparse.RequestParser()
+ .add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json")
+ .add_argument("name", type=alphanumeric, required=True, nullable=False, location="json")
+ .add_argument("label", type=str, required=True, nullable=False, location="json")
+ .add_argument("description", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=dict, required=True, nullable=False, location="json")
+ .add_argument("parameters", type=list[dict], required=True, nullable=False, location="json")
+ .add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="")
+ .add_argument("labels", type=list[str], required=False, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/workflow/update")
class ToolWorkflowProviderUpdateApi(Resource):
+ @console_ns.expect(parser_workflow_update)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
-
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- reqparser = reqparse.RequestParser()
- reqparser.add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json")
- reqparser.add_argument("name", type=alphanumeric, required=True, nullable=False, location="json")
- reqparser.add_argument("label", type=str, required=True, nullable=False, location="json")
- reqparser.add_argument("description", type=str, required=True, nullable=False, location="json")
- reqparser.add_argument("icon", type=dict, required=True, nullable=False, location="json")
- reqparser.add_argument("parameters", type=list[dict], required=True, nullable=False, location="json")
- reqparser.add_argument("privacy_policy", type=str, required=False, nullable=True, location="json", default="")
- reqparser.add_argument("labels", type=list[str], required=False, nullable=True, location="json")
-
- args = reqparser.parse_args()
+ args = parser_workflow_update.parse_args()
if not args["workflow_tool_id"]:
raise ValueError("incorrect workflow_tool_id")
@@ -539,24 +553,24 @@ class ToolWorkflowProviderUpdateApi(Resource):
)
+parser_workflow_delete = reqparse.RequestParser().add_argument(
+ "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/workflow/delete")
class ToolWorkflowProviderDeleteApi(Resource):
+ @console_ns.expect(parser_workflow_delete)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def post(self):
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- reqparser = reqparse.RequestParser()
- reqparser.add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="json")
-
- args = reqparser.parse_args()
+ args = parser_workflow_delete.parse_args()
return WorkflowToolManageService.delete_workflow_tool(
user_id,
@@ -565,22 +579,25 @@ class ToolWorkflowProviderDeleteApi(Resource):
)
+parser_wf_get = (
+ reqparse.RequestParser()
+ .add_argument("workflow_tool_id", type=uuid_value, required=False, nullable=True, location="args")
+ .add_argument("workflow_app_id", type=uuid_value, required=False, nullable=True, location="args")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/workflow/get")
class ToolWorkflowProviderGetApi(Resource):
+ @console_ns.expect(parser_wf_get)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("workflow_tool_id", type=uuid_value, required=False, nullable=True, location="args")
- parser.add_argument("workflow_app_id", type=uuid_value, required=False, nullable=True, location="args")
-
- args = parser.parse_args()
+ args = parser_wf_get.parse_args()
if args.get("workflow_tool_id"):
tool = WorkflowToolManageService.get_workflow_tool_by_tool_id(
@@ -600,21 +617,23 @@ class ToolWorkflowProviderGetApi(Resource):
return jsonable_encoder(tool)
+parser_wf_tools = reqparse.RequestParser().add_argument(
+ "workflow_tool_id", type=uuid_value, required=True, nullable=False, location="args"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/workflow/tools")
class ToolWorkflowProviderListToolApi(Resource):
+ @console_ns.expect(parser_wf_tools)
@setup_required
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
- parser = reqparse.RequestParser()
- parser.add_argument("workflow_tool_id", type=uuid_value, required=True, nullable=False, location="args")
-
- args = parser.parse_args()
+ args = parser_wf_tools.parse_args()
return jsonable_encoder(
WorkflowToolManageService.list_single_workflow_tools(
@@ -631,10 +650,9 @@ class ToolBuiltinListApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
return jsonable_encoder(
[
@@ -653,8 +671,7 @@ class ToolApiListApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
- tenant_id = user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(
[
@@ -672,10 +689,9 @@ class ToolWorkflowListApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
+ user, tenant_id = current_account_with_tenant()
user_id = user.id
- tenant_id = user.current_tenant_id
return jsonable_encoder(
[
@@ -702,26 +718,22 @@ class ToolLabelsApi(Resource):
class ToolPluginOAuthApi(Resource):
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
def get(self, provider):
tool_provider = ToolProviderID(provider)
plugin_id = tool_provider.plugin_id
provider_name = tool_provider.provider_name
- # todo check permission
- user = current_user
+ user, tenant_id = current_account_with_tenant()
- if not user.is_admin_or_owner:
- raise Forbidden()
-
- tenant_id = user.current_tenant_id
oauth_client_params = BuiltinToolManageService.get_oauth_client(tenant_id=tenant_id, provider=provider)
if oauth_client_params is None:
raise Forbidden("no oauth available client config found for this tool provider")
oauth_handler = OAuthHandler()
context_id = OAuthProxyService.create_proxy_context(
- user_id=current_user.id, tenant_id=tenant_id, plugin_id=plugin_id, provider=provider_name
+ user_id=user.id, tenant_id=tenant_id, plugin_id=plugin_id, provider=provider_name
)
redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider}/tool/callback"
authorization_url_response = oauth_handler.get_authorization_url(
@@ -794,38 +806,46 @@ class ToolOAuthCallback(Resource):
return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback")
+parser_default_cred = reqparse.RequestParser().add_argument(
+ "id", type=str, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/builtin//default-credential")
class ToolBuiltinProviderSetDefaultApi(Resource):
+ @console_ns.expect(parser_default_cred)
@setup_required
@login_required
@account_initialization_required
def post(self, provider):
- parser = reqparse.RequestParser()
- parser.add_argument("id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
+ current_user, current_tenant_id = current_account_with_tenant()
+ args = parser_default_cred.parse_args()
return BuiltinToolManageService.set_default_provider(
- tenant_id=current_user.current_tenant_id, user_id=current_user.id, provider=provider, id=args["id"]
+ tenant_id=current_tenant_id, user_id=current_user.id, provider=provider, id=args["id"]
)
+parser_custom = (
+ reqparse.RequestParser()
+ .add_argument("client_params", type=dict, required=False, nullable=True, location="json")
+ .add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json")
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/builtin//oauth/custom-client")
class ToolOAuthCustomClient(Resource):
+ @console_ns.expect(parser_custom)
@setup_required
@login_required
+ @is_admin_or_owner_required
@account_initialization_required
- def post(self, provider):
- parser = reqparse.RequestParser()
- parser.add_argument("client_params", type=dict, required=False, nullable=True, location="json")
- parser.add_argument("enable_oauth_custom_client", type=bool, required=False, nullable=True, location="json")
- args = parser.parse_args()
+ def post(self, provider: str):
+ args = parser_custom.parse_args()
- user = current_user
-
- if not user.is_admin_or_owner:
- raise Forbidden()
+ _, tenant_id = current_account_with_tenant()
return BuiltinToolManageService.save_custom_oauth_client_params(
- tenant_id=user.current_tenant_id,
+ tenant_id=tenant_id,
provider=provider,
client_params=args.get("client_params", {}),
enable_oauth_custom_client=args.get("enable_oauth_custom_client", True),
@@ -835,20 +855,18 @@ class ToolOAuthCustomClient(Resource):
@login_required
@account_initialization_required
def get(self, provider):
+ _, current_tenant_id = current_account_with_tenant()
return jsonable_encoder(
- BuiltinToolManageService.get_custom_oauth_client_params(
- tenant_id=current_user.current_tenant_id, provider=provider
- )
+ BuiltinToolManageService.get_custom_oauth_client_params(tenant_id=current_tenant_id, provider=provider)
)
@setup_required
@login_required
@account_initialization_required
def delete(self, provider):
+ _, current_tenant_id = current_account_with_tenant()
return jsonable_encoder(
- BuiltinToolManageService.delete_custom_oauth_client_params(
- tenant_id=current_user.current_tenant_id, provider=provider
- )
+ BuiltinToolManageService.delete_custom_oauth_client_params(tenant_id=current_tenant_id, provider=provider)
)
@@ -858,9 +876,10 @@ class ToolBuiltinProviderGetOauthClientSchemaApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
+ _, current_tenant_id = current_account_with_tenant()
return jsonable_encoder(
BuiltinToolManageService.get_builtin_tool_provider_oauth_client_schema(
- tenant_id=current_user.current_tenant_id, provider_name=provider
+ tenant_id=current_tenant_id, provider_name=provider
)
)
@@ -871,7 +890,7 @@ class ToolBuiltinProviderGetCredentialInfoApi(Resource):
@login_required
@account_initialization_required
def get(self, provider):
- tenant_id = current_user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
return jsonable_encoder(
BuiltinToolManageService.get_builtin_tool_provider_credential_info(
@@ -881,134 +900,191 @@ class ToolBuiltinProviderGetCredentialInfoApi(Resource):
)
+parser_mcp = (
+ reqparse.RequestParser()
+ .add_argument("server_url", type=str, required=True, nullable=False, location="json")
+ .add_argument("name", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon_type", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon_background", type=str, required=False, nullable=True, location="json", default="")
+ .add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
+ .add_argument("configuration", type=dict, required=False, nullable=True, location="json", default={})
+ .add_argument("headers", type=dict, required=False, nullable=True, location="json", default={})
+ .add_argument("authentication", type=dict, required=False, nullable=True, location="json", default={})
+)
+parser_mcp_put = (
+ reqparse.RequestParser()
+ .add_argument("server_url", type=str, required=True, nullable=False, location="json")
+ .add_argument("name", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon_type", type=str, required=True, nullable=False, location="json")
+ .add_argument("icon_background", type=str, required=False, nullable=True, location="json")
+ .add_argument("provider_id", type=str, required=True, nullable=False, location="json")
+ .add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
+ .add_argument("configuration", type=dict, required=False, nullable=True, location="json", default={})
+ .add_argument("headers", type=dict, required=False, nullable=True, location="json", default={})
+ .add_argument("authentication", type=dict, required=False, nullable=True, location="json", default={})
+)
+parser_mcp_delete = reqparse.RequestParser().add_argument(
+ "provider_id", type=str, required=True, nullable=False, location="json"
+)
+
+
@console_ns.route("/workspaces/current/tool-provider/mcp")
class ToolProviderMCPApi(Resource):
+ @console_ns.expect(parser_mcp)
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("server_url", type=str, required=True, nullable=False, location="json")
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon_type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon_background", type=str, required=False, nullable=True, location="json", default="")
- parser.add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
- parser.add_argument("timeout", type=float, required=False, nullable=False, location="json", default=30)
- parser.add_argument(
- "sse_read_timeout", type=float, required=False, nullable=False, location="json", default=300
- )
- parser.add_argument("headers", type=dict, required=False, nullable=True, location="json", default={})
- args = parser.parse_args()
- user = current_user
- if not is_valid_url(args["server_url"]):
- raise ValueError("Server URL is not valid.")
- return jsonable_encoder(
- MCPToolManageService.create_mcp_provider(
- tenant_id=user.current_tenant_id,
+ args = parser_mcp.parse_args()
+ user, tenant_id = current_account_with_tenant()
+
+ # Parse and validate models
+ configuration = MCPConfiguration.model_validate(args["configuration"])
+ authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None
+
+ # Create provider
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ result = service.create_provider(
+ tenant_id=tenant_id,
+ user_id=user.id,
server_url=args["server_url"],
name=args["name"],
icon=args["icon"],
icon_type=args["icon_type"],
icon_background=args["icon_background"],
- user_id=user.id,
server_identifier=args["server_identifier"],
- timeout=args["timeout"],
- sse_read_timeout=args["sse_read_timeout"],
headers=args["headers"],
+ configuration=configuration,
+ authentication=authentication,
)
- )
+ return jsonable_encoder(result)
+ @console_ns.expect(parser_mcp_put)
@setup_required
@login_required
@account_initialization_required
def put(self):
- parser = reqparse.RequestParser()
- parser.add_argument("server_url", type=str, required=True, nullable=False, location="json")
- parser.add_argument("name", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon_type", type=str, required=True, nullable=False, location="json")
- parser.add_argument("icon_background", type=str, required=False, nullable=True, location="json")
- parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument("server_identifier", type=str, required=True, nullable=False, location="json")
- parser.add_argument("timeout", type=float, required=False, nullable=True, location="json")
- parser.add_argument("sse_read_timeout", type=float, required=False, nullable=True, location="json")
- parser.add_argument("headers", type=dict, required=False, nullable=True, location="json")
- args = parser.parse_args()
- if not is_valid_url(args["server_url"]):
- if "[__HIDDEN__]" in args["server_url"]:
- pass
- else:
- raise ValueError("Server URL is not valid.")
- MCPToolManageService.update_mcp_provider(
- tenant_id=current_user.current_tenant_id,
- provider_id=args["provider_id"],
- server_url=args["server_url"],
- name=args["name"],
- icon=args["icon"],
- icon_type=args["icon_type"],
- icon_background=args["icon_background"],
- server_identifier=args["server_identifier"],
- timeout=args.get("timeout"),
- sse_read_timeout=args.get("sse_read_timeout"),
- headers=args.get("headers"),
- )
- return {"result": "success"}
+ args = parser_mcp_put.parse_args()
+ configuration = MCPConfiguration.model_validate(args["configuration"])
+ authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None
+ _, current_tenant_id = current_account_with_tenant()
+ # Step 1: Validate server URL change if needed (includes URL format validation and network operation)
+ validation_result = None
+ with Session(db.engine) as session:
+ service = MCPToolManageService(session=session)
+ validation_result = service.validate_server_url_change(
+ tenant_id=current_tenant_id, provider_id=args["provider_id"], new_server_url=args["server_url"]
+ )
+
+ # No need to check for errors here, exceptions will be raised directly
+
+ # Step 2: Perform database update in a transaction
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ service.update_provider(
+ tenant_id=current_tenant_id,
+ provider_id=args["provider_id"],
+ server_url=args["server_url"],
+ name=args["name"],
+ icon=args["icon"],
+ icon_type=args["icon_type"],
+ icon_background=args["icon_background"],
+ server_identifier=args["server_identifier"],
+ headers=args["headers"],
+ configuration=configuration,
+ authentication=authentication,
+ validation_result=validation_result,
+ )
+ return {"result": "success"}
+
+ @console_ns.expect(parser_mcp_delete)
@setup_required
@login_required
@account_initialization_required
def delete(self):
- parser = reqparse.RequestParser()
- parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
- args = parser.parse_args()
- MCPToolManageService.delete_mcp_tool(tenant_id=current_user.current_tenant_id, provider_id=args["provider_id"])
- return {"result": "success"}
+ args = parser_mcp_delete.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ service.delete_provider(tenant_id=current_tenant_id, provider_id=args["provider_id"])
+ return {"result": "success"}
+
+
+parser_auth = (
+ reqparse.RequestParser()
+ .add_argument("provider_id", type=str, required=True, nullable=False, location="json")
+ .add_argument("authorization_code", type=str, required=False, nullable=True, location="json")
+)
@console_ns.route("/workspaces/current/tool-provider/mcp/auth")
class ToolMCPAuthApi(Resource):
+ @console_ns.expect(parser_auth)
@setup_required
@login_required
@account_initialization_required
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("provider_id", type=str, required=True, nullable=False, location="json")
- parser.add_argument("authorization_code", type=str, required=False, nullable=True, location="json")
- args = parser.parse_args()
+ args = parser_auth.parse_args()
provider_id = args["provider_id"]
- tenant_id = current_user.current_tenant_id
- provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, tenant_id)
- if not provider:
- raise ValueError("provider not found")
- try:
- with MCPClient(
- provider.decrypted_server_url,
- provider_id,
- tenant_id,
- authed=False,
- authorization_code=args["authorization_code"],
- for_list=True,
- headers=provider.decrypted_headers,
- timeout=provider.timeout,
- sse_read_timeout=provider.sse_read_timeout,
- ):
- MCPToolManageService.update_mcp_provider_credentials(
- mcp_provider=provider,
- credentials=provider.decrypted_credentials,
- authed=True,
- )
- return {"result": "success"}
+ _, tenant_id = current_account_with_tenant()
- except MCPAuthError:
- auth_provider = OAuthClientProvider(provider_id, tenant_id, for_list=True)
- return auth(auth_provider, provider.decrypted_server_url, args["authorization_code"])
- except MCPError as e:
- MCPToolManageService.update_mcp_provider_credentials(
- mcp_provider=provider,
- credentials={},
- authed=False,
- )
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ db_provider = service.get_provider(provider_id=provider_id, tenant_id=tenant_id)
+ if not db_provider:
+ raise ValueError("provider not found")
+
+ # Convert to entity
+ provider_entity = db_provider.to_entity()
+ server_url = provider_entity.decrypt_server_url()
+ headers = provider_entity.decrypt_authentication()
+
+ # Try to connect without active transaction
+ try:
+ # Use MCPClientWithAuthRetry to handle authentication automatically
+ with MCPClient(
+ server_url=server_url,
+ headers=headers,
+ timeout=provider_entity.timeout,
+ sse_read_timeout=provider_entity.sse_read_timeout,
+ ):
+ # Update credentials in new transaction
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ service.update_provider_credentials(
+ provider_id=provider_id,
+ tenant_id=tenant_id,
+ credentials=provider_entity.credentials,
+ authed=True,
+ )
+ return {"result": "success"}
+ except MCPAuthError as e:
+ try:
+ # Pass the extracted OAuth metadata hints to auth()
+ auth_result = auth(
+ provider_entity,
+ args.get("authorization_code"),
+ resource_metadata_url=e.resource_metadata_url,
+ scope_hint=e.scope_hint,
+ )
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ response = service.execute_auth_actions(auth_result)
+ return response
+ except MCPRefreshTokenError as e:
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
+ raise ValueError(f"Failed to refresh token, please try to authorize again: {e}") from e
+ except (MCPError, ValueError) as e:
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
raise ValueError(f"Failed to connect to MCP server: {e}") from e
@@ -1018,9 +1094,11 @@ class ToolMCPDetailApi(Resource):
@login_required
@account_initialization_required
def get(self, provider_id):
- user = current_user
- provider = MCPToolManageService.get_mcp_provider_by_provider_id(provider_id, user.current_tenant_id)
- return jsonable_encoder(ToolTransformService.mcp_provider_to_user_provider(provider, for_list=True))
+ _, tenant_id = current_account_with_tenant()
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ provider = service.get_provider(provider_id=provider_id, tenant_id=tenant_id)
+ return jsonable_encoder(ToolTransformService.mcp_provider_to_user_provider(provider, for_list=True))
@console_ns.route("/workspaces/current/tools/mcp")
@@ -1029,12 +1107,14 @@ class ToolMCPListAllApi(Resource):
@login_required
@account_initialization_required
def get(self):
- user = current_user
- tenant_id = user.current_tenant_id
+ _, tenant_id = current_account_with_tenant()
- tools = MCPToolManageService.retrieve_mcp_tools(tenant_id=tenant_id)
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ # Skip sensitive data decryption for list view to improve performance
+ tools = service.list_providers(tenant_id=tenant_id, include_sensitive=False)
- return [tool.to_dict() for tool in tools]
+ return [tool.to_dict() for tool in tools]
@console_ns.route("/workspaces/current/tool-provider/mcp/update/")
@@ -1043,22 +1123,39 @@ class ToolMCPUpdateApi(Resource):
@login_required
@account_initialization_required
def get(self, provider_id):
- tenant_id = current_user.current_tenant_id
- tools = MCPToolManageService.list_mcp_tool_from_remote_server(
- tenant_id=tenant_id,
- provider_id=provider_id,
- )
- return jsonable_encoder(tools)
+ _, tenant_id = current_account_with_tenant()
+ with Session(db.engine) as session, session.begin():
+ service = MCPToolManageService(session=session)
+ tools = service.list_provider_tools(
+ tenant_id=tenant_id,
+ provider_id=provider_id,
+ )
+ return jsonable_encoder(tools)
+
+
+parser_cb = (
+ reqparse.RequestParser()
+ .add_argument("code", type=str, required=True, nullable=False, location="args")
+ .add_argument("state", type=str, required=True, nullable=False, location="args")
+)
@console_ns.route("/mcp/oauth/callback")
class ToolMCPCallbackApi(Resource):
+ @console_ns.expect(parser_cb)
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("code", type=str, required=True, nullable=False, location="args")
- parser.add_argument("state", type=str, required=True, nullable=False, location="args")
- args = parser.parse_args()
+ args = parser_cb.parse_args()
state_key = args["state"]
authorization_code = args["code"]
- handle_callback(state_key, authorization_code)
+
+ # Create service instance for handle_callback
+ with Session(db.engine) as session, session.begin():
+ mcp_service = MCPToolManageService(session=session)
+ # handle_callback now returns state data and tokens
+ state_data, tokens = handle_callback(state_key, authorization_code)
+ # Save tokens using the service layer
+ mcp_service.save_oauth_data(
+ state_data.provider_id, state_data.tenant_id, tokens.model_dump(), OAuthDataType.TOKENS
+ )
+
return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback")
diff --git a/api/controllers/console/workspace/trigger_providers.py b/api/controllers/console/workspace/trigger_providers.py
new file mode 100644
index 0000000000..268473d6d1
--- /dev/null
+++ b/api/controllers/console/workspace/trigger_providers.py
@@ -0,0 +1,578 @@
+import logging
+
+from flask import make_response, redirect, request
+from flask_restx import Resource, reqparse
+from sqlalchemy.orm import Session
+from werkzeug.exceptions import BadRequest, Forbidden
+
+from configs import dify_config
+from controllers.web.error import NotFoundError
+from core.model_runtime.utils.encoders import jsonable_encoder
+from core.plugin.entities.plugin_daemon import CredentialType
+from core.plugin.impl.oauth import OAuthHandler
+from core.trigger.entities.entities import SubscriptionBuilderUpdater
+from core.trigger.trigger_manager import TriggerManager
+from extensions.ext_database import db
+from libs.login import current_user, login_required
+from models.account import Account
+from models.provider_ids import TriggerProviderID
+from services.plugin.oauth_service import OAuthProxyService
+from services.trigger.trigger_provider_service import TriggerProviderService
+from services.trigger.trigger_subscription_builder_service import TriggerSubscriptionBuilderService
+from services.trigger.trigger_subscription_operator_service import TriggerSubscriptionOperatorService
+
+from .. import console_ns
+from ..wraps import (
+ account_initialization_required,
+ edit_permission_required,
+ is_admin_or_owner_required,
+ setup_required,
+)
+
+logger = logging.getLogger(__name__)
+
+
+@console_ns.route("/workspaces/current/trigger-provider//icon")
+class TriggerProviderIconApi(Resource):
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self, provider):
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+
+ return TriggerManager.get_trigger_plugin_icon(tenant_id=user.current_tenant_id, provider_id=provider)
+
+
+@console_ns.route("/workspaces/current/triggers")
+class TriggerProviderListApi(Resource):
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self):
+ """List all trigger providers for the current tenant"""
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+ return jsonable_encoder(TriggerProviderService.list_trigger_providers(user.current_tenant_id))
+
+
+@console_ns.route("/workspaces/current/trigger-provider//info")
+class TriggerProviderInfoApi(Resource):
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self, provider):
+ """Get info for a trigger provider"""
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+ return jsonable_encoder(
+ TriggerProviderService.get_trigger_provider(user.current_tenant_id, TriggerProviderID(provider))
+ )
+
+
+@console_ns.route("/workspaces/current/trigger-provider//subscriptions/list")
+class TriggerSubscriptionListApi(Resource):
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def get(self, provider):
+ """List all trigger subscriptions for the current tenant's provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ try:
+ return jsonable_encoder(
+ TriggerProviderService.list_trigger_provider_subscriptions(
+ tenant_id=user.current_tenant_id, provider_id=TriggerProviderID(provider)
+ )
+ )
+ except ValueError as e:
+ return jsonable_encoder({"error": str(e)}), 404
+ except Exception as e:
+ logger.exception("Error listing trigger providers", exc_info=e)
+ raise
+
+
+parser = reqparse.RequestParser().add_argument(
+ "credential_type", type=str, required=False, nullable=True, location="json"
+)
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/create",
+)
+class TriggerSubscriptionBuilderCreateApi(Resource):
+ @console_ns.expect(parser)
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def post(self, provider):
+ """Add a new subscription instance for a trigger provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ args = parser.parse_args()
+
+ try:
+ credential_type = CredentialType.of(args.get("credential_type") or CredentialType.UNAUTHORIZED.value)
+ subscription_builder = TriggerSubscriptionBuilderService.create_trigger_subscription_builder(
+ tenant_id=user.current_tenant_id,
+ user_id=user.id,
+ provider_id=TriggerProviderID(provider),
+ credential_type=credential_type,
+ )
+ return jsonable_encoder({"subscription_builder": subscription_builder})
+ except Exception as e:
+ logger.exception("Error adding provider credential", exc_info=e)
+ raise
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/",
+)
+class TriggerSubscriptionBuilderGetApi(Resource):
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def get(self, provider, subscription_builder_id):
+ """Get a subscription instance for a trigger provider"""
+ return jsonable_encoder(
+ TriggerSubscriptionBuilderService.get_subscription_builder_by_id(subscription_builder_id)
+ )
+
+
+parser_api = (
+ reqparse.RequestParser()
+ # The credentials of the subscription builder
+ .add_argument("credentials", type=dict, required=False, nullable=True, location="json")
+)
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/verify/",
+)
+class TriggerSubscriptionBuilderVerifyApi(Resource):
+ @console_ns.expect(parser_api)
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def post(self, provider, subscription_builder_id):
+ """Verify a subscription instance for a trigger provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ args = parser_api.parse_args()
+
+ try:
+ # Use atomic update_and_verify to prevent race conditions
+ return TriggerSubscriptionBuilderService.update_and_verify_builder(
+ tenant_id=user.current_tenant_id,
+ user_id=user.id,
+ provider_id=TriggerProviderID(provider),
+ subscription_builder_id=subscription_builder_id,
+ subscription_builder_updater=SubscriptionBuilderUpdater(
+ credentials=args.get("credentials", None),
+ ),
+ )
+ except Exception as e:
+ logger.exception("Error verifying provider credential", exc_info=e)
+ raise ValueError(str(e)) from e
+
+
+parser_update_api = (
+ reqparse.RequestParser()
+ # The name of the subscription builder
+ .add_argument("name", type=str, required=False, nullable=True, location="json")
+ # The parameters of the subscription builder
+ .add_argument("parameters", type=dict, required=False, nullable=True, location="json")
+ # The properties of the subscription builder
+ .add_argument("properties", type=dict, required=False, nullable=True, location="json")
+ # The credentials of the subscription builder
+ .add_argument("credentials", type=dict, required=False, nullable=True, location="json")
+)
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/update/",
+)
+class TriggerSubscriptionBuilderUpdateApi(Resource):
+ @console_ns.expect(parser_update_api)
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def post(self, provider, subscription_builder_id):
+ """Update a subscription instance for a trigger provider"""
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+
+ args = parser_update_api.parse_args()
+ try:
+ return jsonable_encoder(
+ TriggerSubscriptionBuilderService.update_trigger_subscription_builder(
+ tenant_id=user.current_tenant_id,
+ provider_id=TriggerProviderID(provider),
+ subscription_builder_id=subscription_builder_id,
+ subscription_builder_updater=SubscriptionBuilderUpdater(
+ name=args.get("name", None),
+ parameters=args.get("parameters", None),
+ properties=args.get("properties", None),
+ credentials=args.get("credentials", None),
+ ),
+ )
+ )
+ except Exception as e:
+ logger.exception("Error updating provider credential", exc_info=e)
+ raise
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/logs/",
+)
+class TriggerSubscriptionBuilderLogsApi(Resource):
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def get(self, provider, subscription_builder_id):
+ """Get the request logs for a subscription instance for a trigger provider"""
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+
+ try:
+ logs = TriggerSubscriptionBuilderService.list_logs(subscription_builder_id)
+ return jsonable_encoder({"logs": [log.model_dump(mode="json") for log in logs]})
+ except Exception as e:
+ logger.exception("Error getting request logs for subscription builder", exc_info=e)
+ raise
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/builder/build/",
+)
+class TriggerSubscriptionBuilderBuildApi(Resource):
+ @console_ns.expect(parser_update_api)
+ @setup_required
+ @login_required
+ @edit_permission_required
+ @account_initialization_required
+ def post(self, provider, subscription_builder_id):
+ """Build a subscription instance for a trigger provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+ args = parser_update_api.parse_args()
+ try:
+ # Use atomic update_and_build to prevent race conditions
+ TriggerSubscriptionBuilderService.update_and_build_builder(
+ tenant_id=user.current_tenant_id,
+ user_id=user.id,
+ provider_id=TriggerProviderID(provider),
+ subscription_builder_id=subscription_builder_id,
+ subscription_builder_updater=SubscriptionBuilderUpdater(
+ name=args.get("name", None),
+ parameters=args.get("parameters", None),
+ properties=args.get("properties", None),
+ ),
+ )
+ return 200
+ except Exception as e:
+ logger.exception("Error building provider credential", exc_info=e)
+ raise ValueError(str(e)) from e
+
+
+@console_ns.route(
+ "/workspaces/current/trigger-provider//subscriptions/delete",
+)
+class TriggerSubscriptionDeleteApi(Resource):
+ @setup_required
+ @login_required
+ @is_admin_or_owner_required
+ @account_initialization_required
+ def post(self, subscription_id: str):
+ """Delete a subscription instance"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ try:
+ with Session(db.engine) as session:
+ # Delete trigger provider subscription
+ TriggerProviderService.delete_trigger_provider(
+ session=session,
+ tenant_id=user.current_tenant_id,
+ subscription_id=subscription_id,
+ )
+ # Delete plugin triggers
+ TriggerSubscriptionOperatorService.delete_plugin_trigger_by_subscription(
+ session=session,
+ tenant_id=user.current_tenant_id,
+ subscription_id=subscription_id,
+ )
+ session.commit()
+ return {"result": "success"}
+ except ValueError as e:
+ raise BadRequest(str(e))
+ except Exception as e:
+ logger.exception("Error deleting provider credential", exc_info=e)
+ raise
+
+
+@console_ns.route("/workspaces/current/trigger-provider//subscriptions/oauth/authorize")
+class TriggerOAuthAuthorizeApi(Resource):
+ @setup_required
+ @login_required
+ @account_initialization_required
+ def get(self, provider):
+ """Initiate OAuth authorization flow for a trigger provider"""
+ user = current_user
+ assert isinstance(user, Account)
+ assert user.current_tenant_id is not None
+
+ try:
+ provider_id = TriggerProviderID(provider)
+ plugin_id = provider_id.plugin_id
+ provider_name = provider_id.provider_name
+ tenant_id = user.current_tenant_id
+
+ # Get OAuth client configuration
+ oauth_client_params = TriggerProviderService.get_oauth_client(
+ tenant_id=tenant_id,
+ provider_id=provider_id,
+ )
+
+ if oauth_client_params is None:
+ raise NotFoundError("No OAuth client configuration found for this trigger provider")
+
+ # Create subscription builder
+ subscription_builder = TriggerSubscriptionBuilderService.create_trigger_subscription_builder(
+ tenant_id=tenant_id,
+ user_id=user.id,
+ provider_id=provider_id,
+ credential_type=CredentialType.OAUTH2,
+ )
+
+ # Create OAuth handler and proxy context
+ oauth_handler = OAuthHandler()
+ context_id = OAuthProxyService.create_proxy_context(
+ user_id=user.id,
+ tenant_id=tenant_id,
+ plugin_id=plugin_id,
+ provider=provider_name,
+ extra_data={
+ "subscription_builder_id": subscription_builder.id,
+ },
+ )
+
+ # Build redirect URI for callback
+ redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider}/trigger/callback"
+
+ # Get authorization URL
+ authorization_url_response = oauth_handler.get_authorization_url(
+ tenant_id=tenant_id,
+ user_id=user.id,
+ plugin_id=plugin_id,
+ provider=provider_name,
+ redirect_uri=redirect_uri,
+ system_credentials=oauth_client_params,
+ )
+
+ # Create response with cookie
+ response = make_response(
+ jsonable_encoder(
+ {
+ "authorization_url": authorization_url_response.authorization_url,
+ "subscription_builder_id": subscription_builder.id,
+ "subscription_builder": subscription_builder,
+ }
+ )
+ )
+ response.set_cookie(
+ "context_id",
+ context_id,
+ httponly=True,
+ samesite="Lax",
+ max_age=OAuthProxyService.__MAX_AGE__,
+ )
+
+ return response
+
+ except Exception as e:
+ logger.exception("Error initiating OAuth flow", exc_info=e)
+ raise
+
+
+@console_ns.route("/oauth/plugin//trigger/callback")
+class TriggerOAuthCallbackApi(Resource):
+ @setup_required
+ def get(self, provider):
+ """Handle OAuth callback for trigger provider"""
+ context_id = request.cookies.get("context_id")
+ if not context_id:
+ raise Forbidden("context_id not found")
+
+ # Use and validate proxy context
+ context = OAuthProxyService.use_proxy_context(context_id)
+ if context is None:
+ raise Forbidden("Invalid context_id")
+
+ # Parse provider ID
+ provider_id = TriggerProviderID(provider)
+ plugin_id = provider_id.plugin_id
+ provider_name = provider_id.provider_name
+ user_id = context.get("user_id")
+ tenant_id = context.get("tenant_id")
+ subscription_builder_id = context.get("subscription_builder_id")
+
+ # Get OAuth client configuration
+ oauth_client_params = TriggerProviderService.get_oauth_client(
+ tenant_id=tenant_id,
+ provider_id=provider_id,
+ )
+
+ if oauth_client_params is None:
+ raise Forbidden("No OAuth client configuration found for this trigger provider")
+
+ # Get OAuth credentials from callback
+ oauth_handler = OAuthHandler()
+ redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider}/trigger/callback"
+
+ credentials_response = oauth_handler.get_credentials(
+ tenant_id=tenant_id,
+ user_id=user_id,
+ plugin_id=plugin_id,
+ provider=provider_name,
+ redirect_uri=redirect_uri,
+ system_credentials=oauth_client_params,
+ request=request,
+ )
+
+ credentials = credentials_response.credentials
+ expires_at = credentials_response.expires_at
+
+ if not credentials:
+ raise ValueError("Failed to get OAuth credentials from the provider.")
+
+ # Update subscription builder
+ TriggerSubscriptionBuilderService.update_trigger_subscription_builder(
+ tenant_id=tenant_id,
+ provider_id=provider_id,
+ subscription_builder_id=subscription_builder_id,
+ subscription_builder_updater=SubscriptionBuilderUpdater(
+ credentials=credentials,
+ credential_expires_at=expires_at,
+ ),
+ )
+ # Redirect to OAuth callback page
+ return redirect(f"{dify_config.CONSOLE_WEB_URL}/oauth-callback")
+
+
+parser_oauth_client = (
+ reqparse.RequestParser()
+ .add_argument("client_params", type=dict, required=False, nullable=True, location="json")
+ .add_argument("enabled", type=bool, required=False, nullable=True, location="json")
+)
+
+
+@console_ns.route("/workspaces/current/trigger-provider//oauth/client")
+class TriggerOAuthClientManageApi(Resource):
+ @setup_required
+ @login_required
+ @is_admin_or_owner_required
+ @account_initialization_required
+ def get(self, provider):
+ """Get OAuth client configuration for a provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ try:
+ provider_id = TriggerProviderID(provider)
+
+ # Get custom OAuth client params if exists
+ custom_params = TriggerProviderService.get_custom_oauth_client_params(
+ tenant_id=user.current_tenant_id,
+ provider_id=provider_id,
+ )
+
+ # Check if custom client is enabled
+ is_custom_enabled = TriggerProviderService.is_oauth_custom_client_enabled(
+ tenant_id=user.current_tenant_id,
+ provider_id=provider_id,
+ )
+ system_client_exists = TriggerProviderService.is_oauth_system_client_exists(
+ tenant_id=user.current_tenant_id,
+ provider_id=provider_id,
+ )
+ provider_controller = TriggerManager.get_trigger_provider(user.current_tenant_id, provider_id)
+ redirect_uri = f"{dify_config.CONSOLE_API_URL}/console/api/oauth/plugin/{provider}/trigger/callback"
+ return jsonable_encoder(
+ {
+ "configured": bool(custom_params or system_client_exists),
+ "system_configured": system_client_exists,
+ "custom_configured": bool(custom_params),
+ "oauth_client_schema": provider_controller.get_oauth_client_schema(),
+ "custom_enabled": is_custom_enabled,
+ "redirect_uri": redirect_uri,
+ "params": custom_params or {},
+ }
+ )
+
+ except Exception as e:
+ logger.exception("Error getting OAuth client", exc_info=e)
+ raise
+
+ @console_ns.expect(parser_oauth_client)
+ @setup_required
+ @login_required
+ @is_admin_or_owner_required
+ @account_initialization_required
+ def post(self, provider):
+ """Configure custom OAuth client for a provider"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ args = parser_oauth_client.parse_args()
+
+ try:
+ provider_id = TriggerProviderID(provider)
+ return TriggerProviderService.save_custom_oauth_client_params(
+ tenant_id=user.current_tenant_id,
+ provider_id=provider_id,
+ client_params=args.get("client_params"),
+ enabled=args.get("enabled"),
+ )
+
+ except ValueError as e:
+ raise BadRequest(str(e))
+ except Exception as e:
+ logger.exception("Error configuring OAuth client", exc_info=e)
+ raise
+
+ @setup_required
+ @login_required
+ @is_admin_or_owner_required
+ @account_initialization_required
+ def delete(self, provider):
+ """Remove custom OAuth client configuration"""
+ user = current_user
+ assert user.current_tenant_id is not None
+
+ try:
+ provider_id = TriggerProviderID(provider)
+
+ return TriggerProviderService.delete_custom_oauth_client_params(
+ tenant_id=user.current_tenant_id,
+ provider_id=provider_id,
+ )
+ except ValueError as e:
+ raise BadRequest(str(e))
+ except Exception as e:
+ logger.exception("Error removing OAuth client", exc_info=e)
+ raise
diff --git a/api/controllers/console/workspace/workspace.py b/api/controllers/console/workspace/workspace.py
index bc748ac3d2..909a5ce201 100644
--- a/api/controllers/console/workspace/workspace.py
+++ b/api/controllers/console/workspace/workspace.py
@@ -1,8 +1,8 @@
import logging
from flask import request
-from flask_login import current_user
-from flask_restx import Resource, fields, inputs, marshal, marshal_with, reqparse
+from flask_restx import Resource, fields, marshal, marshal_with
+from pydantic import BaseModel, Field
from sqlalchemy import select
from werkzeug.exceptions import Unauthorized
@@ -22,18 +22,47 @@ from controllers.console.wraps import (
cloud_edition_billing_resource_check,
setup_required,
)
+from enums.cloud_plan import CloudPlan
from extensions.ext_database import db
from libs.helper import TimestampField
-from libs.login import login_required
-from models.account import Account, Tenant, TenantStatus
+from libs.login import current_account_with_tenant, login_required
+from models.account import Tenant, TenantStatus
from services.account_service import TenantService
from services.feature_service import FeatureService
from services.file_service import FileService
from services.workspace_service import WorkspaceService
logger = logging.getLogger(__name__)
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+class WorkspaceListQuery(BaseModel):
+ page: int = Field(default=1, ge=1, le=99999)
+ limit: int = Field(default=20, ge=1, le=100)
+
+
+class SwitchWorkspacePayload(BaseModel):
+ tenant_id: str
+
+
+class WorkspaceCustomConfigPayload(BaseModel):
+ remove_webapp_brand: bool | None = None
+ replace_webapp_logo: str | None = None
+
+
+class WorkspaceInfoPayload(BaseModel):
+ name: str
+
+
+def reg(cls: type[BaseModel]):
+ console_ns.schema_model(cls.__name__, cls.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0))
+
+
+reg(WorkspaceListQuery)
+reg(SwitchWorkspacePayload)
+reg(WorkspaceCustomConfigPayload)
+reg(WorkspaceInfoPayload)
+
provider_fields = {
"provider_name": fields.String,
"provider_type": fields.String,
@@ -71,8 +100,7 @@ class TenantListApi(Resource):
@login_required
@account_initialization_required
def get(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, current_tenant_id = current_account_with_tenant()
tenants = TenantService.get_join_tenants(current_user)
tenant_dicts = []
@@ -85,8 +113,8 @@ class TenantListApi(Resource):
"name": tenant.name,
"status": tenant.status,
"created_at": tenant.created_at,
- "plan": features.billing.subscription.plan if features.billing.enabled else "sandbox",
- "current": tenant.id == current_user.current_tenant_id if current_user.current_tenant_id else False,
+ "plan": features.billing.subscription.plan if features.billing.enabled else CloudPlan.SANDBOX,
+ "current": tenant.id == current_tenant_id if current_tenant_id else False,
}
tenant_dicts.append(tenant_dict)
@@ -96,16 +124,15 @@ class TenantListApi(Resource):
@console_ns.route("/all-workspaces")
class WorkspaceListApi(Resource):
+ @console_ns.expect(console_ns.models[WorkspaceListQuery.__name__])
@setup_required
@admin_required
def get(self):
- parser = reqparse.RequestParser()
- parser.add_argument("page", type=inputs.int_range(1, 99999), required=False, default=1, location="args")
- parser.add_argument("limit", type=inputs.int_range(1, 100), required=False, default=20, location="args")
- args = parser.parse_args()
+ payload = request.args.to_dict(flat=True) # type: ignore
+ args = WorkspaceListQuery.model_validate(payload)
stmt = select(Tenant).order_by(Tenant.created_at.desc())
- tenants = db.paginate(select=stmt, page=args["page"], per_page=args["limit"], error_out=False)
+ tenants = db.paginate(select=stmt, page=args.page, per_page=args.limit, error_out=False)
has_more = False
if tenants.has_next:
@@ -114,8 +141,8 @@ class WorkspaceListApi(Resource):
return {
"data": marshal(tenants.items, workspace_fields),
"has_more": has_more,
- "limit": args["limit"],
- "page": args["page"],
+ "limit": args.limit,
+ "page": args.page,
"total": tenants.total,
}, 200
@@ -127,12 +154,11 @@ class TenantApi(Resource):
@login_required
@account_initialization_required
@marshal_with(tenant_fields)
- def get(self):
+ def post(self):
if request.path == "/info":
logger.warning("Deprecated URL /info was used.")
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
tenant = current_user.current_tenant
if not tenant:
raise ValueError("No current tenant")
@@ -152,23 +178,22 @@ class TenantApi(Resource):
@console_ns.route("/workspaces/switch")
class SwitchWorkspaceApi(Resource):
+ @console_ns.expect(console_ns.models[SwitchWorkspacePayload.__name__])
@setup_required
@login_required
@account_initialization_required
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("tenant_id", type=str, required=True, location="json")
- args = parser.parse_args()
+ current_user, _ = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = SwitchWorkspacePayload.model_validate(payload)
# check if tenant_id is valid, 403 if not
try:
- TenantService.switch_tenant(current_user, args["tenant_id"])
+ TenantService.switch_tenant(current_user, args.tenant_id)
except Exception:
raise AccountNotLinkTenantError("Account not link tenant")
- new_tenant = db.session.query(Tenant).get(args["tenant_id"]) # Get new tenant
+ new_tenant = db.session.query(Tenant).get(args.tenant_id) # Get new tenant
if new_tenant is None:
raise ValueError("Tenant not found")
@@ -177,26 +202,21 @@ class SwitchWorkspaceApi(Resource):
@console_ns.route("/workspaces/custom-config")
class CustomConfigWorkspaceApi(Resource):
+ @console_ns.expect(console_ns.models[WorkspaceCustomConfigPayload.__name__])
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("workspace_custom")
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("remove_webapp_brand", type=bool, location="json")
- parser.add_argument("replace_webapp_logo", type=str, location="json")
- args = parser.parse_args()
-
- if not current_user.current_tenant_id:
- raise ValueError("No current tenant")
- tenant = db.get_or_404(Tenant, current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = WorkspaceCustomConfigPayload.model_validate(payload)
+ tenant = db.get_or_404(Tenant, current_tenant_id)
custom_config_dict = {
- "remove_webapp_brand": args["remove_webapp_brand"],
- "replace_webapp_logo": args["replace_webapp_logo"]
- if args["replace_webapp_logo"] is not None
+ "remove_webapp_brand": args.remove_webapp_brand,
+ "replace_webapp_logo": args.replace_webapp_logo
+ if args.replace_webapp_logo is not None
else tenant.custom_config_dict.get("replace_webapp_logo"),
}
@@ -213,8 +233,7 @@ class WebappLogoWorkspaceApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("workspace_custom")
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
+ current_user, _ = current_account_with_tenant()
# check file
if "file" not in request.files:
raise NoFileUploadedError()
@@ -249,21 +268,20 @@ class WebappLogoWorkspaceApi(Resource):
@console_ns.route("/workspaces/info")
class WorkspaceInfoApi(Resource):
+ @console_ns.expect(console_ns.models[WorkspaceInfoPayload.__name__])
@setup_required
@login_required
@account_initialization_required
# Change workspace name
def post(self):
- if not isinstance(current_user, Account):
- raise ValueError("Invalid user account")
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- args = parser.parse_args()
+ _, current_tenant_id = current_account_with_tenant()
+ payload = console_ns.payload or {}
+ args = WorkspaceInfoPayload.model_validate(payload)
- if not current_user.current_tenant_id:
+ if not current_tenant_id:
raise ValueError("No current tenant")
- tenant = db.get_or_404(Tenant, current_user.current_tenant_id)
- tenant.name = args["name"]
+ tenant = db.get_or_404(Tenant, current_tenant_id)
+ tenant.name = args.name
db.session.commit()
return {"result": "success", "tenant": marshal(WorkspaceService.get_tenant_info(tenant), tenant_fields)}
diff --git a/api/controllers/console/wraps.py b/api/controllers/console/wraps.py
index 914d386c78..4654650c77 100644
--- a/api/controllers/console/wraps.py
+++ b/api/controllers/console/wraps.py
@@ -7,12 +7,13 @@ from functools import wraps
from typing import ParamSpec, TypeVar
from flask import abort, request
-from flask_login import current_user
from configs import dify_config
from controllers.console.workspace.error import AccountNotInitializedError
+from enums.cloud_plan import CloudPlan
from extensions.ext_database import db
from extensions.ext_redis import redis_client
+from libs.login import current_account_with_tenant
from models.account import AccountStatus
from models.dataset import RateLimitLog
from models.model import DifySetup
@@ -29,9 +30,8 @@ def account_initialization_required(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
# check account initialization
- account = current_user
-
- if account.status == AccountStatus.UNINITIALIZED:
+ current_user, _ = current_account_with_tenant()
+ if current_user.status == AccountStatus.UNINITIALIZED:
raise AccountNotInitializedError()
return view(*args, **kwargs)
@@ -75,7 +75,8 @@ def only_edition_self_hosted(view: Callable[P, R]):
def cloud_edition_billing_enabled(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if not features.billing.enabled:
abort(403, "Billing feature is not enabled.")
return view(*args, **kwargs)
@@ -87,7 +88,8 @@ def cloud_edition_billing_resource_check(resource: str):
def interceptor(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if features.billing.enabled:
members = features.members
apps = features.apps
@@ -128,10 +130,11 @@ def cloud_edition_billing_knowledge_limit_check(resource: str):
def interceptor(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if features.billing.enabled:
if resource == "add_segment":
- if features.billing.subscription.plan == "sandbox":
+ if features.billing.subscription.plan == CloudPlan.SANDBOX:
abort(
403,
"To unlock this feature and elevate your Dify experience, please upgrade to a paid plan.",
@@ -151,10 +154,11 @@ def cloud_edition_billing_rate_limit_check(resource: str):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
if resource == "knowledge":
- knowledge_rate_limit = FeatureService.get_knowledge_rate_limit(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ knowledge_rate_limit = FeatureService.get_knowledge_rate_limit(current_tenant_id)
if knowledge_rate_limit.enabled:
current_time = int(time.time() * 1000)
- key = f"rate_limit_{current_user.current_tenant_id}"
+ key = f"rate_limit_{current_tenant_id}"
redis_client.zadd(key, {current_time: current_time})
@@ -165,7 +169,7 @@ def cloud_edition_billing_rate_limit_check(resource: str):
if request_count > knowledge_rate_limit.limit:
# add ratelimit record
rate_limit_log = RateLimitLog(
- tenant_id=current_user.current_tenant_id,
+ tenant_id=current_tenant_id,
subscription_plan=knowledge_rate_limit.subscription_plan,
operation="knowledge",
)
@@ -185,14 +189,15 @@ def cloud_utm_record(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
with contextlib.suppress(Exception):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if features.billing.enabled:
utm_info = request.cookies.get("utm_info")
if utm_info:
utm_info_dict: dict = json.loads(utm_info)
- OperationService.record_utm(current_user.current_tenant_id, utm_info_dict)
+ OperationService.record_utm(current_tenant_id, utm_info_dict)
return view(*args, **kwargs)
@@ -242,9 +247,9 @@ def email_password_login_enabled(view: Callable[P, R]):
return decorated
-def email_register_enabled(view):
+def email_register_enabled(view: Callable[P, R]):
@wraps(view)
- def decorated(*args, **kwargs):
+ def decorated(*args: P.args, **kwargs: P.kwargs):
features = FeatureService.get_system_features()
if features.is_allow_register:
return view(*args, **kwargs)
@@ -271,7 +276,8 @@ def enable_change_email(view: Callable[P, R]):
def is_allow_transfer_owner(view: Callable[P, R]):
@wraps(view)
def decorated(*args: P.args, **kwargs: P.kwargs):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if features.is_allow_transfer_workspace:
return view(*args, **kwargs)
@@ -281,12 +287,135 @@ def is_allow_transfer_owner(view: Callable[P, R]):
return decorated
-def knowledge_pipeline_publish_enabled(view):
+def knowledge_pipeline_publish_enabled(view: Callable[P, R]):
@wraps(view)
- def decorated(*args, **kwargs):
- features = FeatureService.get_features(current_user.current_tenant_id)
+ def decorated(*args: P.args, **kwargs: P.kwargs):
+ _, current_tenant_id = current_account_with_tenant()
+ features = FeatureService.get_features(current_tenant_id)
if features.knowledge_pipeline.publish_enabled:
return view(*args, **kwargs)
abort(403)
return decorated
+
+
+def edit_permission_required(f: Callable[P, R]):
+ @wraps(f)
+ def decorated_function(*args: P.args, **kwargs: P.kwargs):
+ from werkzeug.exceptions import Forbidden
+
+ from libs.login import current_user
+ from models import Account
+
+ user = current_user._get_current_object() # type: ignore
+ if not isinstance(user, Account):
+ raise Forbidden()
+ if not current_user.has_edit_permission:
+ raise Forbidden()
+ return f(*args, **kwargs)
+
+ return decorated_function
+
+
+def is_admin_or_owner_required(f: Callable[P, R]):
+ @wraps(f)
+ def decorated_function(*args: P.args, **kwargs: P.kwargs):
+ from werkzeug.exceptions import Forbidden
+
+ from libs.login import current_user
+ from models import Account
+
+ user = current_user._get_current_object()
+ if not isinstance(user, Account) or not user.is_admin_or_owner:
+ raise Forbidden()
+ return f(*args, **kwargs)
+
+ return decorated_function
+
+
+def annotation_import_rate_limit(view: Callable[P, R]):
+ """
+ Rate limiting decorator for annotation import operations.
+
+ Implements sliding window rate limiting with two tiers:
+ - Short-term: Configurable requests per minute (default: 5)
+ - Long-term: Configurable requests per hour (default: 20)
+
+ Uses Redis ZSET for distributed rate limiting across multiple instances.
+ """
+
+ @wraps(view)
+ def decorated(*args: P.args, **kwargs: P.kwargs):
+ _, current_tenant_id = current_account_with_tenant()
+ current_time = int(time.time() * 1000)
+
+ # Check per-minute rate limit
+ minute_key = f"annotation_import_rate_limit:{current_tenant_id}:1min"
+ redis_client.zadd(minute_key, {current_time: current_time})
+ redis_client.zremrangebyscore(minute_key, 0, current_time - 60000)
+ minute_count = redis_client.zcard(minute_key)
+ redis_client.expire(minute_key, 120) # 2 minutes TTL
+
+ if minute_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE:
+ abort(
+ 429,
+ f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_MINUTE} "
+ f"requests per minute allowed. Please try again later.",
+ )
+
+ # Check per-hour rate limit
+ hour_key = f"annotation_import_rate_limit:{current_tenant_id}:1hour"
+ redis_client.zadd(hour_key, {current_time: current_time})
+ redis_client.zremrangebyscore(hour_key, 0, current_time - 3600000)
+ hour_count = redis_client.zcard(hour_key)
+ redis_client.expire(hour_key, 7200) # 2 hours TTL
+
+ if hour_count > dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR:
+ abort(
+ 429,
+ f"Too many annotation import requests. Maximum {dify_config.ANNOTATION_IMPORT_RATE_LIMIT_PER_HOUR} "
+ f"requests per hour allowed. Please try again later.",
+ )
+
+ return view(*args, **kwargs)
+
+ return decorated
+
+
+def annotation_import_concurrency_limit(view: Callable[P, R]):
+ """
+ Concurrency control decorator for annotation import operations.
+
+ Limits the number of concurrent import tasks per tenant to prevent
+ resource exhaustion and ensure fair resource allocation.
+
+ Uses Redis ZSET to track active import jobs with automatic cleanup
+ of stale entries (jobs older than 2 minutes).
+ """
+
+ @wraps(view)
+ def decorated(*args: P.args, **kwargs: P.kwargs):
+ _, current_tenant_id = current_account_with_tenant()
+ current_time = int(time.time() * 1000)
+
+ active_jobs_key = f"annotation_import_active:{current_tenant_id}"
+
+ # Clean up stale entries (jobs that should have completed or timed out)
+ stale_threshold = current_time - 120000 # 2 minutes ago
+ redis_client.zremrangebyscore(active_jobs_key, 0, stale_threshold)
+
+ # Check current active job count
+ active_count = redis_client.zcard(active_jobs_key)
+
+ if active_count >= dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT:
+ abort(
+ 429,
+ f"Too many concurrent import tasks. Maximum {dify_config.ANNOTATION_IMPORT_MAX_CONCURRENT} "
+ f"concurrent imports allowed per workspace. Please wait for existing imports to complete.",
+ )
+
+ # Allow the request to proceed
+ # The actual job registration will happen in the service layer
+ return view(*args, **kwargs)
+
+ return decorated
diff --git a/api/controllers/files/image_preview.py b/api/controllers/files/image_preview.py
index 0efee0c377..64f47f426a 100644
--- a/api/controllers/files/image_preview.py
+++ b/api/controllers/files/image_preview.py
@@ -1,7 +1,8 @@
from urllib.parse import quote
from flask import Response, request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from werkzeug.exceptions import NotFound
import services
@@ -11,22 +12,55 @@ from extensions.ext_database import db
from services.account_service import TenantService
from services.file_service import FileService
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class FileSignatureQuery(BaseModel):
+ timestamp: str = Field(..., description="Unix timestamp used in the signature")
+ nonce: str = Field(..., description="Random string for signature")
+ sign: str = Field(..., description="HMAC signature")
+
+
+class FilePreviewQuery(FileSignatureQuery):
+ as_attachment: bool = Field(default=False, description="Whether to download as attachment")
+
+
+files_ns.schema_model(
+ FileSignatureQuery.__name__, FileSignatureQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+files_ns.schema_model(
+ FilePreviewQuery.__name__, FilePreviewQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
@files_ns.route("//image-preview")
class ImagePreviewApi(Resource):
- """
- Deprecated
- """
+ """Deprecated endpoint for retrieving image previews."""
+ @files_ns.doc("get_image_preview")
+ @files_ns.doc(description="Retrieve a signed image preview for a file")
+ @files_ns.doc(
+ params={
+ "file_id": "ID of the file to preview",
+ "timestamp": "Unix timestamp used in the signature",
+ "nonce": "Random string used in the signature",
+ "sign": "HMAC signature verifying the request",
+ }
+ )
+ @files_ns.doc(
+ responses={
+ 200: "Image preview returned successfully",
+ 400: "Missing or invalid signature parameters",
+ 415: "Unsupported file type",
+ }
+ )
def get(self, file_id):
file_id = str(file_id)
- timestamp = request.args.get("timestamp")
- nonce = request.args.get("nonce")
- sign = request.args.get("sign")
-
- if not timestamp or not nonce or not sign:
- return {"content": "Invalid request."}, 400
+ args = FileSignatureQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
+ timestamp = args.timestamp
+ nonce = args.nonce
+ sign = args.sign
try:
generator, mimetype = FileService(db.engine).get_image_preview(
@@ -43,26 +77,36 @@ class ImagePreviewApi(Resource):
@files_ns.route("//file-preview")
class FilePreviewApi(Resource):
+ @files_ns.doc("get_file_preview")
+ @files_ns.doc(description="Download a file preview or attachment using signed parameters")
+ @files_ns.doc(
+ params={
+ "file_id": "ID of the file to preview",
+ "timestamp": "Unix timestamp used in the signature",
+ "nonce": "Random string used in the signature",
+ "sign": "HMAC signature verifying the request",
+ "as_attachment": "Whether to download the file as an attachment",
+ }
+ )
+ @files_ns.doc(
+ responses={
+ 200: "File stream returned successfully",
+ 400: "Missing or invalid signature parameters",
+ 404: "File not found",
+ 415: "Unsupported file type",
+ }
+ )
def get(self, file_id):
file_id = str(file_id)
- parser = reqparse.RequestParser()
- parser.add_argument("timestamp", type=str, required=True, location="args")
- parser.add_argument("nonce", type=str, required=True, location="args")
- parser.add_argument("sign", type=str, required=True, location="args")
- parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args")
-
- args = parser.parse_args()
-
- if not args["timestamp"] or not args["nonce"] or not args["sign"]:
- return {"content": "Invalid request."}, 400
+ args = FilePreviewQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
try:
generator, upload_file = FileService(db.engine).get_file_generator_by_file_id(
file_id=file_id,
- timestamp=args["timestamp"],
- nonce=args["nonce"],
- sign=args["sign"],
+ timestamp=args.timestamp,
+ nonce=args.nonce,
+ sign=args.sign,
)
except services.errors.file.UnsupportedFileTypeError:
raise UnsupportedFileTypeError()
@@ -89,7 +133,7 @@ class FilePreviewApi(Resource):
response.headers["Accept-Ranges"] = "bytes"
if upload_file.size > 0:
response.headers["Content-Length"] = str(upload_file.size)
- if args["as_attachment"]:
+ if args.as_attachment:
encoded_filename = quote(upload_file.name)
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
response.headers["Content-Type"] = "application/octet-stream"
@@ -99,6 +143,20 @@ class FilePreviewApi(Resource):
@files_ns.route("/workspaces//webapp-logo")
class WorkspaceWebappLogoApi(Resource):
+ @files_ns.doc("get_workspace_webapp_logo")
+ @files_ns.doc(description="Fetch the custom webapp logo for a workspace")
+ @files_ns.doc(
+ params={
+ "workspace_id": "Workspace identifier",
+ }
+ )
+ @files_ns.doc(
+ responses={
+ 200: "Logo returned successfully",
+ 404: "Webapp logo not configured",
+ 415: "Unsupported file type",
+ }
+ )
def get(self, workspace_id):
workspace_id = str(workspace_id)
diff --git a/api/controllers/files/tool_files.py b/api/controllers/files/tool_files.py
index 42207b878c..c487a0a915 100644
--- a/api/controllers/files/tool_files.py
+++ b/api/controllers/files/tool_files.py
@@ -1,7 +1,8 @@
from urllib.parse import quote
-from flask import Response
-from flask_restx import Resource, reqparse
+from flask import Response, request
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, NotFound
from controllers.common.errors import UnsupportedFileTypeError
@@ -10,23 +11,48 @@ from core.tools.signature import verify_tool_file_signature
from core.tools.tool_file_manager import ToolFileManager
from extensions.ext_database import db as global_db
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class ToolFileQuery(BaseModel):
+ timestamp: str = Field(..., description="Unix timestamp")
+ nonce: str = Field(..., description="Random nonce")
+ sign: str = Field(..., description="HMAC signature")
+ as_attachment: bool = Field(default=False, description="Download as attachment")
+
+
+files_ns.schema_model(
+ ToolFileQuery.__name__, ToolFileQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
+)
+
@files_ns.route("/tools/.")
class ToolFileApi(Resource):
+ @files_ns.doc("get_tool_file")
+ @files_ns.doc(description="Download a tool file by ID using signed parameters")
+ @files_ns.doc(
+ params={
+ "file_id": "Tool file identifier",
+ "extension": "Expected file extension",
+ "timestamp": "Unix timestamp used in the signature",
+ "nonce": "Random string used in the signature",
+ "sign": "HMAC signature verifying the request",
+ "as_attachment": "Whether to download the file as an attachment",
+ }
+ )
+ @files_ns.doc(
+ responses={
+ 200: "Tool file stream returned successfully",
+ 403: "Forbidden - invalid signature",
+ 404: "File not found",
+ 415: "Unsupported file type",
+ }
+ )
def get(self, file_id, extension):
file_id = str(file_id)
- parser = reqparse.RequestParser()
-
- parser.add_argument("timestamp", type=str, required=True, location="args")
- parser.add_argument("nonce", type=str, required=True, location="args")
- parser.add_argument("sign", type=str, required=True, location="args")
- parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args")
-
- args = parser.parse_args()
- if not verify_tool_file_signature(
- file_id=file_id, timestamp=args["timestamp"], nonce=args["nonce"], sign=args["sign"]
- ):
+ args = ToolFileQuery.model_validate(request.args.to_dict())
+ if not verify_tool_file_signature(file_id=file_id, timestamp=args.timestamp, nonce=args.nonce, sign=args.sign):
raise Forbidden("Invalid request.")
try:
@@ -48,7 +74,7 @@ class ToolFileApi(Resource):
)
if tool_file.size > 0:
response.headers["Content-Length"] = str(tool_file.size)
- if args["as_attachment"]:
+ if args.as_attachment:
encoded_filename = quote(tool_file.name)
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
diff --git a/api/controllers/files/upload.py b/api/controllers/files/upload.py
index 206a5d1cc2..6096a87c56 100644
--- a/api/controllers/files/upload.py
+++ b/api/controllers/files/upload.py
@@ -1,42 +1,45 @@
from mimetypes import guess_extension
-from flask_restx import Resource, reqparse
+from flask import request
+from flask_restx import Resource
from flask_restx.api import HTTPStatus
+from pydantic import BaseModel, Field
from werkzeug.datastructures import FileStorage
from werkzeug.exceptions import Forbidden
import services
-from controllers.common.errors import (
- FileTooLargeError,
- UnsupportedFileTypeError,
-)
-from controllers.console.wraps import setup_required
-from controllers.files import files_ns
-from controllers.inner_api.plugin.wraps import get_user
from core.file.helpers import verify_plugin_file_signature
from core.tools.tool_file_manager import ToolFileManager
from fields.file_fields import build_file_model
-# Define parser for both documentation and validation
-upload_parser = reqparse.RequestParser()
-upload_parser.add_argument("file", location="files", type=FileStorage, required=True, help="File to upload")
-upload_parser.add_argument(
- "timestamp", type=str, required=True, location="args", help="Unix timestamp for signature verification"
+from ..common.errors import (
+ FileTooLargeError,
+ UnsupportedFileTypeError,
)
-upload_parser.add_argument(
- "nonce", type=str, required=True, location="args", help="Random string for signature verification"
+from ..console.wraps import setup_required
+from ..files import files_ns
+from ..inner_api.plugin.wraps import get_user
+
+DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
+
+
+class PluginUploadQuery(BaseModel):
+ timestamp: str = Field(..., description="Unix timestamp for signature verification")
+ nonce: str = Field(..., description="Random nonce for signature verification")
+ sign: str = Field(..., description="HMAC signature")
+ tenant_id: str = Field(..., description="Tenant identifier")
+ user_id: str | None = Field(default=None, description="User identifier")
+
+
+files_ns.schema_model(
+ PluginUploadQuery.__name__, PluginUploadQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0)
)
-upload_parser.add_argument(
- "sign", type=str, required=True, location="args", help="HMAC signature for request validation"
-)
-upload_parser.add_argument("tenant_id", type=str, required=True, location="args", help="Tenant identifier")
-upload_parser.add_argument("user_id", type=str, required=False, location="args", help="User identifier")
@files_ns.route("/upload/for-plugin")
class PluginUploadFileApi(Resource):
@setup_required
- @files_ns.expect(upload_parser)
+ @files_ns.expect(files_ns.models[PluginUploadQuery.__name__])
@files_ns.doc("upload_plugin_file")
@files_ns.doc(description="Upload a file for plugin usage with signature verification")
@files_ns.doc(
@@ -64,15 +67,17 @@ class PluginUploadFileApi(Resource):
FileTooLargeError: File exceeds size limit
UnsupportedFileTypeError: File type not supported
"""
- # Parse and validate all arguments
- args = upload_parser.parse_args()
+ args = PluginUploadQuery.model_validate(request.args.to_dict(flat=True)) # type: ignore
- file: FileStorage = args["file"]
- timestamp: str = args["timestamp"]
- nonce: str = args["nonce"]
- sign: str = args["sign"]
- tenant_id: str = args["tenant_id"]
- user_id: str | None = args.get("user_id")
+ file: FileStorage | None = request.files.get("file")
+ if file is None:
+ raise Forbidden("File is required.")
+
+ timestamp = args.timestamp
+ nonce = args.nonce
+ sign = args.sign
+ tenant_id = args.tenant_id
+ user_id = args.user_id
user = get_user(tenant_id, user_id)
filename: str | None = file.filename
diff --git a/api/controllers/inner_api/mail.py b/api/controllers/inner_api/mail.py
index 0b2be03e43..885ab7b78d 100644
--- a/api/controllers/inner_api/mail.py
+++ b/api/controllers/inner_api/mail.py
@@ -1,27 +1,38 @@
-from flask_restx import Resource, reqparse
+from typing import Any
+from flask_restx import Resource
+from pydantic import BaseModel, Field
+
+from controllers.common.schema import register_schema_model
from controllers.console.wraps import setup_required
from controllers.inner_api import inner_api_ns
from controllers.inner_api.wraps import billing_inner_api_only, enterprise_inner_api_only
from tasks.mail_inner_task import send_inner_email_task
-_mail_parser = reqparse.RequestParser()
-_mail_parser.add_argument("to", type=str, action="append", required=True)
-_mail_parser.add_argument("subject", type=str, required=True)
-_mail_parser.add_argument("body", type=str, required=True)
-_mail_parser.add_argument("substitutions", type=dict, required=False)
+
+class InnerMailPayload(BaseModel):
+ to: list[str] = Field(description="Recipient email addresses", min_length=1)
+ subject: str
+ body: str
+ substitutions: dict[str, Any] | None = None
+
+
+register_schema_model(inner_api_ns, InnerMailPayload)
class BaseMail(Resource):
"""Shared logic for sending an inner email."""
+ @inner_api_ns.doc("send_inner_mail")
+ @inner_api_ns.doc(description="Send internal email")
+ @inner_api_ns.expect(inner_api_ns.models[InnerMailPayload.__name__])
def post(self):
- args = _mail_parser.parse_args()
+ args = InnerMailPayload.model_validate(inner_api_ns.payload or {})
send_inner_email_task.delay(
- to=args["to"],
- subject=args["subject"],
- body=args["body"],
- substitutions=args["substitutions"],
+ to=args.to,
+ subject=args.subject,
+ body=args.body,
+ substitutions=args.substitutions, # type: ignore
)
return {"message": "success"}, 200
@@ -32,7 +43,7 @@ class EnterpriseMail(BaseMail):
@inner_api_ns.doc("send_enterprise_mail")
@inner_api_ns.doc(description="Send internal email for enterprise features")
- @inner_api_ns.expect(_mail_parser)
+ @inner_api_ns.expect(inner_api_ns.models[InnerMailPayload.__name__])
@inner_api_ns.doc(
responses={200: "Email sent successfully", 401: "Unauthorized - invalid API key", 404: "Service not available"}
)
@@ -54,7 +65,7 @@ class BillingMail(BaseMail):
@inner_api_ns.doc("send_billing_mail")
@inner_api_ns.doc(description="Send internal email for billing notifications")
- @inner_api_ns.expect(_mail_parser)
+ @inner_api_ns.expect(inner_api_ns.models[InnerMailPayload.__name__])
@inner_api_ns.doc(
responses={200: "Email sent successfully", 401: "Unauthorized - invalid API key", 404: "Service not available"}
)
diff --git a/api/controllers/inner_api/plugin/plugin.py b/api/controllers/inner_api/plugin/plugin.py
index deab50076d..e4fe8d44bf 100644
--- a/api/controllers/inner_api/plugin/plugin.py
+++ b/api/controllers/inner_api/plugin/plugin.py
@@ -31,7 +31,7 @@ from core.plugin.entities.request import (
)
from core.tools.entities.tool_entities import ToolProviderType
from libs.helper import length_prefixed_response
-from models.account import Account, Tenant
+from models import Account, Tenant
from models.model import EndUser
diff --git a/api/controllers/inner_api/plugin/wraps.py b/api/controllers/inner_api/plugin/wraps.py
index 1f588bedce..edf3ac393c 100644
--- a/api/controllers/inner_api/plugin/wraps.py
+++ b/api/controllers/inner_api/plugin/wraps.py
@@ -1,10 +1,9 @@
from collections.abc import Callable
from functools import wraps
-from typing import ParamSpec, TypeVar, cast
+from typing import ParamSpec, TypeVar
from flask import current_app, request
from flask_login import user_logged_in
-from flask_restx import reqparse
from pydantic import BaseModel
from sqlalchemy.orm import Session
@@ -17,6 +16,11 @@ P = ParamSpec("P")
R = TypeVar("R")
+class TenantUserPayload(BaseModel):
+ tenant_id: str
+ user_id: str
+
+
def get_user(tenant_id: str, user_id: str | None) -> EndUser:
"""
Get current user
@@ -67,56 +71,45 @@ def get_user(tenant_id: str, user_id: str | None) -> EndUser:
return user_model
-def get_user_tenant(view: Callable[P, R] | None = None):
- def decorator(view_func: Callable[P, R]):
- @wraps(view_func)
- def decorated_view(*args: P.args, **kwargs: P.kwargs):
- # fetch json body
- parser = reqparse.RequestParser()
- parser.add_argument("tenant_id", type=str, required=True, location="json")
- parser.add_argument("user_id", type=str, required=True, location="json")
+def get_user_tenant(view_func: Callable[P, R]):
+ @wraps(view_func)
+ def decorated_view(*args: P.args, **kwargs: P.kwargs):
+ payload = TenantUserPayload.model_validate(request.get_json(silent=True) or {})
- p = parser.parse_args()
+ user_id = payload.user_id
+ tenant_id = payload.tenant_id
- user_id = cast(str, p.get("user_id"))
- tenant_id = cast(str, p.get("tenant_id"))
+ if not tenant_id:
+ raise ValueError("tenant_id is required")
- if not tenant_id:
- raise ValueError("tenant_id is required")
+ if not user_id:
+ user_id = DefaultEndUserSessionID.DEFAULT_SESSION_ID
- if not user_id:
- user_id = DefaultEndUserSessionID.DEFAULT_SESSION_ID
-
- try:
- tenant_model = (
- db.session.query(Tenant)
- .where(
- Tenant.id == tenant_id,
- )
- .first()
+ try:
+ tenant_model = (
+ db.session.query(Tenant)
+ .where(
+ Tenant.id == tenant_id,
)
- except Exception:
- raise ValueError("tenant not found")
+ .first()
+ )
+ except Exception:
+ raise ValueError("tenant not found")
- if not tenant_model:
- raise ValueError("tenant not found")
+ if not tenant_model:
+ raise ValueError("tenant not found")
- kwargs["tenant_model"] = tenant_model
+ kwargs["tenant_model"] = tenant_model
- user = get_user(tenant_id, user_id)
- kwargs["user_model"] = user
+ user = get_user(tenant_id, user_id)
+ kwargs["user_model"] = user
- current_app.login_manager._update_request_context_with_user(user) # type: ignore
- user_logged_in.send(current_app._get_current_object(), user=current_user) # type: ignore
+ current_app.login_manager._update_request_context_with_user(user) # type: ignore
+ user_logged_in.send(current_app._get_current_object(), user=current_user) # type: ignore
- return view_func(*args, **kwargs)
+ return view_func(*args, **kwargs)
- return decorated_view
-
- if view is None:
- return decorator
- else:
- return decorator(view)
+ return decorated_view
def plugin_data(view: Callable[P, R] | None = None, *, payload_type: type[BaseModel]):
diff --git a/api/controllers/inner_api/workspace/workspace.py b/api/controllers/inner_api/workspace/workspace.py
index 47f0240cd2..a5746abafa 100644
--- a/api/controllers/inner_api/workspace/workspace.py
+++ b/api/controllers/inner_api/workspace/workspace.py
@@ -1,22 +1,37 @@
import json
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel
+from controllers.common.schema import register_schema_models
from controllers.console.wraps import setup_required
from controllers.inner_api import inner_api_ns
from controllers.inner_api.wraps import enterprise_inner_api_only
from events.tenant_event import tenant_was_created
from extensions.ext_database import db
-from models.account import Account
+from models import Account
from services.account_service import TenantService
+class WorkspaceCreatePayload(BaseModel):
+ name: str
+ owner_email: str
+
+
+class WorkspaceOwnerlessPayload(BaseModel):
+ name: str
+
+
+register_schema_models(inner_api_ns, WorkspaceCreatePayload, WorkspaceOwnerlessPayload)
+
+
@inner_api_ns.route("/enterprise/workspace")
class EnterpriseWorkspace(Resource):
@setup_required
@enterprise_inner_api_only
@inner_api_ns.doc("create_enterprise_workspace")
@inner_api_ns.doc(description="Create a new enterprise workspace with owner assignment")
+ @inner_api_ns.expect(inner_api_ns.models[WorkspaceCreatePayload.__name__])
@inner_api_ns.doc(
responses={
200: "Workspace created successfully",
@@ -25,16 +40,13 @@ class EnterpriseWorkspace(Resource):
}
)
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- parser.add_argument("owner_email", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = WorkspaceCreatePayload.model_validate(inner_api_ns.payload or {})
- account = db.session.query(Account).filter_by(email=args["owner_email"]).first()
+ account = db.session.query(Account).filter_by(email=args.owner_email).first()
if account is None:
return {"message": "owner account not found."}, 404
- tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True)
+ tenant = TenantService.create_tenant(args.name, is_from_dashboard=True)
TenantService.create_tenant_member(tenant, account, role="owner")
tenant_was_created.send(tenant)
@@ -60,6 +72,7 @@ class EnterpriseWorkspaceNoOwnerEmail(Resource):
@enterprise_inner_api_only
@inner_api_ns.doc("create_enterprise_workspace_ownerless")
@inner_api_ns.doc(description="Create a new enterprise workspace without initial owner assignment")
+ @inner_api_ns.expect(inner_api_ns.models[WorkspaceOwnerlessPayload.__name__])
@inner_api_ns.doc(
responses={
200: "Workspace created successfully",
@@ -68,11 +81,9 @@ class EnterpriseWorkspaceNoOwnerEmail(Resource):
}
)
def post(self):
- parser = reqparse.RequestParser()
- parser.add_argument("name", type=str, required=True, location="json")
- args = parser.parse_args()
+ args = WorkspaceOwnerlessPayload.model_validate(inner_api_ns.payload or {})
- tenant = TenantService.create_tenant(args["name"], is_from_dashboard=True)
+ tenant = TenantService.create_tenant(args.name, is_from_dashboard=True)
tenant_was_created.send(tenant)
diff --git a/api/controllers/mcp/mcp.py b/api/controllers/mcp/mcp.py
index a8629dca20..90137a10ba 100644
--- a/api/controllers/mcp/mcp.py
+++ b/api/controllers/mcp/mcp.py
@@ -1,10 +1,11 @@
-from typing import Union
+from typing import Any, Union
from flask import Response
-from flask_restx import Resource, reqparse
-from pydantic import ValidationError
+from flask_restx import Resource
+from pydantic import BaseModel, Field, ValidationError
from sqlalchemy.orm import Session
+from controllers.common.schema import register_schema_model
from controllers.console.app.mcp_server import AppMCPServerStatus
from controllers.mcp import mcp_ns
from core.app.app_config.entities import VariableEntity
@@ -24,29 +25,19 @@ class MCPRequestError(Exception):
super().__init__(message)
-def int_or_str(value):
- """Validate that a value is either an integer or string."""
- if isinstance(value, (int, str)):
- return value
- else:
- return None
+class MCPRequestPayload(BaseModel):
+ jsonrpc: str = Field(description="JSON-RPC version (should be '2.0')")
+ method: str = Field(description="The method to invoke")
+ params: dict[str, Any] | None = Field(default=None, description="Parameters for the method")
+ id: int | str | None = Field(default=None, description="Request ID for tracking responses")
-# Define parser for both documentation and validation
-mcp_request_parser = reqparse.RequestParser()
-mcp_request_parser.add_argument(
- "jsonrpc", type=str, required=True, location="json", help="JSON-RPC version (should be '2.0')"
-)
-mcp_request_parser.add_argument("method", type=str, required=True, location="json", help="The method to invoke")
-mcp_request_parser.add_argument("params", type=dict, required=False, location="json", help="Parameters for the method")
-mcp_request_parser.add_argument(
- "id", type=int_or_str, required=False, location="json", help="Request ID for tracking responses"
-)
+register_schema_model(mcp_ns, MCPRequestPayload)
@mcp_ns.route("/server//mcp")
class MCPAppApi(Resource):
- @mcp_ns.expect(mcp_request_parser)
+ @mcp_ns.expect(mcp_ns.models[MCPRequestPayload.__name__])
@mcp_ns.doc("handle_mcp_request")
@mcp_ns.doc(description="Handle Model Context Protocol (MCP) requests for a specific server")
@mcp_ns.doc(params={"server_code": "Unique identifier for the MCP server"})
@@ -72,9 +63,9 @@ class MCPAppApi(Resource):
Raises:
ValidationError: Invalid request format or parameters
"""
- args = mcp_request_parser.parse_args()
- request_id: Union[int, str] | None = args.get("id")
- mcp_request = self._parse_mcp_request(args)
+ args = MCPRequestPayload.model_validate(mcp_ns.payload or {})
+ request_id: Union[int, str] | None = args.id
+ mcp_request = self._parse_mcp_request(args.model_dump(exclude_none=True))
with Session(db.engine, expire_on_commit=False) as session:
# Get MCP server and app
@@ -195,15 +186,16 @@ class MCPAppApi(Resource):
except ValidationError as e:
raise MCPRequestError(mcp_types.INVALID_PARAMS, f"Invalid MCP request: {str(e)}")
- def _retrieve_end_user(self, tenant_id: str, mcp_server_id: str, session: Session) -> EndUser | None:
- """Get end user from existing session - optimized query"""
- return (
- session.query(EndUser)
- .where(EndUser.tenant_id == tenant_id)
- .where(EndUser.session_id == mcp_server_id)
- .where(EndUser.type == "mcp")
- .first()
- )
+ def _retrieve_end_user(self, tenant_id: str, mcp_server_id: str) -> EndUser | None:
+ """Get end user - manages its own database session"""
+ with Session(db.engine, expire_on_commit=False) as session, session.begin():
+ return (
+ session.query(EndUser)
+ .where(EndUser.tenant_id == tenant_id)
+ .where(EndUser.session_id == mcp_server_id)
+ .where(EndUser.type == "mcp")
+ .first()
+ )
def _create_end_user(
self, client_name: str, tenant_id: str, app_id: str, mcp_server_id: str, session: Session
@@ -231,7 +223,7 @@ class MCPAppApi(Resource):
request_id: Union[int, str],
) -> mcp_types.JSONRPCResponse | mcp_types.JSONRPCError | None:
"""Handle MCP request and return response"""
- end_user = self._retrieve_end_user(mcp_server.tenant_id, mcp_server.id, session)
+ end_user = self._retrieve_end_user(mcp_server.tenant_id, mcp_server.id)
if not end_user and isinstance(mcp_request.root, mcp_types.InitializeRequest):
client_info = mcp_request.root.params.clientInfo
diff --git a/api/controllers/service_api/app/annotation.py b/api/controllers/service_api/app/annotation.py
index ad1bdc7334..63c373b50f 100644
--- a/api/controllers/service_api/app/annotation.py
+++ b/api/controllers/service_api/app/annotation.py
@@ -1,39 +1,37 @@
from typing import Literal
from flask import request
-from flask_restx import Api, Namespace, Resource, fields, reqparse
+from flask_restx import Api, Namespace, Resource, fields
from flask_restx.api import HTTPStatus
-from werkzeug.exceptions import Forbidden
+from pydantic import BaseModel, Field
+from controllers.common.schema import register_schema_models
+from controllers.console.wraps import edit_permission_required
from controllers.service_api import service_api_ns
from controllers.service_api.wraps import validate_app_token
from extensions.ext_redis import redis_client
from fields.annotation_fields import annotation_fields, build_annotation_model
-from libs.login import current_user
-from models.account import Account
from models.model import App
from services.annotation_service import AppAnnotationService
-# Define parsers for annotation API
-annotation_create_parser = reqparse.RequestParser()
-annotation_create_parser.add_argument("question", required=True, type=str, location="json", help="Annotation question")
-annotation_create_parser.add_argument("answer", required=True, type=str, location="json", help="Annotation answer")
-annotation_reply_action_parser = reqparse.RequestParser()
-annotation_reply_action_parser.add_argument(
- "score_threshold", required=True, type=float, location="json", help="Score threshold for annotation matching"
-)
-annotation_reply_action_parser.add_argument(
- "embedding_provider_name", required=True, type=str, location="json", help="Embedding provider name"
-)
-annotation_reply_action_parser.add_argument(
- "embedding_model_name", required=True, type=str, location="json", help="Embedding model name"
-)
+class AnnotationCreatePayload(BaseModel):
+ question: str = Field(description="Annotation question")
+ answer: str = Field(description="Annotation answer")
+
+
+class AnnotationReplyActionPayload(BaseModel):
+ score_threshold: float = Field(description="Score threshold for annotation matching")
+ embedding_provider_name: str = Field(description="Embedding provider name")
+ embedding_model_name: str = Field(description="Embedding model name")
+
+
+register_schema_models(service_api_ns, AnnotationCreatePayload, AnnotationReplyActionPayload)
@service_api_ns.route("/apps/annotation-reply/")
class AnnotationReplyActionApi(Resource):
- @service_api_ns.expect(annotation_reply_action_parser)
+ @service_api_ns.expect(service_api_ns.models[AnnotationReplyActionPayload.__name__])
@service_api_ns.doc("annotation_reply_action")
@service_api_ns.doc(description="Enable or disable annotation reply feature")
@service_api_ns.doc(params={"action": "Action to perform: 'enable' or 'disable'"})
@@ -46,7 +44,7 @@ class AnnotationReplyActionApi(Resource):
@validate_app_token
def post(self, app_model: App, action: Literal["enable", "disable"]):
"""Enable or disable annotation reply feature."""
- args = annotation_reply_action_parser.parse_args()
+ args = AnnotationReplyActionPayload.model_validate(service_api_ns.payload or {}).model_dump()
if action == "enable":
result = AppAnnotationService.enable_app_annotation(args, app_model.id)
elif action == "disable":
@@ -128,7 +126,7 @@ class AnnotationListApi(Resource):
"page": page,
}
- @service_api_ns.expect(annotation_create_parser)
+ @service_api_ns.expect(service_api_ns.models[AnnotationCreatePayload.__name__])
@service_api_ns.doc("create_annotation")
@service_api_ns.doc(description="Create a new annotation")
@service_api_ns.doc(
@@ -141,14 +139,14 @@ class AnnotationListApi(Resource):
@service_api_ns.marshal_with(build_annotation_model(service_api_ns), code=HTTPStatus.CREATED)
def post(self, app_model: App):
"""Create a new annotation."""
- args = annotation_create_parser.parse_args()
+ args = AnnotationCreatePayload.model_validate(service_api_ns.payload or {}).model_dump()
annotation = AppAnnotationService.insert_app_annotation_directly(args, app_model.id)
return annotation, 201
@service_api_ns.route("/apps/annotations/")
class AnnotationUpdateDeleteApi(Resource):
- @service_api_ns.expect(annotation_create_parser)
+ @service_api_ns.expect(service_api_ns.models[AnnotationCreatePayload.__name__])
@service_api_ns.doc("update_annotation")
@service_api_ns.doc(description="Update an existing annotation")
@service_api_ns.doc(params={"annotation_id": "Annotation ID"})
@@ -161,15 +159,11 @@ class AnnotationUpdateDeleteApi(Resource):
}
)
@validate_app_token
+ @edit_permission_required
@service_api_ns.marshal_with(build_annotation_model(service_api_ns))
- def put(self, app_model: App, annotation_id):
+ def put(self, app_model: App, annotation_id: str):
"""Update an existing annotation."""
- assert isinstance(current_user, Account)
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- annotation_id = str(annotation_id)
- args = annotation_create_parser.parse_args()
+ args = AnnotationCreatePayload.model_validate(service_api_ns.payload or {}).model_dump()
annotation = AppAnnotationService.update_app_annotation_directly(args, app_model.id, annotation_id)
return annotation
@@ -185,13 +179,8 @@ class AnnotationUpdateDeleteApi(Resource):
}
)
@validate_app_token
- def delete(self, app_model: App, annotation_id):
+ @edit_permission_required
+ def delete(self, app_model: App, annotation_id: str):
"""Delete an annotation."""
- assert isinstance(current_user, Account)
-
- if not current_user.has_edit_permission:
- raise Forbidden()
-
- annotation_id = str(annotation_id)
AppAnnotationService.delete_app_annotation(app_model.id, annotation_id)
return {"result": "success"}, 204
diff --git a/api/controllers/service_api/app/audio.py b/api/controllers/service_api/app/audio.py
index 33035123d7..e383920460 100644
--- a/api/controllers/service_api/app/audio.py
+++ b/api/controllers/service_api/app/audio.py
@@ -1,10 +1,12 @@
import logging
from flask import request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field
from werkzeug.exceptions import InternalServerError
import services
+from controllers.common.schema import register_schema_model
from controllers.service_api import service_api_ns
from controllers.service_api.app.error import (
AppUnavailableError,
@@ -84,17 +86,19 @@ class AudioApi(Resource):
raise InternalServerError()
-# Define parser for text-to-audio API
-text_to_audio_parser = reqparse.RequestParser()
-text_to_audio_parser.add_argument("message_id", type=str, required=False, location="json", help="Message ID")
-text_to_audio_parser.add_argument("voice", type=str, location="json", help="Voice to use for TTS")
-text_to_audio_parser.add_argument("text", type=str, location="json", help="Text to convert to audio")
-text_to_audio_parser.add_argument("streaming", type=bool, location="json", help="Enable streaming response")
+class TextToAudioPayload(BaseModel):
+ message_id: str | None = Field(default=None, description="Message ID")
+ voice: str | None = Field(default=None, description="Voice to use for TTS")
+ text: str | None = Field(default=None, description="Text to convert to audio")
+ streaming: bool | None = Field(default=None, description="Enable streaming response")
+
+
+register_schema_model(service_api_ns, TextToAudioPayload)
@service_api_ns.route("/text-to-audio")
class TextApi(Resource):
- @service_api_ns.expect(text_to_audio_parser)
+ @service_api_ns.expect(service_api_ns.models[TextToAudioPayload.__name__])
@service_api_ns.doc("text_to_audio")
@service_api_ns.doc(description="Convert text to audio using text-to-speech")
@service_api_ns.doc(
@@ -112,11 +116,11 @@ class TextApi(Resource):
Converts the provided text to audio using the specified voice.
"""
try:
- args = text_to_audio_parser.parse_args()
+ payload = TextToAudioPayload.model_validate(service_api_ns.payload or {})
- message_id = args.get("message_id", None)
- text = args.get("text", None)
- voice = args.get("voice", None)
+ message_id = payload.message_id
+ text = payload.text
+ voice = payload.voice
response = AudioService.transcript_tts(
app_model=app_model, text=text, voice=voice, end_user=end_user.external_user_id, message_id=message_id
)
diff --git a/api/controllers/service_api/app/completion.py b/api/controllers/service_api/app/completion.py
index 22428ee0ab..b3836f3a47 100644
--- a/api/controllers/service_api/app/completion.py
+++ b/api/controllers/service_api/app/completion.py
@@ -1,10 +1,14 @@
import logging
+from typing import Any, Literal
+from uuid import UUID
from flask import request
-from flask_restx import Resource, reqparse
+from flask_restx import Resource
+from pydantic import BaseModel, Field, field_validator
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
import services
+from controllers.common.schema import register_schema_models
from controllers.service_api import service_api_ns
from controllers.service_api.app.error import (
AppUnavailableError,
@@ -17,7 +21,6 @@ from controllers.service_api.app.error import (
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError
-from core.app.apps.base_app_queue_manager import AppQueueManager
from core.app.entities.app_invoke_entities import InvokeFrom
from core.errors.error import (
ModelCurrentlyNotSupportError,
@@ -27,55 +30,55 @@ from core.errors.error import (
from core.helper.trace_id_helper import get_external_trace_id
from core.model_runtime.errors.invoke import InvokeError
from libs import helper
-from libs.helper import uuid_value
from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService
+from services.app_task_service import AppTaskService
from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
from services.errors.llm import InvokeRateLimitError
logger = logging.getLogger(__name__)
-# Define parser for completion API
-completion_parser = reqparse.RequestParser()
-completion_parser.add_argument(
- "inputs", type=dict, required=True, location="json", help="Input parameters for completion"
-)
-completion_parser.add_argument("query", type=str, location="json", default="", help="The query string")
-completion_parser.add_argument("files", type=list, required=False, location="json", help="List of file attachments")
-completion_parser.add_argument(
- "response_mode", type=str, choices=["blocking", "streaming"], location="json", help="Response mode"
-)
-completion_parser.add_argument(
- "retriever_from", type=str, required=False, default="dev", location="json", help="Retriever source"
-)
+class CompletionRequestPayload(BaseModel):
+ inputs: dict[str, Any]
+ query: str = Field(default="")
+ files: list[dict[str, Any]] | None = None
+ response_mode: Literal["blocking", "streaming"] | None = None
+ retriever_from: str = Field(default="dev")
-# Define parser for chat API
-chat_parser = reqparse.RequestParser()
-chat_parser.add_argument("inputs", type=dict, required=True, location="json", help="Input parameters for chat")
-chat_parser.add_argument("query", type=str, required=True, location="json", help="The chat query")
-chat_parser.add_argument("files", type=list, required=False, location="json", help="List of file attachments")
-chat_parser.add_argument(
- "response_mode", type=str, choices=["blocking", "streaming"], location="json", help="Response mode"
-)
-chat_parser.add_argument("conversation_id", type=uuid_value, location="json", help="Existing conversation ID")
-chat_parser.add_argument(
- "retriever_from", type=str, required=False, default="dev", location="json", help="Retriever source"
-)
-chat_parser.add_argument(
- "auto_generate_name",
- type=bool,
- required=False,
- default=True,
- location="json",
- help="Auto generate conversation name",
-)
-chat_parser.add_argument("workflow_id", type=str, required=False, location="json", help="Workflow ID for advanced chat")
+
+class ChatRequestPayload(BaseModel):
+ inputs: dict[str, Any]
+ query: str
+ files: list[dict[str, Any]] | None = None
+ response_mode: Literal["blocking", "streaming"] | None = None
+ conversation_id: str | None = Field(default=None, description="Conversation UUID")
+ retriever_from: str = Field(default="dev")
+ auto_generate_name: bool = Field(default=True, description="Auto generate conversation name")
+ workflow_id: str | None = Field(default=None, description="Workflow ID for advanced chat")
+
+ @field_validator("conversation_id", mode="before")
+ @classmethod
+ def normalize_conversation_id(cls, value: str | UUID | None) -> str | None:
+ """Allow missing or blank conversation IDs; enforce UUID format when provided."""
+ if isinstance(value, str):
+ value = value.strip()
+
+ if not value:
+ return None
+
+ try:
+ return helper.uuid_value(value)
+ except ValueError as exc:
+ raise ValueError("conversation_id must be a valid UUID") from exc
+
+
+register_schema_models(service_api_ns, CompletionRequestPayload, ChatRequestPayload)
@service_api_ns.route("/completion-messages")
class CompletionApi(Resource):
- @service_api_ns.expect(completion_parser)
+ @service_api_ns.expect(service_api_ns.models[CompletionRequestPayload.__name__])
@service_api_ns.doc("create_completion")
@service_api_ns.doc(description="Create a completion for the given prompt")
@service_api_ns.doc(
@@ -94,15 +97,16 @@ class CompletionApi(Resource):
This endpoint generates a completion based on the provided inputs and query.
Supports both blocking and streaming response modes.
"""
- if app_model.mode != "completion":
+ if app_model.mode != AppMode.COMPLETION:
raise AppUnavailableError()
- args = completion_parser.parse_args()
+ payload = CompletionRequestPayload.model_validate(service_api_ns.payload or {})
external_trace_id = get_external_trace_id(request)
+ args = payload.model_dump(exclude_none=True)
if external_trace_id:
args["external_trace_id"] = external_trace_id
- streaming = args["response_mode"] == "streaming"
+ streaming = payload.response_mode == "streaming"
args["auto_generate_name"] = False
@@ -153,17 +157,22 @@ class CompletionStopApi(Resource):
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
def post(self, app_model: App, end_user: EndUser, task_id: str):
"""Stop a running completion task."""
- if app_model.mode != "completion":
+ if app_model.mode != AppMode.COMPLETION:
raise AppUnavailableError()
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user.id)
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.SERVICE_API,
+ user_id=end_user.id,
+ app_mode=AppMode.value_of(app_model.mode),
+ )
return {"result": "success"}, 200
@service_api_ns.route("/chat-messages")
class ChatApi(Resource):
- @service_api_ns.expect(chat_parser)
+ @service_api_ns.expect(service_api_ns.models[ChatRequestPayload.__name__])
@service_api_ns.doc("create_chat_message")
@service_api_ns.doc(description="Send a message in a chat conversation")
@service_api_ns.doc(
@@ -187,13 +196,14 @@ class ChatApi(Resource):
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
- args = chat_parser.parse_args()
+ payload = ChatRequestPayload.model_validate(service_api_ns.payload or {})
external_trace_id = get_external_trace_id(request)
+ args = payload.model_dump(exclude_none=True)
if external_trace_id:
args["external_trace_id"] = external_trace_id
- streaming = args["response_mode"] == "streaming"
+ streaming = payload.response_mode == "streaming"
try:
response = AppGenerateService.generate(
@@ -250,6 +260,11 @@ class ChatStopApi(Resource):
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
- AppQueueManager.set_stop_flag(task_id, InvokeFrom.SERVICE_API, end_user.id)
+ AppTaskService.stop_task(
+ task_id=task_id,
+ invoke_from=InvokeFrom.SERVICE_API,
+ user_id=end_user.id,
+ app_mode=app_mode,
+ )
return {"result": "success"}, 200
diff --git a/api/controllers/service_api/app/conversation.py b/api/controllers/service_api/app/conversation.py
index 711dd5704c..be6d837032 100644
--- a/api/controllers/service_api/app/conversation.py
+++ b/api/controllers/service_api/app/conversation.py
@@ -1,10 +1,15 @@
-from flask_restx import Resource, reqparse
+from typing import Any, Literal
+from uuid import UUID
+
+from flask import request
+from flask_restx import Resource
from flask_restx._http import HTTPStatus
-from flask_restx.inputs import int_range
+from pydantic import BaseModel, Field, model_validator
from sqlalchemy.orm import Session
from werkzeug.exceptions import BadRequest, NotFound
import services
+from controllers.common.schema import register_schema_models
from controllers.service_api import service_api_ns
from controllers.service_api.app.error import NotChatAppError
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
@@ -19,59 +24,51 @@ from fields.conversation_variable_fields import (
build_conversation_variable_infinite_scroll_pagination_model,
build_conversation_variable_model,
)
-from libs.helper import uuid_value
from models.model import App, AppMode, EndUser
from services.conversation_service import ConversationService
-# Define parsers for conversation APIs
-conversation_list_parser = reqparse.RequestParser()
-conversation_list_parser.add_argument(
- "last_id", type=uuid_value, location="args", help="Last conversation ID for pagination"
-)
-conversation_list_parser.add_argument(
- "limit",
- type=int_range(1, 100),
- required=False,
- default=20,
- location="args",
- help="Number of conversations to return",
-)
-conversation_list_parser.add_argument(
- "sort_by",
- type=str,
- choices=["created_at", "-created_at", "updated_at", "-updated_at"],
- required=False,
- default="-updated_at",
- location="args",
- help="Sort order for conversations",
-)
-conversation_rename_parser = reqparse.RequestParser()
-conversation_rename_parser.add_argument("name", type=str, required=False, location="json", help="New conversation name")
-conversation_rename_parser.add_argument(
- "auto_generate", type=bool, required=False, default=False, location="json", help="Auto-generate conversation name"
-)
+class ConversationListQuery(BaseModel):
+ last_id: UUID | None = Field(default=None, description="Last conversation ID for pagination")
+ limit: int = Field(default=20, ge=1, le=100, description="Number of conversations to return")
+ sort_by: Literal["created_at", "-created_at", "updated_at", "-updated_at"] = Field(
+ default="-updated_at", description="Sort order for conversations"
+ )
-conversation_variables_parser = reqparse.RequestParser()
-conversation_variables_parser.add_argument(
- "last_id", type=uuid_value, location="args", help="Last variable ID for pagination"
-)
-conversation_variables_parser.add_argument(
- "limit", type=int_range(1, 100), required=False, default=20, location="args", help="Number of variables to return"
-)
-conversation_variable_update_parser = reqparse.RequestParser()
-# using lambda is for passing the already-typed value without modification
-# if no lambda, it will be converted to string
-# the string cannot be converted using json.loads
-conversation_variable_update_parser.add_argument(
- "value", required=True, location="json", type=lambda x: x, help="New value for the conversation variable"
+class ConversationRenamePayload(BaseModel):
+ name: str | None = Field(default=None, description="New conversation name (required if auto_generate is false)")
+ auto_generate: bool = Field(default=False, description="Auto-generate conversation name")
+
+ @model_validator(mode="after")
+ def validate_name_requirement(self):
+ if not self.auto_generate:
+ if self.name is None or not self.name.strip():
+ raise ValueError("name is required when auto_generate is false")
+ return self
+
+
+class ConversationVariablesQuery(BaseModel):
+ last_id: UUID | None = Field(default=None, description="Last variable ID for pagination")
+ limit: int = Field(default=20, ge=1, le=100, description="Number of variables to return")
+
+
+class ConversationVariableUpdatePayload(BaseModel):
+ value: Any
+
+
+register_schema_models(
+ service_api_ns,
+ ConversationListQuery,
+ ConversationRenamePayload,
+ ConversationVariablesQuery,
+ ConversationVariableUpdatePayload,
)
@service_api_ns.route("/conversations")
class ConversationApi(Resource):
- @service_api_ns.expect(conversation_list_parser)
+ @service_api_ns.expect(service_api_ns.models[ConversationListQuery.__name__])
@service_api_ns.doc("list_conversations")
@service_api_ns.doc(description="List all conversations for the current user")
@service_api_ns.doc(
@@ -92,7 +89,8 @@ class ConversationApi(Resource):
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
- args = conversation_list_parser.parse_args()
+ query_args = ConversationListQuery.model_validate(request.args.to_dict())
+ last_id = str(query_args.last_id) if query_args.last_id else None
try:
with Session(db.engine) as session:
@@ -100,10 +98,10 @@ class ConversationApi(Resource):
session=session,
app_model=app_model,
user=end_user,
- last_id=args["last_id"],
- limit=args["limit"],
+ last_id=last_id,
+ limit=query_args.limit,
invoke_from=InvokeFrom.SERVICE_API,
- sort_by=args["sort_by"],
+ sort_by=query_args.sort_by,
)
except services.errors.conversation.LastConversationNotExistsError:
raise NotFound("Last Conversation Not Exists.")
@@ -140,7 +138,7 @@ class ConversationDetailApi(Resource):
@service_api_ns.route("/conversations//name")
class ConversationRenameApi(Resource):
- @service_api_ns.expect(conversation_rename_parser)
+ @service_api_ns.expect(service_api_ns.models[ConversationRenamePayload.__name__])
@service_api_ns.doc("rename_conversation")
@service_api_ns.doc(description="Rename a conversation or auto-generate a name")
@service_api_ns.doc(params={"c_id": "Conversation ID"})
@@ -161,17 +159,17 @@ class ConversationRenameApi(Resource):
conversation_id = str(c_id)
- args = conversation_rename_parser.parse_args()
+ payload = ConversationRenamePayload.model_validate(service_api_ns.payload or {})
try:
- return ConversationService.rename(app_model, conversation_id, end_user, args["name"], args["auto_generate"])
+ return ConversationService.rename(app_model, conversation_id, end_user, payload.name, payload.auto_generate)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@service_api_ns.route("/conversations//variables")
class ConversationVariablesApi(Resource):
- @service_api_ns.expect(conversation_variables_parser)
+ @service_api_ns.expect(service_api_ns.models[ConversationVariablesQuery.__name__])
@service_api_ns.doc("list_conversation_variables")
@service_api_ns.doc(description="List all variables for a conversation")
@service_api_ns.doc(params={"c_id": "Conversation ID"})
@@ -196,11 +194,12 @@ class ConversationVariablesApi(Resource):
conversation_id = str(c_id)
- args = conversation_variables_parser.parse_args()
+ query_args = ConversationVariablesQuery.model_validate(request.args.to_dict())
+ last_id = str(query_args.last_id) if query_args.last_id else None
try:
return ConversationService.get_conversational_variable(
- app_model, conversation_id, end_user, args["limit"], args["last_id"]
+ app_model, conversation_id, end_user, query_args.limit, last_id
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
@@ -208,7 +207,7 @@ class ConversationVariablesApi(Resource):
@service_api_ns.route("/conversations//variables/