mirror of https://github.com/langgenius/dify.git
Compare commits
85 Commits
| Author | SHA1 | Date |
|---|---|---|
|
|
ee1d0df927 | |
|
|
184077c37c | |
|
|
3015e9be73 | |
|
|
2bb1e24fb4 | |
|
|
cad7101534 | |
|
|
e856287b65 | |
|
|
27be89c984 | |
|
|
fa69cce1e7 | |
|
|
f28a08a696 | |
|
|
8129b04143 | |
|
|
1b8e80a722 | |
|
|
0421387672 | |
|
|
2aaaa4bd34 | |
|
|
64dc98e607 | |
|
|
9007109a6b | |
|
|
925168383b | |
|
|
e6f3528bb0 | |
|
|
fb5edd0bf6 | |
|
|
de53c78125 | |
|
|
3a59ae9617 | |
|
|
69589807fd | |
|
|
6ca44eea28 | |
|
|
bf76f10653 | |
|
|
c1af6a7127 | |
|
|
1873b5a766 | |
|
|
9fbc7fa379 | |
|
|
2399d00d86 | |
|
|
3505516e8e | |
|
|
faef04cdf7 | |
|
|
0ba9b9e6b5 | |
|
|
30dd50ff83 | |
|
|
5338cf85b1 | |
|
|
673209d086 | |
|
|
43758ec85d | |
|
|
20944e7e1a | |
|
|
7a5d2728a1 | |
|
|
14bff10201 | |
|
|
9a6b4147bc | |
|
|
2c919efa69 | |
|
|
6d0e36479b | |
|
|
09be869f58 | |
|
|
0b1439fee4 | |
|
|
dfd2dd5c68 | |
|
|
3ae7788933 | |
|
|
446df6b50d | |
|
|
d9cecabe93 | |
|
|
b71a0d3f04 | |
|
|
d546d525b4 | |
|
|
a46dc2f37e | |
|
|
8b38e3f79d | |
|
|
44ab8a3376 | |
|
|
1e86535c4a | |
|
|
5b1c08c19c | |
|
|
6202c566e9 | |
|
|
a00ac1b5b1 | |
|
|
bf56c2e9db | |
|
|
543ce38a6c | |
|
|
1f2c85c916 | |
|
|
2b01f85d61 | |
|
|
d8010a7fbc | |
|
|
b067ad2f0a | |
|
|
b85564cae5 | |
|
|
c393d7a2dc | |
|
|
f610f6895f | |
|
|
d20a8d5b77 | |
|
|
8611301722 | |
|
|
6044f0666a | |
|
|
8d26e6ab28 | |
|
|
61d255a6e6 | |
|
|
f0d02b4b91 | |
|
|
d100354851 | |
|
|
93d1b2fc32 | |
|
|
fa1009b938 | |
|
|
fd64156f9d | |
|
|
bdd8a35b9d | |
|
|
b892906d71 | |
|
|
7e06225ce2 | |
|
|
f08d847c20 | |
|
|
44fc0c614c | |
|
|
0f3ffbee2c | |
|
|
08d5eee993 | |
|
|
9885e92854 | |
|
|
f2555b0bb1 | |
|
|
c3bb95d71d | |
|
|
996c7d9e16 |
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"enabledPlugins": {
|
||||
"feature-dev@claude-plugins-official": true,
|
||||
"context7@claude-plugins-official": true,
|
||||
"typescript-lsp@claude-plugins-official": true,
|
||||
"pyright-lsp@claude-plugins-official": true
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
"permissions": {
|
||||
"allow": [],
|
||||
"deny": []
|
||||
},
|
||||
"env": {
|
||||
"__comment": "Environment variables for MCP servers. Override in .claude/settings.local.json with actual values.",
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
},
|
||||
"enabledMcpjsonServers": [
|
||||
"context7",
|
||||
"sequential-thinking",
|
||||
"github",
|
||||
"fetch",
|
||||
"playwright",
|
||||
"ide"
|
||||
],
|
||||
"enableAllProjectMcpServers": true
|
||||
}
|
||||
|
|
@ -0,0 +1,483 @@
|
|||
---
|
||||
name: component-refactoring
|
||||
description: Refactor high-complexity React components in Dify frontend. Use when `pnpm analyze-component --json` shows complexity > 50 or lineCount > 300, when the user asks for code splitting, hook extraction, or complexity reduction, or when `pnpm analyze-component` warns to refactor before testing; avoid for simple/well-structured components, third-party wrappers, or when the user explicitly wants testing without refactoring.
|
||||
---
|
||||
|
||||
# Dify Component Refactoring Skill
|
||||
|
||||
Refactor high-complexity React components in the Dify frontend codebase with the patterns and workflow below.
|
||||
|
||||
> **Complexity Threshold**: Components with complexity > 50 (measured by `pnpm analyze-component`) should be refactored before testing.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Commands (run from `web/`)
|
||||
|
||||
Use paths relative to `web/` (e.g., `app/components/...`).
|
||||
Use `refactor-component` for refactoring prompts and `analyze-component` for testing prompts and metrics.
|
||||
|
||||
```bash
|
||||
cd web
|
||||
|
||||
# Generate refactoring prompt
|
||||
pnpm refactor-component <path>
|
||||
|
||||
# Output refactoring analysis as JSON
|
||||
pnpm refactor-component <path> --json
|
||||
|
||||
# Generate testing prompt (after refactoring)
|
||||
pnpm analyze-component <path>
|
||||
|
||||
# Output testing analysis as JSON
|
||||
pnpm analyze-component <path> --json
|
||||
```
|
||||
|
||||
### Complexity Analysis
|
||||
|
||||
```bash
|
||||
# Analyze component complexity
|
||||
pnpm analyze-component <path> --json
|
||||
|
||||
# Key metrics to check:
|
||||
# - complexity: normalized score 0-100 (target < 50)
|
||||
# - maxComplexity: highest single function complexity
|
||||
# - lineCount: total lines (target < 300)
|
||||
```
|
||||
|
||||
### Complexity Score Interpretation
|
||||
|
||||
| Score | Level | Action |
|
||||
|-------|-------|--------|
|
||||
| 0-25 | 🟢 Simple | Ready for testing |
|
||||
| 26-50 | 🟡 Medium | Consider minor refactoring |
|
||||
| 51-75 | 🟠 Complex | **Refactor before testing** |
|
||||
| 76-100 | 🔴 Very Complex | **Must refactor** |
|
||||
|
||||
## Core Refactoring Patterns
|
||||
|
||||
### Pattern 1: Extract Custom Hooks
|
||||
|
||||
**When**: Component has complex state management, multiple `useState`/`useEffect`, or business logic mixed with UI.
|
||||
|
||||
**Dify Convention**: Place hooks in a `hooks/` subdirectory or alongside the component as `use-<feature>.ts`.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Complex state logic in component
|
||||
const Configuration: FC = () => {
|
||||
const [modelConfig, setModelConfig] = useState<ModelConfig>(...)
|
||||
const [datasetConfigs, setDatasetConfigs] = useState<DatasetConfigs>(...)
|
||||
const [completionParams, setCompletionParams] = useState<FormValue>({})
|
||||
|
||||
// 50+ lines of state management logic...
|
||||
|
||||
return <div>...</div>
|
||||
}
|
||||
|
||||
// ✅ After: Extract to custom hook
|
||||
// hooks/use-model-config.ts
|
||||
export const useModelConfig = (appId: string) => {
|
||||
const [modelConfig, setModelConfig] = useState<ModelConfig>(...)
|
||||
const [completionParams, setCompletionParams] = useState<FormValue>({})
|
||||
|
||||
// Related state management logic here
|
||||
|
||||
return { modelConfig, setModelConfig, completionParams, setCompletionParams }
|
||||
}
|
||||
|
||||
// Component becomes cleaner
|
||||
const Configuration: FC = () => {
|
||||
const { modelConfig, setModelConfig } = useModelConfig(appId)
|
||||
return <div>...</div>
|
||||
}
|
||||
```
|
||||
|
||||
**Dify Examples**:
|
||||
- `web/app/components/app/configuration/hooks/use-advanced-prompt-config.ts`
|
||||
- `web/app/components/app/configuration/debug/hooks.tsx`
|
||||
- `web/app/components/workflow/hooks/use-workflow.ts`
|
||||
|
||||
### Pattern 2: Extract Sub-Components
|
||||
|
||||
**When**: Single component has multiple UI sections, conditional rendering blocks, or repeated patterns.
|
||||
|
||||
**Dify Convention**: Place sub-components in subdirectories or as separate files in the same directory.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Monolithic JSX with multiple sections
|
||||
const AppInfo = () => {
|
||||
return (
|
||||
<div>
|
||||
{/* 100 lines of header UI */}
|
||||
{/* 100 lines of operations UI */}
|
||||
{/* 100 lines of modals */}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ✅ After: Split into focused components
|
||||
// app-info/
|
||||
// ├── index.tsx (orchestration only)
|
||||
// ├── app-header.tsx (header UI)
|
||||
// ├── app-operations.tsx (operations UI)
|
||||
// └── app-modals.tsx (modal management)
|
||||
|
||||
const AppInfo = () => {
|
||||
const { showModal, setShowModal } = useAppInfoModals()
|
||||
|
||||
return (
|
||||
<div>
|
||||
<AppHeader appDetail={appDetail} />
|
||||
<AppOperations onAction={handleAction} />
|
||||
<AppModals show={showModal} onClose={() => setShowModal(null)} />
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Dify Examples**:
|
||||
- `web/app/components/app/configuration/` directory structure
|
||||
- `web/app/components/workflow/nodes/` per-node organization
|
||||
|
||||
### Pattern 3: Simplify Conditional Logic
|
||||
|
||||
**When**: Deep nesting (> 3 levels), complex ternaries, or multiple `if/else` chains.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Deeply nested conditionals
|
||||
const Template = useMemo(() => {
|
||||
if (appDetail?.mode === AppModeEnum.CHAT) {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateChatZh />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateChatJa />
|
||||
default:
|
||||
return <TemplateChatEn />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === AppModeEnum.ADVANCED_CHAT) {
|
||||
// Another 15 lines...
|
||||
}
|
||||
// More conditions...
|
||||
}, [appDetail, locale])
|
||||
|
||||
// ✅ After: Use lookup tables + early returns
|
||||
const TEMPLATE_MAP = {
|
||||
[AppModeEnum.CHAT]: {
|
||||
[LanguagesSupported[1]]: TemplateChatZh,
|
||||
[LanguagesSupported[7]]: TemplateChatJa,
|
||||
default: TemplateChatEn,
|
||||
},
|
||||
[AppModeEnum.ADVANCED_CHAT]: {
|
||||
[LanguagesSupported[1]]: TemplateAdvancedChatZh,
|
||||
// ...
|
||||
},
|
||||
}
|
||||
|
||||
const Template = useMemo(() => {
|
||||
const modeTemplates = TEMPLATE_MAP[appDetail?.mode]
|
||||
if (!modeTemplates) return null
|
||||
|
||||
const TemplateComponent = modeTemplates[locale] || modeTemplates.default
|
||||
return <TemplateComponent appDetail={appDetail} />
|
||||
}, [appDetail, locale])
|
||||
```
|
||||
|
||||
### Pattern 4: Extract API/Data Logic
|
||||
|
||||
**When**: Component directly handles API calls, data transformation, or complex async operations.
|
||||
|
||||
**Dify Convention**: Use `@tanstack/react-query` hooks from `web/service/use-*.ts` or create custom data hooks.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: API logic in component
|
||||
const MCPServiceCard = () => {
|
||||
const [basicAppConfig, setBasicAppConfig] = useState({})
|
||||
|
||||
useEffect(() => {
|
||||
if (isBasicApp && appId) {
|
||||
(async () => {
|
||||
const res = await fetchAppDetail({ url: '/apps', id: appId })
|
||||
setBasicAppConfig(res?.model_config || {})
|
||||
})()
|
||||
}
|
||||
}, [appId, isBasicApp])
|
||||
|
||||
// More API-related logic...
|
||||
}
|
||||
|
||||
// ✅ After: Extract to data hook using React Query
|
||||
// use-app-config.ts
|
||||
import { useQuery } from '@tanstack/react-query'
|
||||
import { get } from '@/service/base'
|
||||
|
||||
const NAME_SPACE = 'appConfig'
|
||||
|
||||
export const useAppConfig = (appId: string, isBasicApp: boolean) => {
|
||||
return useQuery({
|
||||
enabled: isBasicApp && !!appId,
|
||||
queryKey: [NAME_SPACE, 'detail', appId],
|
||||
queryFn: () => get<AppDetailResponse>(`/apps/${appId}`),
|
||||
select: data => data?.model_config || {},
|
||||
})
|
||||
}
|
||||
|
||||
// Component becomes cleaner
|
||||
const MCPServiceCard = () => {
|
||||
const { data: config, isLoading } = useAppConfig(appId, isBasicApp)
|
||||
// UI only
|
||||
}
|
||||
```
|
||||
|
||||
**React Query Best Practices in Dify**:
|
||||
- Define `NAME_SPACE` for query key organization
|
||||
- Use `enabled` option for conditional fetching
|
||||
- Use `select` for data transformation
|
||||
- Export invalidation hooks: `useInvalidXxx`
|
||||
|
||||
**Dify Examples**:
|
||||
- `web/service/use-workflow.ts`
|
||||
- `web/service/use-common.ts`
|
||||
- `web/service/knowledge/use-dataset.ts`
|
||||
- `web/service/knowledge/use-document.ts`
|
||||
|
||||
### Pattern 5: Extract Modal/Dialog Management
|
||||
|
||||
**When**: Component manages multiple modals with complex open/close states.
|
||||
|
||||
**Dify Convention**: Modals should be extracted with their state management.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Multiple modal states in component
|
||||
const AppInfo = () => {
|
||||
const [showEditModal, setShowEditModal] = useState(false)
|
||||
const [showDuplicateModal, setShowDuplicateModal] = useState(false)
|
||||
const [showConfirmDelete, setShowConfirmDelete] = useState(false)
|
||||
const [showSwitchModal, setShowSwitchModal] = useState(false)
|
||||
const [showImportDSLModal, setShowImportDSLModal] = useState(false)
|
||||
// 5+ more modal states...
|
||||
}
|
||||
|
||||
// ✅ After: Extract to modal management hook
|
||||
type ModalType = 'edit' | 'duplicate' | 'delete' | 'switch' | 'import' | null
|
||||
|
||||
const useAppInfoModals = () => {
|
||||
const [activeModal, setActiveModal] = useState<ModalType>(null)
|
||||
|
||||
const openModal = useCallback((type: ModalType) => setActiveModal(type), [])
|
||||
const closeModal = useCallback(() => setActiveModal(null), [])
|
||||
|
||||
return {
|
||||
activeModal,
|
||||
openModal,
|
||||
closeModal,
|
||||
isOpen: (type: ModalType) => activeModal === type,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Pattern 6: Extract Form Logic
|
||||
|
||||
**When**: Complex form validation, submission handling, or field transformation.
|
||||
|
||||
**Dify Convention**: Use `@tanstack/react-form` patterns from `web/app/components/base/form/`.
|
||||
|
||||
```typescript
|
||||
// ✅ Use existing form infrastructure
|
||||
import { useAppForm } from '@/app/components/base/form'
|
||||
|
||||
const ConfigForm = () => {
|
||||
const form = useAppForm({
|
||||
defaultValues: { name: '', description: '' },
|
||||
onSubmit: handleSubmit,
|
||||
})
|
||||
|
||||
return <form.Provider>...</form.Provider>
|
||||
}
|
||||
```
|
||||
|
||||
## Dify-Specific Refactoring Guidelines
|
||||
|
||||
### 1. Context Provider Extraction
|
||||
|
||||
**When**: Component provides complex context values with multiple states.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Large context value object
|
||||
const value = {
|
||||
appId, isAPIKeySet, isTrailFinished, mode, modelModeType,
|
||||
promptMode, isAdvancedMode, isAgent, isOpenAI, isFunctionCall,
|
||||
// 50+ more properties...
|
||||
}
|
||||
return <ConfigContext.Provider value={value}>...</ConfigContext.Provider>
|
||||
|
||||
// ✅ After: Split into domain-specific contexts
|
||||
<ModelConfigProvider value={modelConfigValue}>
|
||||
<DatasetConfigProvider value={datasetConfigValue}>
|
||||
<UIConfigProvider value={uiConfigValue}>
|
||||
{children}
|
||||
</UIConfigProvider>
|
||||
</DatasetConfigProvider>
|
||||
</ModelConfigProvider>
|
||||
```
|
||||
|
||||
**Dify Reference**: `web/context/` directory structure
|
||||
|
||||
### 2. Workflow Node Components
|
||||
|
||||
**When**: Refactoring workflow node components (`web/app/components/workflow/nodes/`).
|
||||
|
||||
**Conventions**:
|
||||
- Keep node logic in `use-interactions.ts`
|
||||
- Extract panel UI to separate files
|
||||
- Use `_base` components for common patterns
|
||||
|
||||
```
|
||||
nodes/<node-type>/
|
||||
├── index.tsx # Node registration
|
||||
├── node.tsx # Node visual component
|
||||
├── panel.tsx # Configuration panel
|
||||
├── use-interactions.ts # Node-specific hooks
|
||||
└── types.ts # Type definitions
|
||||
```
|
||||
|
||||
### 3. Configuration Components
|
||||
|
||||
**When**: Refactoring app configuration components.
|
||||
|
||||
**Conventions**:
|
||||
- Separate config sections into subdirectories
|
||||
- Use existing patterns from `web/app/components/app/configuration/`
|
||||
- Keep feature toggles in dedicated components
|
||||
|
||||
### 4. Tool/Plugin Components
|
||||
|
||||
**When**: Refactoring tool-related components (`web/app/components/tools/`).
|
||||
|
||||
**Conventions**:
|
||||
- Follow existing modal patterns
|
||||
- Use service hooks from `web/service/use-tools.ts`
|
||||
- Keep provider-specific logic isolated
|
||||
|
||||
## Refactoring Workflow
|
||||
|
||||
### Step 1: Generate Refactoring Prompt
|
||||
|
||||
```bash
|
||||
pnpm refactor-component <path>
|
||||
```
|
||||
|
||||
This command will:
|
||||
- Analyze component complexity and features
|
||||
- Identify specific refactoring actions needed
|
||||
- Generate a prompt for AI assistant (auto-copied to clipboard on macOS)
|
||||
- Provide detailed requirements based on detected patterns
|
||||
|
||||
### Step 2: Analyze Details
|
||||
|
||||
```bash
|
||||
pnpm analyze-component <path> --json
|
||||
```
|
||||
|
||||
Identify:
|
||||
- Total complexity score
|
||||
- Max function complexity
|
||||
- Line count
|
||||
- Features detected (state, effects, API, etc.)
|
||||
|
||||
### Step 3: Plan
|
||||
|
||||
Create a refactoring plan based on detected features:
|
||||
|
||||
| Detected Feature | Refactoring Action |
|
||||
|------------------|-------------------|
|
||||
| `hasState: true` + `hasEffects: true` | Extract custom hook |
|
||||
| `hasAPI: true` | Extract data/service hook |
|
||||
| `hasEvents: true` (many) | Extract event handlers |
|
||||
| `lineCount > 300` | Split into sub-components |
|
||||
| `maxComplexity > 50` | Simplify conditional logic |
|
||||
|
||||
### Step 4: Execute Incrementally
|
||||
|
||||
1. **Extract one piece at a time**
|
||||
2. **Run lint, type-check, and tests after each extraction**
|
||||
3. **Verify functionality before next step**
|
||||
|
||||
```
|
||||
For each extraction:
|
||||
┌────────────────────────────────────────┐
|
||||
│ 1. Extract code │
|
||||
│ 2. Run: pnpm lint:fix │
|
||||
│ 3. Run: pnpm type-check:tsgo │
|
||||
│ 4. Run: pnpm test │
|
||||
│ 5. Test functionality manually │
|
||||
│ 6. PASS? → Next extraction │
|
||||
│ FAIL? → Fix before continuing │
|
||||
└────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Step 5: Verify
|
||||
|
||||
After refactoring:
|
||||
|
||||
```bash
|
||||
# Re-run refactor command to verify improvements
|
||||
pnpm refactor-component <path>
|
||||
|
||||
# If complexity < 25 and lines < 200, you'll see:
|
||||
# ✅ COMPONENT IS WELL-STRUCTURED
|
||||
|
||||
# For detailed metrics:
|
||||
pnpm analyze-component <path> --json
|
||||
|
||||
# Target metrics:
|
||||
# - complexity < 50
|
||||
# - lineCount < 300
|
||||
# - maxComplexity < 30
|
||||
```
|
||||
|
||||
## Common Mistakes to Avoid
|
||||
|
||||
### ❌ Over-Engineering
|
||||
|
||||
```typescript
|
||||
// ❌ Too many tiny hooks
|
||||
const useButtonText = () => useState('Click')
|
||||
const useButtonDisabled = () => useState(false)
|
||||
const useButtonLoading = () => useState(false)
|
||||
|
||||
// ✅ Cohesive hook with related state
|
||||
const useButtonState = () => {
|
||||
const [text, setText] = useState('Click')
|
||||
const [disabled, setDisabled] = useState(false)
|
||||
const [loading, setLoading] = useState(false)
|
||||
return { text, setText, disabled, setDisabled, loading, setLoading }
|
||||
}
|
||||
```
|
||||
|
||||
### ❌ Breaking Existing Patterns
|
||||
|
||||
- Follow existing directory structures
|
||||
- Maintain naming conventions
|
||||
- Preserve export patterns for compatibility
|
||||
|
||||
### ❌ Premature Abstraction
|
||||
|
||||
- Only extract when there's clear complexity benefit
|
||||
- Don't create abstractions for single-use code
|
||||
- Keep refactored code in the same domain area
|
||||
|
||||
## References
|
||||
|
||||
### Dify Codebase Examples
|
||||
|
||||
- **Hook extraction**: `web/app/components/app/configuration/hooks/`
|
||||
- **Component splitting**: `web/app/components/app/configuration/`
|
||||
- **Service hooks**: `web/service/use-*.ts`
|
||||
- **Workflow patterns**: `web/app/components/workflow/hooks/`
|
||||
- **Form patterns**: `web/app/components/base/form/`
|
||||
|
||||
### Related Skills
|
||||
|
||||
- `frontend-testing` - For testing refactored components
|
||||
- `web/testing/testing.md` - Testing specification
|
||||
|
|
@ -0,0 +1,493 @@
|
|||
# Complexity Reduction Patterns
|
||||
|
||||
This document provides patterns for reducing cognitive complexity in Dify React components.
|
||||
|
||||
## Understanding Complexity
|
||||
|
||||
### SonarJS Cognitive Complexity
|
||||
|
||||
The `pnpm analyze-component` tool uses SonarJS cognitive complexity metrics:
|
||||
|
||||
- **Total Complexity**: Sum of all functions' complexity in the file
|
||||
- **Max Complexity**: Highest single function complexity
|
||||
|
||||
### What Increases Complexity
|
||||
|
||||
| Pattern | Complexity Impact |
|
||||
|---------|-------------------|
|
||||
| `if/else` | +1 per branch |
|
||||
| Nested conditions | +1 per nesting level |
|
||||
| `switch/case` | +1 per case |
|
||||
| `for/while/do` | +1 per loop |
|
||||
| `&&`/`||` chains | +1 per operator |
|
||||
| Nested callbacks | +1 per nesting level |
|
||||
| `try/catch` | +1 per catch |
|
||||
| Ternary expressions | +1 per nesting |
|
||||
|
||||
## Pattern 1: Replace Conditionals with Lookup Tables
|
||||
|
||||
**Before** (complexity: ~15):
|
||||
|
||||
```typescript
|
||||
const Template = useMemo(() => {
|
||||
if (appDetail?.mode === AppModeEnum.CHAT) {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateChatZh appDetail={appDetail} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateChatJa appDetail={appDetail} />
|
||||
default:
|
||||
return <TemplateChatEn appDetail={appDetail} />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === AppModeEnum.ADVANCED_CHAT) {
|
||||
switch (locale) {
|
||||
case LanguagesSupported[1]:
|
||||
return <TemplateAdvancedChatZh appDetail={appDetail} />
|
||||
case LanguagesSupported[7]:
|
||||
return <TemplateAdvancedChatJa appDetail={appDetail} />
|
||||
default:
|
||||
return <TemplateAdvancedChatEn appDetail={appDetail} />
|
||||
}
|
||||
}
|
||||
if (appDetail?.mode === AppModeEnum.WORKFLOW) {
|
||||
// Similar pattern...
|
||||
}
|
||||
return null
|
||||
}, [appDetail, locale])
|
||||
```
|
||||
|
||||
**After** (complexity: ~3):
|
||||
|
||||
```typescript
|
||||
// Define lookup table outside component
|
||||
const TEMPLATE_MAP: Record<AppModeEnum, Record<string, FC<TemplateProps>>> = {
|
||||
[AppModeEnum.CHAT]: {
|
||||
[LanguagesSupported[1]]: TemplateChatZh,
|
||||
[LanguagesSupported[7]]: TemplateChatJa,
|
||||
default: TemplateChatEn,
|
||||
},
|
||||
[AppModeEnum.ADVANCED_CHAT]: {
|
||||
[LanguagesSupported[1]]: TemplateAdvancedChatZh,
|
||||
[LanguagesSupported[7]]: TemplateAdvancedChatJa,
|
||||
default: TemplateAdvancedChatEn,
|
||||
},
|
||||
[AppModeEnum.WORKFLOW]: {
|
||||
[LanguagesSupported[1]]: TemplateWorkflowZh,
|
||||
[LanguagesSupported[7]]: TemplateWorkflowJa,
|
||||
default: TemplateWorkflowEn,
|
||||
},
|
||||
// ...
|
||||
}
|
||||
|
||||
// Clean component logic
|
||||
const Template = useMemo(() => {
|
||||
if (!appDetail?.mode) return null
|
||||
|
||||
const templates = TEMPLATE_MAP[appDetail.mode]
|
||||
if (!templates) return null
|
||||
|
||||
const TemplateComponent = templates[locale] ?? templates.default
|
||||
return <TemplateComponent appDetail={appDetail} />
|
||||
}, [appDetail, locale])
|
||||
```
|
||||
|
||||
## Pattern 2: Use Early Returns
|
||||
|
||||
**Before** (complexity: ~10):
|
||||
|
||||
```typescript
|
||||
const handleSubmit = () => {
|
||||
if (isValid) {
|
||||
if (hasChanges) {
|
||||
if (isConnected) {
|
||||
submitData()
|
||||
} else {
|
||||
showConnectionError()
|
||||
}
|
||||
} else {
|
||||
showNoChangesMessage()
|
||||
}
|
||||
} else {
|
||||
showValidationError()
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**After** (complexity: ~4):
|
||||
|
||||
```typescript
|
||||
const handleSubmit = () => {
|
||||
if (!isValid) {
|
||||
showValidationError()
|
||||
return
|
||||
}
|
||||
|
||||
if (!hasChanges) {
|
||||
showNoChangesMessage()
|
||||
return
|
||||
}
|
||||
|
||||
if (!isConnected) {
|
||||
showConnectionError()
|
||||
return
|
||||
}
|
||||
|
||||
submitData()
|
||||
}
|
||||
```
|
||||
|
||||
## Pattern 3: Extract Complex Conditions
|
||||
|
||||
**Before** (complexity: high):
|
||||
|
||||
```typescript
|
||||
const canPublish = (() => {
|
||||
if (mode !== AppModeEnum.COMPLETION) {
|
||||
if (!isAdvancedMode)
|
||||
return true
|
||||
|
||||
if (modelModeType === ModelModeType.completion) {
|
||||
if (!hasSetBlockStatus.history || !hasSetBlockStatus.query)
|
||||
return false
|
||||
return true
|
||||
}
|
||||
return true
|
||||
}
|
||||
return !promptEmpty
|
||||
})()
|
||||
```
|
||||
|
||||
**After** (complexity: lower):
|
||||
|
||||
```typescript
|
||||
// Extract to named functions
|
||||
const canPublishInCompletionMode = () => !promptEmpty
|
||||
|
||||
const canPublishInChatMode = () => {
|
||||
if (!isAdvancedMode) return true
|
||||
if (modelModeType !== ModelModeType.completion) return true
|
||||
return hasSetBlockStatus.history && hasSetBlockStatus.query
|
||||
}
|
||||
|
||||
// Clean main logic
|
||||
const canPublish = mode === AppModeEnum.COMPLETION
|
||||
? canPublishInCompletionMode()
|
||||
: canPublishInChatMode()
|
||||
```
|
||||
|
||||
## Pattern 4: Replace Chained Ternaries
|
||||
|
||||
**Before** (complexity: ~5):
|
||||
|
||||
```typescript
|
||||
const statusText = serverActivated
|
||||
? t('status.running')
|
||||
: serverPublished
|
||||
? t('status.inactive')
|
||||
: appUnpublished
|
||||
? t('status.unpublished')
|
||||
: t('status.notConfigured')
|
||||
```
|
||||
|
||||
**After** (complexity: ~2):
|
||||
|
||||
```typescript
|
||||
const getStatusText = () => {
|
||||
if (serverActivated) return t('status.running')
|
||||
if (serverPublished) return t('status.inactive')
|
||||
if (appUnpublished) return t('status.unpublished')
|
||||
return t('status.notConfigured')
|
||||
}
|
||||
|
||||
const statusText = getStatusText()
|
||||
```
|
||||
|
||||
Or use lookup:
|
||||
|
||||
```typescript
|
||||
const STATUS_TEXT_MAP = {
|
||||
running: 'status.running',
|
||||
inactive: 'status.inactive',
|
||||
unpublished: 'status.unpublished',
|
||||
notConfigured: 'status.notConfigured',
|
||||
} as const
|
||||
|
||||
const getStatusKey = (): keyof typeof STATUS_TEXT_MAP => {
|
||||
if (serverActivated) return 'running'
|
||||
if (serverPublished) return 'inactive'
|
||||
if (appUnpublished) return 'unpublished'
|
||||
return 'notConfigured'
|
||||
}
|
||||
|
||||
const statusText = t(STATUS_TEXT_MAP[getStatusKey()])
|
||||
```
|
||||
|
||||
## Pattern 5: Flatten Nested Loops
|
||||
|
||||
**Before** (complexity: high):
|
||||
|
||||
```typescript
|
||||
const processData = (items: Item[]) => {
|
||||
const results: ProcessedItem[] = []
|
||||
|
||||
for (const item of items) {
|
||||
if (item.isValid) {
|
||||
for (const child of item.children) {
|
||||
if (child.isActive) {
|
||||
for (const prop of child.properties) {
|
||||
if (prop.value !== null) {
|
||||
results.push({
|
||||
itemId: item.id,
|
||||
childId: child.id,
|
||||
propValue: prop.value,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
```
|
||||
|
||||
**After** (complexity: lower):
|
||||
|
||||
```typescript
|
||||
// Use functional approach
|
||||
const processData = (items: Item[]) => {
|
||||
return items
|
||||
.filter(item => item.isValid)
|
||||
.flatMap(item =>
|
||||
item.children
|
||||
.filter(child => child.isActive)
|
||||
.flatMap(child =>
|
||||
child.properties
|
||||
.filter(prop => prop.value !== null)
|
||||
.map(prop => ({
|
||||
itemId: item.id,
|
||||
childId: child.id,
|
||||
propValue: prop.value,
|
||||
}))
|
||||
)
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
## Pattern 6: Extract Event Handler Logic
|
||||
|
||||
**Before** (complexity: high in component):
|
||||
|
||||
```typescript
|
||||
const Component = () => {
|
||||
const handleSelect = (data: DataSet[]) => {
|
||||
if (isEqual(data.map(item => item.id), dataSets.map(item => item.id))) {
|
||||
hideSelectDataSet()
|
||||
return
|
||||
}
|
||||
|
||||
formattingChangedDispatcher()
|
||||
let newDatasets = data
|
||||
if (data.find(item => !item.name)) {
|
||||
const newSelected = produce(data, (draft) => {
|
||||
data.forEach((item, index) => {
|
||||
if (!item.name) {
|
||||
const newItem = dataSets.find(i => i.id === item.id)
|
||||
if (newItem)
|
||||
draft[index] = newItem
|
||||
}
|
||||
})
|
||||
})
|
||||
setDataSets(newSelected)
|
||||
newDatasets = newSelected
|
||||
}
|
||||
else {
|
||||
setDataSets(data)
|
||||
}
|
||||
hideSelectDataSet()
|
||||
|
||||
// 40 more lines of logic...
|
||||
}
|
||||
|
||||
return <div>...</div>
|
||||
}
|
||||
```
|
||||
|
||||
**After** (complexity: lower):
|
||||
|
||||
```typescript
|
||||
// Extract to hook or utility
|
||||
const useDatasetSelection = (dataSets: DataSet[], setDataSets: SetState<DataSet[]>) => {
|
||||
const normalizeSelection = (data: DataSet[]) => {
|
||||
const hasUnloadedItem = data.some(item => !item.name)
|
||||
if (!hasUnloadedItem) return data
|
||||
|
||||
return produce(data, (draft) => {
|
||||
data.forEach((item, index) => {
|
||||
if (!item.name) {
|
||||
const existing = dataSets.find(i => i.id === item.id)
|
||||
if (existing) draft[index] = existing
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
const hasSelectionChanged = (newData: DataSet[]) => {
|
||||
return !isEqual(
|
||||
newData.map(item => item.id),
|
||||
dataSets.map(item => item.id)
|
||||
)
|
||||
}
|
||||
|
||||
return { normalizeSelection, hasSelectionChanged }
|
||||
}
|
||||
|
||||
// Component becomes cleaner
|
||||
const Component = () => {
|
||||
const { normalizeSelection, hasSelectionChanged } = useDatasetSelection(dataSets, setDataSets)
|
||||
|
||||
const handleSelect = (data: DataSet[]) => {
|
||||
if (!hasSelectionChanged(data)) {
|
||||
hideSelectDataSet()
|
||||
return
|
||||
}
|
||||
|
||||
formattingChangedDispatcher()
|
||||
const normalized = normalizeSelection(data)
|
||||
setDataSets(normalized)
|
||||
hideSelectDataSet()
|
||||
}
|
||||
|
||||
return <div>...</div>
|
||||
}
|
||||
```
|
||||
|
||||
## Pattern 7: Reduce Boolean Logic Complexity
|
||||
|
||||
**Before** (complexity: ~8):
|
||||
|
||||
```typescript
|
||||
const toggleDisabled = hasInsufficientPermissions
|
||||
|| appUnpublished
|
||||
|| missingStartNode
|
||||
|| triggerModeDisabled
|
||||
|| (isAdvancedApp && !currentWorkflow?.graph)
|
||||
|| (isBasicApp && !basicAppConfig.updated_at)
|
||||
```
|
||||
|
||||
**After** (complexity: ~3):
|
||||
|
||||
```typescript
|
||||
// Extract meaningful boolean functions
|
||||
const isAppReady = () => {
|
||||
if (isAdvancedApp) return !!currentWorkflow?.graph
|
||||
return !!basicAppConfig.updated_at
|
||||
}
|
||||
|
||||
const hasRequiredPermissions = () => {
|
||||
return isCurrentWorkspaceEditor && !hasInsufficientPermissions
|
||||
}
|
||||
|
||||
const canToggle = () => {
|
||||
if (!hasRequiredPermissions()) return false
|
||||
if (!isAppReady()) return false
|
||||
if (missingStartNode) return false
|
||||
if (triggerModeDisabled) return false
|
||||
return true
|
||||
}
|
||||
|
||||
const toggleDisabled = !canToggle()
|
||||
```
|
||||
|
||||
## Pattern 8: Simplify useMemo/useCallback Dependencies
|
||||
|
||||
**Before** (complexity: multiple recalculations):
|
||||
|
||||
```typescript
|
||||
const payload = useMemo(() => {
|
||||
let parameters: Parameter[] = []
|
||||
let outputParameters: OutputParameter[] = []
|
||||
|
||||
if (!published) {
|
||||
parameters = (inputs || []).map((item) => ({
|
||||
name: item.variable,
|
||||
description: '',
|
||||
form: 'llm',
|
||||
required: item.required,
|
||||
type: item.type,
|
||||
}))
|
||||
outputParameters = (outputs || []).map((item) => ({
|
||||
name: item.variable,
|
||||
description: '',
|
||||
type: item.value_type,
|
||||
}))
|
||||
}
|
||||
else if (detail && detail.tool) {
|
||||
parameters = (inputs || []).map((item) => ({
|
||||
// Complex transformation...
|
||||
}))
|
||||
outputParameters = (outputs || []).map((item) => ({
|
||||
// Complex transformation...
|
||||
}))
|
||||
}
|
||||
|
||||
return {
|
||||
icon: detail?.icon || icon,
|
||||
label: detail?.label || name,
|
||||
// ...more fields
|
||||
}
|
||||
}, [detail, published, workflowAppId, icon, name, description, inputs, outputs])
|
||||
```
|
||||
|
||||
**After** (complexity: separated concerns):
|
||||
|
||||
```typescript
|
||||
// Separate transformations
|
||||
const useParameterTransform = (inputs: InputVar[], detail?: ToolDetail, published?: boolean) => {
|
||||
return useMemo(() => {
|
||||
if (!published) {
|
||||
return inputs.map(item => ({
|
||||
name: item.variable,
|
||||
description: '',
|
||||
form: 'llm',
|
||||
required: item.required,
|
||||
type: item.type,
|
||||
}))
|
||||
}
|
||||
|
||||
if (!detail?.tool) return []
|
||||
|
||||
return inputs.map(item => ({
|
||||
name: item.variable,
|
||||
required: item.required,
|
||||
type: item.type === 'paragraph' ? 'string' : item.type,
|
||||
description: detail.tool.parameters.find(p => p.name === item.variable)?.llm_description || '',
|
||||
form: detail.tool.parameters.find(p => p.name === item.variable)?.form || 'llm',
|
||||
}))
|
||||
}, [inputs, detail, published])
|
||||
}
|
||||
|
||||
// Component uses hook
|
||||
const parameters = useParameterTransform(inputs, detail, published)
|
||||
const outputParameters = useOutputTransform(outputs, detail, published)
|
||||
|
||||
const payload = useMemo(() => ({
|
||||
icon: detail?.icon || icon,
|
||||
label: detail?.label || name,
|
||||
parameters,
|
||||
outputParameters,
|
||||
// ...
|
||||
}), [detail, icon, name, parameters, outputParameters])
|
||||
```
|
||||
|
||||
## Target Metrics After Refactoring
|
||||
|
||||
| Metric | Target |
|
||||
|--------|--------|
|
||||
| Total Complexity | < 50 |
|
||||
| Max Function Complexity | < 30 |
|
||||
| Function Length | < 30 lines |
|
||||
| Nesting Depth | ≤ 3 levels |
|
||||
| Conditional Chains | ≤ 3 conditions |
|
||||
|
|
@ -0,0 +1,477 @@
|
|||
# Component Splitting Patterns
|
||||
|
||||
This document provides detailed guidance on splitting large components into smaller, focused components in Dify.
|
||||
|
||||
## When to Split Components
|
||||
|
||||
Split a component when you identify:
|
||||
|
||||
1. **Multiple UI sections** - Distinct visual areas with minimal coupling that can be composed independently
|
||||
1. **Conditional rendering blocks** - Large `{condition && <JSX />}` blocks
|
||||
1. **Repeated patterns** - Similar UI structures used multiple times
|
||||
1. **300+ lines** - Component exceeds manageable size
|
||||
1. **Modal clusters** - Multiple modals rendered in one component
|
||||
|
||||
## Splitting Strategies
|
||||
|
||||
### Strategy 1: Section-Based Splitting
|
||||
|
||||
Identify visual sections and extract each as a component.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Monolithic component (500+ lines)
|
||||
const ConfigurationPage = () => {
|
||||
return (
|
||||
<div>
|
||||
{/* Header Section - 50 lines */}
|
||||
<div className="header">
|
||||
<h1>{t('configuration.title')}</h1>
|
||||
<div className="actions">
|
||||
{isAdvancedMode && <Badge>Advanced</Badge>}
|
||||
<ModelParameterModal ... />
|
||||
<AppPublisher ... />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Config Section - 200 lines */}
|
||||
<div className="config">
|
||||
<Config />
|
||||
</div>
|
||||
|
||||
{/* Debug Section - 150 lines */}
|
||||
<div className="debug">
|
||||
<Debug ... />
|
||||
</div>
|
||||
|
||||
{/* Modals Section - 100 lines */}
|
||||
{showSelectDataSet && <SelectDataSet ... />}
|
||||
{showHistoryModal && <EditHistoryModal ... />}
|
||||
{showUseGPT4Confirm && <Confirm ... />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ✅ After: Split into focused components
|
||||
// configuration/
|
||||
// ├── index.tsx (orchestration)
|
||||
// ├── configuration-header.tsx
|
||||
// ├── configuration-content.tsx
|
||||
// ├── configuration-debug.tsx
|
||||
// └── configuration-modals.tsx
|
||||
|
||||
// configuration-header.tsx
|
||||
interface ConfigurationHeaderProps {
|
||||
isAdvancedMode: boolean
|
||||
onPublish: () => void
|
||||
}
|
||||
|
||||
const ConfigurationHeader: FC<ConfigurationHeaderProps> = ({
|
||||
isAdvancedMode,
|
||||
onPublish,
|
||||
}) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
return (
|
||||
<div className="header">
|
||||
<h1>{t('configuration.title')}</h1>
|
||||
<div className="actions">
|
||||
{isAdvancedMode && <Badge>Advanced</Badge>}
|
||||
<ModelParameterModal ... />
|
||||
<AppPublisher onPublish={onPublish} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// index.tsx (orchestration only)
|
||||
const ConfigurationPage = () => {
|
||||
const { modelConfig, setModelConfig } = useModelConfig()
|
||||
const { activeModal, openModal, closeModal } = useModalState()
|
||||
|
||||
return (
|
||||
<div>
|
||||
<ConfigurationHeader
|
||||
isAdvancedMode={isAdvancedMode}
|
||||
onPublish={handlePublish}
|
||||
/>
|
||||
<ConfigurationContent
|
||||
modelConfig={modelConfig}
|
||||
onConfigChange={setModelConfig}
|
||||
/>
|
||||
{!isMobile && (
|
||||
<ConfigurationDebug
|
||||
inputs={inputs}
|
||||
onSetting={handleSetting}
|
||||
/>
|
||||
)}
|
||||
<ConfigurationModals
|
||||
activeModal={activeModal}
|
||||
onClose={closeModal}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
### Strategy 2: Conditional Block Extraction
|
||||
|
||||
Extract large conditional rendering blocks.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Large conditional blocks
|
||||
const AppInfo = () => {
|
||||
return (
|
||||
<div>
|
||||
{expand ? (
|
||||
<div className="expanded">
|
||||
{/* 100 lines of expanded view */}
|
||||
</div>
|
||||
) : (
|
||||
<div className="collapsed">
|
||||
{/* 50 lines of collapsed view */}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ✅ After: Separate view components
|
||||
const AppInfoExpanded: FC<AppInfoViewProps> = ({ appDetail, onAction }) => {
|
||||
return (
|
||||
<div className="expanded">
|
||||
{/* Clean, focused expanded view */}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const AppInfoCollapsed: FC<AppInfoViewProps> = ({ appDetail, onAction }) => {
|
||||
return (
|
||||
<div className="collapsed">
|
||||
{/* Clean, focused collapsed view */}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const AppInfo = () => {
|
||||
return (
|
||||
<div>
|
||||
{expand
|
||||
? <AppInfoExpanded appDetail={appDetail} onAction={handleAction} />
|
||||
: <AppInfoCollapsed appDetail={appDetail} onAction={handleAction} />
|
||||
}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
### Strategy 3: Modal Extraction
|
||||
|
||||
Extract modals with their trigger logic.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Multiple modals in one component
|
||||
const AppInfo = () => {
|
||||
const [showEdit, setShowEdit] = useState(false)
|
||||
const [showDuplicate, setShowDuplicate] = useState(false)
|
||||
const [showDelete, setShowDelete] = useState(false)
|
||||
const [showSwitch, setShowSwitch] = useState(false)
|
||||
|
||||
const onEdit = async (data) => { /* 20 lines */ }
|
||||
const onDuplicate = async (data) => { /* 20 lines */ }
|
||||
const onDelete = async () => { /* 15 lines */ }
|
||||
|
||||
return (
|
||||
<div>
|
||||
{/* Main content */}
|
||||
|
||||
{showEdit && <EditModal onConfirm={onEdit} onClose={() => setShowEdit(false)} />}
|
||||
{showDuplicate && <DuplicateModal onConfirm={onDuplicate} onClose={() => setShowDuplicate(false)} />}
|
||||
{showDelete && <DeleteConfirm onConfirm={onDelete} onClose={() => setShowDelete(false)} />}
|
||||
{showSwitch && <SwitchModal ... />}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ✅ After: Modal manager component
|
||||
// app-info-modals.tsx
|
||||
type ModalType = 'edit' | 'duplicate' | 'delete' | 'switch' | null
|
||||
|
||||
interface AppInfoModalsProps {
|
||||
appDetail: AppDetail
|
||||
activeModal: ModalType
|
||||
onClose: () => void
|
||||
onSuccess: () => void
|
||||
}
|
||||
|
||||
const AppInfoModals: FC<AppInfoModalsProps> = ({
|
||||
appDetail,
|
||||
activeModal,
|
||||
onClose,
|
||||
onSuccess,
|
||||
}) => {
|
||||
const handleEdit = async (data) => { /* logic */ }
|
||||
const handleDuplicate = async (data) => { /* logic */ }
|
||||
const handleDelete = async () => { /* logic */ }
|
||||
|
||||
return (
|
||||
<>
|
||||
{activeModal === 'edit' && (
|
||||
<EditModal
|
||||
appDetail={appDetail}
|
||||
onConfirm={handleEdit}
|
||||
onClose={onClose}
|
||||
/>
|
||||
)}
|
||||
{activeModal === 'duplicate' && (
|
||||
<DuplicateModal
|
||||
appDetail={appDetail}
|
||||
onConfirm={handleDuplicate}
|
||||
onClose={onClose}
|
||||
/>
|
||||
)}
|
||||
{activeModal === 'delete' && (
|
||||
<DeleteConfirm
|
||||
onConfirm={handleDelete}
|
||||
onClose={onClose}
|
||||
/>
|
||||
)}
|
||||
{activeModal === 'switch' && (
|
||||
<SwitchModal
|
||||
appDetail={appDetail}
|
||||
onClose={onClose}
|
||||
/>
|
||||
)}
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
// Parent component
|
||||
const AppInfo = () => {
|
||||
const { activeModal, openModal, closeModal } = useModalState()
|
||||
|
||||
return (
|
||||
<div>
|
||||
{/* Main content with openModal triggers */}
|
||||
<Button onClick={() => openModal('edit')}>Edit</Button>
|
||||
|
||||
<AppInfoModals
|
||||
appDetail={appDetail}
|
||||
activeModal={activeModal}
|
||||
onClose={closeModal}
|
||||
onSuccess={handleSuccess}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
### Strategy 4: List Item Extraction
|
||||
|
||||
Extract repeated item rendering.
|
||||
|
||||
```typescript
|
||||
// ❌ Before: Inline item rendering
|
||||
const OperationsList = () => {
|
||||
return (
|
||||
<div>
|
||||
{operations.map(op => (
|
||||
<div key={op.id} className="operation-item">
|
||||
<span className="icon">{op.icon}</span>
|
||||
<span className="title">{op.title}</span>
|
||||
<span className="description">{op.description}</span>
|
||||
<button onClick={() => op.onClick()}>
|
||||
{op.actionLabel}
|
||||
</button>
|
||||
{op.badge && <Badge>{op.badge}</Badge>}
|
||||
{/* More complex rendering... */}
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ✅ After: Extracted item component
|
||||
interface OperationItemProps {
|
||||
operation: Operation
|
||||
onAction: (id: string) => void
|
||||
}
|
||||
|
||||
const OperationItem: FC<OperationItemProps> = ({ operation, onAction }) => {
|
||||
return (
|
||||
<div className="operation-item">
|
||||
<span className="icon">{operation.icon}</span>
|
||||
<span className="title">{operation.title}</span>
|
||||
<span className="description">{operation.description}</span>
|
||||
<button onClick={() => onAction(operation.id)}>
|
||||
{operation.actionLabel}
|
||||
</button>
|
||||
{operation.badge && <Badge>{operation.badge}</Badge>}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const OperationsList = () => {
|
||||
const handleAction = useCallback((id: string) => {
|
||||
const op = operations.find(o => o.id === id)
|
||||
op?.onClick()
|
||||
}, [operations])
|
||||
|
||||
return (
|
||||
<div>
|
||||
{operations.map(op => (
|
||||
<OperationItem
|
||||
key={op.id}
|
||||
operation={op}
|
||||
onAction={handleAction}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
## Directory Structure Patterns
|
||||
|
||||
### Pattern A: Flat Structure (Simple Components)
|
||||
|
||||
For components with 2-3 sub-components:
|
||||
|
||||
```
|
||||
component-name/
|
||||
├── index.tsx # Main component
|
||||
├── sub-component-a.tsx
|
||||
├── sub-component-b.tsx
|
||||
└── types.ts # Shared types
|
||||
```
|
||||
|
||||
### Pattern B: Nested Structure (Complex Components)
|
||||
|
||||
For components with many sub-components:
|
||||
|
||||
```
|
||||
component-name/
|
||||
├── index.tsx # Main orchestration
|
||||
├── types.ts # Shared types
|
||||
├── hooks/
|
||||
│ ├── use-feature-a.ts
|
||||
│ └── use-feature-b.ts
|
||||
├── components/
|
||||
│ ├── header/
|
||||
│ │ └── index.tsx
|
||||
│ ├── content/
|
||||
│ │ └── index.tsx
|
||||
│ └── modals/
|
||||
│ └── index.tsx
|
||||
└── utils/
|
||||
└── helpers.ts
|
||||
```
|
||||
|
||||
### Pattern C: Feature-Based Structure (Dify Standard)
|
||||
|
||||
Following Dify's existing patterns:
|
||||
|
||||
```
|
||||
configuration/
|
||||
├── index.tsx # Main page component
|
||||
├── base/ # Base/shared components
|
||||
│ ├── feature-panel/
|
||||
│ ├── group-name/
|
||||
│ └── operation-btn/
|
||||
├── config/ # Config section
|
||||
│ ├── index.tsx
|
||||
│ ├── agent/
|
||||
│ └── automatic/
|
||||
├── dataset-config/ # Dataset section
|
||||
│ ├── index.tsx
|
||||
│ ├── card-item/
|
||||
│ └── params-config/
|
||||
├── debug/ # Debug section
|
||||
│ ├── index.tsx
|
||||
│ └── hooks.tsx
|
||||
└── hooks/ # Shared hooks
|
||||
└── use-advanced-prompt-config.ts
|
||||
```
|
||||
|
||||
## Props Design
|
||||
|
||||
### Minimal Props Principle
|
||||
|
||||
Pass only what's needed:
|
||||
|
||||
```typescript
|
||||
// ❌ Bad: Passing entire objects when only some fields needed
|
||||
<ConfigHeader appDetail={appDetail} modelConfig={modelConfig} />
|
||||
|
||||
// ✅ Good: Destructure to minimum required
|
||||
<ConfigHeader
|
||||
appName={appDetail.name}
|
||||
isAdvancedMode={modelConfig.isAdvanced}
|
||||
onPublish={handlePublish}
|
||||
/>
|
||||
```
|
||||
|
||||
### Callback Props Pattern
|
||||
|
||||
Use callbacks for child-to-parent communication:
|
||||
|
||||
```typescript
|
||||
// Parent
|
||||
const Parent = () => {
|
||||
const [value, setValue] = useState('')
|
||||
|
||||
return (
|
||||
<Child
|
||||
value={value}
|
||||
onChange={setValue}
|
||||
onSubmit={handleSubmit}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
// Child
|
||||
interface ChildProps {
|
||||
value: string
|
||||
onChange: (value: string) => void
|
||||
onSubmit: () => void
|
||||
}
|
||||
|
||||
const Child: FC<ChildProps> = ({ value, onChange, onSubmit }) => {
|
||||
return (
|
||||
<div>
|
||||
<input value={value} onChange={e => onChange(e.target.value)} />
|
||||
<button onClick={onSubmit}>Submit</button>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
### Render Props for Flexibility
|
||||
|
||||
When sub-components need parent context:
|
||||
|
||||
```typescript
|
||||
interface ListProps<T> {
|
||||
items: T[]
|
||||
renderItem: (item: T, index: number) => React.ReactNode
|
||||
renderEmpty?: () => React.ReactNode
|
||||
}
|
||||
|
||||
function List<T>({ items, renderItem, renderEmpty }: ListProps<T>) {
|
||||
if (items.length === 0 && renderEmpty) {
|
||||
return <>{renderEmpty()}</>
|
||||
}
|
||||
|
||||
return (
|
||||
<div>
|
||||
{items.map((item, index) => renderItem(item, index))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// Usage
|
||||
<List
|
||||
items={operations}
|
||||
renderItem={(op, i) => <OperationItem key={i} operation={op} />}
|
||||
renderEmpty={() => <EmptyState message="No operations" />}
|
||||
/>
|
||||
```
|
||||
|
|
@ -0,0 +1,317 @@
|
|||
# Hook Extraction Patterns
|
||||
|
||||
This document provides detailed guidance on extracting custom hooks from complex components in Dify.
|
||||
|
||||
## When to Extract Hooks
|
||||
|
||||
Extract a custom hook when you identify:
|
||||
|
||||
1. **Coupled state groups** - Multiple `useState` hooks that are always used together
|
||||
1. **Complex effects** - `useEffect` with multiple dependencies or cleanup logic
|
||||
1. **Business logic** - Data transformations, validations, or calculations
|
||||
1. **Reusable patterns** - Logic that appears in multiple components
|
||||
|
||||
## Extraction Process
|
||||
|
||||
### Step 1: Identify State Groups
|
||||
|
||||
Look for state variables that are logically related:
|
||||
|
||||
```typescript
|
||||
// ❌ These belong together - extract to hook
|
||||
const [modelConfig, setModelConfig] = useState<ModelConfig>(...)
|
||||
const [completionParams, setCompletionParams] = useState<FormValue>({})
|
||||
const [modelModeType, setModelModeType] = useState<ModelModeType>(...)
|
||||
|
||||
// These are model-related state that should be in useModelConfig()
|
||||
```
|
||||
|
||||
### Step 2: Identify Related Effects
|
||||
|
||||
Find effects that modify the grouped state:
|
||||
|
||||
```typescript
|
||||
// ❌ These effects belong with the state above
|
||||
useEffect(() => {
|
||||
if (hasFetchedDetail && !modelModeType) {
|
||||
const mode = currModel?.model_properties.mode
|
||||
if (mode) {
|
||||
const newModelConfig = produce(modelConfig, (draft) => {
|
||||
draft.mode = mode
|
||||
})
|
||||
setModelConfig(newModelConfig)
|
||||
}
|
||||
}
|
||||
}, [textGenerationModelList, hasFetchedDetail, modelModeType, currModel])
|
||||
```
|
||||
|
||||
### Step 3: Create the Hook
|
||||
|
||||
```typescript
|
||||
// hooks/use-model-config.ts
|
||||
import type { FormValue } from '@/app/components/header/account-setting/model-provider-page/declarations'
|
||||
import type { ModelConfig } from '@/models/debug'
|
||||
import { produce } from 'immer'
|
||||
import { useEffect, useState } from 'react'
|
||||
import { ModelModeType } from '@/types/app'
|
||||
|
||||
interface UseModelConfigParams {
|
||||
initialConfig?: Partial<ModelConfig>
|
||||
currModel?: { model_properties?: { mode?: ModelModeType } }
|
||||
hasFetchedDetail: boolean
|
||||
}
|
||||
|
||||
interface UseModelConfigReturn {
|
||||
modelConfig: ModelConfig
|
||||
setModelConfig: (config: ModelConfig) => void
|
||||
completionParams: FormValue
|
||||
setCompletionParams: (params: FormValue) => void
|
||||
modelModeType: ModelModeType
|
||||
}
|
||||
|
||||
export const useModelConfig = ({
|
||||
initialConfig,
|
||||
currModel,
|
||||
hasFetchedDetail,
|
||||
}: UseModelConfigParams): UseModelConfigReturn => {
|
||||
const [modelConfig, setModelConfig] = useState<ModelConfig>({
|
||||
provider: 'langgenius/openai/openai',
|
||||
model_id: 'gpt-3.5-turbo',
|
||||
mode: ModelModeType.unset,
|
||||
// ... default values
|
||||
...initialConfig,
|
||||
})
|
||||
|
||||
const [completionParams, setCompletionParams] = useState<FormValue>({})
|
||||
|
||||
const modelModeType = modelConfig.mode
|
||||
|
||||
// Fill old app data missing model mode
|
||||
useEffect(() => {
|
||||
if (hasFetchedDetail && !modelModeType) {
|
||||
const mode = currModel?.model_properties?.mode
|
||||
if (mode) {
|
||||
setModelConfig(produce(modelConfig, (draft) => {
|
||||
draft.mode = mode
|
||||
}))
|
||||
}
|
||||
}
|
||||
}, [hasFetchedDetail, modelModeType, currModel])
|
||||
|
||||
return {
|
||||
modelConfig,
|
||||
setModelConfig,
|
||||
completionParams,
|
||||
setCompletionParams,
|
||||
modelModeType,
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Step 4: Update Component
|
||||
|
||||
```typescript
|
||||
// Before: 50+ lines of state management
|
||||
const Configuration: FC = () => {
|
||||
const [modelConfig, setModelConfig] = useState<ModelConfig>(...)
|
||||
// ... lots of related state and effects
|
||||
}
|
||||
|
||||
// After: Clean component
|
||||
const Configuration: FC = () => {
|
||||
const {
|
||||
modelConfig,
|
||||
setModelConfig,
|
||||
completionParams,
|
||||
setCompletionParams,
|
||||
modelModeType,
|
||||
} = useModelConfig({
|
||||
currModel,
|
||||
hasFetchedDetail,
|
||||
})
|
||||
|
||||
// Component now focuses on UI
|
||||
}
|
||||
```
|
||||
|
||||
## Naming Conventions
|
||||
|
||||
### Hook Names
|
||||
|
||||
- Use `use` prefix: `useModelConfig`, `useDatasetConfig`
|
||||
- Be specific: `useAdvancedPromptConfig` not `usePrompt`
|
||||
- Include domain: `useWorkflowVariables`, `useMCPServer`
|
||||
|
||||
### File Names
|
||||
|
||||
- Kebab-case: `use-model-config.ts`
|
||||
- Place in `hooks/` subdirectory when multiple hooks exist
|
||||
- Place alongside component for single-use hooks
|
||||
|
||||
### Return Type Names
|
||||
|
||||
- Suffix with `Return`: `UseModelConfigReturn`
|
||||
- Suffix params with `Params`: `UseModelConfigParams`
|
||||
|
||||
## Common Hook Patterns in Dify
|
||||
|
||||
### 1. Data Fetching Hook (React Query)
|
||||
|
||||
```typescript
|
||||
// Pattern: Use @tanstack/react-query for data fetching
|
||||
import { useQuery, useQueryClient } from '@tanstack/react-query'
|
||||
import { get } from '@/service/base'
|
||||
import { useInvalid } from '@/service/use-base'
|
||||
|
||||
const NAME_SPACE = 'appConfig'
|
||||
|
||||
// Query keys for cache management
|
||||
export const appConfigQueryKeys = {
|
||||
detail: (appId: string) => [NAME_SPACE, 'detail', appId] as const,
|
||||
}
|
||||
|
||||
// Main data hook
|
||||
export const useAppConfig = (appId: string) => {
|
||||
return useQuery({
|
||||
enabled: !!appId,
|
||||
queryKey: appConfigQueryKeys.detail(appId),
|
||||
queryFn: () => get<AppDetailResponse>(`/apps/${appId}`),
|
||||
select: data => data?.model_config || null,
|
||||
})
|
||||
}
|
||||
|
||||
// Invalidation hook for refreshing data
|
||||
export const useInvalidAppConfig = () => {
|
||||
return useInvalid([NAME_SPACE])
|
||||
}
|
||||
|
||||
// Usage in component
|
||||
const Component = () => {
|
||||
const { data: config, isLoading, error, refetch } = useAppConfig(appId)
|
||||
const invalidAppConfig = useInvalidAppConfig()
|
||||
|
||||
const handleRefresh = () => {
|
||||
invalidAppConfig() // Invalidates cache and triggers refetch
|
||||
}
|
||||
|
||||
return <div>...</div>
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Form State Hook
|
||||
|
||||
```typescript
|
||||
// Pattern: Form state + validation + submission
|
||||
export const useConfigForm = (initialValues: ConfigFormValues) => {
|
||||
const [values, setValues] = useState(initialValues)
|
||||
const [errors, setErrors] = useState<Record<string, string>>({})
|
||||
const [isSubmitting, setIsSubmitting] = useState(false)
|
||||
|
||||
const validate = useCallback(() => {
|
||||
const newErrors: Record<string, string> = {}
|
||||
if (!values.name) newErrors.name = 'Name is required'
|
||||
setErrors(newErrors)
|
||||
return Object.keys(newErrors).length === 0
|
||||
}, [values])
|
||||
|
||||
const handleChange = useCallback((field: string, value: any) => {
|
||||
setValues(prev => ({ ...prev, [field]: value }))
|
||||
}, [])
|
||||
|
||||
const handleSubmit = useCallback(async (onSubmit: (values: ConfigFormValues) => Promise<void>) => {
|
||||
if (!validate()) return
|
||||
setIsSubmitting(true)
|
||||
try {
|
||||
await onSubmit(values)
|
||||
} finally {
|
||||
setIsSubmitting(false)
|
||||
}
|
||||
}, [values, validate])
|
||||
|
||||
return { values, errors, isSubmitting, handleChange, handleSubmit }
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Modal State Hook
|
||||
|
||||
```typescript
|
||||
// Pattern: Multiple modal management
|
||||
type ModalType = 'edit' | 'delete' | 'duplicate' | null
|
||||
|
||||
export const useModalState = () => {
|
||||
const [activeModal, setActiveModal] = useState<ModalType>(null)
|
||||
const [modalData, setModalData] = useState<any>(null)
|
||||
|
||||
const openModal = useCallback((type: ModalType, data?: any) => {
|
||||
setActiveModal(type)
|
||||
setModalData(data)
|
||||
}, [])
|
||||
|
||||
const closeModal = useCallback(() => {
|
||||
setActiveModal(null)
|
||||
setModalData(null)
|
||||
}, [])
|
||||
|
||||
return {
|
||||
activeModal,
|
||||
modalData,
|
||||
openModal,
|
||||
closeModal,
|
||||
isOpen: useCallback((type: ModalType) => activeModal === type, [activeModal]),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Toggle/Boolean Hook
|
||||
|
||||
```typescript
|
||||
// Pattern: Boolean state with convenience methods
|
||||
export const useToggle = (initialValue = false) => {
|
||||
const [value, setValue] = useState(initialValue)
|
||||
|
||||
const toggle = useCallback(() => setValue(v => !v), [])
|
||||
const setTrue = useCallback(() => setValue(true), [])
|
||||
const setFalse = useCallback(() => setValue(false), [])
|
||||
|
||||
return [value, { toggle, setTrue, setFalse, set: setValue }] as const
|
||||
}
|
||||
|
||||
// Usage
|
||||
const [isExpanded, { toggle, setTrue: expand, setFalse: collapse }] = useToggle()
|
||||
```
|
||||
|
||||
## Testing Extracted Hooks
|
||||
|
||||
After extraction, test hooks in isolation:
|
||||
|
||||
```typescript
|
||||
// use-model-config.spec.ts
|
||||
import { renderHook, act } from '@testing-library/react'
|
||||
import { useModelConfig } from './use-model-config'
|
||||
|
||||
describe('useModelConfig', () => {
|
||||
it('should initialize with default values', () => {
|
||||
const { result } = renderHook(() => useModelConfig({
|
||||
hasFetchedDetail: false,
|
||||
}))
|
||||
|
||||
expect(result.current.modelConfig.provider).toBe('langgenius/openai/openai')
|
||||
expect(result.current.modelModeType).toBe(ModelModeType.unset)
|
||||
})
|
||||
|
||||
it('should update model config', () => {
|
||||
const { result } = renderHook(() => useModelConfig({
|
||||
hasFetchedDetail: true,
|
||||
}))
|
||||
|
||||
act(() => {
|
||||
result.current.setModelConfig({
|
||||
...result.current.modelConfig,
|
||||
model_id: 'gpt-4',
|
||||
})
|
||||
})
|
||||
|
||||
expect(result.current.modelConfig.model_id).toBe('gpt-4')
|
||||
})
|
||||
})
|
||||
```
|
||||
|
|
@ -318,5 +318,5 @@ For more detailed information, refer to:
|
|||
|
||||
- `web/vitest.config.ts` - Vitest configuration
|
||||
- `web/vitest.setup.ts` - Test environment setup
|
||||
- `web/testing/analyze-component.js` - Component analysis tool
|
||||
- `web/scripts/analyze-component.js` - Component analysis tool
|
||||
- Modules are not mocked automatically. Global mocks live in `web/vitest.setup.ts` (for example `react-i18next`, `next/image`); mock other modules like `ky` or `mime` locally in test files.
|
||||
|
|
|
|||
|
|
@ -28,17 +28,14 @@ import userEvent from '@testing-library/user-event'
|
|||
|
||||
// i18n (automatically mocked)
|
||||
// WHY: Global mock in web/vitest.setup.ts is auto-loaded by Vitest setup
|
||||
// No explicit mock needed - it returns translation keys as-is
|
||||
// The global mock provides: useTranslation, Trans, useMixedTranslation, useGetLanguage
|
||||
// No explicit mock needed for most tests
|
||||
//
|
||||
// Override only if custom translations are required:
|
||||
// vi.mock('react-i18next', () => ({
|
||||
// useTranslation: () => ({
|
||||
// t: (key: string) => {
|
||||
// const customTranslations: Record<string, string> = {
|
||||
// 'my.custom.key': 'Custom Translation',
|
||||
// }
|
||||
// return customTranslations[key] || key
|
||||
// },
|
||||
// }),
|
||||
// import { createReactI18nextMock } from '@/test/i18n-mock'
|
||||
// vi.mock('react-i18next', () => createReactI18nextMock({
|
||||
// 'my.custom.key': 'Custom Translation',
|
||||
// 'button.save': 'Save',
|
||||
// }))
|
||||
|
||||
// Router (if component uses useRouter, usePathname, useSearchParams)
|
||||
|
|
|
|||
|
|
@ -52,23 +52,29 @@ Modules are not mocked automatically. Use `vi.mock` in test files, or add global
|
|||
### 1. i18n (Auto-loaded via Global Mock)
|
||||
|
||||
A global mock is defined in `web/vitest.setup.ts` and is auto-loaded by Vitest setup.
|
||||
**No explicit mock needed** for most tests - it returns translation keys as-is.
|
||||
|
||||
For tests requiring custom translations, override the mock:
|
||||
The global mock provides:
|
||||
|
||||
- `useTranslation` - returns translation keys with namespace prefix
|
||||
- `Trans` component - renders i18nKey and components
|
||||
- `useMixedTranslation` (from `@/app/components/plugins/marketplace/hooks`)
|
||||
- `useGetLanguage` (from `@/context/i18n`) - returns `'en-US'`
|
||||
|
||||
**Default behavior**: Most tests should use the global mock (no local override needed).
|
||||
|
||||
**For custom translations**: Use the helper function from `@/test/i18n-mock`:
|
||||
|
||||
```typescript
|
||||
vi.mock('react-i18next', () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string) => {
|
||||
const translations: Record<string, string> = {
|
||||
'my.custom.key': 'Custom translation',
|
||||
}
|
||||
return translations[key] || key
|
||||
},
|
||||
}),
|
||||
import { createReactI18nextMock } from '@/test/i18n-mock'
|
||||
|
||||
vi.mock('react-i18next', () => createReactI18nextMock({
|
||||
'my.custom.key': 'Custom translation',
|
||||
'button.save': 'Save',
|
||||
}))
|
||||
```
|
||||
|
||||
**Avoid**: Manually defining `useTranslation` mocks that just return the key - the global mock already does this.
|
||||
|
||||
### 2. Next.js Router
|
||||
|
||||
```typescript
|
||||
|
|
|
|||
|
|
@ -22,12 +22,12 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@v6
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
|
@ -57,7 +57,7 @@ jobs:
|
|||
run: sh .github/workflows/expose_service_ports.sh
|
||||
|
||||
- name: Set up Sandbox
|
||||
uses: hoverkraft-tech/compose-action@v2.0.2
|
||||
uses: hoverkraft-tech/compose-action@v2
|
||||
with:
|
||||
compose-file: |
|
||||
docker/docker-compose.middleware.yaml
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ jobs:
|
|||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
|
||||
- name: Check Docker Compose inputs
|
||||
id: docker-compose-changes
|
||||
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- uses: astral-sh/setup-uv@v6
|
||||
- uses: astral-sh/setup-uv@v7
|
||||
|
||||
- name: Generate Docker Compose
|
||||
if: steps.docker-compose-changes.outputs.any_changed == 'true'
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ jobs:
|
|||
touch "/tmp/digests/${sanitized_digest}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: digests-${{ matrix.context }}-${{ env.PLATFORM_PAIR }}
|
||||
path: /tmp/digests/*
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@v6
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
|
|
@ -63,13 +63,13 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@v6
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: "3.12"
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
vdb-changed: ${{ steps.changes.outputs.vdb }}
|
||||
migration-changed: ${{ steps.changes.outputs.migration }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
- uses: dorny/paths-filter@v3
|
||||
id: changes
|
||||
with:
|
||||
|
|
@ -38,6 +38,7 @@ jobs:
|
|||
- '.github/workflows/api-tests.yml'
|
||||
web:
|
||||
- 'web/**'
|
||||
- '.github/workflows/web-tests.yml'
|
||||
vdb:
|
||||
- 'api/core/rag/datasource/**'
|
||||
- 'docker/**'
|
||||
|
|
|
|||
|
|
@ -19,13 +19,13 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v46
|
||||
uses: tj-actions/changed-files@v47
|
||||
with:
|
||||
files: |
|
||||
api/**
|
||||
|
|
@ -33,7 +33,7 @@ jobs:
|
|||
|
||||
- name: Setup UV and Python
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
uses: astral-sh/setup-uv@v6
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: false
|
||||
python-version: "3.12"
|
||||
|
|
@ -68,15 +68,17 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v46
|
||||
uses: tj-actions/changed-files@v47
|
||||
with:
|
||||
files: web/**
|
||||
files: |
|
||||
web/**
|
||||
.github/workflows/style.yml
|
||||
|
||||
- name: Install pnpm
|
||||
uses: pnpm/action-setup@v4
|
||||
|
|
@ -85,7 +87,7 @@ jobs:
|
|||
run_install: false
|
||||
|
||||
- name: Setup NodeJS
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v6
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
with:
|
||||
node-version: 22
|
||||
|
|
@ -108,20 +110,30 @@ jobs:
|
|||
working-directory: ./web
|
||||
run: pnpm run type-check:tsgo
|
||||
|
||||
- name: Web dead code check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
run: pnpm run knip
|
||||
|
||||
- name: Web build check
|
||||
if: steps.changed-files.outputs.any_changed == 'true'
|
||||
working-directory: ./web
|
||||
run: pnpm run build
|
||||
|
||||
superlinter:
|
||||
name: SuperLinter
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
- name: Check changed files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v46
|
||||
uses: tj-actions/changed-files@v47
|
||||
with:
|
||||
files: |
|
||||
**.sh
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@ jobs:
|
|||
working-directory: sdks/nodejs-client
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Use Node.js ${{ matrix.node-version }}
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: ''
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ on:
|
|||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'web/i18n/en-US/*.ts'
|
||||
- 'web/i18n/en-US/*.json'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
run:
|
||||
working-directory: web
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -28,13 +28,13 @@ jobs:
|
|||
run: |
|
||||
git fetch origin "${{ github.event.before }}" || true
|
||||
git fetch origin "${{ github.sha }}" || true
|
||||
changed_files=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'i18n/en-US/*.ts')
|
||||
changed_files=$(git diff --name-only "${{ github.event.before }}" "${{ github.sha }}" -- 'i18n/en-US/*.json')
|
||||
echo "Changed files: $changed_files"
|
||||
if [ -n "$changed_files" ]; then
|
||||
echo "FILES_CHANGED=true" >> $GITHUB_ENV
|
||||
file_args=""
|
||||
for file in $changed_files; do
|
||||
filename=$(basename "$file" .ts)
|
||||
filename=$(basename "$file" .json)
|
||||
file_args="$file_args --file $filename"
|
||||
done
|
||||
echo "FILE_ARGS=$file_args" >> $GITHUB_ENV
|
||||
|
|
@ -51,7 +51,7 @@ jobs:
|
|||
|
||||
- name: Set up Node.js
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 'lts/*'
|
||||
cache: pnpm
|
||||
|
|
@ -65,7 +65,7 @@ jobs:
|
|||
- name: Generate i18n translations
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
working-directory: ./web
|
||||
run: pnpm run auto-gen-i18n ${{ env.FILE_ARGS }}
|
||||
run: pnpm run i18n:gen ${{ env.FILE_ARGS }}
|
||||
|
||||
- name: Create Pull Request
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
|
|
|
|||
|
|
@ -19,19 +19,19 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Free Disk Space
|
||||
uses: endersonmenezes/free-disk-space@v2
|
||||
uses: endersonmenezes/free-disk-space@v3
|
||||
with:
|
||||
remove_dotnet: true
|
||||
remove_haskell: true
|
||||
remove_tool_cache: true
|
||||
|
||||
- name: Setup UV and Python
|
||||
uses: astral-sh/setup-uv@v6
|
||||
uses: astral-sh/setup-uv@v7
|
||||
with:
|
||||
enable-cache: true
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@v6
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ jobs:
|
|||
run_install: false
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
uses: actions/setup-node@v6
|
||||
with:
|
||||
node-version: 22
|
||||
cache: pnpm
|
||||
|
|
@ -360,7 +360,7 @@ jobs:
|
|||
|
||||
- name: Upload Coverage Artifact
|
||||
if: steps.coverage-summary.outputs.has_coverage == 'true'
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
name: web-coverage-report
|
||||
path: web/coverage
|
||||
|
|
|
|||
34
.mcp.json
34
.mcp.json
|
|
@ -1,34 +0,0 @@
|
|||
{
|
||||
"mcpServers": {
|
||||
"context7": {
|
||||
"type": "http",
|
||||
"url": "https://mcp.context7.com/mcp"
|
||||
},
|
||||
"sequential-thinking": {
|
||||
"type": "stdio",
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-sequential-thinking"],
|
||||
"env": {}
|
||||
},
|
||||
"github": {
|
||||
"type": "stdio",
|
||||
"command": "npx",
|
||||
"args": ["-y", "@modelcontextprotocol/server-github"],
|
||||
"env": {
|
||||
"GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_PERSONAL_ACCESS_TOKEN}"
|
||||
}
|
||||
},
|
||||
"fetch": {
|
||||
"type": "stdio",
|
||||
"command": "uvx",
|
||||
"args": ["mcp-server-fetch"],
|
||||
"env": {}
|
||||
},
|
||||
"playwright": {
|
||||
"type": "stdio",
|
||||
"command": "npx",
|
||||
"args": ["-y", "@playwright/mcp@latest"],
|
||||
"env": {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -101,6 +101,15 @@ S3_ACCESS_KEY=your-access-key
|
|||
S3_SECRET_KEY=your-secret-key
|
||||
S3_REGION=your-region
|
||||
|
||||
# Workflow run and Conversation archive storage (S3-compatible)
|
||||
ARCHIVE_STORAGE_ENABLED=false
|
||||
ARCHIVE_STORAGE_ENDPOINT=
|
||||
ARCHIVE_STORAGE_ARCHIVE_BUCKET=
|
||||
ARCHIVE_STORAGE_EXPORT_BUCKET=
|
||||
ARCHIVE_STORAGE_ACCESS_KEY=
|
||||
ARCHIVE_STORAGE_SECRET_KEY=
|
||||
ARCHIVE_STORAGE_REGION=auto
|
||||
|
||||
# Azure Blob Storage configuration
|
||||
AZURE_BLOB_ACCOUNT_NAME=your-account-name
|
||||
AZURE_BLOB_ACCOUNT_KEY=your-account-key
|
||||
|
|
@ -128,6 +137,7 @@ TENCENT_COS_SECRET_KEY=your-secret-key
|
|||
TENCENT_COS_SECRET_ID=your-secret-id
|
||||
TENCENT_COS_REGION=your-region
|
||||
TENCENT_COS_SCHEME=your-scheme
|
||||
TENCENT_COS_CUSTOM_DOMAIN=your-custom-domain
|
||||
|
||||
# Huawei OBS Storage Configuration
|
||||
HUAWEI_OBS_BUCKET_NAME=your-bucket-name
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
exclude = ["migrations/*"]
|
||||
exclude = [
|
||||
"migrations/*",
|
||||
".git",
|
||||
".git/**",
|
||||
]
|
||||
line-length = 120
|
||||
|
||||
[format]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
from configs.extra.archive_config import ArchiveStorageConfig
|
||||
from configs.extra.notion_config import NotionConfig
|
||||
from configs.extra.sentry_config import SentryConfig
|
||||
|
||||
|
||||
class ExtraServiceConfig(
|
||||
# place the configs in alphabet order
|
||||
ArchiveStorageConfig,
|
||||
NotionConfig,
|
||||
SentryConfig,
|
||||
):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ArchiveStorageConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for workflow run logs archiving storage.
|
||||
"""
|
||||
|
||||
ARCHIVE_STORAGE_ENABLED: bool = Field(
|
||||
description="Enable workflow run logs archiving to S3-compatible storage",
|
||||
default=False,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_ENDPOINT: str | None = Field(
|
||||
description="URL of the S3-compatible storage endpoint (e.g., 'https://storage.example.com')",
|
||||
default=None,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_ARCHIVE_BUCKET: str | None = Field(
|
||||
description="Name of the bucket to store archived workflow logs",
|
||||
default=None,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_EXPORT_BUCKET: str | None = Field(
|
||||
description="Name of the bucket to store exported workflow runs",
|
||||
default=None,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_ACCESS_KEY: str | None = Field(
|
||||
description="Access key ID for authenticating with storage",
|
||||
default=None,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_SECRET_KEY: str | None = Field(
|
||||
description="Secret access key for authenticating with storage",
|
||||
default=None,
|
||||
)
|
||||
|
||||
ARCHIVE_STORAGE_REGION: str = Field(
|
||||
description="Region for storage (use 'auto' if the provider supports it)",
|
||||
default="auto",
|
||||
)
|
||||
|
|
@ -31,3 +31,8 @@ class TencentCloudCOSStorageConfig(BaseSettings):
|
|||
description="Protocol scheme for COS requests: 'https' (recommended) or 'http'",
|
||||
default=None,
|
||||
)
|
||||
|
||||
TENCENT_COS_CUSTOM_DOMAIN: str | None = Field(
|
||||
description="Tencent Cloud COS custom domain setting",
|
||||
default=None,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import re
|
||||
import uuid
|
||||
from typing import Literal
|
||||
|
||||
|
|
@ -73,6 +74,48 @@ class AppListQuery(BaseModel):
|
|||
raise ValueError("Invalid UUID format in tag_ids.") from exc
|
||||
|
||||
|
||||
# XSS prevention: patterns that could lead to XSS attacks
|
||||
# Includes: script tags, iframe tags, javascript: protocol, SVG with onload, etc.
|
||||
_XSS_PATTERNS = [
|
||||
r"<script[^>]*>.*?</script>", # Script tags
|
||||
r"<iframe\b[^>]*?(?:/>|>.*?</iframe>)", # Iframe tags (including self-closing)
|
||||
r"javascript:", # JavaScript protocol
|
||||
r"<svg[^>]*?\s+onload\s*=[^>]*>", # SVG with onload handler (attribute-aware, flexible whitespace)
|
||||
r"<.*?on\s*\w+\s*=", # Event handlers like onclick, onerror, etc.
|
||||
r"<object\b[^>]*(?:\s*/>|>.*?</object\s*>)", # Object tags (opening tag)
|
||||
r"<embed[^>]*>", # Embed tags (self-closing)
|
||||
r"<link[^>]*>", # Link tags with javascript
|
||||
]
|
||||
|
||||
|
||||
def _validate_xss_safe(value: str | None, field_name: str = "Field") -> str | None:
|
||||
"""
|
||||
Validate that a string value doesn't contain potential XSS payloads.
|
||||
|
||||
Args:
|
||||
value: The string value to validate
|
||||
field_name: Name of the field for error messages
|
||||
|
||||
Returns:
|
||||
The original value if safe
|
||||
|
||||
Raises:
|
||||
ValueError: If the value contains XSS patterns
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
value_lower = value.lower()
|
||||
for pattern in _XSS_PATTERNS:
|
||||
if re.search(pattern, value_lower, re.DOTALL | re.IGNORECASE):
|
||||
raise ValueError(
|
||||
f"{field_name} contains invalid characters or patterns. "
|
||||
"HTML tags, JavaScript, and other potentially dangerous content are not allowed."
|
||||
)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
class CreateAppPayload(BaseModel):
|
||||
name: str = Field(..., min_length=1, description="App name")
|
||||
description: str | None = Field(default=None, description="App description (max 400 chars)", max_length=400)
|
||||
|
|
@ -81,6 +124,11 @@ class CreateAppPayload(BaseModel):
|
|||
icon: str | None = Field(default=None, description="Icon")
|
||||
icon_background: str | None = Field(default=None, description="Icon background color")
|
||||
|
||||
@field_validator("name", "description", mode="before")
|
||||
@classmethod
|
||||
def validate_xss_safe(cls, value: str | None, info) -> str | None:
|
||||
return _validate_xss_safe(value, info.field_name)
|
||||
|
||||
|
||||
class UpdateAppPayload(BaseModel):
|
||||
name: str = Field(..., min_length=1, description="App name")
|
||||
|
|
@ -91,6 +139,11 @@ class UpdateAppPayload(BaseModel):
|
|||
use_icon_as_answer_icon: bool | None = Field(default=None, description="Use icon as answer icon")
|
||||
max_active_requests: int | None = Field(default=None, description="Maximum active requests")
|
||||
|
||||
@field_validator("name", "description", mode="before")
|
||||
@classmethod
|
||||
def validate_xss_safe(cls, value: str | None, info) -> str | None:
|
||||
return _validate_xss_safe(value, info.field_name)
|
||||
|
||||
|
||||
class CopyAppPayload(BaseModel):
|
||||
name: str | None = Field(default=None, description="Name for the copied app")
|
||||
|
|
@ -99,6 +152,11 @@ class CopyAppPayload(BaseModel):
|
|||
icon: str | None = Field(default=None, description="Icon")
|
||||
icon_background: str | None = Field(default=None, description="Icon background color")
|
||||
|
||||
@field_validator("name", "description", mode="before")
|
||||
@classmethod
|
||||
def validate_xss_safe(cls, value: str | None, info) -> str | None:
|
||||
return _validate_xss_safe(value, info.field_name)
|
||||
|
||||
|
||||
class AppExportQuery(BaseModel):
|
||||
include_secret: bool = Field(default=False, description="Include secrets in export")
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ class OAuthCallback(Resource):
|
|||
return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin/invite-settings?invite_token={invite_token}")
|
||||
|
||||
try:
|
||||
account = _generate_account(provider, user_info)
|
||||
account, oauth_new_user = _generate_account(provider, user_info)
|
||||
except AccountNotFoundError:
|
||||
return redirect(f"{dify_config.CONSOLE_WEB_URL}/signin?message=Account not found.")
|
||||
except (WorkSpaceNotFoundError, WorkSpaceNotAllowedCreateError):
|
||||
|
|
@ -159,7 +159,10 @@ class OAuthCallback(Resource):
|
|||
ip_address=extract_remote_ip(request),
|
||||
)
|
||||
|
||||
response = redirect(f"{dify_config.CONSOLE_WEB_URL}")
|
||||
base_url = dify_config.CONSOLE_WEB_URL
|
||||
query_char = "&" if "?" in base_url else "?"
|
||||
target_url = f"{base_url}{query_char}oauth_new_user={str(oauth_new_user).lower()}"
|
||||
response = redirect(target_url)
|
||||
|
||||
set_access_token_to_cookie(request, response, token_pair.access_token)
|
||||
set_refresh_token_to_cookie(request, response, token_pair.refresh_token)
|
||||
|
|
@ -177,9 +180,10 @@ def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) ->
|
|||
return account
|
||||
|
||||
|
||||
def _generate_account(provider: str, user_info: OAuthUserInfo):
|
||||
def _generate_account(provider: str, user_info: OAuthUserInfo) -> tuple[Account, bool]:
|
||||
# Get account by openid or email.
|
||||
account = _get_account_by_openid_or_email(provider, user_info)
|
||||
oauth_new_user = False
|
||||
|
||||
if account:
|
||||
tenants = TenantService.get_join_tenants(account)
|
||||
|
|
@ -193,6 +197,7 @@ def _generate_account(provider: str, user_info: OAuthUserInfo):
|
|||
tenant_was_created.send(new_tenant)
|
||||
|
||||
if not account:
|
||||
oauth_new_user = True
|
||||
if not FeatureService.get_system_features().is_allow_register:
|
||||
if dify_config.BILLING_ENABLED and BillingService.is_email_in_freeze(user_info.email):
|
||||
raise AccountRegisterError(
|
||||
|
|
@ -220,4 +225,4 @@ def _generate_account(provider: str, user_info: OAuthUserInfo):
|
|||
# Link account
|
||||
AccountService.link_account_integrate(provider, user_info.id, account)
|
||||
|
||||
return account
|
||||
return account, oauth_new_user
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
import base64
|
||||
from typing import Literal
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource, fields
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from pydantic import BaseModel, Field
|
||||
from werkzeug.exceptions import BadRequest
|
||||
|
||||
from controllers.console import console_ns
|
||||
|
|
@ -15,22 +16,8 @@ DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
|
|||
|
||||
|
||||
class SubscriptionQuery(BaseModel):
|
||||
plan: str = Field(..., description="Subscription plan")
|
||||
interval: str = Field(..., description="Billing interval")
|
||||
|
||||
@field_validator("plan")
|
||||
@classmethod
|
||||
def validate_plan(cls, value: str) -> str:
|
||||
if value not in [CloudPlan.PROFESSIONAL, CloudPlan.TEAM]:
|
||||
raise ValueError("Invalid plan")
|
||||
return value
|
||||
|
||||
@field_validator("interval")
|
||||
@classmethod
|
||||
def validate_interval(cls, value: str) -> str:
|
||||
if value not in {"month", "year"}:
|
||||
raise ValueError("Invalid interval")
|
||||
return value
|
||||
plan: Literal[CloudPlan.PROFESSIONAL, CloudPlan.TEAM] = Field(..., description="Subscription plan")
|
||||
interval: Literal["month", "year"] = Field(..., description="Billing interval")
|
||||
|
||||
|
||||
class PartnerTenantsPayload(BaseModel):
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ import uuid
|
|||
from flask import request
|
||||
from flask_restx import Resource, marshal
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import String, cast, func, or_, select
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
import services
|
||||
from configs import dify_config
|
||||
from controllers.common.schema import register_schema_models
|
||||
from controllers.console import console_ns
|
||||
from controllers.console.app.error import ProviderNotInitializeError
|
||||
|
|
@ -143,7 +145,29 @@ class DatasetDocumentSegmentListApi(Resource):
|
|||
query = query.where(DocumentSegment.hit_count >= hit_count_gte)
|
||||
|
||||
if keyword:
|
||||
query = query.where(DocumentSegment.content.ilike(f"%{keyword}%"))
|
||||
# Search in both content and keywords fields
|
||||
# Use database-specific methods for JSON array search
|
||||
if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql":
|
||||
# PostgreSQL: Use jsonb_array_elements_text to properly handle Unicode/Chinese text
|
||||
keywords_condition = func.array_to_string(
|
||||
func.array(
|
||||
select(func.jsonb_array_elements_text(cast(DocumentSegment.keywords, JSONB)))
|
||||
.correlate(DocumentSegment)
|
||||
.scalar_subquery()
|
||||
),
|
||||
",",
|
||||
).ilike(f"%{keyword}%")
|
||||
else:
|
||||
# MySQL: Cast JSON to string for pattern matching
|
||||
# MySQL stores Chinese text directly in JSON without Unicode escaping
|
||||
keywords_condition = cast(DocumentSegment.keywords, String).ilike(f"%{keyword}%")
|
||||
|
||||
query = query.where(
|
||||
or_(
|
||||
DocumentSegment.content.ilike(f"%{keyword}%"),
|
||||
keywords_condition,
|
||||
)
|
||||
)
|
||||
|
||||
if args.enabled.lower() != "all":
|
||||
if args.enabled.lower() == "true":
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import logging
|
||||
from typing import Literal
|
||||
from uuid import UUID
|
||||
|
||||
from flask import request
|
||||
from flask_restx import marshal_with
|
||||
|
|
@ -26,6 +25,7 @@ from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotIni
|
|||
from core.model_runtime.errors.invoke import InvokeError
|
||||
from fields.message_fields import message_infinite_scroll_pagination_fields
|
||||
from libs import helper
|
||||
from libs.helper import UUIDStrOrEmpty
|
||||
from libs.login import current_account_with_tenant
|
||||
from models.model import AppMode
|
||||
from services.app_generate_service import AppGenerateService
|
||||
|
|
@ -44,8 +44,8 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class MessageListQuery(BaseModel):
|
||||
conversation_id: UUID
|
||||
first_id: UUID | None = None
|
||||
conversation_id: UUIDStrOrEmpty
|
||||
first_id: UUIDStrOrEmpty | None = None
|
||||
limit: int = Field(default=20, ge=1, le=100)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,3 @@
|
|||
from uuid import UUID
|
||||
|
||||
from flask import request
|
||||
from flask_restx import fields, marshal_with
|
||||
from pydantic import BaseModel, Field
|
||||
|
|
@ -10,19 +8,19 @@ from controllers.console import console_ns
|
|||
from controllers.console.explore.error import NotCompletionAppError
|
||||
from controllers.console.explore.wraps import InstalledAppResource
|
||||
from fields.conversation_fields import message_file_fields
|
||||
from libs.helper import TimestampField
|
||||
from libs.helper import TimestampField, UUIDStrOrEmpty
|
||||
from libs.login import current_account_with_tenant
|
||||
from services.errors.message import MessageNotExistsError
|
||||
from services.saved_message_service import SavedMessageService
|
||||
|
||||
|
||||
class SavedMessageListQuery(BaseModel):
|
||||
last_id: UUID | None = None
|
||||
last_id: UUIDStrOrEmpty | None = None
|
||||
limit: int = Field(default=20, ge=1, le=100)
|
||||
|
||||
|
||||
class SavedMessageCreatePayload(BaseModel):
|
||||
message_id: UUID
|
||||
message_id: UUIDStrOrEmpty
|
||||
|
||||
|
||||
register_schema_models(console_ns, SavedMessageListQuery, SavedMessageCreatePayload)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from flask_restx import Resource, reqparse
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel
|
||||
from werkzeug.exceptions import Forbidden
|
||||
|
||||
from controllers.common.schema import register_schema_models
|
||||
from controllers.console import console_ns
|
||||
from controllers.console.wraps import account_initialization_required, setup_required
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
|
|
@ -10,10 +12,20 @@ from models import TenantAccountRole
|
|||
from services.model_load_balancing_service import ModelLoadBalancingService
|
||||
|
||||
|
||||
class LoadBalancingCredentialPayload(BaseModel):
|
||||
model: str
|
||||
model_type: ModelType
|
||||
credentials: dict[str, object]
|
||||
|
||||
|
||||
register_schema_models(console_ns, LoadBalancingCredentialPayload)
|
||||
|
||||
|
||||
@console_ns.route(
|
||||
"/workspaces/current/model-providers/<path:provider>/models/load-balancing-configs/credentials-validate"
|
||||
)
|
||||
class LoadBalancingCredentialsValidateApi(Resource):
|
||||
@console_ns.expect(console_ns.models[LoadBalancingCredentialPayload.__name__])
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
|
|
@ -24,20 +36,7 @@ class LoadBalancingCredentialsValidateApi(Resource):
|
|||
|
||||
tenant_id = current_tenant_id
|
||||
|
||||
parser = (
|
||||
reqparse.RequestParser()
|
||||
.add_argument("model", type=str, required=True, nullable=False, location="json")
|
||||
.add_argument(
|
||||
"model_type",
|
||||
type=str,
|
||||
required=True,
|
||||
nullable=False,
|
||||
choices=[mt.value for mt in ModelType],
|
||||
location="json",
|
||||
)
|
||||
.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
|
||||
)
|
||||
args = parser.parse_args()
|
||||
payload = LoadBalancingCredentialPayload.model_validate(console_ns.payload or {})
|
||||
|
||||
# validate model load balancing credentials
|
||||
model_load_balancing_service = ModelLoadBalancingService()
|
||||
|
|
@ -49,9 +48,9 @@ class LoadBalancingCredentialsValidateApi(Resource):
|
|||
model_load_balancing_service.validate_load_balancing_credentials(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider,
|
||||
model=args["model"],
|
||||
model_type=args["model_type"],
|
||||
credentials=args["credentials"],
|
||||
model=payload.model,
|
||||
model_type=payload.model_type,
|
||||
credentials=payload.credentials,
|
||||
)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
result = False
|
||||
|
|
@ -69,6 +68,7 @@ class LoadBalancingCredentialsValidateApi(Resource):
|
|||
"/workspaces/current/model-providers/<path:provider>/models/load-balancing-configs/<string:config_id>/credentials-validate"
|
||||
)
|
||||
class LoadBalancingConfigCredentialsValidateApi(Resource):
|
||||
@console_ns.expect(console_ns.models[LoadBalancingCredentialPayload.__name__])
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
|
|
@ -79,20 +79,7 @@ class LoadBalancingConfigCredentialsValidateApi(Resource):
|
|||
|
||||
tenant_id = current_tenant_id
|
||||
|
||||
parser = (
|
||||
reqparse.RequestParser()
|
||||
.add_argument("model", type=str, required=True, nullable=False, location="json")
|
||||
.add_argument(
|
||||
"model_type",
|
||||
type=str,
|
||||
required=True,
|
||||
nullable=False,
|
||||
choices=[mt.value for mt in ModelType],
|
||||
location="json",
|
||||
)
|
||||
.add_argument("credentials", type=dict, required=True, nullable=False, location="json")
|
||||
)
|
||||
args = parser.parse_args()
|
||||
payload = LoadBalancingCredentialPayload.model_validate(console_ns.payload or {})
|
||||
|
||||
# validate model load balancing config credentials
|
||||
model_load_balancing_service = ModelLoadBalancingService()
|
||||
|
|
@ -104,9 +91,9 @@ class LoadBalancingConfigCredentialsValidateApi(Resource):
|
|||
model_load_balancing_service.validate_load_balancing_credentials(
|
||||
tenant_id=tenant_id,
|
||||
provider=provider,
|
||||
model=args["model"],
|
||||
model_type=args["model_type"],
|
||||
credentials=args["credentials"],
|
||||
model=payload.model,
|
||||
model_type=payload.model_type,
|
||||
credentials=payload.credentials,
|
||||
config_id=config_id,
|
||||
)
|
||||
except CredentialsValidateFailedError as ex:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import io
|
||||
import logging
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from flask import make_response, redirect, request, send_file
|
||||
|
|
@ -17,8 +18,8 @@ from controllers.console.wraps import (
|
|||
is_admin_or_owner_required,
|
||||
setup_required,
|
||||
)
|
||||
from core.db.session_factory import session_factory
|
||||
from core.entities.mcp_provider import MCPAuthentication, MCPConfiguration
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.mcp.auth.auth_flow import auth, handle_callback
|
||||
from core.mcp.error import MCPAuthError, MCPError, MCPRefreshTokenError
|
||||
from core.mcp.mcp_client import MCPClient
|
||||
|
|
@ -40,6 +41,8 @@ from services.tools.tools_manage_service import ToolCommonService
|
|||
from services.tools.tools_transform_service import ToolTransformService
|
||||
from services.tools.workflow_tools_manage_service import WorkflowToolManageService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_valid_url(url: str) -> bool:
|
||||
if not url:
|
||||
|
|
@ -945,8 +948,8 @@ class ToolProviderMCPApi(Resource):
|
|||
configuration = MCPConfiguration.model_validate(args["configuration"])
|
||||
authentication = MCPAuthentication.model_validate(args["authentication"]) if args["authentication"] else None
|
||||
|
||||
# Create provider in transaction
|
||||
with Session(db.engine) as session, session.begin():
|
||||
# 1) Create provider in a short transaction (no network I/O inside)
|
||||
with session_factory.create_session() as session, session.begin():
|
||||
service = MCPToolManageService(session=session)
|
||||
result = service.create_provider(
|
||||
tenant_id=tenant_id,
|
||||
|
|
@ -962,8 +965,26 @@ class ToolProviderMCPApi(Resource):
|
|||
authentication=authentication,
|
||||
)
|
||||
|
||||
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
# 2) Try to fetch tools immediately after creation so they appear without a second save.
|
||||
# Perform network I/O outside any DB session to avoid holding locks.
|
||||
try:
|
||||
reconnect = MCPToolManageService.reconnect_with_url(
|
||||
server_url=args["server_url"],
|
||||
headers=args.get("headers") or {},
|
||||
timeout=configuration.timeout,
|
||||
sse_read_timeout=configuration.sse_read_timeout,
|
||||
)
|
||||
# Update just-created provider with authed/tools in a new short transaction
|
||||
with session_factory.create_session() as session, session.begin():
|
||||
service = MCPToolManageService(session=session)
|
||||
db_provider = service.get_provider(provider_id=result.id, tenant_id=tenant_id)
|
||||
db_provider.authed = reconnect.authed
|
||||
db_provider.tools = reconnect.tools
|
||||
|
||||
result = ToolTransformService.mcp_provider_to_user_provider(db_provider, for_list=True)
|
||||
except Exception:
|
||||
# Best-effort: if initial fetch fails (e.g., auth required), return created provider as-is
|
||||
logger.warning("Failed to fetch MCP tools after creation", exc_info=True)
|
||||
|
||||
return jsonable_encoder(result)
|
||||
|
||||
|
|
@ -1011,9 +1032,6 @@ class ToolProviderMCPApi(Resource):
|
|||
validation_result=validation_result,
|
||||
)
|
||||
|
||||
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
|
||||
ToolProviderListCache.invalidate_cache(current_tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@console_ns.expect(parser_mcp_delete)
|
||||
|
|
@ -1028,9 +1046,6 @@ class ToolProviderMCPApi(Resource):
|
|||
service = MCPToolManageService(session=session)
|
||||
service.delete_provider(tenant_id=current_tenant_id, provider_id=args["provider_id"])
|
||||
|
||||
# Invalidate cache AFTER transaction commits to avoid holding locks during Redis operations
|
||||
ToolProviderListCache.invalidate_cache(current_tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
|
||||
|
|
@ -1081,8 +1096,6 @@ class ToolMCPAuthApi(Resource):
|
|||
credentials=provider_entity.credentials,
|
||||
authed=True,
|
||||
)
|
||||
# Invalidate cache after updating credentials
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
return {"result": "success"}
|
||||
except MCPAuthError as e:
|
||||
try:
|
||||
|
|
@ -1096,22 +1109,16 @@ class ToolMCPAuthApi(Resource):
|
|||
with Session(db.engine) as session, session.begin():
|
||||
service = MCPToolManageService(session=session)
|
||||
response = service.execute_auth_actions(auth_result)
|
||||
# Invalidate cache after auth actions may have updated provider state
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
return response
|
||||
except MCPRefreshTokenError as e:
|
||||
with Session(db.engine) as session, session.begin():
|
||||
service = MCPToolManageService(session=session)
|
||||
service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
|
||||
# Invalidate cache after clearing credentials
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
raise ValueError(f"Failed to refresh token, please try to authorize again: {e}") from e
|
||||
except (MCPError, ValueError) as e:
|
||||
with Session(db.engine) as session, session.begin():
|
||||
service = MCPToolManageService(session=session)
|
||||
service.clear_provider_credentials(provider_id=provider_id, tenant_id=tenant_id)
|
||||
# Invalidate cache after clearing credentials
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
raise ValueError(f"Failed to connect to MCP server: {e}") from e
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from controllers.service_api.dataset.error import DatasetInUseError, DatasetName
|
|||
from controllers.service_api.wraps import (
|
||||
DatasetApiResource,
|
||||
cloud_edition_billing_rate_limit_check,
|
||||
validate_dataset_token,
|
||||
)
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.provider_manager import ProviderManager
|
||||
|
|
@ -460,9 +459,8 @@ class DatasetTagsApi(DatasetApiResource):
|
|||
401: "Unauthorized - invalid API token",
|
||||
}
|
||||
)
|
||||
@validate_dataset_token
|
||||
@service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns))
|
||||
def get(self, _, dataset_id):
|
||||
def get(self, _):
|
||||
"""Get all knowledge type tags."""
|
||||
assert isinstance(current_user, Account)
|
||||
cid = current_user.current_tenant_id
|
||||
|
|
@ -482,8 +480,7 @@ class DatasetTagsApi(DatasetApiResource):
|
|||
}
|
||||
)
|
||||
@service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns))
|
||||
@validate_dataset_token
|
||||
def post(self, _, dataset_id):
|
||||
def post(self, _):
|
||||
"""Add a knowledge type tag."""
|
||||
assert isinstance(current_user, Account)
|
||||
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
|
||||
|
|
@ -506,8 +503,7 @@ class DatasetTagsApi(DatasetApiResource):
|
|||
}
|
||||
)
|
||||
@service_api_ns.marshal_with(build_dataset_tag_fields(service_api_ns))
|
||||
@validate_dataset_token
|
||||
def patch(self, _, dataset_id):
|
||||
def patch(self, _):
|
||||
assert isinstance(current_user, Account)
|
||||
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
|
||||
raise Forbidden()
|
||||
|
|
@ -533,9 +529,8 @@ class DatasetTagsApi(DatasetApiResource):
|
|||
403: "Forbidden - insufficient permissions",
|
||||
}
|
||||
)
|
||||
@validate_dataset_token
|
||||
@edit_permission_required
|
||||
def delete(self, _, dataset_id):
|
||||
def delete(self, _):
|
||||
"""Delete a knowledge type tag."""
|
||||
payload = TagDeletePayload.model_validate(service_api_ns.payload or {})
|
||||
TagService.delete_tag(payload.tag_id)
|
||||
|
|
@ -555,8 +550,7 @@ class DatasetTagBindingApi(DatasetApiResource):
|
|||
403: "Forbidden - insufficient permissions",
|
||||
}
|
||||
)
|
||||
@validate_dataset_token
|
||||
def post(self, _, dataset_id):
|
||||
def post(self, _):
|
||||
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
|
||||
assert isinstance(current_user, Account)
|
||||
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
|
||||
|
|
@ -580,8 +574,7 @@ class DatasetTagUnbindingApi(DatasetApiResource):
|
|||
403: "Forbidden - insufficient permissions",
|
||||
}
|
||||
)
|
||||
@validate_dataset_token
|
||||
def post(self, _, dataset_id):
|
||||
def post(self, _):
|
||||
# The role of the current user in the ta table must be admin, owner, editor, or dataset_operator
|
||||
assert isinstance(current_user, Account)
|
||||
if not (current_user.has_edit_permission or current_user.is_dataset_editor):
|
||||
|
|
@ -604,7 +597,6 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
|
|||
401: "Unauthorized - invalid API token",
|
||||
}
|
||||
)
|
||||
@validate_dataset_token
|
||||
def get(self, _, *args, **kwargs):
|
||||
"""Get all knowledge type tags."""
|
||||
dataset_id = kwargs.get("dataset_id")
|
||||
|
|
|
|||
|
|
@ -90,6 +90,7 @@ class AppQueueManager:
|
|||
"""
|
||||
self._clear_task_belong_cache()
|
||||
self._q.put(None)
|
||||
self._graph_runtime_state = None # Release reference to allow GC to reclaim memory
|
||||
|
||||
def _clear_task_belong_cache(self) -> None:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
from collections.abc import Mapping
|
||||
from textwrap import dedent
|
||||
from typing import Any
|
||||
|
||||
from core.helper.code_executor.template_transformer import TemplateTransformer
|
||||
|
||||
|
||||
class Jinja2TemplateTransformer(TemplateTransformer):
|
||||
# Use separate placeholder for base64-encoded template to avoid confusion
|
||||
_template_b64_placeholder: str = "{{template_b64}}"
|
||||
|
||||
@classmethod
|
||||
def transform_response(cls, response: str):
|
||||
"""
|
||||
|
|
@ -13,18 +18,35 @@ class Jinja2TemplateTransformer(TemplateTransformer):
|
|||
"""
|
||||
return {"result": cls.extract_result_str_from_response(response)}
|
||||
|
||||
@classmethod
|
||||
def assemble_runner_script(cls, code: str, inputs: Mapping[str, Any]) -> str:
|
||||
"""
|
||||
Override base class to use base64 encoding for template code.
|
||||
This prevents issues with special characters (quotes, newlines) in templates
|
||||
breaking the generated Python script. Fixes #26818.
|
||||
"""
|
||||
script = cls.get_runner_script()
|
||||
# Encode template as base64 to safely embed any content including quotes
|
||||
code_b64 = cls.serialize_code(code)
|
||||
script = script.replace(cls._template_b64_placeholder, code_b64)
|
||||
inputs_str = cls.serialize_inputs(inputs)
|
||||
script = script.replace(cls._inputs_placeholder, inputs_str)
|
||||
return script
|
||||
|
||||
@classmethod
|
||||
def get_runner_script(cls) -> str:
|
||||
runner_script = dedent(f"""
|
||||
# declare main function
|
||||
def main(**inputs):
|
||||
import jinja2
|
||||
template = jinja2.Template('''{cls._code_placeholder}''')
|
||||
return template.render(**inputs)
|
||||
|
||||
import jinja2
|
||||
import json
|
||||
from base64 import b64decode
|
||||
|
||||
# declare main function
|
||||
def main(**inputs):
|
||||
# Decode base64-encoded template to handle special characters safely
|
||||
template_code = b64decode('{cls._template_b64_placeholder}').decode('utf-8')
|
||||
template = jinja2.Template(template_code)
|
||||
return template.render(**inputs)
|
||||
|
||||
# decode and prepare input dict
|
||||
inputs_obj = json.loads(b64decode('{cls._inputs_placeholder}').decode('utf-8'))
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,15 @@ class TemplateTransformer(ABC):
|
|||
_inputs_placeholder: str = "{{inputs}}"
|
||||
_result_tag: str = "<<RESULT>>"
|
||||
|
||||
@classmethod
|
||||
def serialize_code(cls, code: str) -> str:
|
||||
"""
|
||||
Serialize template code to base64 to safely embed in generated script.
|
||||
This prevents issues with special characters like quotes breaking the script.
|
||||
"""
|
||||
code_bytes = code.encode("utf-8")
|
||||
return b64encode(code_bytes).decode("utf-8")
|
||||
|
||||
@classmethod
|
||||
def transform_caller(cls, code: str, inputs: Mapping[str, Any]) -> tuple[str, str]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,56 +0,0 @@
|
|||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from core.tools.entities.api_entities import ToolProviderTypeApiLiteral
|
||||
from extensions.ext_redis import redis_client, redis_fallback
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ToolProviderListCache:
|
||||
"""Cache for tool provider lists"""
|
||||
|
||||
CACHE_TTL = 300 # 5 minutes
|
||||
|
||||
@staticmethod
|
||||
def _generate_cache_key(tenant_id: str, typ: ToolProviderTypeApiLiteral = None) -> str:
|
||||
"""Generate cache key for tool providers list"""
|
||||
type_filter = typ or "all"
|
||||
return f"tool_providers:tenant_id:{tenant_id}:type:{type_filter}"
|
||||
|
||||
@staticmethod
|
||||
@redis_fallback(default_return=None)
|
||||
def get_cached_providers(tenant_id: str, typ: ToolProviderTypeApiLiteral = None) -> list[dict[str, Any]] | None:
|
||||
"""Get cached tool providers"""
|
||||
cache_key = ToolProviderListCache._generate_cache_key(tenant_id, typ)
|
||||
cached_data = redis_client.get(cache_key)
|
||||
if cached_data:
|
||||
try:
|
||||
return json.loads(cached_data.decode("utf-8"))
|
||||
except (json.JSONDecodeError, UnicodeDecodeError):
|
||||
logger.warning("Failed to decode cached tool providers data")
|
||||
return None
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
@redis_fallback()
|
||||
def set_cached_providers(tenant_id: str, typ: ToolProviderTypeApiLiteral, providers: list[dict[str, Any]]):
|
||||
"""Cache tool providers"""
|
||||
cache_key = ToolProviderListCache._generate_cache_key(tenant_id, typ)
|
||||
redis_client.setex(cache_key, ToolProviderListCache.CACHE_TTL, json.dumps(providers))
|
||||
|
||||
@staticmethod
|
||||
@redis_fallback()
|
||||
def invalidate_cache(tenant_id: str, typ: ToolProviderTypeApiLiteral = None):
|
||||
"""Invalidate cache for tool providers"""
|
||||
if typ:
|
||||
# Invalidate specific type cache
|
||||
cache_key = ToolProviderListCache._generate_cache_key(tenant_id, typ)
|
||||
redis_client.delete(cache_key)
|
||||
else:
|
||||
# Invalidate all caches for this tenant
|
||||
pattern = f"tool_providers:tenant_id:{tenant_id}:*"
|
||||
keys = list(redis_client.scan_iter(pattern))
|
||||
if keys:
|
||||
redis_client.delete(*keys)
|
||||
|
|
@ -313,17 +313,20 @@ class StreamableHTTPTransport:
|
|||
if is_initialization:
|
||||
self._maybe_extract_session_id_from_response(response)
|
||||
|
||||
content_type = cast(str, response.headers.get(CONTENT_TYPE, "").lower())
|
||||
# Per https://modelcontextprotocol.io/specification/2025-06-18/basic#notifications:
|
||||
# The server MUST NOT send a response to notifications.
|
||||
if isinstance(message.root, JSONRPCRequest):
|
||||
content_type = cast(str, response.headers.get(CONTENT_TYPE, "").lower())
|
||||
|
||||
if content_type.startswith(JSON):
|
||||
self._handle_json_response(response, ctx.server_to_client_queue)
|
||||
elif content_type.startswith(SSE):
|
||||
self._handle_sse_response(response, ctx)
|
||||
else:
|
||||
self._handle_unexpected_content_type(
|
||||
content_type,
|
||||
ctx.server_to_client_queue,
|
||||
)
|
||||
if content_type.startswith(JSON):
|
||||
self._handle_json_response(response, ctx.server_to_client_queue)
|
||||
elif content_type.startswith(SSE):
|
||||
self._handle_sse_response(response, ctx)
|
||||
else:
|
||||
self._handle_unexpected_content_type(
|
||||
content_type,
|
||||
ctx.server_to_client_queue,
|
||||
)
|
||||
|
||||
def _handle_json_response(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class PluginParameter(BaseModel):
|
|||
auto_generate: PluginParameterAutoGenerate | None = None
|
||||
template: PluginParameterTemplate | None = None
|
||||
required: bool = False
|
||||
default: Union[float, int, str, bool] | None = None
|
||||
default: Union[float, int, str, bool, list, dict] | None = None
|
||||
min: Union[float, int] | None = None
|
||||
max: Union[float, int] | None = None
|
||||
precision: int | None = None
|
||||
|
|
|
|||
|
|
@ -27,26 +27,44 @@ class CleanProcessor:
|
|||
pattern = r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)"
|
||||
text = re.sub(pattern, "", text)
|
||||
|
||||
# Remove URL but keep Markdown image URLs
|
||||
# First, temporarily replace Markdown image URLs with a placeholder
|
||||
markdown_image_pattern = r"!\[.*?\]\((https?://[^\s)]+)\)"
|
||||
placeholders: list[str] = []
|
||||
# Remove URL but keep Markdown image URLs and link URLs
|
||||
# Replace the ENTIRE markdown link/image with a single placeholder to protect
|
||||
# the link text (which might also be a URL) from being removed
|
||||
markdown_link_pattern = r"\[([^\]]*)\]\((https?://[^)]+)\)"
|
||||
markdown_image_pattern = r"!\[.*?\]\((https?://[^)]+)\)"
|
||||
placeholders: list[tuple[str, str, str]] = [] # (type, text, url)
|
||||
|
||||
def replace_with_placeholder(match, placeholders=placeholders):
|
||||
def replace_markdown_with_placeholder(match, placeholders=placeholders):
|
||||
link_type = "link"
|
||||
link_text = match.group(1)
|
||||
url = match.group(2)
|
||||
placeholder = f"__MARKDOWN_PLACEHOLDER_{len(placeholders)}__"
|
||||
placeholders.append((link_type, link_text, url))
|
||||
return placeholder
|
||||
|
||||
def replace_image_with_placeholder(match, placeholders=placeholders):
|
||||
link_type = "image"
|
||||
url = match.group(1)
|
||||
placeholder = f"__MARKDOWN_IMAGE_URL_{len(placeholders)}__"
|
||||
placeholders.append(url)
|
||||
return f""
|
||||
placeholder = f"__MARKDOWN_PLACEHOLDER_{len(placeholders)}__"
|
||||
placeholders.append((link_type, "image", url))
|
||||
return placeholder
|
||||
|
||||
text = re.sub(markdown_image_pattern, replace_with_placeholder, text)
|
||||
# Protect markdown links first
|
||||
text = re.sub(markdown_link_pattern, replace_markdown_with_placeholder, text)
|
||||
# Then protect markdown images
|
||||
text = re.sub(markdown_image_pattern, replace_image_with_placeholder, text)
|
||||
|
||||
# Now remove all remaining URLs
|
||||
url_pattern = r"https?://[^\s)]+"
|
||||
url_pattern = r"https?://\S+"
|
||||
text = re.sub(url_pattern, "", text)
|
||||
|
||||
# Finally, restore the Markdown image URLs
|
||||
for i, url in enumerate(placeholders):
|
||||
text = text.replace(f"__MARKDOWN_IMAGE_URL_{i}__", url)
|
||||
# Restore the Markdown links and images
|
||||
for i, (link_type, text_or_alt, url) in enumerate(placeholders):
|
||||
placeholder = f"__MARKDOWN_PLACEHOLDER_{i}__"
|
||||
if link_type == "link":
|
||||
text = text.replace(placeholder, f"[{text_or_alt}]({url})")
|
||||
else: # image
|
||||
text = text.replace(placeholder, f"")
|
||||
return text
|
||||
|
||||
def filter_string(self, text):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import concurrent.futures
|
||||
import logging
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -13,7 +14,7 @@ from core.model_runtime.entities.model_entities import ModelType
|
|||
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
|
||||
from core.rag.datasource.keyword.keyword_factory import Keyword
|
||||
from core.rag.datasource.vdb.vector_factory import Vector
|
||||
from core.rag.embedding.retrieval import RetrievalSegments
|
||||
from core.rag.embedding.retrieval import RetrievalChildChunk, RetrievalSegments
|
||||
from core.rag.entities.metadata_entities import MetadataCondition
|
||||
from core.rag.index_processor.constant.doc_type import DocType
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
|
|
@ -36,6 +37,8 @@ default_retrieval_model = {
|
|||
"score_threshold_enabled": False,
|
||||
}
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RetrievalService:
|
||||
# Cache precompiled regular expressions to avoid repeated compilation
|
||||
|
|
@ -106,7 +109,12 @@ class RetrievalService:
|
|||
)
|
||||
)
|
||||
|
||||
concurrent.futures.wait(futures, timeout=3600, return_when=concurrent.futures.ALL_COMPLETED)
|
||||
if futures:
|
||||
for future in concurrent.futures.as_completed(futures, timeout=3600):
|
||||
if exceptions:
|
||||
for f in futures:
|
||||
f.cancel()
|
||||
break
|
||||
|
||||
if exceptions:
|
||||
raise ValueError(";\n".join(exceptions))
|
||||
|
|
@ -210,6 +218,7 @@ class RetrievalService:
|
|||
)
|
||||
all_documents.extend(documents)
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
exceptions.append(str(e))
|
||||
|
||||
@classmethod
|
||||
|
|
@ -303,6 +312,7 @@ class RetrievalService:
|
|||
else:
|
||||
all_documents.extend(documents)
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
exceptions.append(str(e))
|
||||
|
||||
@classmethod
|
||||
|
|
@ -351,6 +361,7 @@ class RetrievalService:
|
|||
else:
|
||||
all_documents.extend(documents)
|
||||
except Exception as e:
|
||||
logger.error(e, exc_info=True)
|
||||
exceptions.append(str(e))
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -381,10 +392,9 @@ class RetrievalService:
|
|||
records = []
|
||||
include_segment_ids = set()
|
||||
segment_child_map = {}
|
||||
segment_file_map = {}
|
||||
|
||||
valid_dataset_documents = {}
|
||||
image_doc_ids = []
|
||||
image_doc_ids: list[Any] = []
|
||||
child_index_node_ids = []
|
||||
index_node_ids = []
|
||||
doc_to_document_map = {}
|
||||
|
|
@ -417,28 +427,39 @@ class RetrievalService:
|
|||
child_index_node_ids = [i for i in child_index_node_ids if i]
|
||||
index_node_ids = [i for i in index_node_ids if i]
|
||||
|
||||
segment_ids = []
|
||||
segment_ids: list[str] = []
|
||||
index_node_segments: list[DocumentSegment] = []
|
||||
segments: list[DocumentSegment] = []
|
||||
attachment_map = {}
|
||||
child_chunk_map = {}
|
||||
doc_segment_map = {}
|
||||
attachment_map: dict[str, list[dict[str, Any]]] = {}
|
||||
child_chunk_map: dict[str, list[ChildChunk]] = {}
|
||||
doc_segment_map: dict[str, list[str]] = {}
|
||||
|
||||
with session_factory.create_session() as session:
|
||||
attachments = cls.get_segment_attachment_infos(image_doc_ids, session)
|
||||
|
||||
for attachment in attachments:
|
||||
segment_ids.append(attachment["segment_id"])
|
||||
attachment_map[attachment["segment_id"]] = attachment
|
||||
doc_segment_map[attachment["segment_id"]] = attachment["attachment_id"]
|
||||
|
||||
if attachment["segment_id"] in attachment_map:
|
||||
attachment_map[attachment["segment_id"]].append(attachment["attachment_info"])
|
||||
else:
|
||||
attachment_map[attachment["segment_id"]] = [attachment["attachment_info"]]
|
||||
if attachment["segment_id"] in doc_segment_map:
|
||||
doc_segment_map[attachment["segment_id"]].append(attachment["attachment_id"])
|
||||
else:
|
||||
doc_segment_map[attachment["segment_id"]] = [attachment["attachment_id"]]
|
||||
child_chunk_stmt = select(ChildChunk).where(ChildChunk.index_node_id.in_(child_index_node_ids))
|
||||
child_index_nodes = session.execute(child_chunk_stmt).scalars().all()
|
||||
|
||||
for i in child_index_nodes:
|
||||
segment_ids.append(i.segment_id)
|
||||
child_chunk_map[i.segment_id] = i
|
||||
doc_segment_map[i.segment_id] = i.index_node_id
|
||||
if i.segment_id in child_chunk_map:
|
||||
child_chunk_map[i.segment_id].append(i)
|
||||
else:
|
||||
child_chunk_map[i.segment_id] = [i]
|
||||
if i.segment_id in doc_segment_map:
|
||||
doc_segment_map[i.segment_id].append(i.index_node_id)
|
||||
else:
|
||||
doc_segment_map[i.segment_id] = [i.index_node_id]
|
||||
|
||||
if index_node_ids:
|
||||
document_segment_stmt = select(DocumentSegment).where(
|
||||
|
|
@ -448,7 +469,7 @@ class RetrievalService:
|
|||
)
|
||||
index_node_segments = session.execute(document_segment_stmt).scalars().all() # type: ignore
|
||||
for index_node_segment in index_node_segments:
|
||||
doc_segment_map[index_node_segment.id] = index_node_segment.index_node_id
|
||||
doc_segment_map[index_node_segment.id] = [index_node_segment.index_node_id]
|
||||
if segment_ids:
|
||||
document_segment_stmt = select(DocumentSegment).where(
|
||||
DocumentSegment.enabled == True,
|
||||
|
|
@ -461,95 +482,86 @@ class RetrievalService:
|
|||
segments.extend(index_node_segments)
|
||||
|
||||
for segment in segments:
|
||||
doc_id = doc_segment_map.get(segment.id)
|
||||
child_chunk = child_chunk_map.get(segment.id)
|
||||
attachment_info = attachment_map.get(segment.id)
|
||||
child_chunks: list[ChildChunk] = child_chunk_map.get(segment.id, [])
|
||||
attachment_infos: list[dict[str, Any]] = attachment_map.get(segment.id, [])
|
||||
ds_dataset_document: DatasetDocument | None = valid_dataset_documents.get(segment.document_id)
|
||||
|
||||
if doc_id:
|
||||
document = doc_to_document_map[doc_id]
|
||||
ds_dataset_document: DatasetDocument | None = valid_dataset_documents.get(
|
||||
document.metadata.get("document_id")
|
||||
)
|
||||
|
||||
if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||
if segment.id not in include_segment_ids:
|
||||
include_segment_ids.add(segment.id)
|
||||
if child_chunk:
|
||||
if ds_dataset_document and ds_dataset_document.doc_form == IndexStructureType.PARENT_CHILD_INDEX:
|
||||
if segment.id not in include_segment_ids:
|
||||
include_segment_ids.add(segment.id)
|
||||
if child_chunks or attachment_infos:
|
||||
child_chunk_details = []
|
||||
max_score = 0.0
|
||||
for child_chunk in child_chunks:
|
||||
document = doc_to_document_map[child_chunk.index_node_id]
|
||||
child_chunk_detail = {
|
||||
"id": child_chunk.id,
|
||||
"content": child_chunk.content,
|
||||
"position": child_chunk.position,
|
||||
"score": document.metadata.get("score", 0.0) if document else 0.0,
|
||||
}
|
||||
map_detail = {
|
||||
"max_score": document.metadata.get("score", 0.0) if document else 0.0,
|
||||
"child_chunks": [child_chunk_detail],
|
||||
}
|
||||
segment_child_map[segment.id] = map_detail
|
||||
record = {
|
||||
"segment": segment,
|
||||
child_chunk_details.append(child_chunk_detail)
|
||||
max_score = max(max_score, document.metadata.get("score", 0.0) if document else 0.0)
|
||||
for attachment_info in attachment_infos:
|
||||
file_document = doc_to_document_map[attachment_info["id"]]
|
||||
max_score = max(
|
||||
max_score, file_document.metadata.get("score", 0.0) if file_document else 0.0
|
||||
)
|
||||
|
||||
map_detail = {
|
||||
"max_score": max_score,
|
||||
"child_chunks": child_chunk_details,
|
||||
}
|
||||
if attachment_info:
|
||||
segment_file_map[segment.id] = [attachment_info]
|
||||
records.append(record)
|
||||
else:
|
||||
if child_chunk:
|
||||
child_chunk_detail = {
|
||||
"id": child_chunk.id,
|
||||
"content": child_chunk.content,
|
||||
"position": child_chunk.position,
|
||||
"score": document.metadata.get("score", 0.0),
|
||||
}
|
||||
if segment.id in segment_child_map:
|
||||
segment_child_map[segment.id]["child_chunks"].append(child_chunk_detail) # type: ignore
|
||||
segment_child_map[segment.id]["max_score"] = max(
|
||||
segment_child_map[segment.id]["max_score"],
|
||||
document.metadata.get("score", 0.0) if document else 0.0,
|
||||
)
|
||||
else:
|
||||
segment_child_map[segment.id] = {
|
||||
"max_score": document.metadata.get("score", 0.0) if document else 0.0,
|
||||
"child_chunks": [child_chunk_detail],
|
||||
}
|
||||
if attachment_info:
|
||||
if segment.id in segment_file_map:
|
||||
segment_file_map[segment.id].append(attachment_info)
|
||||
else:
|
||||
segment_file_map[segment.id] = [attachment_info]
|
||||
else:
|
||||
if segment.id not in include_segment_ids:
|
||||
include_segment_ids.add(segment.id)
|
||||
record = {
|
||||
"segment": segment,
|
||||
"score": document.metadata.get("score", 0.0), # type: ignore
|
||||
}
|
||||
if attachment_info:
|
||||
segment_file_map[segment.id] = [attachment_info]
|
||||
records.append(record)
|
||||
else:
|
||||
if attachment_info:
|
||||
attachment_infos = segment_file_map.get(segment.id, [])
|
||||
if attachment_info not in attachment_infos:
|
||||
attachment_infos.append(attachment_info)
|
||||
segment_file_map[segment.id] = attachment_infos
|
||||
segment_child_map[segment.id] = map_detail
|
||||
record: dict[str, Any] = {
|
||||
"segment": segment,
|
||||
}
|
||||
records.append(record)
|
||||
else:
|
||||
if segment.id not in include_segment_ids:
|
||||
include_segment_ids.add(segment.id)
|
||||
max_score = 0.0
|
||||
segment_document = doc_to_document_map.get(segment.index_node_id)
|
||||
if segment_document:
|
||||
max_score = max(max_score, segment_document.metadata.get("score", 0.0))
|
||||
for attachment_info in attachment_infos:
|
||||
file_doc = doc_to_document_map.get(attachment_info["id"])
|
||||
if file_doc:
|
||||
max_score = max(max_score, file_doc.metadata.get("score", 0.0))
|
||||
record = {
|
||||
"segment": segment,
|
||||
"score": max_score,
|
||||
}
|
||||
records.append(record)
|
||||
|
||||
# Add child chunks information to records
|
||||
for record in records:
|
||||
if record["segment"].id in segment_child_map:
|
||||
record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore
|
||||
record["score"] = segment_child_map[record["segment"].id]["max_score"] # type: ignore
|
||||
if record["segment"].id in segment_file_map:
|
||||
record["files"] = segment_file_map[record["segment"].id] # type: ignore[assignment]
|
||||
if record["segment"].id in attachment_map:
|
||||
record["files"] = attachment_map[record["segment"].id] # type: ignore[assignment]
|
||||
|
||||
result = []
|
||||
result: list[RetrievalSegments] = []
|
||||
for record in records:
|
||||
# Extract segment
|
||||
segment = record["segment"]
|
||||
|
||||
# Extract child_chunks, ensuring it's a list or None
|
||||
child_chunks = record.get("child_chunks")
|
||||
if not isinstance(child_chunks, list):
|
||||
child_chunks = None
|
||||
raw_child_chunks = record.get("child_chunks")
|
||||
child_chunks_list: list[RetrievalChildChunk] | None = None
|
||||
if isinstance(raw_child_chunks, list):
|
||||
# Sort by score descending
|
||||
sorted_chunks = sorted(raw_child_chunks, key=lambda x: x.get("score", 0.0), reverse=True)
|
||||
child_chunks_list = [
|
||||
RetrievalChildChunk(
|
||||
id=chunk["id"],
|
||||
content=chunk["content"],
|
||||
score=chunk.get("score", 0.0),
|
||||
position=chunk["position"],
|
||||
)
|
||||
for chunk in sorted_chunks
|
||||
]
|
||||
|
||||
# Extract files, ensuring it's a list or None
|
||||
files = record.get("files")
|
||||
|
|
@ -566,11 +578,11 @@ class RetrievalService:
|
|||
|
||||
# Create RetrievalSegments object
|
||||
retrieval_segment = RetrievalSegments(
|
||||
segment=segment, child_chunks=child_chunks, score=score, files=files
|
||||
segment=segment, child_chunks=child_chunks_list, score=score, files=files
|
||||
)
|
||||
result.append(retrieval_segment)
|
||||
|
||||
return result
|
||||
return sorted(result, key=lambda x: x.score if x.score is not None else 0.0, reverse=True)
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
raise e
|
||||
|
|
@ -662,7 +674,14 @@ class RetrievalService:
|
|||
document_ids_filter=document_ids_filter,
|
||||
)
|
||||
)
|
||||
concurrent.futures.wait(futures, timeout=300, return_when=concurrent.futures.ALL_COMPLETED)
|
||||
# Use as_completed for early error propagation - cancel remaining futures on first error
|
||||
if futures:
|
||||
for future in concurrent.futures.as_completed(futures, timeout=300):
|
||||
if future.exception():
|
||||
# Cancel remaining futures to avoid unnecessary waiting
|
||||
for f in futures:
|
||||
f.cancel()
|
||||
break
|
||||
|
||||
if exceptions:
|
||||
raise ValueError(";\n".join(exceptions))
|
||||
|
|
|
|||
|
|
@ -255,7 +255,10 @@ class PGVector(BaseVector):
|
|||
return
|
||||
|
||||
with self._get_cursor() as cur:
|
||||
cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
cur.execute("SELECT 1 FROM pg_extension WHERE extname = 'vector'")
|
||||
if not cur.fetchone():
|
||||
cur.execute("CREATE EXTENSION vector")
|
||||
|
||||
cur.execute(SQL_CREATE_TABLE.format(table_name=self.table_name, dimension=dimension))
|
||||
# PG hnsw index only support 2000 dimension or less
|
||||
# ref: https://github.com/pgvector/pgvector?tab=readme-ov-file#indexing
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ class ExtractProcessor:
|
|||
if file_extension in {".xlsx", ".xls"}:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
extractor = PdfExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
|
||||
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||
extractor = (
|
||||
UnstructuredMarkdownExtractor(file_path, unstructured_api_url, unstructured_api_key)
|
||||
|
|
@ -148,7 +148,7 @@ class ExtractProcessor:
|
|||
if file_extension in {".xlsx", ".xls"}:
|
||||
extractor = ExcelExtractor(file_path)
|
||||
elif file_extension == ".pdf":
|
||||
extractor = PdfExtractor(file_path)
|
||||
extractor = PdfExtractor(file_path, upload_file.tenant_id, upload_file.created_by)
|
||||
elif file_extension in {".md", ".markdown", ".mdx"}:
|
||||
extractor = MarkdownExtractor(file_path, autodetect_encoding=True)
|
||||
elif file_extension in {".htm", ".html"}:
|
||||
|
|
|
|||
|
|
@ -1,25 +1,57 @@
|
|||
"""Abstract interface for document loader implementations."""
|
||||
|
||||
import contextlib
|
||||
import io
|
||||
import logging
|
||||
import uuid
|
||||
from collections.abc import Iterator
|
||||
|
||||
import pypdfium2
|
||||
import pypdfium2.raw as pdfium_c
|
||||
|
||||
from configs import dify_config
|
||||
from core.rag.extractor.blob.blob import Blob
|
||||
from core.rag.extractor.extractor_base import BaseExtractor
|
||||
from core.rag.models.document import Document
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.enums import CreatorUserRole
|
||||
from models.model import UploadFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PdfExtractor(BaseExtractor):
|
||||
"""Load pdf files.
|
||||
|
||||
"""
|
||||
PdfExtractor is used to extract text and images from PDF files.
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to load.
|
||||
file_path: Path to the PDF file.
|
||||
tenant_id: Workspace ID.
|
||||
user_id: ID of the user performing the extraction.
|
||||
file_cache_key: Optional cache key for the extracted text.
|
||||
"""
|
||||
|
||||
def __init__(self, file_path: str, file_cache_key: str | None = None):
|
||||
"""Initialize with file path."""
|
||||
# Magic bytes for image format detection: (magic_bytes, extension, mime_type)
|
||||
IMAGE_FORMATS = [
|
||||
(b"\xff\xd8\xff", "jpg", "image/jpeg"),
|
||||
(b"\x89PNG\r\n\x1a\n", "png", "image/png"),
|
||||
(b"\x00\x00\x00\x0c\x6a\x50\x20\x20\x0d\x0a\x87\x0a", "jp2", "image/jp2"),
|
||||
(b"GIF8", "gif", "image/gif"),
|
||||
(b"BM", "bmp", "image/bmp"),
|
||||
(b"II*\x00", "tiff", "image/tiff"),
|
||||
(b"MM\x00*", "tiff", "image/tiff"),
|
||||
(b"II+\x00", "tiff", "image/tiff"),
|
||||
(b"MM\x00+", "tiff", "image/tiff"),
|
||||
]
|
||||
MAX_MAGIC_LEN = max(len(m) for m, _, _ in IMAGE_FORMATS)
|
||||
|
||||
def __init__(self, file_path: str, tenant_id: str, user_id: str, file_cache_key: str | None = None):
|
||||
"""Initialize PdfExtractor."""
|
||||
self._file_path = file_path
|
||||
self._tenant_id = tenant_id
|
||||
self._user_id = user_id
|
||||
self._file_cache_key = file_cache_key
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
|
|
@ -50,7 +82,6 @@ class PdfExtractor(BaseExtractor):
|
|||
|
||||
def parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
import pypdfium2 # type: ignore
|
||||
|
||||
with blob.as_bytes_io() as file_path:
|
||||
pdf_reader = pypdfium2.PdfDocument(file_path, autoclose=True)
|
||||
|
|
@ -59,8 +90,87 @@ class PdfExtractor(BaseExtractor):
|
|||
text_page = page.get_textpage()
|
||||
content = text_page.get_text_range()
|
||||
text_page.close()
|
||||
|
||||
image_content = self._extract_images(page)
|
||||
if image_content:
|
||||
content += "\n" + image_content
|
||||
|
||||
page.close()
|
||||
metadata = {"source": blob.source, "page": page_number}
|
||||
yield Document(page_content=content, metadata=metadata)
|
||||
finally:
|
||||
pdf_reader.close()
|
||||
|
||||
def _extract_images(self, page) -> str:
|
||||
"""
|
||||
Extract images from a PDF page, save them to storage and database,
|
||||
and return markdown image links.
|
||||
|
||||
Args:
|
||||
page: pypdfium2 page object.
|
||||
|
||||
Returns:
|
||||
Markdown string containing links to the extracted images.
|
||||
"""
|
||||
image_content = []
|
||||
upload_files = []
|
||||
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
|
||||
|
||||
try:
|
||||
image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,))
|
||||
for obj in image_objects:
|
||||
try:
|
||||
# Extract image bytes
|
||||
img_byte_arr = io.BytesIO()
|
||||
# Extract DCTDecode (JPEG) and JPXDecode (JPEG 2000) images directly
|
||||
# Fallback to png for other formats
|
||||
obj.extract(img_byte_arr, fb_format="png")
|
||||
img_bytes = img_byte_arr.getvalue()
|
||||
|
||||
if not img_bytes:
|
||||
continue
|
||||
|
||||
header = img_bytes[: self.MAX_MAGIC_LEN]
|
||||
image_ext = None
|
||||
mime_type = None
|
||||
for magic, ext, mime in self.IMAGE_FORMATS:
|
||||
if header.startswith(magic):
|
||||
image_ext = ext
|
||||
mime_type = mime
|
||||
break
|
||||
|
||||
if not image_ext or not mime_type:
|
||||
continue
|
||||
|
||||
file_uuid = str(uuid.uuid4())
|
||||
file_key = "image_files/" + self._tenant_id + "/" + file_uuid + "." + image_ext
|
||||
|
||||
storage.save(file_key, img_bytes)
|
||||
|
||||
# save file to db
|
||||
upload_file = UploadFile(
|
||||
tenant_id=self._tenant_id,
|
||||
storage_type=dify_config.STORAGE_TYPE,
|
||||
key=file_key,
|
||||
name=file_key,
|
||||
size=len(img_bytes),
|
||||
extension=image_ext,
|
||||
mime_type=mime_type,
|
||||
created_by=self._user_id,
|
||||
created_by_role=CreatorUserRole.ACCOUNT,
|
||||
created_at=naive_utc_now(),
|
||||
used=True,
|
||||
used_by=self._user_id,
|
||||
used_at=naive_utc_now(),
|
||||
)
|
||||
upload_files.append(upload_file)
|
||||
image_content.append(f"")
|
||||
except Exception as e:
|
||||
logger.warning("Failed to extract image from PDF: %s", e)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.warning("Failed to get objects from PDF page: %s", e)
|
||||
if upload_files:
|
||||
db.session.add_all(upload_files)
|
||||
db.session.commit()
|
||||
return "\n".join(image_content)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from collections.abc import Generator, Mapping
|
|||
from typing import Any, Union, cast
|
||||
|
||||
from flask import Flask, current_app
|
||||
from sqlalchemy import and_, or_, select
|
||||
from sqlalchemy import and_, literal, or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.app.app_config.entities import (
|
||||
|
|
@ -516,6 +516,9 @@ class DatasetRetrieval:
|
|||
].embedding_model_provider
|
||||
weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model
|
||||
with measure_time() as timer:
|
||||
cancel_event = threading.Event()
|
||||
thread_exceptions: list[Exception] = []
|
||||
|
||||
if query:
|
||||
query_thread = threading.Thread(
|
||||
target=self._multiple_retrieve_thread,
|
||||
|
|
@ -534,6 +537,8 @@ class DatasetRetrieval:
|
|||
"score_threshold": score_threshold,
|
||||
"query": query,
|
||||
"attachment_id": None,
|
||||
"cancel_event": cancel_event,
|
||||
"thread_exceptions": thread_exceptions,
|
||||
},
|
||||
)
|
||||
all_threads.append(query_thread)
|
||||
|
|
@ -557,12 +562,25 @@ class DatasetRetrieval:
|
|||
"score_threshold": score_threshold,
|
||||
"query": None,
|
||||
"attachment_id": attachment_id,
|
||||
"cancel_event": cancel_event,
|
||||
"thread_exceptions": thread_exceptions,
|
||||
},
|
||||
)
|
||||
all_threads.append(attachment_thread)
|
||||
attachment_thread.start()
|
||||
for thread in all_threads:
|
||||
thread.join()
|
||||
|
||||
# Poll threads with short timeout to detect errors quickly (fail-fast)
|
||||
while any(t.is_alive() for t in all_threads):
|
||||
for thread in all_threads:
|
||||
thread.join(timeout=0.1)
|
||||
if thread_exceptions:
|
||||
cancel_event.set()
|
||||
break
|
||||
if thread_exceptions:
|
||||
break
|
||||
|
||||
if thread_exceptions:
|
||||
raise thread_exceptions[0]
|
||||
self._on_query(query, attachment_ids, dataset_ids, app_id, user_from, user_id)
|
||||
|
||||
if all_documents:
|
||||
|
|
@ -1036,7 +1054,7 @@ class DatasetRetrieval:
|
|||
if automatic_metadata_filters:
|
||||
conditions = []
|
||||
for sequence, filter in enumerate(automatic_metadata_filters):
|
||||
self._process_metadata_filter_func(
|
||||
self.process_metadata_filter_func(
|
||||
sequence,
|
||||
filter.get("condition"), # type: ignore
|
||||
filter.get("metadata_name"), # type: ignore
|
||||
|
|
@ -1072,7 +1090,7 @@ class DatasetRetrieval:
|
|||
value=expected_value,
|
||||
)
|
||||
)
|
||||
filters = self._process_metadata_filter_func(
|
||||
filters = self.process_metadata_filter_func(
|
||||
sequence,
|
||||
condition.comparison_operator,
|
||||
metadata_name,
|
||||
|
|
@ -1168,8 +1186,9 @@ class DatasetRetrieval:
|
|||
return None
|
||||
return automatic_metadata_filters
|
||||
|
||||
def _process_metadata_filter_func(
|
||||
self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list
|
||||
@classmethod
|
||||
def process_metadata_filter_func(
|
||||
cls, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list
|
||||
):
|
||||
if value is None and condition not in ("empty", "not empty"):
|
||||
return filters
|
||||
|
|
@ -1218,6 +1237,20 @@ class DatasetRetrieval:
|
|||
|
||||
case "≥" | ">=":
|
||||
filters.append(DatasetDocument.doc_metadata[metadata_name].as_float() >= value)
|
||||
case "in" | "not in":
|
||||
if isinstance(value, str):
|
||||
value_list = [v.strip() for v in value.split(",") if v.strip()]
|
||||
elif isinstance(value, (list, tuple)):
|
||||
value_list = [str(v) for v in value if v is not None]
|
||||
else:
|
||||
value_list = [str(value)] if value is not None else []
|
||||
|
||||
if not value_list:
|
||||
# `field in []` is False, `field not in []` is True
|
||||
filters.append(literal(condition == "not in"))
|
||||
else:
|
||||
op = json_field.in_ if condition == "in" else json_field.notin_
|
||||
filters.append(op(value_list))
|
||||
case _:
|
||||
pass
|
||||
|
||||
|
|
@ -1389,40 +1422,53 @@ class DatasetRetrieval:
|
|||
score_threshold: float,
|
||||
query: str | None,
|
||||
attachment_id: str | None,
|
||||
cancel_event: threading.Event | None = None,
|
||||
thread_exceptions: list[Exception] | None = None,
|
||||
):
|
||||
with flask_app.app_context():
|
||||
threads = []
|
||||
all_documents_item: list[Document] = []
|
||||
index_type = None
|
||||
for dataset in available_datasets:
|
||||
index_type = dataset.indexing_technique
|
||||
document_ids_filter = None
|
||||
if dataset.provider != "external":
|
||||
if metadata_condition and not metadata_filter_document_ids:
|
||||
continue
|
||||
if metadata_filter_document_ids:
|
||||
document_ids = metadata_filter_document_ids.get(dataset.id, [])
|
||||
if document_ids:
|
||||
document_ids_filter = document_ids
|
||||
else:
|
||||
try:
|
||||
with flask_app.app_context():
|
||||
threads = []
|
||||
all_documents_item: list[Document] = []
|
||||
index_type = None
|
||||
for dataset in available_datasets:
|
||||
# Check for cancellation signal
|
||||
if cancel_event and cancel_event.is_set():
|
||||
break
|
||||
index_type = dataset.indexing_technique
|
||||
document_ids_filter = None
|
||||
if dataset.provider != "external":
|
||||
if metadata_condition and not metadata_filter_document_ids:
|
||||
continue
|
||||
retrieval_thread = threading.Thread(
|
||||
target=self._retriever,
|
||||
kwargs={
|
||||
"flask_app": flask_app,
|
||||
"dataset_id": dataset.id,
|
||||
"query": query,
|
||||
"top_k": top_k,
|
||||
"all_documents": all_documents_item,
|
||||
"document_ids_filter": document_ids_filter,
|
||||
"metadata_condition": metadata_condition,
|
||||
"attachment_ids": [attachment_id] if attachment_id else None,
|
||||
},
|
||||
)
|
||||
threads.append(retrieval_thread)
|
||||
retrieval_thread.start()
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
if metadata_filter_document_ids:
|
||||
document_ids = metadata_filter_document_ids.get(dataset.id, [])
|
||||
if document_ids:
|
||||
document_ids_filter = document_ids
|
||||
else:
|
||||
continue
|
||||
retrieval_thread = threading.Thread(
|
||||
target=self._retriever,
|
||||
kwargs={
|
||||
"flask_app": flask_app,
|
||||
"dataset_id": dataset.id,
|
||||
"query": query,
|
||||
"top_k": top_k,
|
||||
"all_documents": all_documents_item,
|
||||
"document_ids_filter": document_ids_filter,
|
||||
"metadata_condition": metadata_condition,
|
||||
"attachment_ids": [attachment_id] if attachment_id else None,
|
||||
},
|
||||
)
|
||||
threads.append(retrieval_thread)
|
||||
retrieval_thread.start()
|
||||
|
||||
# Poll threads with short timeout to respond quickly to cancellation
|
||||
while any(t.is_alive() for t in threads):
|
||||
for thread in threads:
|
||||
thread.join(timeout=0.1)
|
||||
if cancel_event and cancel_event.is_set():
|
||||
break
|
||||
if cancel_event and cancel_event.is_set():
|
||||
break
|
||||
|
||||
if reranking_enable:
|
||||
# do rerank for searched documents
|
||||
|
|
@ -1455,3 +1501,8 @@ class DatasetRetrieval:
|
|||
all_documents_item = all_documents_item[:top_k] if top_k else all_documents_item
|
||||
if all_documents_item:
|
||||
all_documents.extend(all_documents_item)
|
||||
except Exception as e:
|
||||
if cancel_event:
|
||||
cancel_event.set()
|
||||
if thread_exceptions is not None:
|
||||
thread_exceptions.append(e)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,15 @@ from typing import Any
|
|||
|
||||
from core.mcp.auth_client import MCPClientWithAuthRetry
|
||||
from core.mcp.error import MCPConnectionError
|
||||
from core.mcp.types import AudioContent, CallToolResult, ImageContent, TextContent
|
||||
from core.mcp.types import (
|
||||
AudioContent,
|
||||
BlobResourceContents,
|
||||
CallToolResult,
|
||||
EmbeddedResource,
|
||||
ImageContent,
|
||||
TextContent,
|
||||
TextResourceContents,
|
||||
)
|
||||
from core.tools.__base.tool import Tool
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
from core.tools.entities.tool_entities import ToolEntity, ToolInvokeMessage, ToolProviderType
|
||||
|
|
@ -53,10 +61,19 @@ class MCPTool(Tool):
|
|||
for content in result.content:
|
||||
if isinstance(content, TextContent):
|
||||
yield from self._process_text_content(content)
|
||||
elif isinstance(content, ImageContent):
|
||||
yield self._process_image_content(content)
|
||||
elif isinstance(content, AudioContent):
|
||||
yield self._process_audio_content(content)
|
||||
elif isinstance(content, ImageContent | AudioContent):
|
||||
yield self.create_blob_message(
|
||||
blob=base64.b64decode(content.data), meta={"mime_type": content.mimeType}
|
||||
)
|
||||
elif isinstance(content, EmbeddedResource):
|
||||
resource = content.resource
|
||||
if isinstance(resource, TextResourceContents):
|
||||
yield self.create_text_message(resource.text)
|
||||
elif isinstance(resource, BlobResourceContents):
|
||||
mime_type = resource.mimeType or "application/octet-stream"
|
||||
yield self.create_blob_message(blob=base64.b64decode(resource.blob), meta={"mime_type": mime_type})
|
||||
else:
|
||||
raise ToolInvokeError(f"Unsupported embedded resource type: {type(resource)}")
|
||||
else:
|
||||
logger.warning("Unsupported content type=%s", type(content))
|
||||
|
||||
|
|
@ -101,14 +118,6 @@ class MCPTool(Tool):
|
|||
for item in json_list:
|
||||
yield self.create_json_message(item)
|
||||
|
||||
def _process_image_content(self, content: ImageContent) -> ToolInvokeMessage:
|
||||
"""Process image content and return a blob message."""
|
||||
return self.create_blob_message(blob=base64.b64decode(content.data), meta={"mime_type": content.mimeType})
|
||||
|
||||
def _process_audio_content(self, content: AudioContent) -> ToolInvokeMessage:
|
||||
"""Process audio content and return a blob message."""
|
||||
return self.create_blob_message(blob=base64.b64decode(content.data), meta={"mime_type": content.mimeType})
|
||||
|
||||
def fork_tool_runtime(self, runtime: ToolRuntime) -> "MCPTool":
|
||||
return MCPTool(
|
||||
entity=self.entity,
|
||||
|
|
|
|||
|
|
@ -378,7 +378,7 @@ class ApiBasedToolSchemaParser:
|
|||
@staticmethod
|
||||
def auto_parse_to_tool_bundle(
|
||||
content: str, extra_info: dict | None = None, warning: dict | None = None
|
||||
) -> tuple[list[ApiToolBundle], str]:
|
||||
) -> tuple[list[ApiToolBundle], ApiProviderSchemaType]:
|
||||
"""
|
||||
auto parse to tool bundle
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import re
|
|||
def remove_leading_symbols(text: str) -> str:
|
||||
"""
|
||||
Remove leading punctuation or symbols from the given text.
|
||||
Preserves markdown links like [text](url) at the start.
|
||||
|
||||
Args:
|
||||
text (str): The input text to process.
|
||||
|
|
@ -11,6 +12,11 @@ def remove_leading_symbols(text: str) -> str:
|
|||
Returns:
|
||||
str: The text with leading punctuation or symbols removed.
|
||||
"""
|
||||
# Check if text starts with a markdown link - preserve it
|
||||
markdown_link_pattern = r"^\[([^\]]+)\]\((https?://[^)]+)\)"
|
||||
if re.match(markdown_link_pattern, text):
|
||||
return text
|
||||
|
||||
# Match Unicode ranges for punctuation and symbols
|
||||
# FIXME this pattern is confused quick fix for #11868 maybe refactor it later
|
||||
pattern = r'^[\[\]\u2000-\u2025\u2027-\u206F\u2E00-\u2E7F\u3000-\u300F\u3011-\u303F"#$%&\'()*+,./:;<=>?@^_`~]+'
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from sqlalchemy.orm import Session
|
|||
|
||||
from core.app.app_config.entities import VariableEntity, VariableEntityType
|
||||
from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager
|
||||
from core.db.session_factory import session_factory
|
||||
from core.plugin.entities.parameters import PluginParameterOption
|
||||
from core.tools.__base.tool_provider import ToolProviderController
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
|
|
@ -47,33 +48,29 @@ class WorkflowToolProviderController(ToolProviderController):
|
|||
|
||||
@classmethod
|
||||
def from_db(cls, db_provider: WorkflowToolProvider) -> "WorkflowToolProviderController":
|
||||
with Session(db.engine, expire_on_commit=False) as session, session.begin():
|
||||
provider = session.get(WorkflowToolProvider, db_provider.id) if db_provider.id else None
|
||||
if not provider:
|
||||
raise ValueError("workflow provider not found")
|
||||
app = session.get(App, provider.app_id)
|
||||
with session_factory.create_session() as session, session.begin():
|
||||
app = session.get(App, db_provider.app_id)
|
||||
if not app:
|
||||
raise ValueError("app not found")
|
||||
|
||||
user = session.get(Account, provider.user_id) if provider.user_id else None
|
||||
|
||||
user = session.get(Account, db_provider.user_id) if db_provider.user_id else None
|
||||
controller = WorkflowToolProviderController(
|
||||
entity=ToolProviderEntity(
|
||||
identity=ToolProviderIdentity(
|
||||
author=user.name if user else "",
|
||||
name=provider.label,
|
||||
label=I18nObject(en_US=provider.label, zh_Hans=provider.label),
|
||||
description=I18nObject(en_US=provider.description, zh_Hans=provider.description),
|
||||
icon=provider.icon,
|
||||
name=db_provider.label,
|
||||
label=I18nObject(en_US=db_provider.label, zh_Hans=db_provider.label),
|
||||
description=I18nObject(en_US=db_provider.description, zh_Hans=db_provider.description),
|
||||
icon=db_provider.icon,
|
||||
),
|
||||
credentials_schema=[],
|
||||
plugin_id=None,
|
||||
),
|
||||
provider_id=provider.id or "",
|
||||
provider_id=db_provider.id,
|
||||
)
|
||||
|
||||
controller.tools = [
|
||||
controller._get_db_provider_tool(provider, app, session=session, user=user),
|
||||
controller._get_db_provider_tool(db_provider, app, session=session, user=user),
|
||||
]
|
||||
|
||||
return controller
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ class SkipPropagator:
|
|||
if edge_states["has_taken"]:
|
||||
# Enqueue node
|
||||
self._state_manager.enqueue_node(downstream_node_id)
|
||||
self._state_manager.start_execution(downstream_node_id)
|
||||
return
|
||||
|
||||
# All edges are skipped, propagate skip to this node
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from collections import defaultdict
|
|||
from collections.abc import Mapping, Sequence
|
||||
from typing import TYPE_CHECKING, Any, cast
|
||||
|
||||
from sqlalchemy import and_, func, literal, or_, select
|
||||
from sqlalchemy import and_, func, or_, select
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from core.app.app_config.entities import DatasetRetrieveConfigEntity
|
||||
|
|
@ -460,7 +460,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
|
|||
if automatic_metadata_filters:
|
||||
conditions = []
|
||||
for sequence, filter in enumerate(automatic_metadata_filters):
|
||||
self._process_metadata_filter_func(
|
||||
DatasetRetrieval.process_metadata_filter_func(
|
||||
sequence,
|
||||
filter.get("condition", ""),
|
||||
filter.get("metadata_name", ""),
|
||||
|
|
@ -504,7 +504,7 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
|
|||
value=expected_value,
|
||||
)
|
||||
)
|
||||
filters = self._process_metadata_filter_func(
|
||||
filters = DatasetRetrieval.process_metadata_filter_func(
|
||||
sequence,
|
||||
condition.comparison_operator,
|
||||
metadata_name,
|
||||
|
|
@ -603,87 +603,6 @@ class KnowledgeRetrievalNode(LLMUsageTrackingMixin, Node[KnowledgeRetrievalNodeD
|
|||
return [], usage
|
||||
return automatic_metadata_filters, usage
|
||||
|
||||
def _process_metadata_filter_func(
|
||||
self, sequence: int, condition: str, metadata_name: str, value: Any, filters: list[Any]
|
||||
) -> list[Any]:
|
||||
if value is None and condition not in ("empty", "not empty"):
|
||||
return filters
|
||||
|
||||
json_field = Document.doc_metadata[metadata_name].as_string()
|
||||
|
||||
match condition:
|
||||
case "contains":
|
||||
filters.append(json_field.like(f"%{value}%"))
|
||||
|
||||
case "not contains":
|
||||
filters.append(json_field.notlike(f"%{value}%"))
|
||||
|
||||
case "start with":
|
||||
filters.append(json_field.like(f"{value}%"))
|
||||
|
||||
case "end with":
|
||||
filters.append(json_field.like(f"%{value}"))
|
||||
case "in":
|
||||
if isinstance(value, str):
|
||||
value_list = [v.strip() for v in value.split(",") if v.strip()]
|
||||
elif isinstance(value, (list, tuple)):
|
||||
value_list = [str(v) for v in value if v is not None]
|
||||
else:
|
||||
value_list = [str(value)] if value is not None else []
|
||||
|
||||
if not value_list:
|
||||
filters.append(literal(False))
|
||||
else:
|
||||
filters.append(json_field.in_(value_list))
|
||||
|
||||
case "not in":
|
||||
if isinstance(value, str):
|
||||
value_list = [v.strip() for v in value.split(",") if v.strip()]
|
||||
elif isinstance(value, (list, tuple)):
|
||||
value_list = [str(v) for v in value if v is not None]
|
||||
else:
|
||||
value_list = [str(value)] if value is not None else []
|
||||
|
||||
if not value_list:
|
||||
filters.append(literal(True))
|
||||
else:
|
||||
filters.append(json_field.notin_(value_list))
|
||||
|
||||
case "is" | "=":
|
||||
if isinstance(value, str):
|
||||
filters.append(json_field == value)
|
||||
elif isinstance(value, (int, float)):
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() == value)
|
||||
|
||||
case "is not" | "≠":
|
||||
if isinstance(value, str):
|
||||
filters.append(json_field != value)
|
||||
elif isinstance(value, (int, float)):
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() != value)
|
||||
|
||||
case "empty":
|
||||
filters.append(Document.doc_metadata[metadata_name].is_(None))
|
||||
|
||||
case "not empty":
|
||||
filters.append(Document.doc_metadata[metadata_name].isnot(None))
|
||||
|
||||
case "before" | "<":
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() < value)
|
||||
|
||||
case "after" | ">":
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() > value)
|
||||
|
||||
case "≤" | "<=":
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() <= value)
|
||||
|
||||
case "≥" | ">=":
|
||||
filters.append(Document.doc_metadata[metadata_name].as_float() >= value)
|
||||
|
||||
case _:
|
||||
pass
|
||||
|
||||
return filters
|
||||
|
||||
@classmethod
|
||||
def _extract_variable_selector_to_variable_mapping(
|
||||
cls,
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ from dify_app import DifyApp
|
|||
|
||||
def _get_celery_ssl_options() -> dict[str, Any] | None:
|
||||
"""Get SSL configuration for Celery broker/backend connections."""
|
||||
# Use REDIS_USE_SSL for consistency with the main Redis client
|
||||
# Only apply SSL if we're using Redis as broker/backend
|
||||
if not dify_config.REDIS_USE_SSL:
|
||||
if not dify_config.BROKER_USE_SSL:
|
||||
return None
|
||||
|
||||
# Check if Celery is actually using Redis
|
||||
|
|
|
|||
|
|
@ -13,12 +13,20 @@ class TencentCosStorage(BaseStorage):
|
|||
super().__init__()
|
||||
|
||||
self.bucket_name = dify_config.TENCENT_COS_BUCKET_NAME
|
||||
config = CosConfig(
|
||||
Region=dify_config.TENCENT_COS_REGION,
|
||||
SecretId=dify_config.TENCENT_COS_SECRET_ID,
|
||||
SecretKey=dify_config.TENCENT_COS_SECRET_KEY,
|
||||
Scheme=dify_config.TENCENT_COS_SCHEME,
|
||||
)
|
||||
if dify_config.TENCENT_COS_CUSTOM_DOMAIN:
|
||||
config = CosConfig(
|
||||
Domain=dify_config.TENCENT_COS_CUSTOM_DOMAIN,
|
||||
SecretId=dify_config.TENCENT_COS_SECRET_ID,
|
||||
SecretKey=dify_config.TENCENT_COS_SECRET_KEY,
|
||||
Scheme=dify_config.TENCENT_COS_SCHEME,
|
||||
)
|
||||
else:
|
||||
config = CosConfig(
|
||||
Region=dify_config.TENCENT_COS_REGION,
|
||||
SecretId=dify_config.TENCENT_COS_SECRET_ID,
|
||||
SecretKey=dify_config.TENCENT_COS_SECRET_KEY,
|
||||
Scheme=dify_config.TENCENT_COS_SCHEME,
|
||||
)
|
||||
self.client = CosS3Client(config)
|
||||
|
||||
def save(self, filename, data):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,347 @@
|
|||
"""
|
||||
Archive Storage Client for S3-compatible storage.
|
||||
|
||||
This module provides a dedicated storage client for archiving or exporting logs
|
||||
to S3-compatible object storage.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import datetime
|
||||
import gzip
|
||||
import hashlib
|
||||
import logging
|
||||
from collections.abc import Generator
|
||||
from typing import Any, cast
|
||||
|
||||
import boto3
|
||||
import orjson
|
||||
from botocore.client import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from configs import dify_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ArchiveStorageError(Exception):
|
||||
"""Base exception for archive storage operations."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ArchiveStorageNotConfiguredError(ArchiveStorageError):
|
||||
"""Raised when archive storage is not properly configured."""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class ArchiveStorage:
|
||||
"""
|
||||
S3-compatible storage client for archiving or exporting.
|
||||
|
||||
This client provides methods for storing and retrieving archived data in JSONL+gzip format.
|
||||
"""
|
||||
|
||||
def __init__(self, bucket: str):
|
||||
if not dify_config.ARCHIVE_STORAGE_ENABLED:
|
||||
raise ArchiveStorageNotConfiguredError("Archive storage is not enabled")
|
||||
|
||||
if not bucket:
|
||||
raise ArchiveStorageNotConfiguredError("Archive storage bucket is not configured")
|
||||
if not all(
|
||||
[
|
||||
dify_config.ARCHIVE_STORAGE_ENDPOINT,
|
||||
bucket,
|
||||
dify_config.ARCHIVE_STORAGE_ACCESS_KEY,
|
||||
dify_config.ARCHIVE_STORAGE_SECRET_KEY,
|
||||
]
|
||||
):
|
||||
raise ArchiveStorageNotConfiguredError(
|
||||
"Archive storage configuration is incomplete. "
|
||||
"Required: ARCHIVE_STORAGE_ENDPOINT, ARCHIVE_STORAGE_ACCESS_KEY, "
|
||||
"ARCHIVE_STORAGE_SECRET_KEY, and a bucket name"
|
||||
)
|
||||
|
||||
self.bucket = bucket
|
||||
self.client = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=dify_config.ARCHIVE_STORAGE_ENDPOINT,
|
||||
aws_access_key_id=dify_config.ARCHIVE_STORAGE_ACCESS_KEY,
|
||||
aws_secret_access_key=dify_config.ARCHIVE_STORAGE_SECRET_KEY,
|
||||
region_name=dify_config.ARCHIVE_STORAGE_REGION,
|
||||
config=Config(s3={"addressing_style": "path"}),
|
||||
)
|
||||
|
||||
# Verify bucket accessibility
|
||||
try:
|
||||
self.client.head_bucket(Bucket=self.bucket)
|
||||
except ClientError as e:
|
||||
error_code = e.response.get("Error", {}).get("Code")
|
||||
if error_code == "404":
|
||||
raise ArchiveStorageNotConfiguredError(f"Archive bucket '{self.bucket}' does not exist")
|
||||
elif error_code == "403":
|
||||
raise ArchiveStorageNotConfiguredError(f"Access denied to archive bucket '{self.bucket}'")
|
||||
else:
|
||||
raise ArchiveStorageError(f"Failed to access archive bucket: {e}")
|
||||
|
||||
def put_object(self, key: str, data: bytes) -> str:
|
||||
"""
|
||||
Upload an object to the archive storage.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
data: Binary data to upload
|
||||
|
||||
Returns:
|
||||
MD5 checksum of the uploaded data
|
||||
|
||||
Raises:
|
||||
ArchiveStorageError: If upload fails
|
||||
"""
|
||||
checksum = hashlib.md5(data).hexdigest()
|
||||
try:
|
||||
self.client.put_object(
|
||||
Bucket=self.bucket,
|
||||
Key=key,
|
||||
Body=data,
|
||||
ContentMD5=self._content_md5(data),
|
||||
)
|
||||
logger.debug("Uploaded object: %s (size=%d, checksum=%s)", key, len(data), checksum)
|
||||
return checksum
|
||||
except ClientError as e:
|
||||
raise ArchiveStorageError(f"Failed to upload object '{key}': {e}")
|
||||
|
||||
def get_object(self, key: str) -> bytes:
|
||||
"""
|
||||
Download an object from the archive storage.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
|
||||
Returns:
|
||||
Binary data of the object
|
||||
|
||||
Raises:
|
||||
ArchiveStorageError: If download fails
|
||||
FileNotFoundError: If object does not exist
|
||||
"""
|
||||
try:
|
||||
response = self.client.get_object(Bucket=self.bucket, Key=key)
|
||||
return response["Body"].read()
|
||||
except ClientError as e:
|
||||
error_code = e.response.get("Error", {}).get("Code")
|
||||
if error_code == "NoSuchKey":
|
||||
raise FileNotFoundError(f"Archive object not found: {key}")
|
||||
raise ArchiveStorageError(f"Failed to download object '{key}': {e}")
|
||||
|
||||
def get_object_stream(self, key: str) -> Generator[bytes, None, None]:
|
||||
"""
|
||||
Stream an object from the archive storage.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
|
||||
Yields:
|
||||
Chunks of binary data
|
||||
|
||||
Raises:
|
||||
ArchiveStorageError: If download fails
|
||||
FileNotFoundError: If object does not exist
|
||||
"""
|
||||
try:
|
||||
response = self.client.get_object(Bucket=self.bucket, Key=key)
|
||||
yield from response["Body"].iter_chunks()
|
||||
except ClientError as e:
|
||||
error_code = e.response.get("Error", {}).get("Code")
|
||||
if error_code == "NoSuchKey":
|
||||
raise FileNotFoundError(f"Archive object not found: {key}")
|
||||
raise ArchiveStorageError(f"Failed to stream object '{key}': {e}")
|
||||
|
||||
def object_exists(self, key: str) -> bool:
|
||||
"""
|
||||
Check if an object exists in the archive storage.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
|
||||
Returns:
|
||||
True if object exists, False otherwise
|
||||
"""
|
||||
try:
|
||||
self.client.head_object(Bucket=self.bucket, Key=key)
|
||||
return True
|
||||
except ClientError:
|
||||
return False
|
||||
|
||||
def delete_object(self, key: str) -> None:
|
||||
"""
|
||||
Delete an object from the archive storage.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
|
||||
Raises:
|
||||
ArchiveStorageError: If deletion fails
|
||||
"""
|
||||
try:
|
||||
self.client.delete_object(Bucket=self.bucket, Key=key)
|
||||
logger.debug("Deleted object: %s", key)
|
||||
except ClientError as e:
|
||||
raise ArchiveStorageError(f"Failed to delete object '{key}': {e}")
|
||||
|
||||
def generate_presigned_url(self, key: str, expires_in: int = 3600) -> str:
|
||||
"""
|
||||
Generate a pre-signed URL for downloading an object.
|
||||
|
||||
Args:
|
||||
key: Object key (path) within the bucket
|
||||
expires_in: URL validity duration in seconds (default: 1 hour)
|
||||
|
||||
Returns:
|
||||
Pre-signed URL string.
|
||||
|
||||
Raises:
|
||||
ArchiveStorageError: If generation fails
|
||||
"""
|
||||
try:
|
||||
return self.client.generate_presigned_url(
|
||||
ClientMethod="get_object",
|
||||
Params={"Bucket": self.bucket, "Key": key},
|
||||
ExpiresIn=expires_in,
|
||||
)
|
||||
except ClientError as e:
|
||||
raise ArchiveStorageError(f"Failed to generate pre-signed URL for '{key}': {e}")
|
||||
|
||||
def list_objects(self, prefix: str) -> list[str]:
|
||||
"""
|
||||
List objects under a given prefix.
|
||||
|
||||
Args:
|
||||
prefix: Object key prefix to filter by
|
||||
|
||||
Returns:
|
||||
List of object keys matching the prefix
|
||||
"""
|
||||
keys = []
|
||||
paginator = self.client.get_paginator("list_objects_v2")
|
||||
|
||||
try:
|
||||
for page in paginator.paginate(Bucket=self.bucket, Prefix=prefix):
|
||||
for obj in page.get("Contents", []):
|
||||
keys.append(obj["Key"])
|
||||
except ClientError as e:
|
||||
raise ArchiveStorageError(f"Failed to list objects with prefix '{prefix}': {e}")
|
||||
|
||||
return keys
|
||||
|
||||
@staticmethod
|
||||
def _content_md5(data: bytes) -> str:
|
||||
"""Calculate base64-encoded MD5 for Content-MD5 header."""
|
||||
return base64.b64encode(hashlib.md5(data).digest()).decode()
|
||||
|
||||
@staticmethod
|
||||
def serialize_to_jsonl_gz(records: list[dict[str, Any]]) -> bytes:
|
||||
"""
|
||||
Serialize records to gzipped JSONL format.
|
||||
|
||||
Args:
|
||||
records: List of dictionaries to serialize
|
||||
|
||||
Returns:
|
||||
Gzipped JSONL bytes
|
||||
"""
|
||||
lines = []
|
||||
for record in records:
|
||||
# Convert datetime objects to ISO format strings
|
||||
serialized = ArchiveStorage._serialize_record(record)
|
||||
lines.append(orjson.dumps(serialized))
|
||||
|
||||
jsonl_content = b"\n".join(lines)
|
||||
if jsonl_content:
|
||||
jsonl_content += b"\n"
|
||||
|
||||
return gzip.compress(jsonl_content)
|
||||
|
||||
@staticmethod
|
||||
def deserialize_from_jsonl_gz(data: bytes) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Deserialize gzipped JSONL data to records.
|
||||
|
||||
Args:
|
||||
data: Gzipped JSONL bytes
|
||||
|
||||
Returns:
|
||||
List of dictionaries
|
||||
"""
|
||||
jsonl_content = gzip.decompress(data)
|
||||
records = []
|
||||
|
||||
for line in jsonl_content.splitlines():
|
||||
if line:
|
||||
records.append(orjson.loads(line))
|
||||
|
||||
return records
|
||||
|
||||
@staticmethod
|
||||
def _serialize_record(record: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Serialize a single record, converting special types."""
|
||||
|
||||
def _serialize(item: Any) -> Any:
|
||||
if isinstance(item, datetime.datetime):
|
||||
return item.isoformat()
|
||||
if isinstance(item, dict):
|
||||
return {key: _serialize(value) for key, value in item.items()}
|
||||
if isinstance(item, list):
|
||||
return [_serialize(value) for value in item]
|
||||
return item
|
||||
|
||||
return cast(dict[str, Any], _serialize(record))
|
||||
|
||||
@staticmethod
|
||||
def compute_checksum(data: bytes) -> str:
|
||||
"""Compute MD5 checksum of data."""
|
||||
return hashlib.md5(data).hexdigest()
|
||||
|
||||
|
||||
# Singleton instance (lazy initialization)
|
||||
_archive_storage: ArchiveStorage | None = None
|
||||
_export_storage: ArchiveStorage | None = None
|
||||
|
||||
|
||||
def get_archive_storage() -> ArchiveStorage:
|
||||
"""
|
||||
Get the archive storage singleton instance.
|
||||
|
||||
Returns:
|
||||
ArchiveStorage instance
|
||||
|
||||
Raises:
|
||||
ArchiveStorageNotConfiguredError: If archive storage is not configured
|
||||
"""
|
||||
global _archive_storage
|
||||
if _archive_storage is None:
|
||||
archive_bucket = dify_config.ARCHIVE_STORAGE_ARCHIVE_BUCKET
|
||||
if not archive_bucket:
|
||||
raise ArchiveStorageNotConfiguredError(
|
||||
"Archive storage bucket is not configured. Required: ARCHIVE_STORAGE_ARCHIVE_BUCKET"
|
||||
)
|
||||
_archive_storage = ArchiveStorage(bucket=archive_bucket)
|
||||
return _archive_storage
|
||||
|
||||
|
||||
def get_export_storage() -> ArchiveStorage:
|
||||
"""
|
||||
Get the export storage singleton instance.
|
||||
|
||||
Returns:
|
||||
ArchiveStorage instance
|
||||
"""
|
||||
global _export_storage
|
||||
if _export_storage is None:
|
||||
export_bucket = dify_config.ARCHIVE_STORAGE_EXPORT_BUCKET
|
||||
if not export_bucket:
|
||||
raise ArchiveStorageNotConfiguredError(
|
||||
"Archive export bucket is not configured. Required: ARCHIVE_STORAGE_EXPORT_BUCKET"
|
||||
)
|
||||
_export_storage = ArchiveStorage(bucket=export_bucket)
|
||||
return _export_storage
|
||||
|
|
@ -16,6 +16,11 @@ celery_redis = Redis(
|
|||
port=redis_config.get("port") or 6379,
|
||||
password=redis_config.get("password") or None,
|
||||
db=int(redis_config.get("virtual_host")) if redis_config.get("virtual_host") else 1,
|
||||
ssl=bool(dify_config.BROKER_USE_SSL),
|
||||
ssl_ca_certs=dify_config.REDIS_SSL_CA_CERTS if dify_config.BROKER_USE_SSL else None,
|
||||
ssl_cert_reqs=getattr(dify_config, "REDIS_SSL_CERT_REQS", None) if dify_config.BROKER_USE_SSL else None,
|
||||
ssl_certfile=getattr(dify_config, "REDIS_SSL_CERTFILE", None) if dify_config.BROKER_USE_SSL else None,
|
||||
ssl_keyfile=getattr(dify_config, "REDIS_SSL_KEYFILE", None) if dify_config.BROKER_USE_SSL else None,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,8 @@ from enums.quota_type import QuotaType, unlimited
|
|||
from extensions.otel import AppGenerateHandler, trace_span
|
||||
from models.model import Account, App, AppMode, EndUser
|
||||
from models.workflow import Workflow
|
||||
from services.errors.app import InvokeRateLimitError, QuotaExceededError, WorkflowIdFormatError, WorkflowNotFoundError
|
||||
from services.errors.app import QuotaExceededError, WorkflowIdFormatError, WorkflowNotFoundError
|
||||
from services.errors.llm import InvokeRateLimitError
|
||||
from services.workflow_service import WorkflowService
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from models.model import App, EndUser
|
|||
from models.trigger import WorkflowTriggerLog
|
||||
from models.workflow import Workflow
|
||||
from repositories.sqlalchemy_workflow_trigger_log_repository import SQLAlchemyWorkflowTriggerLogRepository
|
||||
from services.errors.app import InvokeRateLimitError, QuotaExceededError, WorkflowNotFoundError
|
||||
from services.errors.app import QuotaExceededError, WorkflowNotFoundError, WorkflowQuotaLimitError
|
||||
from services.workflow.entities import AsyncTriggerResponse, TriggerData, WorkflowTaskData
|
||||
from services.workflow.queue_dispatcher import QueueDispatcherManager, QueuePriority
|
||||
from services.workflow_service import WorkflowService
|
||||
|
|
@ -141,7 +141,7 @@ class AsyncWorkflowService:
|
|||
trigger_log_repo.update(trigger_log)
|
||||
session.commit()
|
||||
|
||||
raise InvokeRateLimitError(
|
||||
raise WorkflowQuotaLimitError(
|
||||
f"Workflow execution quota limit reached for tenant {trigger_data.tenant_id}"
|
||||
) from e
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
from collections.abc import Sequence
|
||||
|
|
@ -31,6 +32,11 @@ class BillingService:
|
|||
|
||||
compliance_download_rate_limiter = RateLimiter("compliance_download_rate_limiter", 4, 60)
|
||||
|
||||
# Redis key prefix for tenant plan cache
|
||||
_PLAN_CACHE_KEY_PREFIX = "tenant_plan:"
|
||||
# Cache TTL: 10 minutes
|
||||
_PLAN_CACHE_TTL = 600
|
||||
|
||||
@classmethod
|
||||
def get_info(cls, tenant_id: str):
|
||||
params = {"tenant_id": tenant_id}
|
||||
|
|
@ -272,14 +278,110 @@ class BillingService:
|
|||
data = resp.get("data", {})
|
||||
|
||||
for tenant_id, plan in data.items():
|
||||
subscription_plan = subscription_adapter.validate_python(plan)
|
||||
results[tenant_id] = subscription_plan
|
||||
try:
|
||||
subscription_plan = subscription_adapter.validate_python(plan)
|
||||
results[tenant_id] = subscription_plan
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"get_plan_bulk: failed to validate subscription plan for tenant(%s)", tenant_id
|
||||
)
|
||||
continue
|
||||
except Exception:
|
||||
logger.exception("Failed to fetch billing info batch for tenants: %s", chunk)
|
||||
logger.exception("get_plan_bulk: failed to fetch billing info batch for tenants: %s", chunk)
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
@classmethod
|
||||
def _make_plan_cache_key(cls, tenant_id: str) -> str:
|
||||
return f"{cls._PLAN_CACHE_KEY_PREFIX}{tenant_id}"
|
||||
|
||||
@classmethod
|
||||
def get_plan_bulk_with_cache(cls, tenant_ids: Sequence[str]) -> dict[str, SubscriptionPlan]:
|
||||
"""
|
||||
Bulk fetch billing subscription plan with cache to reduce billing API loads in batch job scenarios.
|
||||
|
||||
NOTE: if you want to high data consistency, use get_plan_bulk instead.
|
||||
|
||||
Returns:
|
||||
Mapping of tenant_id -> {plan: str, expiration_date: int}
|
||||
"""
|
||||
tenant_plans: dict[str, SubscriptionPlan] = {}
|
||||
|
||||
if not tenant_ids:
|
||||
return tenant_plans
|
||||
|
||||
subscription_adapter = TypeAdapter(SubscriptionPlan)
|
||||
|
||||
# Step 1: Batch fetch from Redis cache using mget
|
||||
redis_keys = [cls._make_plan_cache_key(tenant_id) for tenant_id in tenant_ids]
|
||||
try:
|
||||
cached_values = redis_client.mget(redis_keys)
|
||||
|
||||
if len(cached_values) != len(tenant_ids):
|
||||
raise Exception(
|
||||
"get_plan_bulk_with_cache: unexpected error: redis mget failed: cached values length mismatch"
|
||||
)
|
||||
|
||||
# Map cached values back to tenant_ids
|
||||
cache_misses: list[str] = []
|
||||
|
||||
for tenant_id, cached_value in zip(tenant_ids, cached_values):
|
||||
if cached_value:
|
||||
try:
|
||||
# Redis returns bytes, decode to string and parse JSON
|
||||
json_str = cached_value.decode("utf-8") if isinstance(cached_value, bytes) else cached_value
|
||||
plan_dict = json.loads(json_str)
|
||||
subscription_plan = subscription_adapter.validate_python(plan_dict)
|
||||
tenant_plans[tenant_id] = subscription_plan
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"get_plan_bulk_with_cache: process tenant(%s) failed, add to cache misses", tenant_id
|
||||
)
|
||||
cache_misses.append(tenant_id)
|
||||
else:
|
||||
cache_misses.append(tenant_id)
|
||||
|
||||
logger.info(
|
||||
"get_plan_bulk_with_cache: cache hits=%s, cache misses=%s",
|
||||
len(tenant_plans),
|
||||
len(cache_misses),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("get_plan_bulk_with_cache: redis mget failed, falling back to API")
|
||||
cache_misses = list(tenant_ids)
|
||||
|
||||
# Step 2: Fetch missing plans from billing API
|
||||
if cache_misses:
|
||||
bulk_plans = BillingService.get_plan_bulk(cache_misses)
|
||||
|
||||
if bulk_plans:
|
||||
plans_to_cache: dict[str, SubscriptionPlan] = {}
|
||||
|
||||
for tenant_id, subscription_plan in bulk_plans.items():
|
||||
tenant_plans[tenant_id] = subscription_plan
|
||||
plans_to_cache[tenant_id] = subscription_plan
|
||||
|
||||
# Step 3: Batch update Redis cache using pipeline
|
||||
if plans_to_cache:
|
||||
try:
|
||||
pipe = redis_client.pipeline()
|
||||
for tenant_id, subscription_plan in plans_to_cache.items():
|
||||
redis_key = cls._make_plan_cache_key(tenant_id)
|
||||
# Serialize dict to JSON string
|
||||
json_str = json.dumps(subscription_plan)
|
||||
pipe.setex(redis_key, cls._PLAN_CACHE_TTL, json_str)
|
||||
pipe.execute()
|
||||
|
||||
logger.info(
|
||||
"get_plan_bulk_with_cache: cached %s new tenant plans to Redis",
|
||||
len(plans_to_cache),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("get_plan_bulk_with_cache: redis pipeline failed")
|
||||
|
||||
return tenant_plans
|
||||
|
||||
@classmethod
|
||||
def get_expired_subscription_cleanup_whitelist(cls) -> Sequence[str]:
|
||||
resp = cls._send_request("GET", "/subscription/cleanup/whitelist")
|
||||
|
|
|
|||
|
|
@ -110,5 +110,5 @@ class EnterpriseService:
|
|||
if not app_id:
|
||||
raise ValueError("app_id must be provided.")
|
||||
|
||||
body = {"appId": app_id}
|
||||
EnterpriseRequest.send_request("DELETE", "/webapp/clean", json=body)
|
||||
params = {"appId": app_id}
|
||||
EnterpriseRequest.send_request("DELETE", "/webapp/clean", params=params)
|
||||
|
|
|
|||
|
|
@ -18,8 +18,8 @@ class WorkflowIdFormatError(Exception):
|
|||
pass
|
||||
|
||||
|
||||
class InvokeRateLimitError(Exception):
|
||||
"""Raised when rate limit is exceeded for workflow invocations."""
|
||||
class WorkflowQuotaLimitError(Exception):
|
||||
"""Raised when workflow execution quota is exceeded (for async/background workflows)."""
|
||||
|
||||
pass
|
||||
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class PluginParameterService:
|
|||
provider,
|
||||
action,
|
||||
resolved_credentials,
|
||||
CredentialType.API_KEY.value,
|
||||
original_subscription.credential_type or CredentialType.UNAUTHORIZED.value,
|
||||
parameter,
|
||||
)
|
||||
.options
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from httpx import get
|
|||
from sqlalchemy import select
|
||||
|
||||
from core.entities.provider_entities import ProviderConfig
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
from core.tools.custom_tool.provider import ApiToolProviderController
|
||||
|
|
@ -86,7 +85,9 @@ class ApiToolManageService:
|
|||
raise ValueError(f"invalid schema: {str(e)}")
|
||||
|
||||
@staticmethod
|
||||
def convert_schema_to_tool_bundles(schema: str, extra_info: dict | None = None) -> tuple[list[ApiToolBundle], str]:
|
||||
def convert_schema_to_tool_bundles(
|
||||
schema: str, extra_info: dict | None = None
|
||||
) -> tuple[list[ApiToolBundle], ApiProviderSchemaType]:
|
||||
"""
|
||||
convert schema to tool bundles
|
||||
|
||||
|
|
@ -104,7 +105,7 @@ class ApiToolManageService:
|
|||
provider_name: str,
|
||||
icon: dict,
|
||||
credentials: dict,
|
||||
schema_type: str,
|
||||
schema_type: ApiProviderSchemaType,
|
||||
schema: str,
|
||||
privacy_policy: str,
|
||||
custom_disclaimer: str,
|
||||
|
|
@ -113,9 +114,6 @@ class ApiToolManageService:
|
|||
"""
|
||||
create api tool provider
|
||||
"""
|
||||
if schema_type not in [member.value for member in ApiProviderSchemaType]:
|
||||
raise ValueError(f"invalid schema type {schema}")
|
||||
|
||||
provider_name = provider_name.strip()
|
||||
|
||||
# check if the provider exists
|
||||
|
|
@ -178,9 +176,6 @@ class ApiToolManageService:
|
|||
# update labels
|
||||
ToolLabelManager.update_tool_labels(provider_controller, labels)
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -245,18 +240,15 @@ class ApiToolManageService:
|
|||
original_provider: str,
|
||||
icon: dict,
|
||||
credentials: dict,
|
||||
schema_type: str,
|
||||
_schema_type: ApiProviderSchemaType,
|
||||
schema: str,
|
||||
privacy_policy: str,
|
||||
privacy_policy: str | None,
|
||||
custom_disclaimer: str,
|
||||
labels: list[str],
|
||||
):
|
||||
"""
|
||||
update api tool provider
|
||||
"""
|
||||
if schema_type not in [member.value for member in ApiProviderSchemaType]:
|
||||
raise ValueError(f"invalid schema type {schema}")
|
||||
|
||||
provider_name = provider_name.strip()
|
||||
|
||||
# check if the provider exists
|
||||
|
|
@ -281,7 +273,7 @@ class ApiToolManageService:
|
|||
provider.icon = json.dumps(icon)
|
||||
provider.schema = schema
|
||||
provider.description = extra_info.get("description", "")
|
||||
provider.schema_type_str = ApiProviderSchemaType.OPENAPI
|
||||
provider.schema_type_str = schema_type
|
||||
provider.tools_str = json.dumps(jsonable_encoder(tool_bundles))
|
||||
provider.privacy_policy = privacy_policy
|
||||
provider.custom_disclaimer = custom_disclaimer
|
||||
|
|
@ -322,9 +314,6 @@ class ApiToolManageService:
|
|||
# update labels
|
||||
ToolLabelManager.update_tool_labels(provider_controller, labels)
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -347,9 +336,6 @@ class ApiToolManageService:
|
|||
db.session.delete(provider)
|
||||
db.session.commit()
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -366,7 +352,7 @@ class ApiToolManageService:
|
|||
tool_name: str,
|
||||
credentials: dict,
|
||||
parameters: dict,
|
||||
schema_type: str,
|
||||
schema_type: ApiProviderSchemaType,
|
||||
schema: str,
|
||||
):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ from constants import HIDDEN_VALUE, UNKNOWN_VALUE
|
|||
from core.helper.name_generator import generate_incremental_name
|
||||
from core.helper.position_helper import is_filtered
|
||||
from core.helper.provider_cache import NoOpProviderCredentialCache, ToolProviderCredentialsCache
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.plugin.entities.plugin_daemon import CredentialType
|
||||
from core.tools.builtin_tool.provider import BuiltinToolProviderController
|
||||
from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort
|
||||
|
|
@ -205,9 +204,6 @@ class BuiltinToolManageService:
|
|||
db_provider.name = name
|
||||
|
||||
session.commit()
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
raise ValueError(str(e))
|
||||
|
|
@ -290,8 +286,6 @@ class BuiltinToolManageService:
|
|||
session.rollback()
|
||||
raise ValueError(str(e))
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id, "builtin")
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -409,9 +403,6 @@ class BuiltinToolManageService:
|
|||
)
|
||||
cache.delete()
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -434,8 +425,6 @@ class BuiltinToolManageService:
|
|||
target_provider.is_default = True
|
||||
session.commit()
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
return {"result": "success"}
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -319,8 +319,14 @@ class MCPToolManageService:
|
|||
except MCPError as e:
|
||||
raise ValueError(f"Failed to connect to MCP server: {e}")
|
||||
|
||||
# Update database with retrieved tools
|
||||
db_provider.tools = json.dumps([tool.model_dump() for tool in tools])
|
||||
# Update database with retrieved tools (ensure description is a non-null string)
|
||||
tools_payload = []
|
||||
for tool in tools:
|
||||
data = tool.model_dump()
|
||||
if data.get("description") is None:
|
||||
data["description"] = ""
|
||||
tools_payload.append(data)
|
||||
db_provider.tools = json.dumps(tools_payload)
|
||||
db_provider.authed = True
|
||||
db_provider.updated_at = datetime.now()
|
||||
self._session.flush()
|
||||
|
|
@ -620,6 +626,21 @@ class MCPToolManageService:
|
|||
server_url_hash=new_server_url_hash,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def reconnect_with_url(
|
||||
*,
|
||||
server_url: str,
|
||||
headers: dict[str, str],
|
||||
timeout: float | None,
|
||||
sse_read_timeout: float | None,
|
||||
) -> ReconnectResult:
|
||||
return MCPToolManageService._reconnect_with_url(
|
||||
server_url=server_url,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
sse_read_timeout=sse_read_timeout,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _reconnect_with_url(
|
||||
*,
|
||||
|
|
@ -642,9 +663,16 @@ class MCPToolManageService:
|
|||
sse_read_timeout=sse_read_timeout,
|
||||
) as mcp_client:
|
||||
tools = mcp_client.list_tools()
|
||||
# Ensure tool descriptions are non-null in payload
|
||||
tools_payload = []
|
||||
for t in tools:
|
||||
d = t.model_dump()
|
||||
if d.get("description") is None:
|
||||
d["description"] = ""
|
||||
tools_payload.append(d)
|
||||
return ReconnectResult(
|
||||
authed=True,
|
||||
tools=json.dumps([tool.model_dump() for tool in tools]),
|
||||
tools=json.dumps(tools_payload),
|
||||
encrypted_credentials=EMPTY_CREDENTIALS_JSON,
|
||||
)
|
||||
except MCPAuthError:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import logging
|
||||
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.tools.entities.api_entities import ToolProviderTypeApiLiteral
|
||||
from core.tools.tool_manager import ToolManager
|
||||
from services.tools.tools_transform_service import ToolTransformService
|
||||
|
|
@ -16,14 +15,6 @@ class ToolCommonService:
|
|||
|
||||
:return: the list of tool providers
|
||||
"""
|
||||
# Try to get from cache first
|
||||
cached_result = ToolProviderListCache.get_cached_providers(tenant_id, typ)
|
||||
if cached_result is not None:
|
||||
logger.debug("Returning cached tool providers for tenant %s, type %s", tenant_id, typ)
|
||||
return cached_result
|
||||
|
||||
# Cache miss - fetch from database
|
||||
logger.debug("Cache miss for tool providers, fetching from database for tenant %s, type %s", tenant_id, typ)
|
||||
providers = ToolManager.list_providers_from_api(user_id, tenant_id, typ)
|
||||
|
||||
# add icon
|
||||
|
|
@ -32,7 +23,4 @@ class ToolCommonService:
|
|||
|
||||
result = [provider.to_dict() for provider in providers]
|
||||
|
||||
# Cache the result
|
||||
ToolProviderListCache.set_cached_providers(tenant_id, typ, result)
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from typing import Any
|
|||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.model_runtime.utils.encoders import jsonable_encoder
|
||||
from core.tools.__base.tool_provider import ToolProviderController
|
||||
from core.tools.entities.api_entities import ToolApiEntity, ToolProviderApiEntity
|
||||
|
|
@ -68,34 +67,31 @@ class WorkflowToolManageService:
|
|||
if workflow is None:
|
||||
raise ValueError(f"Workflow not found for app {workflow_app_id}")
|
||||
|
||||
with Session(db.engine, expire_on_commit=False) as session, session.begin():
|
||||
workflow_tool_provider = WorkflowToolProvider(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
app_id=workflow_app_id,
|
||||
name=name,
|
||||
label=label,
|
||||
icon=json.dumps(icon),
|
||||
description=description,
|
||||
parameter_configuration=json.dumps(parameters),
|
||||
privacy_policy=privacy_policy,
|
||||
version=workflow.version,
|
||||
)
|
||||
session.add(workflow_tool_provider)
|
||||
workflow_tool_provider = WorkflowToolProvider(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
app_id=workflow_app_id,
|
||||
name=name,
|
||||
label=label,
|
||||
icon=json.dumps(icon),
|
||||
description=description,
|
||||
parameter_configuration=json.dumps(parameters),
|
||||
privacy_policy=privacy_policy,
|
||||
version=workflow.version,
|
||||
)
|
||||
|
||||
try:
|
||||
WorkflowToolProviderController.from_db(workflow_tool_provider)
|
||||
except Exception as e:
|
||||
raise ValueError(str(e))
|
||||
|
||||
with Session(db.engine, expire_on_commit=False) as session, session.begin():
|
||||
session.add(workflow_tool_provider)
|
||||
|
||||
if labels is not None:
|
||||
ToolLabelManager.update_tool_labels(
|
||||
ToolTransformService.workflow_provider_to_controller(workflow_tool_provider), labels
|
||||
)
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@classmethod
|
||||
|
|
@ -183,9 +179,6 @@ class WorkflowToolManageService:
|
|||
ToolTransformService.workflow_provider_to_controller(workflow_tool_provider), labels
|
||||
)
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@classmethod
|
||||
|
|
@ -248,9 +241,6 @@ class WorkflowToolManageService:
|
|||
|
||||
db.session.commit()
|
||||
|
||||
# Invalidate tool providers cache
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -868,48 +868,111 @@ class TriggerProviderService:
|
|||
if not provider_controller:
|
||||
raise ValueError(f"Provider {provider_id} not found")
|
||||
|
||||
subscription = TriggerProviderService.get_subscription_by_id(
|
||||
tenant_id=tenant_id,
|
||||
subscription_id=subscription_id,
|
||||
)
|
||||
if not subscription:
|
||||
raise ValueError(f"Subscription {subscription_id} not found")
|
||||
# Use distributed lock to prevent race conditions on the same subscription
|
||||
lock_key = f"trigger_subscription_rebuild_lock:{tenant_id}_{subscription_id}"
|
||||
with redis_client.lock(lock_key, timeout=20):
|
||||
with Session(db.engine, expire_on_commit=False) as session:
|
||||
try:
|
||||
# Get subscription within the transaction
|
||||
subscription: TriggerSubscription | None = (
|
||||
session.query(TriggerSubscription).filter_by(tenant_id=tenant_id, id=subscription_id).first()
|
||||
)
|
||||
if not subscription:
|
||||
raise ValueError(f"Subscription {subscription_id} not found")
|
||||
|
||||
credential_type = CredentialType.of(subscription.credential_type)
|
||||
if credential_type not in [CredentialType.OAUTH2, CredentialType.API_KEY]:
|
||||
raise ValueError("Credential type not supported for rebuild")
|
||||
credential_type = CredentialType.of(subscription.credential_type)
|
||||
if credential_type not in [CredentialType.OAUTH2, CredentialType.API_KEY]:
|
||||
raise ValueError("Credential type not supported for rebuild")
|
||||
|
||||
# TODO: Trying to invoke update api of the plugin trigger provider
|
||||
# Decrypt existing credentials for merging
|
||||
credential_encrypter, _ = create_trigger_provider_encrypter_for_subscription(
|
||||
tenant_id=tenant_id,
|
||||
controller=provider_controller,
|
||||
subscription=subscription,
|
||||
)
|
||||
decrypted_credentials = dict(credential_encrypter.decrypt(subscription.credentials))
|
||||
|
||||
# FALLBACK: If the update api is not implemented, delete the previous subscription and create a new one
|
||||
# Merge credentials: if caller passed HIDDEN_VALUE, retain existing decrypted value
|
||||
merged_credentials: dict[str, Any] = {
|
||||
key: value if value != HIDDEN_VALUE else decrypted_credentials.get(key, UNKNOWN_VALUE)
|
||||
for key, value in credentials.items()
|
||||
}
|
||||
|
||||
# Delete the previous subscription
|
||||
user_id = subscription.user_id
|
||||
TriggerManager.unsubscribe_trigger(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
provider_id=provider_id,
|
||||
subscription=subscription.to_entity(),
|
||||
credentials=subscription.credentials,
|
||||
credential_type=credential_type,
|
||||
)
|
||||
user_id = subscription.user_id
|
||||
|
||||
# Create a new subscription with the same subscription_id and endpoint_id
|
||||
new_subscription: TriggerSubscriptionEntity = TriggerManager.subscribe_trigger(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
provider_id=provider_id,
|
||||
endpoint=generate_plugin_trigger_endpoint_url(subscription.endpoint_id),
|
||||
parameters=parameters,
|
||||
credentials=credentials,
|
||||
credential_type=credential_type,
|
||||
)
|
||||
TriggerProviderService.update_trigger_subscription(
|
||||
tenant_id=tenant_id,
|
||||
subscription_id=subscription.id,
|
||||
name=name,
|
||||
parameters=parameters,
|
||||
credentials=credentials,
|
||||
properties=new_subscription.properties,
|
||||
expires_at=new_subscription.expires_at,
|
||||
)
|
||||
# TODO: Trying to invoke update api of the plugin trigger provider
|
||||
|
||||
# FALLBACK: If the update api is not implemented,
|
||||
# delete the previous subscription and create a new one
|
||||
|
||||
# Unsubscribe the previous subscription (external call, but we'll handle errors)
|
||||
try:
|
||||
TriggerManager.unsubscribe_trigger(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
provider_id=provider_id,
|
||||
subscription=subscription.to_entity(),
|
||||
credentials=decrypted_credentials,
|
||||
credential_type=credential_type,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Error unsubscribing trigger during rebuild", exc_info=e)
|
||||
# Continue anyway - the subscription might already be deleted externally
|
||||
|
||||
# Create a new subscription with the same subscription_id and endpoint_id (external call)
|
||||
new_subscription: TriggerSubscriptionEntity = TriggerManager.subscribe_trigger(
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
provider_id=provider_id,
|
||||
endpoint=generate_plugin_trigger_endpoint_url(subscription.endpoint_id),
|
||||
parameters=parameters,
|
||||
credentials=merged_credentials,
|
||||
credential_type=credential_type,
|
||||
)
|
||||
|
||||
# Update the subscription in the same transaction
|
||||
# Inline update logic to reuse the same session
|
||||
if name is not None and name != subscription.name:
|
||||
existing = (
|
||||
session.query(TriggerSubscription)
|
||||
.filter_by(tenant_id=tenant_id, provider_id=str(provider_id), name=name)
|
||||
.first()
|
||||
)
|
||||
if existing and existing.id != subscription.id:
|
||||
raise ValueError(f"Subscription name '{name}' already exists for this provider")
|
||||
subscription.name = name
|
||||
|
||||
# Update parameters
|
||||
subscription.parameters = dict(parameters)
|
||||
|
||||
# Update credentials with merged (and encrypted) values
|
||||
subscription.credentials = dict(credential_encrypter.encrypt(merged_credentials))
|
||||
|
||||
# Update properties
|
||||
if new_subscription.properties:
|
||||
properties_encrypter, _ = create_provider_encrypter(
|
||||
tenant_id=tenant_id,
|
||||
config=provider_controller.get_properties_schema(),
|
||||
cache=NoOpProviderCredentialCache(),
|
||||
)
|
||||
subscription.properties = dict(properties_encrypter.encrypt(dict(new_subscription.properties)))
|
||||
|
||||
# Update expiration timestamp
|
||||
if new_subscription.expires_at is not None:
|
||||
subscription.expires_at = new_subscription.expires_at
|
||||
|
||||
# Commit the transaction
|
||||
session.commit()
|
||||
|
||||
# Clear subscription cache
|
||||
delete_cache_for_subscription(
|
||||
tenant_id=tenant_id,
|
||||
provider_id=subscription.provider_id,
|
||||
subscription_id=subscription.id,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Rollback on any error
|
||||
session.rollback()
|
||||
logger.exception("Failed to rebuild trigger subscription", exc_info=e)
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -863,10 +863,18 @@ class WebhookService:
|
|||
not_found_in_cache.append(node_id)
|
||||
continue
|
||||
|
||||
with Session(db.engine) as session:
|
||||
try:
|
||||
# lock the concurrent webhook trigger creation
|
||||
redis_client.lock(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:apps:{app.id}:lock", timeout=10)
|
||||
lock_key = f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:apps:{app.id}:lock"
|
||||
lock = redis_client.lock(lock_key, timeout=10)
|
||||
lock_acquired = False
|
||||
|
||||
try:
|
||||
# acquire the lock with blocking and timeout
|
||||
lock_acquired = lock.acquire(blocking=True, blocking_timeout=10)
|
||||
if not lock_acquired:
|
||||
logger.warning("Failed to acquire lock for webhook sync, app %s", app.id)
|
||||
raise RuntimeError("Failed to acquire lock for webhook trigger synchronization")
|
||||
|
||||
with Session(db.engine) as session:
|
||||
# fetch the non-cached nodes from DB
|
||||
all_records = session.scalars(
|
||||
select(WorkflowWebhookTrigger).where(
|
||||
|
|
@ -903,11 +911,16 @@ class WebhookService:
|
|||
session.delete(nodes_id_in_db[node_id])
|
||||
redis_client.delete(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:{app.id}:{node_id}")
|
||||
session.commit()
|
||||
except Exception:
|
||||
logger.exception("Failed to sync webhook relationships for app %s", app.id)
|
||||
raise
|
||||
finally:
|
||||
redis_client.delete(f"{cls.__WEBHOOK_NODE_CACHE_KEY__}:apps:{app.id}:lock")
|
||||
except Exception:
|
||||
logger.exception("Failed to sync webhook relationships for app %s", app.id)
|
||||
raise
|
||||
finally:
|
||||
# release the lock only if it was acquired
|
||||
if lock_acquired:
|
||||
try:
|
||||
lock.release()
|
||||
except Exception:
|
||||
logger.exception("Failed to release lock for webhook sync, app %s", app.id)
|
||||
|
||||
@classmethod
|
||||
def generate_webhook_id(cls) -> str:
|
||||
|
|
|
|||
|
|
@ -7,11 +7,14 @@ CODE_LANGUAGE = CodeLanguage.JINJA2
|
|||
|
||||
|
||||
def test_jinja2():
|
||||
"""Test basic Jinja2 template rendering."""
|
||||
template = "Hello {{template}}"
|
||||
# Template must be base64 encoded to match the new safe embedding approach
|
||||
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
|
||||
inputs = base64.b64encode(b'{"template": "World"}').decode("utf-8")
|
||||
code = (
|
||||
Jinja2TemplateTransformer.get_runner_script()
|
||||
.replace(Jinja2TemplateTransformer._code_placeholder, template)
|
||||
.replace(Jinja2TemplateTransformer._template_b64_placeholder, template_b64)
|
||||
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs)
|
||||
)
|
||||
result = CodeExecutor.execute_code(
|
||||
|
|
@ -21,6 +24,7 @@ def test_jinja2():
|
|||
|
||||
|
||||
def test_jinja2_with_code_template():
|
||||
"""Test template rendering via the high-level workflow API."""
|
||||
result = CodeExecutor.execute_workflow_code_template(
|
||||
language=CODE_LANGUAGE, code="Hello {{template}}", inputs={"template": "World"}
|
||||
)
|
||||
|
|
@ -28,7 +32,64 @@ def test_jinja2_with_code_template():
|
|||
|
||||
|
||||
def test_jinja2_get_runner_script():
|
||||
"""Test that runner script contains required placeholders."""
|
||||
runner_script = Jinja2TemplateTransformer.get_runner_script()
|
||||
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._template_b64_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2
|
||||
|
||||
|
||||
def test_jinja2_template_with_special_characters():
|
||||
"""
|
||||
Test that templates with special characters (quotes, newlines) render correctly.
|
||||
This is a regression test for issue #26818 where textarea pre-fill values
|
||||
containing special characters would break template rendering.
|
||||
"""
|
||||
# Template with triple quotes, single quotes, double quotes, and newlines
|
||||
template = """<html>
|
||||
<body>
|
||||
<input value="{{ task.get('Task ID', '') }}"/>
|
||||
<textarea>{{ task.get('Issues', 'No issues reported') }}</textarea>
|
||||
<p>Status: "{{ status }}"</p>
|
||||
<pre>'''code block'''</pre>
|
||||
</body>
|
||||
</html>"""
|
||||
inputs = {"task": {"Task ID": "TASK-123", "Issues": "Line 1\nLine 2\nLine 3"}, "status": "completed"}
|
||||
|
||||
result = CodeExecutor.execute_workflow_code_template(language=CODE_LANGUAGE, code=template, inputs=inputs)
|
||||
|
||||
# Verify the template rendered correctly with all special characters
|
||||
output = result["result"]
|
||||
assert 'value="TASK-123"' in output
|
||||
assert "<textarea>Line 1\nLine 2\nLine 3</textarea>" in output
|
||||
assert 'Status: "completed"' in output
|
||||
assert "'''code block'''" in output
|
||||
|
||||
|
||||
def test_jinja2_template_with_html_textarea_prefill():
|
||||
"""
|
||||
Specific test for HTML textarea with Jinja2 variable pre-fill.
|
||||
Verifies fix for issue #26818.
|
||||
"""
|
||||
template = "<textarea name='notes'>{{ notes }}</textarea>"
|
||||
notes_content = "This is a multi-line note.\nWith special chars: 'single' and \"double\" quotes."
|
||||
inputs = {"notes": notes_content}
|
||||
|
||||
result = CodeExecutor.execute_workflow_code_template(language=CODE_LANGUAGE, code=template, inputs=inputs)
|
||||
|
||||
expected_output = f"<textarea name='notes'>{notes_content}</textarea>"
|
||||
assert result["result"] == expected_output
|
||||
|
||||
|
||||
def test_jinja2_assemble_runner_script_encodes_template():
|
||||
"""Test that assemble_runner_script properly base64 encodes the template."""
|
||||
template = "Hello {{ name }}!"
|
||||
inputs = {"name": "World"}
|
||||
|
||||
script = Jinja2TemplateTransformer.assemble_runner_script(template, inputs)
|
||||
|
||||
# The template should be base64 encoded in the script
|
||||
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
|
||||
assert template_b64 in script
|
||||
# The raw template should NOT appear in the script (it's encoded)
|
||||
assert "Hello {{ name }}!" not in script
|
||||
|
|
|
|||
|
|
@ -0,0 +1,365 @@
|
|||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from extensions.ext_redis import redis_client
|
||||
from services.billing_service import BillingService
|
||||
|
||||
|
||||
class TestBillingServiceGetPlanBulkWithCache:
|
||||
"""
|
||||
Comprehensive integration tests for get_plan_bulk_with_cache using testcontainers.
|
||||
|
||||
This test class covers all major scenarios:
|
||||
- Cache hit/miss scenarios
|
||||
- Redis operation failures and fallback behavior
|
||||
- Invalid cache data handling
|
||||
- TTL expiration handling
|
||||
- Error recovery and logging
|
||||
"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_redis_cleanup(self, flask_app_with_containers):
|
||||
"""Clean up Redis cache before and after each test."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Clean up before test
|
||||
yield
|
||||
# Clean up after test
|
||||
# Delete all test cache keys
|
||||
pattern = f"{BillingService._PLAN_CACHE_KEY_PREFIX}*"
|
||||
keys = redis_client.keys(pattern)
|
||||
if keys:
|
||||
redis_client.delete(*keys)
|
||||
|
||||
def _create_test_plan_data(self, plan: str = "sandbox", expiration_date: int = 1735689600):
|
||||
"""Helper to create test SubscriptionPlan data."""
|
||||
return {"plan": plan, "expiration_date": expiration_date}
|
||||
|
||||
def _set_cache(self, tenant_id: str, plan_data: dict, ttl: int = 600):
|
||||
"""Helper to set cache data in Redis."""
|
||||
cache_key = BillingService._make_plan_cache_key(tenant_id)
|
||||
json_str = json.dumps(plan_data)
|
||||
redis_client.setex(cache_key, ttl, json_str)
|
||||
|
||||
def _get_cache(self, tenant_id: str):
|
||||
"""Helper to get cache data from Redis."""
|
||||
cache_key = BillingService._make_plan_cache_key(tenant_id)
|
||||
value = redis_client.get(cache_key)
|
||||
if value:
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8")
|
||||
return value
|
||||
return None
|
||||
|
||||
def test_get_plan_bulk_with_cache_all_cache_hit(self, flask_app_with_containers):
|
||||
"""Test bulk plan retrieval when all tenants are in cache."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2", "tenant-3"]
|
||||
expected_plans = {
|
||||
"tenant-1": self._create_test_plan_data("sandbox", 1735689600),
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
"tenant-3": self._create_test_plan_data("team", 1798761600),
|
||||
}
|
||||
|
||||
# Pre-populate cache
|
||||
for tenant_id, plan_data in expected_plans.items():
|
||||
self._set_cache(tenant_id, plan_data)
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk") as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 3
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-1"]["expiration_date"] == 1735689600
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
assert result["tenant-2"]["expiration_date"] == 1767225600
|
||||
assert result["tenant-3"]["plan"] == "team"
|
||||
assert result["tenant-3"]["expiration_date"] == 1798761600
|
||||
|
||||
# Verify API was not called
|
||||
mock_get_plan_bulk.assert_not_called()
|
||||
|
||||
def test_get_plan_bulk_with_cache_all_cache_miss(self, flask_app_with_containers):
|
||||
"""Test bulk plan retrieval when all tenants are not in cache."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2"]
|
||||
expected_plans = {
|
||||
"tenant-1": self._create_test_plan_data("sandbox", 1735689600),
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk", return_value=expected_plans) as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 2
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
|
||||
# Verify API was called with correct tenant_ids
|
||||
mock_get_plan_bulk.assert_called_once_with(tenant_ids)
|
||||
|
||||
# Verify data was written to cache
|
||||
cached_1 = self._get_cache("tenant-1")
|
||||
cached_2 = self._get_cache("tenant-2")
|
||||
assert cached_1 is not None
|
||||
assert cached_2 is not None
|
||||
|
||||
# Verify cache content
|
||||
cached_data_1 = json.loads(cached_1)
|
||||
cached_data_2 = json.loads(cached_2)
|
||||
assert cached_data_1 == expected_plans["tenant-1"]
|
||||
assert cached_data_2 == expected_plans["tenant-2"]
|
||||
|
||||
# Verify TTL is set
|
||||
cache_key_1 = BillingService._make_plan_cache_key("tenant-1")
|
||||
ttl_1 = redis_client.ttl(cache_key_1)
|
||||
assert ttl_1 > 0
|
||||
assert ttl_1 <= 600 # Should be <= 600 seconds
|
||||
|
||||
def test_get_plan_bulk_with_cache_partial_cache_hit(self, flask_app_with_containers):
|
||||
"""Test bulk plan retrieval when some tenants are in cache, some are not."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2", "tenant-3"]
|
||||
# Pre-populate cache for tenant-1 and tenant-2
|
||||
self._set_cache("tenant-1", self._create_test_plan_data("sandbox", 1735689600))
|
||||
self._set_cache("tenant-2", self._create_test_plan_data("professional", 1767225600))
|
||||
|
||||
# tenant-3 is not in cache
|
||||
missing_plan = {"tenant-3": self._create_test_plan_data("team", 1798761600)}
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk", return_value=missing_plan) as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 3
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
assert result["tenant-3"]["plan"] == "team"
|
||||
|
||||
# Verify API was called only for missing tenant
|
||||
mock_get_plan_bulk.assert_called_once_with(["tenant-3"])
|
||||
|
||||
# Verify tenant-3 data was written to cache
|
||||
cached_3 = self._get_cache("tenant-3")
|
||||
assert cached_3 is not None
|
||||
cached_data_3 = json.loads(cached_3)
|
||||
assert cached_data_3 == missing_plan["tenant-3"]
|
||||
|
||||
def test_get_plan_bulk_with_cache_redis_mget_failure(self, flask_app_with_containers):
|
||||
"""Test fallback to API when Redis mget fails."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2"]
|
||||
expected_plans = {
|
||||
"tenant-1": self._create_test_plan_data("sandbox", 1735689600),
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with (
|
||||
patch.object(redis_client, "mget", side_effect=Exception("Redis connection error")),
|
||||
patch.object(BillingService, "get_plan_bulk", return_value=expected_plans) as mock_get_plan_bulk,
|
||||
):
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 2
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
|
||||
# Verify API was called for all tenants (fallback)
|
||||
mock_get_plan_bulk.assert_called_once_with(tenant_ids)
|
||||
|
||||
# Verify data was written to cache after fallback
|
||||
cached_1 = self._get_cache("tenant-1")
|
||||
cached_2 = self._get_cache("tenant-2")
|
||||
assert cached_1 is not None
|
||||
assert cached_2 is not None
|
||||
|
||||
def test_get_plan_bulk_with_cache_invalid_json_in_cache(self, flask_app_with_containers):
|
||||
"""Test fallback to API when cache contains invalid JSON."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2", "tenant-3"]
|
||||
|
||||
# Set valid cache for tenant-1
|
||||
self._set_cache("tenant-1", self._create_test_plan_data("sandbox", 1735689600))
|
||||
|
||||
# Set invalid JSON for tenant-2
|
||||
cache_key_2 = BillingService._make_plan_cache_key("tenant-2")
|
||||
redis_client.setex(cache_key_2, 600, "invalid json {")
|
||||
|
||||
# tenant-3 is not in cache
|
||||
expected_plans = {
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
"tenant-3": self._create_test_plan_data("team", 1798761600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk", return_value=expected_plans) as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 3
|
||||
assert result["tenant-1"]["plan"] == "sandbox" # From cache
|
||||
assert result["tenant-2"]["plan"] == "professional" # From API (fallback)
|
||||
assert result["tenant-3"]["plan"] == "team" # From API
|
||||
|
||||
# Verify API was called for tenant-2 and tenant-3
|
||||
mock_get_plan_bulk.assert_called_once_with(["tenant-2", "tenant-3"])
|
||||
|
||||
# Verify tenant-2's invalid JSON was replaced with correct data in cache
|
||||
cached_2 = self._get_cache("tenant-2")
|
||||
assert cached_2 is not None
|
||||
cached_data_2 = json.loads(cached_2)
|
||||
assert cached_data_2 == expected_plans["tenant-2"]
|
||||
assert cached_data_2["plan"] == "professional"
|
||||
assert cached_data_2["expiration_date"] == 1767225600
|
||||
|
||||
# Verify tenant-2 cache has correct TTL
|
||||
cache_key_2_new = BillingService._make_plan_cache_key("tenant-2")
|
||||
ttl_2 = redis_client.ttl(cache_key_2_new)
|
||||
assert ttl_2 > 0
|
||||
assert ttl_2 <= 600
|
||||
|
||||
# Verify tenant-3 data was also written to cache
|
||||
cached_3 = self._get_cache("tenant-3")
|
||||
assert cached_3 is not None
|
||||
cached_data_3 = json.loads(cached_3)
|
||||
assert cached_data_3 == expected_plans["tenant-3"]
|
||||
|
||||
def test_get_plan_bulk_with_cache_invalid_plan_data_in_cache(self, flask_app_with_containers):
|
||||
"""Test fallback to API when cache data doesn't match SubscriptionPlan schema."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2", "tenant-3"]
|
||||
|
||||
# Set valid cache for tenant-1
|
||||
self._set_cache("tenant-1", self._create_test_plan_data("sandbox", 1735689600))
|
||||
|
||||
# Set invalid plan data for tenant-2 (missing expiration_date)
|
||||
cache_key_2 = BillingService._make_plan_cache_key("tenant-2")
|
||||
invalid_data = json.dumps({"plan": "professional"}) # Missing expiration_date
|
||||
redis_client.setex(cache_key_2, 600, invalid_data)
|
||||
|
||||
# tenant-3 is not in cache
|
||||
expected_plans = {
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
"tenant-3": self._create_test_plan_data("team", 1798761600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk", return_value=expected_plans) as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 3
|
||||
assert result["tenant-1"]["plan"] == "sandbox" # From cache
|
||||
assert result["tenant-2"]["plan"] == "professional" # From API (fallback)
|
||||
assert result["tenant-3"]["plan"] == "team" # From API
|
||||
|
||||
# Verify API was called for tenant-2 and tenant-3
|
||||
mock_get_plan_bulk.assert_called_once_with(["tenant-2", "tenant-3"])
|
||||
|
||||
def test_get_plan_bulk_with_cache_redis_pipeline_failure(self, flask_app_with_containers):
|
||||
"""Test that pipeline failure doesn't affect return value."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2"]
|
||||
expected_plans = {
|
||||
"tenant-1": self._create_test_plan_data("sandbox", 1735689600),
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with (
|
||||
patch.object(BillingService, "get_plan_bulk", return_value=expected_plans),
|
||||
patch.object(redis_client, "pipeline") as mock_pipeline,
|
||||
):
|
||||
# Create a mock pipeline that fails on execute
|
||||
mock_pipe = mock_pipeline.return_value
|
||||
mock_pipe.execute.side_effect = Exception("Pipeline execution failed")
|
||||
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert - Function should still return correct result despite pipeline failure
|
||||
assert len(result) == 2
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
|
||||
# Verify pipeline was attempted
|
||||
mock_pipeline.assert_called_once()
|
||||
|
||||
def test_get_plan_bulk_with_cache_empty_tenant_ids(self, flask_app_with_containers):
|
||||
"""Test with empty tenant_ids list."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk") as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache([])
|
||||
|
||||
# Assert
|
||||
assert result == {}
|
||||
assert len(result) == 0
|
||||
|
||||
# Verify no API calls
|
||||
mock_get_plan_bulk.assert_not_called()
|
||||
|
||||
# Verify no Redis operations (mget with empty list would return empty list)
|
||||
# But we should check that mget was not called at all
|
||||
# Since we can't easily verify this without more mocking, we just verify the result
|
||||
|
||||
def test_get_plan_bulk_with_cache_ttl_expired(self, flask_app_with_containers):
|
||||
"""Test that expired cache keys are treated as cache misses."""
|
||||
with flask_app_with_containers.app_context():
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-1", "tenant-2"]
|
||||
|
||||
# Set cache for tenant-1 with very short TTL (1 second) to simulate expiration
|
||||
self._set_cache("tenant-1", self._create_test_plan_data("sandbox", 1735689600), ttl=1)
|
||||
|
||||
# Wait for TTL to expire (key will be deleted by Redis)
|
||||
import time
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Verify cache is expired (key doesn't exist)
|
||||
cache_key_1 = BillingService._make_plan_cache_key("tenant-1")
|
||||
exists = redis_client.exists(cache_key_1)
|
||||
assert exists == 0 # Key doesn't exist (expired)
|
||||
|
||||
# tenant-2 is not in cache
|
||||
expected_plans = {
|
||||
"tenant-1": self._create_test_plan_data("sandbox", 1735689600),
|
||||
"tenant-2": self._create_test_plan_data("professional", 1767225600),
|
||||
}
|
||||
|
||||
# Act
|
||||
with patch.object(BillingService, "get_plan_bulk", return_value=expected_plans) as mock_get_plan_bulk:
|
||||
result = BillingService.get_plan_bulk_with_cache(tenant_ids)
|
||||
|
||||
# Assert
|
||||
assert len(result) == 2
|
||||
assert result["tenant-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-2"]["plan"] == "professional"
|
||||
|
||||
# Verify API was called for both tenants (tenant-1 expired, tenant-2 missing)
|
||||
mock_get_plan_bulk.assert_called_once_with(tenant_ids)
|
||||
|
||||
# Verify both were written to cache with correct TTL
|
||||
cache_key_1_new = BillingService._make_plan_cache_key("tenant-1")
|
||||
cache_key_2 = BillingService._make_plan_cache_key("tenant-2")
|
||||
ttl_1_new = redis_client.ttl(cache_key_1_new)
|
||||
ttl_2 = redis_client.ttl(cache_key_2)
|
||||
assert ttl_1_new > 0
|
||||
assert ttl_1_new <= 600
|
||||
assert ttl_2 > 0
|
||||
assert ttl_2 <= 600
|
||||
|
|
@ -0,0 +1,682 @@
|
|||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
|
||||
from constants import HIDDEN_VALUE, UNKNOWN_VALUE
|
||||
from core.plugin.entities.plugin_daemon import CredentialType
|
||||
from core.trigger.entities.entities import Subscription as TriggerSubscriptionEntity
|
||||
from extensions.ext_database import db
|
||||
from models.provider_ids import TriggerProviderID
|
||||
from models.trigger import TriggerSubscription
|
||||
from services.trigger.trigger_provider_service import TriggerProviderService
|
||||
|
||||
|
||||
class TestTriggerProviderService:
|
||||
"""Integration tests for TriggerProviderService using testcontainers."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_external_service_dependencies(self):
|
||||
"""Mock setup for external service dependencies."""
|
||||
with (
|
||||
patch("services.trigger.trigger_provider_service.TriggerManager") as mock_trigger_manager,
|
||||
patch("services.trigger.trigger_provider_service.redis_client") as mock_redis_client,
|
||||
patch("services.trigger.trigger_provider_service.delete_cache_for_subscription") as mock_delete_cache,
|
||||
patch("services.account_service.FeatureService") as mock_account_feature_service,
|
||||
):
|
||||
# Setup default mock returns
|
||||
mock_provider_controller = MagicMock()
|
||||
mock_provider_controller.get_credential_schema_config.return_value = MagicMock()
|
||||
mock_provider_controller.get_properties_schema.return_value = MagicMock()
|
||||
mock_trigger_manager.get_trigger_provider.return_value = mock_provider_controller
|
||||
|
||||
# Mock redis lock
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock(return_value=None)
|
||||
mock_lock.__exit__ = MagicMock(return_value=None)
|
||||
mock_redis_client.lock.return_value = mock_lock
|
||||
|
||||
# Setup account feature service mock
|
||||
mock_account_feature_service.get_system_features.return_value.is_allow_register = True
|
||||
|
||||
yield {
|
||||
"trigger_manager": mock_trigger_manager,
|
||||
"redis_client": mock_redis_client,
|
||||
"delete_cache": mock_delete_cache,
|
||||
"provider_controller": mock_provider_controller,
|
||||
"account_feature_service": mock_account_feature_service,
|
||||
}
|
||||
|
||||
def _create_test_account_and_tenant(self, db_session_with_containers, mock_external_service_dependencies):
|
||||
"""
|
||||
Helper method to create a test account and tenant for testing.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session from testcontainers infrastructure
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
|
||||
Returns:
|
||||
tuple: (account, tenant) - Created account and tenant instances
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
from services.account_service import AccountService, TenantService
|
||||
|
||||
# Setup mocks for account creation
|
||||
mock_external_service_dependencies[
|
||||
"account_feature_service"
|
||||
].get_system_features.return_value.is_allow_register = True
|
||||
mock_external_service_dependencies[
|
||||
"trigger_manager"
|
||||
].get_trigger_provider.return_value = mock_external_service_dependencies["provider_controller"]
|
||||
|
||||
# Create account and tenant
|
||||
account = AccountService.create_account(
|
||||
email=fake.email(),
|
||||
name=fake.name(),
|
||||
interface_language="en-US",
|
||||
password=fake.password(length=12),
|
||||
)
|
||||
TenantService.create_owner_tenant_if_not_exist(account, name=fake.company())
|
||||
tenant = account.current_tenant
|
||||
|
||||
return account, tenant
|
||||
|
||||
def _create_test_subscription(
|
||||
self,
|
||||
db_session_with_containers,
|
||||
tenant_id,
|
||||
user_id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
credentials,
|
||||
mock_external_service_dependencies,
|
||||
):
|
||||
"""
|
||||
Helper method to create a test trigger subscription.
|
||||
|
||||
Args:
|
||||
db_session_with_containers: Database session
|
||||
tenant_id: Tenant ID
|
||||
user_id: User ID
|
||||
provider_id: Provider ID
|
||||
credential_type: Credential type
|
||||
credentials: Credentials dict
|
||||
mock_external_service_dependencies: Mock dependencies
|
||||
|
||||
Returns:
|
||||
TriggerSubscription: Created subscription instance
|
||||
"""
|
||||
fake = Faker()
|
||||
from core.helper.provider_cache import NoOpProviderCredentialCache
|
||||
from core.helper.provider_encryption import create_provider_encrypter
|
||||
|
||||
# Use mock provider controller to encrypt credentials
|
||||
provider_controller = mock_external_service_dependencies["provider_controller"]
|
||||
|
||||
# Create encrypter for credentials
|
||||
credential_encrypter, _ = create_provider_encrypter(
|
||||
tenant_id=tenant_id,
|
||||
config=provider_controller.get_credential_schema_config(credential_type),
|
||||
cache=NoOpProviderCredentialCache(),
|
||||
)
|
||||
|
||||
subscription = TriggerSubscription(
|
||||
name=fake.word(),
|
||||
tenant_id=tenant_id,
|
||||
user_id=user_id,
|
||||
provider_id=str(provider_id),
|
||||
endpoint_id=fake.uuid4(),
|
||||
parameters={"param1": "value1"},
|
||||
properties={"prop1": "value1"},
|
||||
credentials=dict(credential_encrypter.encrypt(credentials)),
|
||||
credential_type=credential_type.value,
|
||||
credential_expires_at=-1,
|
||||
expires_at=-1,
|
||||
)
|
||||
|
||||
db.session.add(subscription)
|
||||
db.session.commit()
|
||||
db.session.refresh(subscription)
|
||||
|
||||
return subscription
|
||||
|
||||
def test_rebuild_trigger_subscription_success_with_merged_credentials(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test successful rebuild with credential merging (HIDDEN_VALUE handling).
|
||||
|
||||
This test verifies:
|
||||
- Credentials are properly merged (HIDDEN_VALUE replaced with existing values)
|
||||
- Single transaction wraps all operations
|
||||
- Merged credentials are used for subscribe and update
|
||||
- Database state is correctly updated
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
# Create initial subscription with credentials
|
||||
original_credentials = {"api_key": "original-secret-key", "api_secret": "original-secret"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# Prepare new credentials with HIDDEN_VALUE for api_key (should keep original)
|
||||
# and new value for api_secret (should update)
|
||||
new_credentials = {
|
||||
"api_key": HIDDEN_VALUE, # Should be replaced with original
|
||||
"api_secret": "new-secret-value", # Should be updated
|
||||
}
|
||||
|
||||
# Mock subscribe_trigger to return a new subscription entity
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription.endpoint_id,
|
||||
parameters={"param1": "value1"},
|
||||
properties={"prop1": "new_prop_value"},
|
||||
expires_at=1234567890,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
|
||||
# Mock unsubscribe_trigger
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Execute rebuild
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials=new_credentials,
|
||||
parameters={"param1": "updated_value"},
|
||||
name="updated_name",
|
||||
)
|
||||
|
||||
# Verify unsubscribe was called with decrypted original credentials
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.assert_called_once()
|
||||
unsubscribe_call_args = mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.call_args
|
||||
assert unsubscribe_call_args.kwargs["tenant_id"] == tenant.id
|
||||
assert unsubscribe_call_args.kwargs["provider_id"] == provider_id
|
||||
assert unsubscribe_call_args.kwargs["credential_type"] == credential_type
|
||||
|
||||
# Verify subscribe was called with merged credentials (api_key from original, api_secret new)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.assert_called_once()
|
||||
subscribe_call_args = mock_external_service_dependencies["trigger_manager"].subscribe_trigger.call_args
|
||||
subscribe_credentials = subscribe_call_args.kwargs["credentials"]
|
||||
assert subscribe_credentials["api_key"] == original_credentials["api_key"] # Merged from original
|
||||
assert subscribe_credentials["api_secret"] == "new-secret-value" # New value
|
||||
|
||||
# Verify database state was updated
|
||||
db.session.refresh(subscription)
|
||||
assert subscription.name == "updated_name"
|
||||
assert subscription.parameters == {"param1": "updated_value"}
|
||||
|
||||
# Verify credentials in DB were updated with merged values (decrypt to check)
|
||||
from core.helper.provider_cache import NoOpProviderCredentialCache
|
||||
from core.helper.provider_encryption import create_provider_encrypter
|
||||
|
||||
# Use mock provider controller to decrypt credentials
|
||||
provider_controller = mock_external_service_dependencies["provider_controller"]
|
||||
credential_encrypter, _ = create_provider_encrypter(
|
||||
tenant_id=tenant.id,
|
||||
config=provider_controller.get_credential_schema_config(credential_type),
|
||||
cache=NoOpProviderCredentialCache(),
|
||||
)
|
||||
decrypted_db_credentials = dict(credential_encrypter.decrypt(subscription.credentials))
|
||||
assert decrypted_db_credentials["api_key"] == original_credentials["api_key"]
|
||||
assert decrypted_db_credentials["api_secret"] == "new-secret-value"
|
||||
|
||||
# Verify cache was cleared
|
||||
mock_external_service_dependencies["delete_cache"].assert_called_once_with(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=subscription.provider_id,
|
||||
subscription_id=subscription.id,
|
||||
)
|
||||
|
||||
def test_rebuild_trigger_subscription_with_all_new_credentials(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test rebuild when all credentials are new (no HIDDEN_VALUE).
|
||||
|
||||
This test verifies:
|
||||
- All new credentials are used when no HIDDEN_VALUE is present
|
||||
- Merged credentials contain only new values
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
# Create initial subscription
|
||||
original_credentials = {"api_key": "original-key", "api_secret": "original-secret"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# All new credentials (no HIDDEN_VALUE)
|
||||
new_credentials = {
|
||||
"api_key": "completely-new-key",
|
||||
"api_secret": "completely-new-secret",
|
||||
}
|
||||
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription.endpoint_id,
|
||||
parameters={},
|
||||
properties={},
|
||||
expires_at=-1,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Execute rebuild
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials=new_credentials,
|
||||
parameters={},
|
||||
)
|
||||
|
||||
# Verify subscribe was called with all new credentials
|
||||
subscribe_call_args = mock_external_service_dependencies["trigger_manager"].subscribe_trigger.call_args
|
||||
subscribe_credentials = subscribe_call_args.kwargs["credentials"]
|
||||
assert subscribe_credentials["api_key"] == "completely-new-key"
|
||||
assert subscribe_credentials["api_secret"] == "completely-new-secret"
|
||||
|
||||
def test_rebuild_trigger_subscription_with_all_hidden_values(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test rebuild when all credentials are HIDDEN_VALUE (preserve all existing).
|
||||
|
||||
This test verifies:
|
||||
- All HIDDEN_VALUE credentials are replaced with existing values
|
||||
- Original credentials are preserved
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
original_credentials = {"api_key": "original-key", "api_secret": "original-secret"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# All HIDDEN_VALUE (should preserve all original)
|
||||
new_credentials = {
|
||||
"api_key": HIDDEN_VALUE,
|
||||
"api_secret": HIDDEN_VALUE,
|
||||
}
|
||||
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription.endpoint_id,
|
||||
parameters={},
|
||||
properties={},
|
||||
expires_at=-1,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Execute rebuild
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials=new_credentials,
|
||||
parameters={},
|
||||
)
|
||||
|
||||
# Verify subscribe was called with all original credentials
|
||||
subscribe_call_args = mock_external_service_dependencies["trigger_manager"].subscribe_trigger.call_args
|
||||
subscribe_credentials = subscribe_call_args.kwargs["credentials"]
|
||||
assert subscribe_credentials["api_key"] == original_credentials["api_key"]
|
||||
assert subscribe_credentials["api_secret"] == original_credentials["api_secret"]
|
||||
|
||||
def test_rebuild_trigger_subscription_with_missing_key_uses_unknown_value(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test rebuild when HIDDEN_VALUE is used for a key that doesn't exist in original.
|
||||
|
||||
This test verifies:
|
||||
- UNKNOWN_VALUE is used when HIDDEN_VALUE key doesn't exist in original credentials
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
# Original has only api_key
|
||||
original_credentials = {"api_key": "original-key"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# HIDDEN_VALUE for non-existent key should use UNKNOWN_VALUE
|
||||
new_credentials = {
|
||||
"api_key": HIDDEN_VALUE,
|
||||
"non_existent_key": HIDDEN_VALUE, # This key doesn't exist in original
|
||||
}
|
||||
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription.endpoint_id,
|
||||
parameters={},
|
||||
properties={},
|
||||
expires_at=-1,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Execute rebuild
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials=new_credentials,
|
||||
parameters={},
|
||||
)
|
||||
|
||||
# Verify subscribe was called with original api_key and UNKNOWN_VALUE for missing key
|
||||
subscribe_call_args = mock_external_service_dependencies["trigger_manager"].subscribe_trigger.call_args
|
||||
subscribe_credentials = subscribe_call_args.kwargs["credentials"]
|
||||
assert subscribe_credentials["api_key"] == original_credentials["api_key"]
|
||||
assert subscribe_credentials["non_existent_key"] == UNKNOWN_VALUE
|
||||
|
||||
def test_rebuild_trigger_subscription_rollback_on_error(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test that transaction is rolled back on error.
|
||||
|
||||
This test verifies:
|
||||
- Database transaction is rolled back when an error occurs
|
||||
- Original subscription state is preserved
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
original_credentials = {"api_key": "original-key"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
original_name = subscription.name
|
||||
original_parameters = subscription.parameters.copy()
|
||||
|
||||
# Make subscribe_trigger raise an error
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.side_effect = ValueError(
|
||||
"Subscribe failed"
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Execute rebuild and expect error
|
||||
with pytest.raises(ValueError, match="Subscribe failed"):
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials={"api_key": "new-key"},
|
||||
parameters={},
|
||||
)
|
||||
|
||||
# Verify subscription state was not changed (rolled back)
|
||||
db.session.refresh(subscription)
|
||||
assert subscription.name == original_name
|
||||
assert subscription.parameters == original_parameters
|
||||
|
||||
def test_rebuild_trigger_subscription_unsubscribe_error_continues(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test that unsubscribe errors are handled gracefully and operation continues.
|
||||
|
||||
This test verifies:
|
||||
- Unsubscribe errors are caught and logged but don't stop the rebuild
|
||||
- Rebuild continues even if unsubscribe fails
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
original_credentials = {"api_key": "original-key"}
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
original_credentials,
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# Make unsubscribe_trigger raise an error (should be caught and continue)
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.side_effect = ValueError(
|
||||
"Unsubscribe failed"
|
||||
)
|
||||
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription.endpoint_id,
|
||||
parameters={},
|
||||
properties={},
|
||||
expires_at=-1,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
|
||||
# Execute rebuild - should succeed despite unsubscribe error
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials={"api_key": "new-key"},
|
||||
parameters={},
|
||||
)
|
||||
|
||||
# Verify subscribe was still called (operation continued)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.assert_called_once()
|
||||
|
||||
# Verify subscription was updated
|
||||
db.session.refresh(subscription)
|
||||
assert subscription.parameters == {}
|
||||
|
||||
def test_rebuild_trigger_subscription_subscription_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test error when subscription is not found.
|
||||
|
||||
This test verifies:
|
||||
- Proper error is raised when subscription doesn't exist
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
fake_subscription_id = fake.uuid4()
|
||||
|
||||
with pytest.raises(ValueError, match="not found"):
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=fake_subscription_id,
|
||||
credentials={},
|
||||
parameters={},
|
||||
)
|
||||
|
||||
def test_rebuild_trigger_subscription_provider_not_found(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test error when provider is not found.
|
||||
|
||||
This test verifies:
|
||||
- Proper error is raised when provider doesn't exist
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("non_existent_org/non_existent_plugin/non_existent_provider")
|
||||
|
||||
# Make get_trigger_provider return None
|
||||
mock_external_service_dependencies["trigger_manager"].get_trigger_provider.return_value = None
|
||||
|
||||
with pytest.raises(ValueError, match="Provider.*not found"):
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=fake.uuid4(),
|
||||
credentials={},
|
||||
parameters={},
|
||||
)
|
||||
|
||||
def test_rebuild_trigger_subscription_unsupported_credential_type(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test error when credential type is not supported for rebuild.
|
||||
|
||||
This test verifies:
|
||||
- Proper error is raised for unsupported credential types (not OAUTH2 or API_KEY)
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.UNAUTHORIZED # Not supported
|
||||
|
||||
subscription = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
{},
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Credential type not supported for rebuild"):
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription.id,
|
||||
credentials={},
|
||||
parameters={},
|
||||
)
|
||||
|
||||
def test_rebuild_trigger_subscription_name_uniqueness_check(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test that name uniqueness is checked when updating name.
|
||||
|
||||
This test verifies:
|
||||
- Error is raised when new name conflicts with existing subscription
|
||||
"""
|
||||
fake = Faker()
|
||||
account, tenant = self._create_test_account_and_tenant(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
provider_id = TriggerProviderID("test_org/test_plugin/test_provider")
|
||||
credential_type = CredentialType.API_KEY
|
||||
|
||||
# Create first subscription
|
||||
subscription1 = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
{"api_key": "key1"},
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
# Create second subscription with different name
|
||||
subscription2 = self._create_test_subscription(
|
||||
db_session_with_containers,
|
||||
tenant.id,
|
||||
account.id,
|
||||
provider_id,
|
||||
credential_type,
|
||||
{"api_key": "key2"},
|
||||
mock_external_service_dependencies,
|
||||
)
|
||||
|
||||
new_subscription_entity = TriggerSubscriptionEntity(
|
||||
endpoint=subscription2.endpoint_id,
|
||||
parameters={},
|
||||
properties={},
|
||||
expires_at=-1,
|
||||
)
|
||||
mock_external_service_dependencies["trigger_manager"].subscribe_trigger.return_value = new_subscription_entity
|
||||
mock_external_service_dependencies["trigger_manager"].unsubscribe_trigger.return_value = MagicMock()
|
||||
|
||||
# Try to rename subscription2 to subscription1's name (should fail)
|
||||
with pytest.raises(ValueError, match="already exists"):
|
||||
TriggerProviderService.rebuild_trigger_subscription(
|
||||
tenant_id=tenant.id,
|
||||
provider_id=provider_id,
|
||||
subscription_id=subscription2.id,
|
||||
credentials={"api_key": "new-key"},
|
||||
parameters={},
|
||||
name=subscription1.name, # Conflicting name
|
||||
)
|
||||
|
|
@ -705,3 +705,207 @@ class TestWorkflowToolManageService:
|
|||
db.session.refresh(created_tool)
|
||||
assert created_tool.name == first_tool_name
|
||||
assert created_tool.updated_at is not None
|
||||
|
||||
def test_create_workflow_tool_with_file_parameter_default(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test workflow tool creation with FILE parameter having a file object as default.
|
||||
|
||||
This test verifies:
|
||||
- FILE parameters can have file object defaults
|
||||
- The default value (dict with id/base64Url) is properly handled
|
||||
- Tool creation succeeds without Pydantic validation errors
|
||||
|
||||
Related issue: Array[File] default value causes Pydantic validation errors.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
app, account, workflow = self._create_test_app_and_account(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Create workflow graph with a FILE variable that has a default value
|
||||
workflow_graph = {
|
||||
"nodes": [
|
||||
{
|
||||
"id": "start_node",
|
||||
"data": {
|
||||
"type": "start",
|
||||
"variables": [
|
||||
{
|
||||
"variable": "document",
|
||||
"label": "Document",
|
||||
"type": "file",
|
||||
"required": False,
|
||||
"default": {"id": fake.uuid4(), "base64Url": ""},
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
workflow.graph = json.dumps(workflow_graph)
|
||||
|
||||
# Setup workflow tool parameters with FILE type
|
||||
file_parameters = [
|
||||
{
|
||||
"name": "document",
|
||||
"description": "Upload a document",
|
||||
"form": "form",
|
||||
"type": "file",
|
||||
"required": False,
|
||||
}
|
||||
]
|
||||
|
||||
# Execute the method under test
|
||||
# Note: from_db is mocked, so this test primarily validates the parameter configuration
|
||||
result = WorkflowToolManageService.create_workflow_tool(
|
||||
user_id=account.id,
|
||||
tenant_id=account.current_tenant.id,
|
||||
workflow_app_id=app.id,
|
||||
name=fake.word(),
|
||||
label=fake.word(),
|
||||
icon={"type": "emoji", "emoji": "📄"},
|
||||
description=fake.text(max_nb_chars=200),
|
||||
parameters=file_parameters,
|
||||
)
|
||||
|
||||
# Verify the result
|
||||
assert result == {"result": "success"}
|
||||
|
||||
def test_create_workflow_tool_with_files_parameter_default(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test workflow tool creation with FILES (Array[File]) parameter having file objects as default.
|
||||
|
||||
This test verifies:
|
||||
- FILES parameters can have a list of file objects as default
|
||||
- The default value (list of dicts with id/base64Url) is properly handled
|
||||
- Tool creation succeeds without Pydantic validation errors
|
||||
|
||||
Related issue: Array[File] default value causes 4 Pydantic validation errors
|
||||
because PluginParameter.default only accepts Union[float, int, str, bool] | None.
|
||||
"""
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
app, account, workflow = self._create_test_app_and_account(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
# Create workflow graph with a FILE_LIST variable that has a default value
|
||||
workflow_graph = {
|
||||
"nodes": [
|
||||
{
|
||||
"id": "start_node",
|
||||
"data": {
|
||||
"type": "start",
|
||||
"variables": [
|
||||
{
|
||||
"variable": "documents",
|
||||
"label": "Documents",
|
||||
"type": "file-list",
|
||||
"required": False,
|
||||
"default": [
|
||||
{"id": fake.uuid4(), "base64Url": ""},
|
||||
{"id": fake.uuid4(), "base64Url": ""},
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
workflow.graph = json.dumps(workflow_graph)
|
||||
|
||||
# Setup workflow tool parameters with FILES type
|
||||
files_parameters = [
|
||||
{
|
||||
"name": "documents",
|
||||
"description": "Upload multiple documents",
|
||||
"form": "form",
|
||||
"type": "files",
|
||||
"required": False,
|
||||
}
|
||||
]
|
||||
|
||||
# Execute the method under test
|
||||
# Note: from_db is mocked, so this test primarily validates the parameter configuration
|
||||
result = WorkflowToolManageService.create_workflow_tool(
|
||||
user_id=account.id,
|
||||
tenant_id=account.current_tenant.id,
|
||||
workflow_app_id=app.id,
|
||||
name=fake.word(),
|
||||
label=fake.word(),
|
||||
icon={"type": "emoji", "emoji": "📁"},
|
||||
description=fake.text(max_nb_chars=200),
|
||||
parameters=files_parameters,
|
||||
)
|
||||
|
||||
# Verify the result
|
||||
assert result == {"result": "success"}
|
||||
|
||||
def test_create_workflow_tool_db_commit_before_validation(
|
||||
self, db_session_with_containers, mock_external_service_dependencies
|
||||
):
|
||||
"""
|
||||
Test that database commit happens before validation, causing DB pollution on validation failure.
|
||||
|
||||
This test verifies the second bug:
|
||||
- WorkflowToolProvider is committed to database BEFORE from_db validation
|
||||
- If validation fails, the record remains in the database
|
||||
- Subsequent attempts fail with "Tool already exists" error
|
||||
|
||||
This demonstrates why we need to validate BEFORE database commit.
|
||||
"""
|
||||
|
||||
fake = Faker()
|
||||
|
||||
# Create test data
|
||||
app, account, workflow = self._create_test_app_and_account(
|
||||
db_session_with_containers, mock_external_service_dependencies
|
||||
)
|
||||
|
||||
tool_name = fake.word()
|
||||
|
||||
# Mock from_db to raise validation error
|
||||
mock_external_service_dependencies["workflow_tool_provider_controller"].from_db.side_effect = ValueError(
|
||||
"Validation failed: default parameter type mismatch"
|
||||
)
|
||||
|
||||
# Attempt to create workflow tool (will fail at validation stage)
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
WorkflowToolManageService.create_workflow_tool(
|
||||
user_id=account.id,
|
||||
tenant_id=account.current_tenant.id,
|
||||
workflow_app_id=app.id,
|
||||
name=tool_name,
|
||||
label=fake.word(),
|
||||
icon={"type": "emoji", "emoji": "🔧"},
|
||||
description=fake.text(max_nb_chars=200),
|
||||
parameters=self._create_test_workflow_tool_parameters(),
|
||||
)
|
||||
|
||||
assert "Validation failed" in str(exc_info.value)
|
||||
|
||||
# Verify the tool was NOT created in database
|
||||
# This is the expected behavior (no pollution)
|
||||
from extensions.ext_database import db
|
||||
|
||||
tool_count = (
|
||||
db.session.query(WorkflowToolProvider)
|
||||
.where(
|
||||
WorkflowToolProvider.tenant_id == account.current_tenant.id,
|
||||
WorkflowToolProvider.name == tool_name,
|
||||
)
|
||||
.count()
|
||||
)
|
||||
|
||||
# The record should NOT exist because the transaction should be rolled back
|
||||
# Currently, due to the bug, the record might exist (this test documents the bug)
|
||||
# After the fix, this should always be 0
|
||||
# For now, we document that the record may exist, demonstrating the bug
|
||||
# assert tool_count == 0 # Expected after fix
|
||||
|
|
|
|||
|
|
@ -12,10 +12,12 @@ class TestJinja2CodeExecutor(CodeExecutorTestMixin):
|
|||
_, Jinja2TemplateTransformer = self.jinja2_imports
|
||||
|
||||
template = "Hello {{template}}"
|
||||
# Template must be base64 encoded to match the new safe embedding approach
|
||||
template_b64 = base64.b64encode(template.encode("utf-8")).decode("utf-8")
|
||||
inputs = base64.b64encode(b'{"template": "World"}').decode("utf-8")
|
||||
code = (
|
||||
Jinja2TemplateTransformer.get_runner_script()
|
||||
.replace(Jinja2TemplateTransformer._code_placeholder, template)
|
||||
.replace(Jinja2TemplateTransformer._template_b64_placeholder, template_b64)
|
||||
.replace(Jinja2TemplateTransformer._inputs_placeholder, inputs)
|
||||
)
|
||||
result = CodeExecutor.execute_code(
|
||||
|
|
@ -37,6 +39,34 @@ class TestJinja2CodeExecutor(CodeExecutorTestMixin):
|
|||
_, Jinja2TemplateTransformer = self.jinja2_imports
|
||||
|
||||
runner_script = Jinja2TemplateTransformer.get_runner_script()
|
||||
assert runner_script.count(Jinja2TemplateTransformer._code_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._template_b64_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._inputs_placeholder) == 1
|
||||
assert runner_script.count(Jinja2TemplateTransformer._result_tag) == 2
|
||||
|
||||
def test_jinja2_template_with_special_characters(self, flask_app_with_containers):
|
||||
"""
|
||||
Test that templates with special characters (quotes, newlines) render correctly.
|
||||
This is a regression test for issue #26818 where textarea pre-fill values
|
||||
containing special characters would break template rendering.
|
||||
"""
|
||||
CodeExecutor, CodeLanguage = self.code_executor_imports
|
||||
|
||||
# Template with triple quotes, single quotes, double quotes, and newlines
|
||||
template = """<html>
|
||||
<body>
|
||||
<input value="{{ task.get('Task ID', '') }}"/>
|
||||
<textarea>{{ task.get('Issues', 'No issues reported') }}</textarea>
|
||||
<p>Status: "{{ status }}"</p>
|
||||
<pre>'''code block'''</pre>
|
||||
</body>
|
||||
</html>"""
|
||||
inputs = {"task": {"Task ID": "TASK-123", "Issues": "Line 1\nLine 2\nLine 3"}, "status": "completed"}
|
||||
|
||||
result = CodeExecutor.execute_workflow_code_template(language=CodeLanguage.JINJA2, code=template, inputs=inputs)
|
||||
|
||||
# Verify the template rendered correctly with all special characters
|
||||
output = result["result"]
|
||||
assert 'value="TASK-123"' in output
|
||||
assert "<textarea>Line 1\nLine 2\nLine 3</textarea>" in output
|
||||
assert 'Status: "completed"' in output
|
||||
assert "'''code block'''" in output
|
||||
|
|
|
|||
|
|
@ -0,0 +1,254 @@
|
|||
"""
|
||||
Unit tests for XSS prevention in App payloads.
|
||||
|
||||
This test module validates that HTML tags, JavaScript, and other potentially
|
||||
dangerous content are rejected in App names and descriptions.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from controllers.console.app.app import CopyAppPayload, CreateAppPayload, UpdateAppPayload
|
||||
|
||||
|
||||
class TestXSSPreventionUnit:
|
||||
"""Unit tests for XSS prevention in App payloads."""
|
||||
|
||||
def test_create_app_valid_names(self):
|
||||
"""Test CreateAppPayload with valid app names."""
|
||||
# Normal app names should be valid
|
||||
valid_names = [
|
||||
"My App",
|
||||
"Test App 123",
|
||||
"App with - dash",
|
||||
"App with _ underscore",
|
||||
"App with + plus",
|
||||
"App with () parentheses",
|
||||
"App with [] brackets",
|
||||
"App with {} braces",
|
||||
"App with ! exclamation",
|
||||
"App with @ at",
|
||||
"App with # hash",
|
||||
"App with $ dollar",
|
||||
"App with % percent",
|
||||
"App with ^ caret",
|
||||
"App with & ampersand",
|
||||
"App with * asterisk",
|
||||
"Unicode: 测试应用",
|
||||
"Emoji: 🤖",
|
||||
"Mixed: Test 测试 123",
|
||||
]
|
||||
|
||||
for name in valid_names:
|
||||
payload = CreateAppPayload(
|
||||
name=name,
|
||||
mode="chat",
|
||||
)
|
||||
assert payload.name == name
|
||||
|
||||
def test_create_app_xss_script_tags(self):
|
||||
"""Test CreateAppPayload rejects script tags."""
|
||||
xss_payloads = [
|
||||
"<script>alert(document.cookie)</script>",
|
||||
"<Script>alert(1)</Script>",
|
||||
"<SCRIPT>alert('XSS')</SCRIPT>",
|
||||
"<script>alert(String.fromCharCode(88,83,83))</script>",
|
||||
"<script src='evil.js'></script>",
|
||||
"<script>document.location='http://evil.com'</script>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_iframe_tags(self):
|
||||
"""Test CreateAppPayload rejects iframe tags."""
|
||||
xss_payloads = [
|
||||
"<iframe src='evil.com'></iframe>",
|
||||
"<Iframe srcdoc='<script>alert(1)</script>'></iframe>",
|
||||
"<IFRAME src='javascript:alert(1)'></iframe>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_javascript_protocol(self):
|
||||
"""Test CreateAppPayload rejects javascript: protocol."""
|
||||
xss_payloads = [
|
||||
"javascript:alert(1)",
|
||||
"JAVASCRIPT:alert(1)",
|
||||
"JavaScript:alert(document.cookie)",
|
||||
"javascript:void(0)",
|
||||
"javascript://comment%0Aalert(1)",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_svg_onload(self):
|
||||
"""Test CreateAppPayload rejects SVG with onload."""
|
||||
xss_payloads = [
|
||||
"<svg onload=alert(1)>",
|
||||
"<SVG ONLOAD=alert(1)>",
|
||||
"<svg/x/onload=alert(1)>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_event_handlers(self):
|
||||
"""Test CreateAppPayload rejects HTML event handlers."""
|
||||
xss_payloads = [
|
||||
"<div onclick=alert(1)>",
|
||||
"<img onerror=alert(1)>",
|
||||
"<body onload=alert(1)>",
|
||||
"<input onfocus=alert(1)>",
|
||||
"<a onmouseover=alert(1)>",
|
||||
"<DIV ONCLICK=alert(1)>",
|
||||
"<img src=x onerror=alert(1)>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_object_embed(self):
|
||||
"""Test CreateAppPayload rejects object and embed tags."""
|
||||
xss_payloads = [
|
||||
"<object data='evil.swf'></object>",
|
||||
"<embed src='evil.swf'>",
|
||||
"<OBJECT data='javascript:alert(1)'></OBJECT>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_link_javascript(self):
|
||||
"""Test CreateAppPayload rejects link tags with javascript."""
|
||||
xss_payloads = [
|
||||
"<link href='javascript:alert(1)'>",
|
||||
"<LINK HREF='javascript:alert(1)'>",
|
||||
]
|
||||
|
||||
for name in xss_payloads:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_xss_in_description(self):
|
||||
"""Test CreateAppPayload rejects XSS in description."""
|
||||
xss_descriptions = [
|
||||
"<script>alert(1)</script>",
|
||||
"javascript:alert(1)",
|
||||
"<img onerror=alert(1)>",
|
||||
]
|
||||
|
||||
for description in xss_descriptions:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(
|
||||
name="Valid Name",
|
||||
mode="chat",
|
||||
description=description,
|
||||
)
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_create_app_valid_descriptions(self):
|
||||
"""Test CreateAppPayload with valid descriptions."""
|
||||
valid_descriptions = [
|
||||
"A simple description",
|
||||
"Description with < and > symbols",
|
||||
"Description with & ampersand",
|
||||
"Description with 'quotes' and \"double quotes\"",
|
||||
"Description with / slashes",
|
||||
"Description with \\ backslashes",
|
||||
"Description with ; semicolons",
|
||||
"Unicode: 这是一个描述",
|
||||
"Emoji: 🎉🚀",
|
||||
]
|
||||
|
||||
for description in valid_descriptions:
|
||||
payload = CreateAppPayload(
|
||||
name="Valid App Name",
|
||||
mode="chat",
|
||||
description=description,
|
||||
)
|
||||
assert payload.description == description
|
||||
|
||||
def test_create_app_none_description(self):
|
||||
"""Test CreateAppPayload with None description."""
|
||||
payload = CreateAppPayload(
|
||||
name="Valid App Name",
|
||||
mode="chat",
|
||||
description=None,
|
||||
)
|
||||
assert payload.description is None
|
||||
|
||||
def test_update_app_xss_prevention(self):
|
||||
"""Test UpdateAppPayload also prevents XSS."""
|
||||
xss_names = [
|
||||
"<script>alert(1)</script>",
|
||||
"javascript:alert(1)",
|
||||
"<img onerror=alert(1)>",
|
||||
]
|
||||
|
||||
for name in xss_names:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
UpdateAppPayload(name=name)
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_update_app_valid_names(self):
|
||||
"""Test UpdateAppPayload with valid names."""
|
||||
payload = UpdateAppPayload(name="Valid Updated Name")
|
||||
assert payload.name == "Valid Updated Name"
|
||||
|
||||
def test_copy_app_xss_prevention(self):
|
||||
"""Test CopyAppPayload also prevents XSS."""
|
||||
xss_names = [
|
||||
"<script>alert(1)</script>",
|
||||
"javascript:alert(1)",
|
||||
"<img onerror=alert(1)>",
|
||||
]
|
||||
|
||||
for name in xss_names:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CopyAppPayload(name=name)
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
||||
def test_copy_app_valid_names(self):
|
||||
"""Test CopyAppPayload with valid names."""
|
||||
payload = CopyAppPayload(name="Valid Copy Name")
|
||||
assert payload.name == "Valid Copy Name"
|
||||
|
||||
def test_copy_app_none_name(self):
|
||||
"""Test CopyAppPayload with None name (should be allowed)."""
|
||||
payload = CopyAppPayload(name=None)
|
||||
assert payload.name is None
|
||||
|
||||
def test_edge_case_angle_brackets_content(self):
|
||||
"""Test that angle brackets with actual content are rejected."""
|
||||
# Angle brackets without valid HTML-like patterns should be checked
|
||||
# The regex pattern <.*?on\w+\s*= should catch event handlers
|
||||
# But let's verify other patterns too
|
||||
|
||||
# Valid: angle brackets used as symbols (not matched by our patterns)
|
||||
# Our patterns specifically look for dangerous constructs
|
||||
|
||||
# Invalid: actual HTML tags with event handlers
|
||||
invalid_names = [
|
||||
"<div onclick=xss>",
|
||||
"<img src=x onerror=alert(1)>",
|
||||
]
|
||||
|
||||
for name in invalid_names:
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
CreateAppPayload(name=name, mode="chat")
|
||||
assert "invalid characters or patterns" in str(exc_info.value).lower()
|
||||
|
|
@ -171,7 +171,7 @@ class TestOAuthCallback:
|
|||
):
|
||||
mock_config.CONSOLE_WEB_URL = "http://localhost:3000"
|
||||
mock_get_providers.return_value = {"github": oauth_setup["provider"]}
|
||||
mock_generate_account.return_value = oauth_setup["account"]
|
||||
mock_generate_account.return_value = (oauth_setup["account"], True)
|
||||
mock_account_service.login.return_value = oauth_setup["token_pair"]
|
||||
|
||||
with app.test_request_context("/auth/oauth/github/callback?code=test_code"):
|
||||
|
|
@ -179,7 +179,7 @@ class TestOAuthCallback:
|
|||
|
||||
oauth_setup["provider"].get_access_token.assert_called_once_with("test_code")
|
||||
oauth_setup["provider"].get_user_info.assert_called_once_with("access_token")
|
||||
mock_redirect.assert_called_once_with("http://localhost:3000")
|
||||
mock_redirect.assert_called_once_with("http://localhost:3000?oauth_new_user=true")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("exception", "expected_error"),
|
||||
|
|
@ -223,7 +223,7 @@ class TestOAuthCallback:
|
|||
# This documents actual behavior. See test_defensive_check_for_closed_account_status for details
|
||||
(
|
||||
AccountStatus.CLOSED.value,
|
||||
"http://localhost:3000",
|
||||
"http://localhost:3000?oauth_new_user=false",
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
@ -260,7 +260,7 @@ class TestOAuthCallback:
|
|||
account = MagicMock()
|
||||
account.status = account_status
|
||||
account.id = "123"
|
||||
mock_generate_account.return_value = account
|
||||
mock_generate_account.return_value = (account, False)
|
||||
|
||||
# Mock login for CLOSED status
|
||||
mock_token_pair = MagicMock()
|
||||
|
|
@ -296,7 +296,7 @@ class TestOAuthCallback:
|
|||
|
||||
mock_account = MagicMock()
|
||||
mock_account.status = AccountStatus.PENDING
|
||||
mock_generate_account.return_value = mock_account
|
||||
mock_generate_account.return_value = (mock_account, False)
|
||||
|
||||
mock_token_pair = MagicMock()
|
||||
mock_token_pair.access_token = "jwt_access_token"
|
||||
|
|
@ -360,7 +360,7 @@ class TestOAuthCallback:
|
|||
closed_account.status = AccountStatus.CLOSED
|
||||
closed_account.id = "123"
|
||||
closed_account.name = "Closed Account"
|
||||
mock_generate_account.return_value = closed_account
|
||||
mock_generate_account.return_value = (closed_account, False)
|
||||
|
||||
# Mock successful login (current behavior)
|
||||
mock_token_pair = MagicMock()
|
||||
|
|
@ -374,7 +374,7 @@ class TestOAuthCallback:
|
|||
resource.get("github")
|
||||
|
||||
# Verify current behavior: login succeeds (this is NOT ideal)
|
||||
mock_redirect.assert_called_once_with("http://localhost:3000")
|
||||
mock_redirect.assert_called_once_with("http://localhost:3000?oauth_new_user=false")
|
||||
mock_account_service.login.assert_called_once()
|
||||
|
||||
# Document expected behavior in comments:
|
||||
|
|
@ -458,8 +458,9 @@ class TestAccountGeneration:
|
|||
with pytest.raises(AccountRegisterError):
|
||||
_generate_account("github", user_info)
|
||||
else:
|
||||
result = _generate_account("github", user_info)
|
||||
result, oauth_new_user = _generate_account("github", user_info)
|
||||
assert result == mock_account
|
||||
assert oauth_new_user == should_create
|
||||
|
||||
if should_create:
|
||||
mock_register_service.register.assert_called_once_with(
|
||||
|
|
@ -490,9 +491,10 @@ class TestAccountGeneration:
|
|||
mock_tenant_service.create_tenant.return_value = mock_new_tenant
|
||||
|
||||
with app.test_request_context(headers={"Accept-Language": "en-US,en;q=0.9"}):
|
||||
result = _generate_account("github", user_info)
|
||||
result, oauth_new_user = _generate_account("github", user_info)
|
||||
|
||||
assert result == mock_account
|
||||
assert oauth_new_user is False
|
||||
mock_tenant_service.create_tenant.assert_called_once_with("Test User's Workspace")
|
||||
mock_tenant_service.create_tenant_member.assert_called_once_with(
|
||||
mock_new_tenant, mock_account, role="owner"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,145 @@
|
|||
"""Unit tests for load balancing credential validation APIs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import builtins
|
||||
import importlib
|
||||
import sys
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from flask.views import MethodView
|
||||
from werkzeug.exceptions import Forbidden
|
||||
|
||||
from core.model_runtime.entities.model_entities import ModelType
|
||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||
|
||||
if not hasattr(builtins, "MethodView"):
|
||||
builtins.MethodView = MethodView # type: ignore[attr-defined]
|
||||
|
||||
from models.account import TenantAccountRole
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app() -> Flask:
|
||||
app = Flask(__name__)
|
||||
app.config["TESTING"] = True
|
||||
return app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def load_balancing_module(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Reload controller module with lightweight decorators for testing."""
|
||||
|
||||
from controllers.console import console_ns, wraps
|
||||
from libs import login
|
||||
|
||||
def _noop(func):
|
||||
return func
|
||||
|
||||
monkeypatch.setattr(login, "login_required", _noop)
|
||||
monkeypatch.setattr(wraps, "setup_required", _noop)
|
||||
monkeypatch.setattr(wraps, "account_initialization_required", _noop)
|
||||
|
||||
def _noop_route(*args, **kwargs): # type: ignore[override]
|
||||
def _decorator(cls):
|
||||
return cls
|
||||
|
||||
return _decorator
|
||||
|
||||
monkeypatch.setattr(console_ns, "route", _noop_route)
|
||||
|
||||
module_name = "controllers.console.workspace.load_balancing_config"
|
||||
sys.modules.pop(module_name, None)
|
||||
module = importlib.import_module(module_name)
|
||||
return module
|
||||
|
||||
|
||||
def _mock_user(role: TenantAccountRole) -> SimpleNamespace:
|
||||
return SimpleNamespace(current_role=role)
|
||||
|
||||
|
||||
def _prepare_context(module, monkeypatch: pytest.MonkeyPatch, role=TenantAccountRole.OWNER):
|
||||
user = _mock_user(role)
|
||||
monkeypatch.setattr(module, "current_account_with_tenant", lambda: (user, "tenant-123"))
|
||||
mock_service = MagicMock()
|
||||
monkeypatch.setattr(module, "ModelLoadBalancingService", lambda: mock_service)
|
||||
return mock_service
|
||||
|
||||
|
||||
def _request_payload():
|
||||
return {"model": "gpt-4o", "model_type": ModelType.LLM, "credentials": {"api_key": "sk-***"}}
|
||||
|
||||
|
||||
def test_validate_credentials_success(app: Flask, load_balancing_module, monkeypatch: pytest.MonkeyPatch):
|
||||
service = _prepare_context(load_balancing_module, monkeypatch)
|
||||
|
||||
with app.test_request_context(
|
||||
"/workspaces/current/model-providers/openai/models/load-balancing-configs/credentials-validate",
|
||||
method="POST",
|
||||
json=_request_payload(),
|
||||
):
|
||||
response = load_balancing_module.LoadBalancingCredentialsValidateApi().post(provider="openai")
|
||||
|
||||
assert response == {"result": "success"}
|
||||
service.validate_load_balancing_credentials.assert_called_once_with(
|
||||
tenant_id="tenant-123",
|
||||
provider="openai",
|
||||
model="gpt-4o",
|
||||
model_type=ModelType.LLM,
|
||||
credentials={"api_key": "sk-***"},
|
||||
)
|
||||
|
||||
|
||||
def test_validate_credentials_returns_error_message(app: Flask, load_balancing_module, monkeypatch: pytest.MonkeyPatch):
|
||||
service = _prepare_context(load_balancing_module, monkeypatch)
|
||||
service.validate_load_balancing_credentials.side_effect = CredentialsValidateFailedError("invalid credentials")
|
||||
|
||||
with app.test_request_context(
|
||||
"/workspaces/current/model-providers/openai/models/load-balancing-configs/credentials-validate",
|
||||
method="POST",
|
||||
json=_request_payload(),
|
||||
):
|
||||
response = load_balancing_module.LoadBalancingCredentialsValidateApi().post(provider="openai")
|
||||
|
||||
assert response == {"result": "error", "error": "invalid credentials"}
|
||||
|
||||
|
||||
def test_validate_credentials_requires_privileged_role(
|
||||
app: Flask, load_balancing_module, monkeypatch: pytest.MonkeyPatch
|
||||
):
|
||||
_prepare_context(load_balancing_module, monkeypatch, role=TenantAccountRole.NORMAL)
|
||||
|
||||
with app.test_request_context(
|
||||
"/workspaces/current/model-providers/openai/models/load-balancing-configs/credentials-validate",
|
||||
method="POST",
|
||||
json=_request_payload(),
|
||||
):
|
||||
api = load_balancing_module.LoadBalancingCredentialsValidateApi()
|
||||
with pytest.raises(Forbidden):
|
||||
api.post(provider="openai")
|
||||
|
||||
|
||||
def test_validate_credentials_with_config_id(app: Flask, load_balancing_module, monkeypatch: pytest.MonkeyPatch):
|
||||
service = _prepare_context(load_balancing_module, monkeypatch)
|
||||
|
||||
with app.test_request_context(
|
||||
"/workspaces/current/model-providers/openai/models/load-balancing-configs/cfg-1/credentials-validate",
|
||||
method="POST",
|
||||
json=_request_payload(),
|
||||
):
|
||||
response = load_balancing_module.LoadBalancingConfigCredentialsValidateApi().post(
|
||||
provider="openai", config_id="cfg-1"
|
||||
)
|
||||
|
||||
assert response == {"result": "success"}
|
||||
service.validate_load_balancing_credentials.assert_called_once_with(
|
||||
tenant_id="tenant-123",
|
||||
provider="openai",
|
||||
model="gpt-4o",
|
||||
model_type=ModelType.LLM,
|
||||
credentials={"api_key": "sk-***"},
|
||||
config_id="cfg-1",
|
||||
)
|
||||
|
|
@ -0,0 +1,100 @@
|
|||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from flask_restx import Api
|
||||
|
||||
from controllers.console.workspace.tool_providers import ToolProviderMCPApi
|
||||
from core.db.session_factory import configure_session_factory
|
||||
from extensions.ext_database import db
|
||||
from services.tools.mcp_tools_manage_service import ReconnectResult
|
||||
|
||||
|
||||
# Backward-compat fixtures referenced by @pytest.mark.usefixtures in this file.
|
||||
# They are intentionally no-ops because the test already patches the required
|
||||
# behaviors explicitly via @patch and context managers below.
|
||||
@pytest.fixture
|
||||
def _mock_cache():
|
||||
return
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _mock_user_tenant():
|
||||
return
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
app = Flask(__name__)
|
||||
app.config["TESTING"] = True
|
||||
app.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///:memory:"
|
||||
api = Api(app)
|
||||
api.add_resource(ToolProviderMCPApi, "/console/api/workspaces/current/tool-provider/mcp")
|
||||
db.init_app(app)
|
||||
# Configure session factory used by controller code
|
||||
with app.app_context():
|
||||
configure_session_factory(db.engine)
|
||||
return app.test_client()
|
||||
|
||||
|
||||
@patch(
|
||||
"controllers.console.workspace.tool_providers.current_account_with_tenant", return_value=(MagicMock(id="u1"), "t1")
|
||||
)
|
||||
@patch("controllers.console.workspace.tool_providers.Session")
|
||||
@patch("controllers.console.workspace.tool_providers.MCPToolManageService._reconnect_with_url")
|
||||
@pytest.mark.usefixtures("_mock_cache", "_mock_user_tenant")
|
||||
def test_create_mcp_provider_populates_tools(mock_reconnect, mock_session, mock_current_account_with_tenant, client):
|
||||
# Arrange: reconnect returns tools immediately
|
||||
mock_reconnect.return_value = ReconnectResult(
|
||||
authed=True,
|
||||
tools=json.dumps(
|
||||
[{"name": "ping", "description": "ok", "inputSchema": {"type": "object"}, "outputSchema": {}}]
|
||||
),
|
||||
encrypted_credentials="{}",
|
||||
)
|
||||
|
||||
# Fake service.create_provider -> returns object with id for reload
|
||||
svc = MagicMock()
|
||||
create_result = MagicMock()
|
||||
create_result.id = "provider-1"
|
||||
svc.create_provider.return_value = create_result
|
||||
svc.get_provider.return_value = MagicMock(id="provider-1", tenant_id="t1") # used by reload path
|
||||
mock_session.return_value.__enter__.return_value = MagicMock()
|
||||
# Patch MCPToolManageService constructed inside controller
|
||||
with patch("controllers.console.workspace.tool_providers.MCPToolManageService", return_value=svc):
|
||||
payload = {
|
||||
"server_url": "http://example.com/mcp",
|
||||
"name": "demo",
|
||||
"icon": "😀",
|
||||
"icon_type": "emoji",
|
||||
"icon_background": "#000",
|
||||
"server_identifier": "demo-sid",
|
||||
"configuration": {"timeout": 5, "sse_read_timeout": 30},
|
||||
"headers": {},
|
||||
"authentication": {},
|
||||
}
|
||||
# Act
|
||||
with (
|
||||
patch("controllers.console.wraps.dify_config.EDITION", "CLOUD"), # bypass setup_required DB check
|
||||
patch("controllers.console.wraps.current_account_with_tenant", return_value=(MagicMock(id="u1"), "t1")),
|
||||
patch("libs.login.check_csrf_token", return_value=None), # bypass CSRF in login_required
|
||||
patch("libs.login._get_user", return_value=MagicMock(id="u1", is_authenticated=True)), # login
|
||||
patch(
|
||||
"services.tools.tools_transform_service.ToolTransformService.mcp_provider_to_user_provider",
|
||||
return_value={"id": "provider-1", "tools": [{"name": "ping"}]},
|
||||
),
|
||||
):
|
||||
resp = client.post(
|
||||
"/console/api/workspaces/current/tool-provider/mcp",
|
||||
data=json.dumps(payload),
|
||||
content_type="application/json",
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert resp.status_code == 200
|
||||
body = resp.get_json()
|
||||
assert body.get("id") == "provider-1"
|
||||
# 若 transform 后包含 tools 字段,确保非空
|
||||
assert isinstance(body.get("tools"), list)
|
||||
assert body["tools"]
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
import json
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from redis.exceptions import RedisError
|
||||
|
||||
from core.helper.tool_provider_cache import ToolProviderListCache
|
||||
from core.tools.entities.api_entities import ToolProviderTypeApiLiteral
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_redis_client():
|
||||
"""Fixture: Mock Redis client"""
|
||||
with patch("core.helper.tool_provider_cache.redis_client") as mock:
|
||||
yield mock
|
||||
|
||||
|
||||
class TestToolProviderListCache:
|
||||
"""Test class for ToolProviderListCache"""
|
||||
|
||||
def test_generate_cache_key(self):
|
||||
"""Test cache key generation logic"""
|
||||
# Scenario 1: Specify typ (valid literal value)
|
||||
tenant_id = "tenant_123"
|
||||
typ: ToolProviderTypeApiLiteral = "builtin"
|
||||
expected_key = f"tool_providers:tenant_id:{tenant_id}:type:{typ}"
|
||||
assert ToolProviderListCache._generate_cache_key(tenant_id, typ) == expected_key
|
||||
|
||||
# Scenario 2: typ is None (defaults to "all")
|
||||
expected_key_all = f"tool_providers:tenant_id:{tenant_id}:type:all"
|
||||
assert ToolProviderListCache._generate_cache_key(tenant_id) == expected_key_all
|
||||
|
||||
def test_get_cached_providers_hit(self, mock_redis_client):
|
||||
"""Test get cached providers - cache hit and successful decoding"""
|
||||
tenant_id = "tenant_123"
|
||||
typ: ToolProviderTypeApiLiteral = "api"
|
||||
mock_providers = [{"id": "tool", "name": "test_provider"}]
|
||||
mock_redis_client.get.return_value = json.dumps(mock_providers).encode("utf-8")
|
||||
|
||||
result = ToolProviderListCache.get_cached_providers(tenant_id, typ)
|
||||
|
||||
mock_redis_client.get.assert_called_once_with(ToolProviderListCache._generate_cache_key(tenant_id, typ))
|
||||
assert result == mock_providers
|
||||
|
||||
def test_get_cached_providers_decode_error(self, mock_redis_client):
|
||||
"""Test get cached providers - cache hit but decoding failed"""
|
||||
tenant_id = "tenant_123"
|
||||
mock_redis_client.get.return_value = b"invalid_json_data"
|
||||
|
||||
result = ToolProviderListCache.get_cached_providers(tenant_id)
|
||||
|
||||
assert result is None
|
||||
mock_redis_client.get.assert_called_once()
|
||||
|
||||
def test_get_cached_providers_miss(self, mock_redis_client):
|
||||
"""Test get cached providers - cache miss"""
|
||||
tenant_id = "tenant_123"
|
||||
mock_redis_client.get.return_value = None
|
||||
|
||||
result = ToolProviderListCache.get_cached_providers(tenant_id)
|
||||
|
||||
assert result is None
|
||||
mock_redis_client.get.assert_called_once()
|
||||
|
||||
def test_set_cached_providers(self, mock_redis_client):
|
||||
"""Test set cached providers"""
|
||||
tenant_id = "tenant_123"
|
||||
typ: ToolProviderTypeApiLiteral = "builtin"
|
||||
mock_providers = [{"id": "tool", "name": "test_provider"}]
|
||||
cache_key = ToolProviderListCache._generate_cache_key(tenant_id, typ)
|
||||
|
||||
ToolProviderListCache.set_cached_providers(tenant_id, typ, mock_providers)
|
||||
|
||||
mock_redis_client.setex.assert_called_once_with(
|
||||
cache_key, ToolProviderListCache.CACHE_TTL, json.dumps(mock_providers)
|
||||
)
|
||||
|
||||
def test_invalidate_cache_specific_type(self, mock_redis_client):
|
||||
"""Test invalidate cache - specific type"""
|
||||
tenant_id = "tenant_123"
|
||||
typ: ToolProviderTypeApiLiteral = "workflow"
|
||||
cache_key = ToolProviderListCache._generate_cache_key(tenant_id, typ)
|
||||
|
||||
ToolProviderListCache.invalidate_cache(tenant_id, typ)
|
||||
|
||||
mock_redis_client.delete.assert_called_once_with(cache_key)
|
||||
|
||||
def test_invalidate_cache_all_types(self, mock_redis_client):
|
||||
"""Test invalidate cache - clear all tenant cache"""
|
||||
tenant_id = "tenant_123"
|
||||
mock_keys = [
|
||||
b"tool_providers:tenant_id:tenant_123:type:all",
|
||||
b"tool_providers:tenant_id:tenant_123:type:builtin",
|
||||
]
|
||||
mock_redis_client.scan_iter.return_value = mock_keys
|
||||
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
mock_redis_client.scan_iter.assert_called_once_with(f"tool_providers:tenant_id:{tenant_id}:*")
|
||||
mock_redis_client.delete.assert_called_once_with(*mock_keys)
|
||||
|
||||
def test_invalidate_cache_no_keys(self, mock_redis_client):
|
||||
"""Test invalidate cache - no cache keys for tenant"""
|
||||
tenant_id = "tenant_123"
|
||||
mock_redis_client.scan_iter.return_value = []
|
||||
|
||||
ToolProviderListCache.invalidate_cache(tenant_id)
|
||||
|
||||
mock_redis_client.delete.assert_not_called()
|
||||
|
||||
def test_redis_fallback_default_return(self, mock_redis_client):
|
||||
"""Test redis_fallback decorator - default return value (Redis error)"""
|
||||
mock_redis_client.get.side_effect = RedisError("Redis connection error")
|
||||
|
||||
result = ToolProviderListCache.get_cached_providers("tenant_123")
|
||||
|
||||
assert result is None
|
||||
mock_redis_client.get.assert_called_once()
|
||||
|
||||
def test_redis_fallback_no_default(self, mock_redis_client):
|
||||
"""Test redis_fallback decorator - no default return value (Redis error)"""
|
||||
mock_redis_client.setex.side_effect = RedisError("Redis connection error")
|
||||
|
||||
try:
|
||||
ToolProviderListCache.set_cached_providers("tenant_123", "mcp", [])
|
||||
except RedisError:
|
||||
pytest.fail("set_cached_providers should not raise RedisError (handled by fallback)")
|
||||
|
||||
mock_redis_client.setex.assert_called_once()
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
from core.rag.cleaner.clean_processor import CleanProcessor
|
||||
|
||||
|
||||
class TestCleanProcessor:
|
||||
"""Test cases for CleanProcessor.clean method."""
|
||||
|
||||
def test_clean_default_removal_of_invalid_symbols(self):
|
||||
"""Test default cleaning removes invalid symbols."""
|
||||
# Test <| replacement
|
||||
assert CleanProcessor.clean("text<|with<|invalid", None) == "text<with<invalid"
|
||||
|
||||
# Test |> replacement
|
||||
assert CleanProcessor.clean("text|>with|>invalid", None) == "text>with>invalid"
|
||||
|
||||
# Test removal of control characters
|
||||
text_with_control = "normal\x00text\x1fwith\x07control\x7fchars"
|
||||
expected = "normaltextwithcontrolchars"
|
||||
assert CleanProcessor.clean(text_with_control, None) == expected
|
||||
|
||||
# Test U+FFFE removal
|
||||
text_with_ufffe = "normal\ufffepadding"
|
||||
expected = "normalpadding"
|
||||
assert CleanProcessor.clean(text_with_ufffe, None) == expected
|
||||
|
||||
def test_clean_with_none_process_rule(self):
|
||||
"""Test cleaning with None process_rule - only default cleaning applied."""
|
||||
text = "Hello<|World\x00"
|
||||
expected = "Hello<World"
|
||||
assert CleanProcessor.clean(text, None) == expected
|
||||
|
||||
def test_clean_with_empty_process_rule(self):
|
||||
"""Test cleaning with empty process_rule dict - only default cleaning applied."""
|
||||
text = "Hello<|World\x00"
|
||||
expected = "Hello<World"
|
||||
assert CleanProcessor.clean(text, {}) == expected
|
||||
|
||||
def test_clean_with_empty_rules(self):
|
||||
"""Test cleaning with empty rules - only default cleaning applied."""
|
||||
text = "Hello<|World\x00"
|
||||
expected = "Hello<World"
|
||||
assert CleanProcessor.clean(text, {"rules": {}}) == expected
|
||||
|
||||
def test_clean_remove_extra_spaces_enabled(self):
|
||||
"""Test remove_extra_spaces rule when enabled."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_extra_spaces", "enabled": True}]}}
|
||||
|
||||
# Test multiple newlines reduced to two
|
||||
text = "Line1\n\n\n\n\nLine2"
|
||||
expected = "Line1\n\nLine2"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test various whitespace characters reduced to single space
|
||||
text = "word1\u2000\u2001\t\t \u3000word2"
|
||||
expected = "word1 word2"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test combination of newlines and spaces
|
||||
text = "Line1\n\n\n\n \t Line2"
|
||||
expected = "Line1\n\n Line2"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_remove_extra_spaces_disabled(self):
|
||||
"""Test remove_extra_spaces rule when disabled."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_extra_spaces", "enabled": False}]}}
|
||||
|
||||
text = "Line1\n\n\n\n\nLine2 with spaces"
|
||||
# Should only apply default cleaning (no invalid symbols here)
|
||||
assert CleanProcessor.clean(text, process_rule) == text
|
||||
|
||||
def test_clean_remove_urls_emails_enabled(self):
|
||||
"""Test remove_urls_emails rule when enabled."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}}
|
||||
|
||||
# Test email removal
|
||||
text = "Contact us at test@example.com for more info"
|
||||
expected = "Contact us at for more info"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test URL removal
|
||||
text = "Visit https://example.com or http://test.org"
|
||||
expected = "Visit or "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test both email and URL
|
||||
text = "Email me@test.com and visit https://site.com"
|
||||
expected = "Email and visit "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_preserve_markdown_links_and_images(self):
|
||||
"""Test that markdown links and images are preserved when removing URLs."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}}
|
||||
|
||||
# Test markdown link preservation
|
||||
text = "Check [Google](https://google.com) for info"
|
||||
expected = "Check [Google](https://google.com) for info"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test markdown image preservation
|
||||
text = "Image: "
|
||||
expected = "Image: "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test both link and image preservation
|
||||
text = "[Link](https://link.com) and "
|
||||
expected = "[Link](https://link.com) and "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test that non-markdown URLs are still removed
|
||||
text = "Check [Link](https://keep.com) but remove https://remove.com"
|
||||
expected = "Check [Link](https://keep.com) but remove "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test email removal alongside markdown preservation
|
||||
text = "Email: test@test.com, link: [Click](https://site.com)"
|
||||
expected = "Email: , link: [Click](https://site.com)"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_remove_urls_emails_disabled(self):
|
||||
"""Test remove_urls_emails rule when disabled."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": False}]}}
|
||||
|
||||
text = "Email test@example.com visit https://example.com"
|
||||
# Should only apply default cleaning
|
||||
assert CleanProcessor.clean(text, process_rule) == text
|
||||
|
||||
def test_clean_both_rules_enabled(self):
|
||||
"""Test both pre-processing rules enabled together."""
|
||||
process_rule = {
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{"id": "remove_extra_spaces", "enabled": True},
|
||||
{"id": "remove_urls_emails", "enabled": True},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
text = "Hello\n\n\n\n World test@example.com \n\n\nhttps://example.com"
|
||||
expected = "Hello\n\n World \n\n"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_with_markdown_link_and_extra_spaces(self):
|
||||
"""Test markdown link preservation with extra spaces removal."""
|
||||
process_rule = {
|
||||
"rules": {
|
||||
"pre_processing_rules": [
|
||||
{"id": "remove_extra_spaces", "enabled": True},
|
||||
{"id": "remove_urls_emails", "enabled": True},
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
text = "[Link](https://example.com)\n\n\n\n Text https://remove.com"
|
||||
expected = "[Link](https://example.com)\n\n Text "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_unknown_rule_id_ignored(self):
|
||||
"""Test that unknown rule IDs are silently ignored."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "unknown_rule", "enabled": True}]}}
|
||||
|
||||
text = "Hello<|World\x00"
|
||||
expected = "Hello<World"
|
||||
# Only default cleaning should be applied
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_empty_text(self):
|
||||
"""Test cleaning empty text."""
|
||||
assert CleanProcessor.clean("", None) == ""
|
||||
assert CleanProcessor.clean("", {}) == ""
|
||||
assert CleanProcessor.clean("", {"rules": {}}) == ""
|
||||
|
||||
def test_clean_text_with_only_invalid_symbols(self):
|
||||
"""Test text containing only invalid symbols."""
|
||||
text = "<|<|\x00\x01\x02\ufffe|>|>"
|
||||
# <| becomes <, |> becomes >, control chars and U+FFFE are removed
|
||||
assert CleanProcessor.clean(text, None) == "<<>>"
|
||||
|
||||
def test_clean_multiple_markdown_links_preserved(self):
|
||||
"""Test multiple markdown links are all preserved."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}}
|
||||
|
||||
text = "[One](https://one.com) [Two](http://two.org) [Three](https://three.net)"
|
||||
expected = "[One](https://one.com) [Two](http://two.org) [Three](https://three.net)"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_markdown_link_text_as_url(self):
|
||||
"""Test markdown link where the link text itself is a URL."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}}
|
||||
|
||||
# Link text that looks like URL should be preserved
|
||||
text = "[https://text-url.com](https://actual-url.com)"
|
||||
expected = "[https://text-url.com](https://actual-url.com)"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Text URL without markdown should be removed
|
||||
text = "https://text-url.com https://actual-url.com"
|
||||
expected = " "
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
def test_clean_complex_markdown_link_content(self):
|
||||
"""Test markdown links with complex content - known limitation with brackets in link text."""
|
||||
process_rule = {"rules": {"pre_processing_rules": [{"id": "remove_urls_emails", "enabled": True}]}}
|
||||
|
||||
# Note: The regex pattern [^\]]* cannot handle ] within link text
|
||||
# This is a known limitation - the pattern stops at the first ]
|
||||
text = "[Text with [brackets] and (parens)](https://example.com)"
|
||||
# Actual behavior: only matches up to first ], URL gets removed
|
||||
expected = "[Text with [brackets] and (parens)]("
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
||||
# Test that properly formatted markdown links work
|
||||
text = "[Text with (parens) and symbols](https://example.com)"
|
||||
expected = "[Text with (parens) and symbols](https://example.com)"
|
||||
assert CleanProcessor.clean(text, process_rule) == expected
|
||||
|
|
@ -0,0 +1,327 @@
|
|||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.rag.datasource.vdb.pgvector.pgvector import (
|
||||
PGVector,
|
||||
PGVectorConfig,
|
||||
)
|
||||
|
||||
|
||||
class TestPGVector(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.config = PGVectorConfig(
|
||||
host="localhost",
|
||||
port=5432,
|
||||
user="test_user",
|
||||
password="test_password",
|
||||
database="test_db",
|
||||
min_connection=1,
|
||||
max_connection=5,
|
||||
pg_bigm=False,
|
||||
)
|
||||
self.collection_name = "test_collection"
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
def test_init(self, mock_pool_class):
|
||||
"""Test PGVector initialization."""
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
|
||||
assert pgvector._collection_name == self.collection_name
|
||||
assert pgvector.table_name == f"embedding_{self.collection_name}"
|
||||
assert pgvector.get_type() == "pgvector"
|
||||
assert pgvector.pool is not None
|
||||
assert pgvector.pg_bigm is False
|
||||
assert pgvector.index_hash is not None
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
def test_init_with_pg_bigm(self, mock_pool_class):
|
||||
"""Test PGVector initialization with pg_bigm enabled."""
|
||||
config = PGVectorConfig(
|
||||
host="localhost",
|
||||
port=5432,
|
||||
user="test_user",
|
||||
password="test_password",
|
||||
database="test_db",
|
||||
min_connection=1,
|
||||
max_connection=5,
|
||||
pg_bigm=True,
|
||||
)
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
pgvector = PGVector(self.collection_name, config)
|
||||
|
||||
assert pgvector.pg_bigm is True
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_basic(self, mock_redis, mock_pool_class):
|
||||
"""Test basic collection creation."""
|
||||
# Mock Redis operations
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = None
|
||||
mock_redis.set.return_value = None
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
mock_cursor.fetchone.return_value = [1] # vector extension exists
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
pgvector._create_collection(1536)
|
||||
|
||||
# Verify SQL execution calls
|
||||
assert mock_cursor.execute.called
|
||||
|
||||
# Check that CREATE TABLE was called with correct dimension
|
||||
create_table_calls = [call for call in mock_cursor.execute.call_args_list if "CREATE TABLE" in str(call)]
|
||||
assert len(create_table_calls) == 1
|
||||
assert "vector(1536)" in create_table_calls[0][0][0]
|
||||
|
||||
# Check that CREATE INDEX was called (dimension <= 2000)
|
||||
create_index_calls = [
|
||||
call for call in mock_cursor.execute.call_args_list if "CREATE INDEX" in str(call) and "hnsw" in str(call)
|
||||
]
|
||||
assert len(create_index_calls) == 1
|
||||
|
||||
# Verify Redis cache was set
|
||||
mock_redis.set.assert_called_once()
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_with_large_dimension(self, mock_redis, mock_pool_class):
|
||||
"""Test collection creation with dimension > 2000 (no HNSW index)."""
|
||||
# Mock Redis operations
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = None
|
||||
mock_redis.set.return_value = None
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
mock_cursor.fetchone.return_value = [1] # vector extension exists
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
pgvector._create_collection(3072) # Dimension > 2000
|
||||
|
||||
# Check that CREATE TABLE was called
|
||||
create_table_calls = [call for call in mock_cursor.execute.call_args_list if "CREATE TABLE" in str(call)]
|
||||
assert len(create_table_calls) == 1
|
||||
assert "vector(3072)" in create_table_calls[0][0][0]
|
||||
|
||||
# Check that HNSW index was NOT created (dimension > 2000)
|
||||
hnsw_index_calls = [call for call in mock_cursor.execute.call_args_list if "hnsw" in str(call)]
|
||||
assert len(hnsw_index_calls) == 0
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_with_pg_bigm(self, mock_redis, mock_pool_class):
|
||||
"""Test collection creation with pg_bigm enabled."""
|
||||
config = PGVectorConfig(
|
||||
host="localhost",
|
||||
port=5432,
|
||||
user="test_user",
|
||||
password="test_password",
|
||||
database="test_db",
|
||||
min_connection=1,
|
||||
max_connection=5,
|
||||
pg_bigm=True,
|
||||
)
|
||||
|
||||
# Mock Redis operations
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = None
|
||||
mock_redis.set.return_value = None
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
mock_cursor.fetchone.return_value = [1] # vector extension exists
|
||||
|
||||
pgvector = PGVector(self.collection_name, config)
|
||||
pgvector._create_collection(1536)
|
||||
|
||||
# Check that pg_bigm index was created
|
||||
bigm_index_calls = [call for call in mock_cursor.execute.call_args_list if "gin_bigm_ops" in str(call)]
|
||||
assert len(bigm_index_calls) == 1
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_creates_vector_extension(self, mock_redis, mock_pool_class):
|
||||
"""Test that vector extension is created if it doesn't exist."""
|
||||
# Mock Redis operations
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = None
|
||||
mock_redis.set.return_value = None
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
# First call: vector extension doesn't exist
|
||||
mock_cursor.fetchone.return_value = None
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
pgvector._create_collection(1536)
|
||||
|
||||
# Check that CREATE EXTENSION was called
|
||||
create_extension_calls = [
|
||||
call for call in mock_cursor.execute.call_args_list if "CREATE EXTENSION vector" in str(call)
|
||||
]
|
||||
assert len(create_extension_calls) == 1
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_with_cache_hit(self, mock_redis, mock_pool_class):
|
||||
"""Test that collection creation is skipped when cache exists."""
|
||||
# Mock Redis operations - cache exists
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = 1 # Cache exists
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
pgvector._create_collection(1536)
|
||||
|
||||
# Check that no SQL was executed (early return due to cache)
|
||||
assert mock_cursor.execute.call_count == 0
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.redis_client")
|
||||
def test_create_collection_with_redis_lock(self, mock_redis, mock_pool_class):
|
||||
"""Test that Redis lock is used during collection creation."""
|
||||
# Mock Redis operations
|
||||
mock_lock = MagicMock()
|
||||
mock_lock.__enter__ = MagicMock()
|
||||
mock_lock.__exit__ = MagicMock()
|
||||
mock_redis.lock.return_value = mock_lock
|
||||
mock_redis.get.return_value = None
|
||||
mock_redis.set.return_value = None
|
||||
|
||||
# Mock the connection pool
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
# Mock connection and cursor
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
mock_cursor.fetchone.return_value = [1] # vector extension exists
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
pgvector._create_collection(1536)
|
||||
|
||||
# Verify Redis lock was acquired with correct lock name
|
||||
mock_redis.lock.assert_called_once_with("vector_indexing_test_collection_lock", timeout=20)
|
||||
|
||||
# Verify lock context manager was entered and exited
|
||||
mock_lock.__enter__.assert_called_once()
|
||||
mock_lock.__exit__.assert_called_once()
|
||||
|
||||
@patch("core.rag.datasource.vdb.pgvector.pgvector.psycopg2.pool.SimpleConnectionPool")
|
||||
def test_get_cursor_context_manager(self, mock_pool_class):
|
||||
"""Test that _get_cursor properly manages connection lifecycle."""
|
||||
mock_pool = MagicMock()
|
||||
mock_pool_class.return_value = mock_pool
|
||||
|
||||
mock_conn = MagicMock()
|
||||
mock_cursor = MagicMock()
|
||||
mock_pool.getconn.return_value = mock_conn
|
||||
mock_conn.cursor.return_value = mock_cursor
|
||||
|
||||
pgvector = PGVector(self.collection_name, self.config)
|
||||
|
||||
with pgvector._get_cursor() as cur:
|
||||
assert cur == mock_cursor
|
||||
|
||||
# Verify connection lifecycle methods were called
|
||||
mock_pool.getconn.assert_called_once()
|
||||
mock_cursor.close.assert_called_once()
|
||||
mock_conn.commit.assert_called_once()
|
||||
mock_pool.putconn.assert_called_once_with(mock_conn)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_config_override",
|
||||
[
|
||||
{"host": ""}, # Test empty host
|
||||
{"port": 0}, # Test invalid port
|
||||
{"user": ""}, # Test empty user
|
||||
{"password": ""}, # Test empty password
|
||||
{"database": ""}, # Test empty database
|
||||
{"min_connection": 0}, # Test invalid min_connection
|
||||
{"max_connection": 0}, # Test invalid max_connection
|
||||
{"min_connection": 10, "max_connection": 5}, # Test min > max
|
||||
],
|
||||
)
|
||||
def test_config_validation_parametrized(invalid_config_override):
|
||||
"""Test configuration validation for various invalid inputs using parametrize."""
|
||||
config = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"user": "test_user",
|
||||
"password": "test_password",
|
||||
"database": "test_db",
|
||||
"min_connection": 1,
|
||||
"max_connection": 5,
|
||||
}
|
||||
config.update(invalid_config_override)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
PGVectorConfig(**config)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,186 @@
|
|||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
import core.rag.extractor.pdf_extractor as pe
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_dependencies(monkeypatch):
|
||||
# Mock storage
|
||||
saves = []
|
||||
|
||||
def save(key, data):
|
||||
saves.append((key, data))
|
||||
|
||||
monkeypatch.setattr(pe, "storage", SimpleNamespace(save=save))
|
||||
|
||||
# Mock db
|
||||
class DummySession:
|
||||
def __init__(self):
|
||||
self.added = []
|
||||
self.committed = False
|
||||
|
||||
def add(self, obj):
|
||||
self.added.append(obj)
|
||||
|
||||
def add_all(self, objs):
|
||||
self.added.extend(objs)
|
||||
|
||||
def commit(self):
|
||||
self.committed = True
|
||||
|
||||
db_stub = SimpleNamespace(session=DummySession())
|
||||
monkeypatch.setattr(pe, "db", db_stub)
|
||||
|
||||
# Mock UploadFile
|
||||
class FakeUploadFile:
|
||||
DEFAULT_ID = "test_file_id"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Assign id from DEFAULT_ID, allow override via kwargs if needed
|
||||
self.id = self.DEFAULT_ID
|
||||
for k, v in kwargs.items():
|
||||
setattr(self, k, v)
|
||||
|
||||
monkeypatch.setattr(pe, "UploadFile", FakeUploadFile)
|
||||
|
||||
# Mock config
|
||||
monkeypatch.setattr(pe.dify_config, "FILES_URL", "http://files.local")
|
||||
monkeypatch.setattr(pe.dify_config, "INTERNAL_FILES_URL", None)
|
||||
monkeypatch.setattr(pe.dify_config, "STORAGE_TYPE", "local")
|
||||
|
||||
return SimpleNamespace(saves=saves, db=db_stub, UploadFile=FakeUploadFile)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("image_bytes", "expected_mime", "expected_ext", "file_id"),
|
||||
[
|
||||
(b"\xff\xd8\xff some jpeg", "image/jpeg", "jpg", "test_file_id_jpeg"),
|
||||
(b"\x89PNG\r\n\x1a\n some png", "image/png", "png", "test_file_id_png"),
|
||||
],
|
||||
)
|
||||
def test_extract_images_formats(mock_dependencies, monkeypatch, image_bytes, expected_mime, expected_ext, file_id):
|
||||
saves = mock_dependencies.saves
|
||||
db_stub = mock_dependencies.db
|
||||
|
||||
# Customize FakeUploadFile id for this test case.
|
||||
# Using monkeypatch ensures the class attribute is reset between parameter sets.
|
||||
monkeypatch.setattr(mock_dependencies.UploadFile, "DEFAULT_ID", file_id)
|
||||
|
||||
# Mock page and image objects
|
||||
mock_page = MagicMock()
|
||||
mock_image_obj = MagicMock()
|
||||
|
||||
def mock_extract(buf, fb_format=None):
|
||||
buf.write(image_bytes)
|
||||
|
||||
mock_image_obj.extract.side_effect = mock_extract
|
||||
|
||||
mock_page.get_objects.return_value = [mock_image_obj]
|
||||
|
||||
extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1")
|
||||
|
||||
# We need to handle the import inside _extract_images
|
||||
with patch("pypdfium2.raw") as mock_raw:
|
||||
mock_raw.FPDF_PAGEOBJ_IMAGE = 1
|
||||
result = extractor._extract_images(mock_page)
|
||||
|
||||
assert f"" in result
|
||||
assert len(saves) == 1
|
||||
assert saves[0][1] == image_bytes
|
||||
assert len(db_stub.session.added) == 1
|
||||
assert db_stub.session.added[0].tenant_id == "t1"
|
||||
assert db_stub.session.added[0].size == len(image_bytes)
|
||||
assert db_stub.session.added[0].mime_type == expected_mime
|
||||
assert db_stub.session.added[0].extension == expected_ext
|
||||
assert db_stub.session.committed is True
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("get_objects_side_effect", "get_objects_return_value"),
|
||||
[
|
||||
(None, []), # Empty list
|
||||
(None, None), # None returned
|
||||
(Exception("Failed to get objects"), None), # Exception raised
|
||||
],
|
||||
)
|
||||
def test_extract_images_get_objects_scenarios(mock_dependencies, get_objects_side_effect, get_objects_return_value):
|
||||
mock_page = MagicMock()
|
||||
if get_objects_side_effect:
|
||||
mock_page.get_objects.side_effect = get_objects_side_effect
|
||||
else:
|
||||
mock_page.get_objects.return_value = get_objects_return_value
|
||||
|
||||
extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1")
|
||||
|
||||
with patch("pypdfium2.raw") as mock_raw:
|
||||
mock_raw.FPDF_PAGEOBJ_IMAGE = 1
|
||||
result = extractor._extract_images(mock_page)
|
||||
|
||||
assert result == ""
|
||||
|
||||
|
||||
def test_extract_calls_extract_images(mock_dependencies, monkeypatch):
|
||||
# Mock pypdfium2
|
||||
mock_pdf_doc = MagicMock()
|
||||
mock_page = MagicMock()
|
||||
mock_pdf_doc.__iter__.return_value = [mock_page]
|
||||
|
||||
# Mock text extraction
|
||||
mock_text_page = MagicMock()
|
||||
mock_text_page.get_text_range.return_value = "Page text content"
|
||||
mock_page.get_textpage.return_value = mock_text_page
|
||||
|
||||
with patch("pypdfium2.PdfDocument", return_value=mock_pdf_doc):
|
||||
# Mock Blob
|
||||
mock_blob = MagicMock()
|
||||
mock_blob.source = "test.pdf"
|
||||
with patch("core.rag.extractor.pdf_extractor.Blob.from_path", return_value=mock_blob):
|
||||
extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1")
|
||||
|
||||
# Mock _extract_images to return a known string
|
||||
monkeypatch.setattr(extractor, "_extract_images", lambda p: "")
|
||||
|
||||
documents = list(extractor.extract())
|
||||
|
||||
assert len(documents) == 1
|
||||
assert "Page text content" in documents[0].page_content
|
||||
assert "" in documents[0].page_content
|
||||
assert documents[0].metadata["page"] == 0
|
||||
|
||||
|
||||
def test_extract_images_failures(mock_dependencies):
|
||||
saves = mock_dependencies.saves
|
||||
db_stub = mock_dependencies.db
|
||||
|
||||
# Mock page and image objects
|
||||
mock_page = MagicMock()
|
||||
mock_image_obj_fail = MagicMock()
|
||||
mock_image_obj_ok = MagicMock()
|
||||
|
||||
# First image raises exception
|
||||
mock_image_obj_fail.extract.side_effect = Exception("Extraction failure")
|
||||
|
||||
# Second image is OK (JPEG)
|
||||
jpeg_bytes = b"\xff\xd8\xff some image data"
|
||||
|
||||
def mock_extract(buf, fb_format=None):
|
||||
buf.write(jpeg_bytes)
|
||||
|
||||
mock_image_obj_ok.extract.side_effect = mock_extract
|
||||
|
||||
mock_page.get_objects.return_value = [mock_image_obj_fail, mock_image_obj_ok]
|
||||
|
||||
extractor = pe.PdfExtractor(file_path="test.pdf", tenant_id="t1", user_id="u1")
|
||||
|
||||
with patch("pypdfium2.raw") as mock_raw:
|
||||
mock_raw.FPDF_PAGEOBJ_IMAGE = 1
|
||||
result = extractor._extract_images(mock_page)
|
||||
|
||||
# Should have one success
|
||||
assert "" in result
|
||||
assert len(saves) == 1
|
||||
assert saves[0][1] == jpeg_bytes
|
||||
assert db_stub.session.committed is True
|
||||
|
|
@ -421,7 +421,18 @@ class TestRetrievalService:
|
|||
# In real code, this waits for all futures to complete
|
||||
# In tests, futures complete immediately, so wait is a no-op
|
||||
with patch("core.rag.datasource.retrieval_service.concurrent.futures.wait"):
|
||||
yield mock_executor
|
||||
# Mock concurrent.futures.as_completed for early error propagation
|
||||
# In real code, this yields futures as they complete
|
||||
# In tests, we yield all futures immediately since they're already done
|
||||
def mock_as_completed(futures_list, timeout=None):
|
||||
"""Mock as_completed that yields futures immediately."""
|
||||
yield from futures_list
|
||||
|
||||
with patch(
|
||||
"core.rag.datasource.retrieval_service.concurrent.futures.as_completed",
|
||||
side_effect=mock_as_completed,
|
||||
):
|
||||
yield mock_executor
|
||||
|
||||
# ==================== Vector Search Tests ====================
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,873 @@
|
|||
"""
|
||||
Unit tests for DatasetRetrieval.process_metadata_filter_func.
|
||||
|
||||
This module provides comprehensive test coverage for the process_metadata_filter_func
|
||||
method in the DatasetRetrieval class, which is responsible for building SQLAlchemy
|
||||
filter expressions based on metadata filtering conditions.
|
||||
|
||||
Conditions Tested:
|
||||
==================
|
||||
1. **String Conditions**: contains, not contains, start with, end with
|
||||
2. **Equality Conditions**: is / =, is not / ≠
|
||||
3. **Null Conditions**: empty, not empty
|
||||
4. **Numeric Comparisons**: before / <, after / >, ≤ / <=, ≥ / >=
|
||||
5. **List Conditions**: in
|
||||
6. **Edge Cases**: None values, different data types (str, int, float)
|
||||
|
||||
Test Architecture:
|
||||
==================
|
||||
- Direct instantiation of DatasetRetrieval
|
||||
- Mocking of DatasetDocument model attributes
|
||||
- Verification of SQLAlchemy filter expressions
|
||||
- Follows Arrange-Act-Assert (AAA) pattern
|
||||
|
||||
Running Tests:
|
||||
==============
|
||||
# Run all tests in this module
|
||||
uv run --project api pytest \
|
||||
api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py -v
|
||||
|
||||
# Run a specific test
|
||||
uv run --project api pytest \
|
||||
api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py::\
|
||||
TestProcessMetadataFilterFunc::test_contains_condition -v
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from core.rag.retrieval.dataset_retrieval import DatasetRetrieval
|
||||
|
||||
|
||||
class TestProcessMetadataFilterFunc:
|
||||
"""
|
||||
Comprehensive test suite for process_metadata_filter_func method.
|
||||
|
||||
This test class validates all metadata filtering conditions supported by
|
||||
the DatasetRetrieval class, including string operations, numeric comparisons,
|
||||
null checks, and list operations.
|
||||
|
||||
Method Signature:
|
||||
==================
|
||||
def process_metadata_filter_func(
|
||||
self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list
|
||||
) -> list:
|
||||
|
||||
The method builds SQLAlchemy filter expressions by:
|
||||
1. Validating value is not None (except for empty/not empty conditions)
|
||||
2. Using DatasetDocument.doc_metadata JSON field operations
|
||||
3. Adding appropriate SQLAlchemy expressions to the filters list
|
||||
4. Returning the updated filters list
|
||||
|
||||
Mocking Strategy:
|
||||
==================
|
||||
- Mock DatasetDocument.doc_metadata to avoid database dependencies
|
||||
- Verify filter expressions are created correctly
|
||||
- Test with various data types (str, int, float, list)
|
||||
"""
|
||||
|
||||
@pytest.fixture
|
||||
def retrieval(self):
|
||||
"""
|
||||
Create a DatasetRetrieval instance for testing.
|
||||
|
||||
Returns:
|
||||
DatasetRetrieval: Instance to test process_metadata_filter_func
|
||||
"""
|
||||
return DatasetRetrieval()
|
||||
|
||||
@pytest.fixture
|
||||
def mock_doc_metadata(self):
|
||||
"""
|
||||
Mock the DatasetDocument.doc_metadata JSON field.
|
||||
|
||||
The method uses DatasetDocument.doc_metadata[metadata_name] to access
|
||||
JSON fields. We mock this to avoid database dependencies.
|
||||
|
||||
Returns:
|
||||
Mock: Mocked doc_metadata attribute
|
||||
"""
|
||||
mock_metadata_field = MagicMock()
|
||||
|
||||
# Create mock for string access
|
||||
mock_string_access = MagicMock()
|
||||
mock_string_access.like = MagicMock()
|
||||
mock_string_access.notlike = MagicMock()
|
||||
mock_string_access.__eq__ = MagicMock(return_value=MagicMock())
|
||||
mock_string_access.__ne__ = MagicMock(return_value=MagicMock())
|
||||
mock_string_access.in_ = MagicMock(return_value=MagicMock())
|
||||
|
||||
# Create mock for float access (for numeric comparisons)
|
||||
mock_float_access = MagicMock()
|
||||
mock_float_access.__eq__ = MagicMock(return_value=MagicMock())
|
||||
mock_float_access.__ne__ = MagicMock(return_value=MagicMock())
|
||||
mock_float_access.__lt__ = MagicMock(return_value=MagicMock())
|
||||
mock_float_access.__gt__ = MagicMock(return_value=MagicMock())
|
||||
mock_float_access.__le__ = MagicMock(return_value=MagicMock())
|
||||
mock_float_access.__ge__ = MagicMock(return_value=MagicMock())
|
||||
|
||||
# Create mock for null checks
|
||||
mock_null_access = MagicMock()
|
||||
mock_null_access.is_ = MagicMock(return_value=MagicMock())
|
||||
mock_null_access.isnot = MagicMock(return_value=MagicMock())
|
||||
|
||||
# Setup __getitem__ to return appropriate mock based on usage
|
||||
def getitem_side_effect(name):
|
||||
if name in ["author", "title", "category"]:
|
||||
return mock_string_access
|
||||
elif name in ["year", "price", "rating"]:
|
||||
return mock_float_access
|
||||
else:
|
||||
return mock_string_access
|
||||
|
||||
mock_metadata_field.__getitem__ = MagicMock(side_effect=getitem_side_effect)
|
||||
mock_metadata_field.as_string.return_value = mock_string_access
|
||||
mock_metadata_field.as_float.return_value = mock_float_access
|
||||
mock_metadata_field[metadata_name:str].is_ = mock_null_access.is_
|
||||
mock_metadata_field[metadata_name:str].isnot = mock_null_access.isnot
|
||||
|
||||
return mock_metadata_field
|
||||
|
||||
# ==================== String Condition Tests ====================
|
||||
|
||||
def test_contains_condition_string_value(self, retrieval):
|
||||
"""
|
||||
Test 'contains' condition with string value.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with LIKE expression
|
||||
- Pattern matching uses %value% syntax
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "contains"
|
||||
metadata_name = "author"
|
||||
value = "John"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_not_contains_condition(self, retrieval):
|
||||
"""
|
||||
Test 'not contains' condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with NOT LIKE expression
|
||||
- Pattern matching uses %value% syntax with negation
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "not contains"
|
||||
metadata_name = "title"
|
||||
value = "banned"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_start_with_condition(self, retrieval):
|
||||
"""
|
||||
Test 'start with' condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with LIKE expression
|
||||
- Pattern matching uses value% syntax
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "start with"
|
||||
metadata_name = "category"
|
||||
value = "tech"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_end_with_condition(self, retrieval):
|
||||
"""
|
||||
Test 'end with' condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with LIKE expression
|
||||
- Pattern matching uses %value syntax
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "end with"
|
||||
metadata_name = "filename"
|
||||
value = ".pdf"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
# ==================== Equality Condition Tests ====================
|
||||
|
||||
def test_is_condition_with_string_value(self, retrieval):
|
||||
"""
|
||||
Test 'is' (=) condition with string value.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with equality expression
|
||||
- String comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is"
|
||||
metadata_name = "author"
|
||||
value = "Jane Doe"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_equals_condition_with_string_value(self, retrieval):
|
||||
"""
|
||||
Test '=' condition with string value.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as 'is' condition
|
||||
- String comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "="
|
||||
metadata_name = "category"
|
||||
value = "technology"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_is_condition_with_int_value(self, retrieval):
|
||||
"""
|
||||
Test 'is' condition with integer value.
|
||||
|
||||
Verifies:
|
||||
- Numeric comparison is used
|
||||
- as_float() is called on the metadata field
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is"
|
||||
metadata_name = "year"
|
||||
value = 2023
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_is_condition_with_float_value(self, retrieval):
|
||||
"""
|
||||
Test 'is' condition with float value.
|
||||
|
||||
Verifies:
|
||||
- Numeric comparison is used
|
||||
- as_float() is called on the metadata field
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is"
|
||||
metadata_name = "price"
|
||||
value = 19.99
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_is_not_condition_with_string_value(self, retrieval):
|
||||
"""
|
||||
Test 'is not' (≠) condition with string value.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with inequality expression
|
||||
- String comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is not"
|
||||
metadata_name = "author"
|
||||
value = "Unknown"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_not_equals_condition(self, retrieval):
|
||||
"""
|
||||
Test '≠' condition with string value.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as 'is not' condition
|
||||
- Inequality expression is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "≠"
|
||||
metadata_name = "category"
|
||||
value = "archived"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_is_not_condition_with_numeric_value(self, retrieval):
|
||||
"""
|
||||
Test 'is not' condition with numeric value.
|
||||
|
||||
Verifies:
|
||||
- Numeric inequality comparison is used
|
||||
- as_float() is called on the metadata field
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is not"
|
||||
metadata_name = "year"
|
||||
value = 2000
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
# ==================== Null Condition Tests ====================
|
||||
|
||||
def test_empty_condition(self, retrieval):
|
||||
"""
|
||||
Test 'empty' condition (null check).
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with IS NULL expression
|
||||
- Value can be None for this condition
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "empty"
|
||||
metadata_name = "author"
|
||||
value = None
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_not_empty_condition(self, retrieval):
|
||||
"""
|
||||
Test 'not empty' condition (not null check).
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with IS NOT NULL expression
|
||||
- Value can be None for this condition
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "not empty"
|
||||
metadata_name = "description"
|
||||
value = None
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
# ==================== Numeric Comparison Tests ====================
|
||||
|
||||
def test_before_condition(self, retrieval):
|
||||
"""
|
||||
Test 'before' (<) condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with less than expression
|
||||
- Numeric comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "before"
|
||||
metadata_name = "year"
|
||||
value = 2020
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_less_than_condition(self, retrieval):
|
||||
"""
|
||||
Test '<' condition.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as 'before' condition
|
||||
- Less than expression is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "<"
|
||||
metadata_name = "price"
|
||||
value = 100.0
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_after_condition(self, retrieval):
|
||||
"""
|
||||
Test 'after' (>) condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with greater than expression
|
||||
- Numeric comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "after"
|
||||
metadata_name = "year"
|
||||
value = 2020
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_greater_than_condition(self, retrieval):
|
||||
"""
|
||||
Test '>' condition.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as 'after' condition
|
||||
- Greater than expression is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = ">"
|
||||
metadata_name = "rating"
|
||||
value = 4.5
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_less_than_or_equal_condition_unicode(self, retrieval):
|
||||
"""
|
||||
Test '≤' condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with less than or equal expression
|
||||
- Numeric comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "≤"
|
||||
metadata_name = "price"
|
||||
value = 50.0
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_less_than_or_equal_condition_ascii(self, retrieval):
|
||||
"""
|
||||
Test '<=' condition.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as '≤' condition
|
||||
- Less than or equal expression is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "<="
|
||||
metadata_name = "year"
|
||||
value = 2023
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_greater_than_or_equal_condition_unicode(self, retrieval):
|
||||
"""
|
||||
Test '≥' condition.
|
||||
|
||||
Verifies:
|
||||
- Filters list is populated with greater than or equal expression
|
||||
- Numeric comparison is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "≥"
|
||||
metadata_name = "rating"
|
||||
value = 3.5
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_greater_than_or_equal_condition_ascii(self, retrieval):
|
||||
"""
|
||||
Test '>=' condition.
|
||||
|
||||
Verifies:
|
||||
- Same behavior as '≥' condition
|
||||
- Greater than or equal expression is used
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = ">="
|
||||
metadata_name = "year"
|
||||
value = 2000
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
# ==================== List/In Condition Tests ====================
|
||||
|
||||
def test_in_condition_with_comma_separated_string(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with comma-separated string value.
|
||||
|
||||
Verifies:
|
||||
- String is split into list
|
||||
- Whitespace is trimmed from each value
|
||||
- IN expression is created
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "category"
|
||||
value = "tech, science, AI "
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_in_condition_with_list_value(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with list value.
|
||||
|
||||
Verifies:
|
||||
- List is processed correctly
|
||||
- None values are filtered out
|
||||
- IN expression is created with valid values
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "tags"
|
||||
value = ["python", "javascript", None, "golang"]
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_in_condition_with_tuple_value(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with tuple value.
|
||||
|
||||
Verifies:
|
||||
- Tuple is processed like a list
|
||||
- IN expression is created
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "category"
|
||||
value = ("tech", "science", "ai")
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_in_condition_with_empty_string(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with empty string value.
|
||||
|
||||
Verifies:
|
||||
- Empty string results in literal(False) filter
|
||||
- No valid values to match
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "category"
|
||||
value = ""
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
# Verify it's a literal(False) expression
|
||||
# This is a bit tricky to test without access to the actual expression
|
||||
|
||||
def test_in_condition_with_only_whitespace(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with whitespace-only string value.
|
||||
|
||||
Verifies:
|
||||
- Whitespace-only string results in literal(False) filter
|
||||
- All values are stripped and filtered out
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "category"
|
||||
value = " , , "
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_in_condition_with_single_string(self, retrieval):
|
||||
"""
|
||||
Test 'in' condition with single non-comma string.
|
||||
|
||||
Verifies:
|
||||
- Single string is treated as single-item list
|
||||
- IN expression is created with one value
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "in"
|
||||
metadata_name = "category"
|
||||
value = "technology"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
# ==================== Edge Case Tests ====================
|
||||
|
||||
def test_none_value_with_non_empty_condition(self, retrieval):
|
||||
"""
|
||||
Test None value with conditions that require value.
|
||||
|
||||
Verifies:
|
||||
- Original filters list is returned unchanged
|
||||
- No filter is added for None values (except empty/not empty)
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "contains"
|
||||
metadata_name = "author"
|
||||
value = None
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 0 # No filter added
|
||||
|
||||
def test_none_value_with_equals_condition(self, retrieval):
|
||||
"""
|
||||
Test None value with 'is' (=) condition.
|
||||
|
||||
Verifies:
|
||||
- Original filters list is returned unchanged
|
||||
- No filter is added for None values
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "is"
|
||||
metadata_name = "author"
|
||||
value = None
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 0
|
||||
|
||||
def test_none_value_with_numeric_condition(self, retrieval):
|
||||
"""
|
||||
Test None value with numeric comparison condition.
|
||||
|
||||
Verifies:
|
||||
- Original filters list is returned unchanged
|
||||
- No filter is added for None values
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = ">"
|
||||
metadata_name = "year"
|
||||
value = None
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 0
|
||||
|
||||
def test_existing_filters_preserved(self, retrieval):
|
||||
"""
|
||||
Test that existing filters are preserved.
|
||||
|
||||
Verifies:
|
||||
- Existing filters in the list are not removed
|
||||
- New filters are appended to the list
|
||||
"""
|
||||
existing_filter = MagicMock()
|
||||
filters = [existing_filter]
|
||||
sequence = 0
|
||||
condition = "contains"
|
||||
metadata_name = "author"
|
||||
value = "test"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 2
|
||||
assert filters[0] == existing_filter
|
||||
|
||||
def test_multiple_filters_accumulated(self, retrieval):
|
||||
"""
|
||||
Test multiple calls to accumulate filters.
|
||||
|
||||
Verifies:
|
||||
- Each call adds a new filter to the list
|
||||
- All filters are preserved across calls
|
||||
"""
|
||||
filters = []
|
||||
|
||||
# First filter
|
||||
retrieval.process_metadata_filter_func(0, "contains", "author", "John", filters)
|
||||
assert len(filters) == 1
|
||||
|
||||
# Second filter
|
||||
retrieval.process_metadata_filter_func(1, ">", "year", 2020, filters)
|
||||
assert len(filters) == 2
|
||||
|
||||
# Third filter
|
||||
retrieval.process_metadata_filter_func(2, "is", "category", "tech", filters)
|
||||
assert len(filters) == 3
|
||||
|
||||
def test_unknown_condition(self, retrieval):
|
||||
"""
|
||||
Test unknown/unsupported condition.
|
||||
|
||||
Verifies:
|
||||
- Original filters list is returned unchanged
|
||||
- No filter is added for unknown conditions
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "unknown_condition"
|
||||
metadata_name = "author"
|
||||
value = "test"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 0
|
||||
|
||||
def test_empty_string_value_with_contains(self, retrieval):
|
||||
"""
|
||||
Test empty string value with 'contains' condition.
|
||||
|
||||
Verifies:
|
||||
- Filter is added even with empty string
|
||||
- LIKE expression is created
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "contains"
|
||||
metadata_name = "author"
|
||||
value = ""
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_special_characters_in_value(self, retrieval):
|
||||
"""
|
||||
Test special characters in value string.
|
||||
|
||||
Verifies:
|
||||
- Special characters are handled in value
|
||||
- LIKE expression is created correctly
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "contains"
|
||||
metadata_name = "title"
|
||||
value = "C++ & Python's features"
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_zero_value_with_numeric_condition(self, retrieval):
|
||||
"""
|
||||
Test zero value with numeric comparison condition.
|
||||
|
||||
Verifies:
|
||||
- Zero is treated as valid value
|
||||
- Numeric comparison is performed
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = ">"
|
||||
metadata_name = "price"
|
||||
value = 0
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_negative_value_with_numeric_condition(self, retrieval):
|
||||
"""
|
||||
Test negative value with numeric comparison condition.
|
||||
|
||||
Verifies:
|
||||
- Negative numbers are handled correctly
|
||||
- Numeric comparison is performed
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = "<"
|
||||
metadata_name = "temperature"
|
||||
value = -10.5
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
||||
def test_float_value_with_integer_comparison(self, retrieval):
|
||||
"""
|
||||
Test float value with numeric comparison condition.
|
||||
|
||||
Verifies:
|
||||
- Float values work correctly
|
||||
- Numeric comparison is performed
|
||||
"""
|
||||
filters = []
|
||||
sequence = 0
|
||||
condition = ">="
|
||||
metadata_name = "rating"
|
||||
value = 4.5
|
||||
|
||||
result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters)
|
||||
|
||||
assert result == filters
|
||||
assert len(filters) == 1
|
||||
|
|
@ -0,0 +1 @@
|
|||
"""Tests for graph traversal components."""
|
||||
|
|
@ -0,0 +1,307 @@
|
|||
"""Unit tests for skip propagator."""
|
||||
|
||||
from unittest.mock import MagicMock, create_autospec
|
||||
|
||||
from core.workflow.graph import Edge, Graph
|
||||
from core.workflow.graph_engine.graph_state_manager import GraphStateManager
|
||||
from core.workflow.graph_engine.graph_traversal.skip_propagator import SkipPropagator
|
||||
|
||||
|
||||
class TestSkipPropagator:
|
||||
"""Test suite for SkipPropagator."""
|
||||
|
||||
def test_propagate_skip_from_edge_with_unknown_edges_stops_processing(self) -> None:
|
||||
"""When there are unknown incoming edges, propagation should stop."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create a mock edge
|
||||
mock_edge = MagicMock(spec=Edge)
|
||||
mock_edge.id = "edge_1"
|
||||
mock_edge.head = "node_2"
|
||||
|
||||
# Setup graph edges dict
|
||||
mock_graph.edges = {"edge_1": mock_edge}
|
||||
|
||||
# Setup incoming edges
|
||||
incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge)]
|
||||
mock_graph.get_incoming_edges.return_value = incoming_edges
|
||||
|
||||
# Setup state manager to return has_unknown=True
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": True,
|
||||
"has_taken": False,
|
||||
"all_skipped": False,
|
||||
}
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert
|
||||
mock_graph.get_incoming_edges.assert_called_once_with("node_2")
|
||||
mock_state_manager.analyze_edge_states.assert_called_once_with(incoming_edges)
|
||||
# Should not call any other state manager methods
|
||||
mock_state_manager.enqueue_node.assert_not_called()
|
||||
mock_state_manager.start_execution.assert_not_called()
|
||||
mock_state_manager.mark_node_skipped.assert_not_called()
|
||||
|
||||
def test_propagate_skip_from_edge_with_taken_edge_enqueues_node(self) -> None:
|
||||
"""When there is at least one taken edge, node should be enqueued."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create a mock edge
|
||||
mock_edge = MagicMock(spec=Edge)
|
||||
mock_edge.id = "edge_1"
|
||||
mock_edge.head = "node_2"
|
||||
|
||||
mock_graph.edges = {"edge_1": mock_edge}
|
||||
incoming_edges = [MagicMock(spec=Edge)]
|
||||
mock_graph.get_incoming_edges.return_value = incoming_edges
|
||||
|
||||
# Setup state manager to return has_taken=True
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": False,
|
||||
"has_taken": True,
|
||||
"all_skipped": False,
|
||||
}
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert
|
||||
mock_state_manager.enqueue_node.assert_called_once_with("node_2")
|
||||
mock_state_manager.start_execution.assert_called_once_with("node_2")
|
||||
mock_state_manager.mark_node_skipped.assert_not_called()
|
||||
|
||||
def test_propagate_skip_from_edge_with_all_skipped_propagates_to_node(self) -> None:
|
||||
"""When all incoming edges are skipped, should propagate skip to node."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create a mock edge
|
||||
mock_edge = MagicMock(spec=Edge)
|
||||
mock_edge.id = "edge_1"
|
||||
mock_edge.head = "node_2"
|
||||
|
||||
mock_graph.edges = {"edge_1": mock_edge}
|
||||
incoming_edges = [MagicMock(spec=Edge)]
|
||||
mock_graph.get_incoming_edges.return_value = incoming_edges
|
||||
|
||||
# Setup state manager to return all_skipped=True
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": False,
|
||||
"has_taken": False,
|
||||
"all_skipped": True,
|
||||
}
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert
|
||||
mock_state_manager.mark_node_skipped.assert_called_once_with("node_2")
|
||||
mock_state_manager.enqueue_node.assert_not_called()
|
||||
mock_state_manager.start_execution.assert_not_called()
|
||||
|
||||
def test_propagate_skip_to_node_marks_node_and_outgoing_edges_skipped(self) -> None:
|
||||
"""_propagate_skip_to_node should mark node and all outgoing edges as skipped."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create outgoing edges
|
||||
edge1 = MagicMock(spec=Edge)
|
||||
edge1.id = "edge_2"
|
||||
edge1.head = "node_downstream_1" # Set head for propagate_skip_from_edge
|
||||
|
||||
edge2 = MagicMock(spec=Edge)
|
||||
edge2.id = "edge_3"
|
||||
edge2.head = "node_downstream_2"
|
||||
|
||||
# Setup graph edges dict for propagate_skip_from_edge
|
||||
mock_graph.edges = {"edge_2": edge1, "edge_3": edge2}
|
||||
mock_graph.get_outgoing_edges.return_value = [edge1, edge2]
|
||||
|
||||
# Setup get_incoming_edges to return empty list to stop recursion
|
||||
mock_graph.get_incoming_edges.return_value = []
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Use mock to call private method
|
||||
# Act
|
||||
propagator._propagate_skip_to_node("node_1")
|
||||
|
||||
# Assert
|
||||
mock_state_manager.mark_node_skipped.assert_called_once_with("node_1")
|
||||
mock_state_manager.mark_edge_skipped.assert_any_call("edge_2")
|
||||
mock_state_manager.mark_edge_skipped.assert_any_call("edge_3")
|
||||
assert mock_state_manager.mark_edge_skipped.call_count == 2
|
||||
# Should recursively propagate from each edge
|
||||
# Since propagate_skip_from_edge is called, we need to verify it was called
|
||||
# But we can't directly verify due to recursion. We'll trust the logic.
|
||||
|
||||
def test_skip_branch_paths_marks_unselected_edges_and_propagates(self) -> None:
|
||||
"""skip_branch_paths should mark all unselected edges as skipped and propagate."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create unselected edges
|
||||
edge1 = MagicMock(spec=Edge)
|
||||
edge1.id = "edge_1"
|
||||
edge1.head = "node_downstream_1"
|
||||
|
||||
edge2 = MagicMock(spec=Edge)
|
||||
edge2.id = "edge_2"
|
||||
edge2.head = "node_downstream_2"
|
||||
|
||||
unselected_edges = [edge1, edge2]
|
||||
|
||||
# Setup graph edges dict
|
||||
mock_graph.edges = {"edge_1": edge1, "edge_2": edge2}
|
||||
# Setup get_incoming_edges to return empty list to stop recursion
|
||||
mock_graph.get_incoming_edges.return_value = []
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.skip_branch_paths(unselected_edges)
|
||||
|
||||
# Assert
|
||||
mock_state_manager.mark_edge_skipped.assert_any_call("edge_1")
|
||||
mock_state_manager.mark_edge_skipped.assert_any_call("edge_2")
|
||||
assert mock_state_manager.mark_edge_skipped.call_count == 2
|
||||
# propagate_skip_from_edge should be called for each edge
|
||||
# We can't directly verify due to the mock, but the logic is covered
|
||||
|
||||
def test_propagate_skip_from_edge_recursively_propagates_through_graph(self) -> None:
|
||||
"""Skip propagation should recursively propagate through the graph."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
# Create edge chain: edge_1 -> node_2 -> edge_3 -> node_4
|
||||
edge1 = MagicMock(spec=Edge)
|
||||
edge1.id = "edge_1"
|
||||
edge1.head = "node_2"
|
||||
|
||||
edge3 = MagicMock(spec=Edge)
|
||||
edge3.id = "edge_3"
|
||||
edge3.head = "node_4"
|
||||
|
||||
mock_graph.edges = {"edge_1": edge1, "edge_3": edge3}
|
||||
|
||||
# Setup get_incoming_edges to return different values based on node
|
||||
def get_incoming_edges_side_effect(node_id):
|
||||
if node_id == "node_2":
|
||||
return [edge1]
|
||||
elif node_id == "node_4":
|
||||
return [edge3]
|
||||
return []
|
||||
|
||||
mock_graph.get_incoming_edges.side_effect = get_incoming_edges_side_effect
|
||||
|
||||
# Setup get_outgoing_edges to return different values based on node
|
||||
def get_outgoing_edges_side_effect(node_id):
|
||||
if node_id == "node_2":
|
||||
return [edge3]
|
||||
elif node_id == "node_4":
|
||||
return [] # No outgoing edges, stops recursion
|
||||
return []
|
||||
|
||||
mock_graph.get_outgoing_edges.side_effect = get_outgoing_edges_side_effect
|
||||
|
||||
# Setup state manager to return all_skipped for both nodes
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": False,
|
||||
"has_taken": False,
|
||||
"all_skipped": True,
|
||||
}
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert
|
||||
# Should mark node_2 as skipped
|
||||
mock_state_manager.mark_node_skipped.assert_any_call("node_2")
|
||||
# Should mark edge_3 as skipped
|
||||
mock_state_manager.mark_edge_skipped.assert_any_call("edge_3")
|
||||
# Should propagate to node_4
|
||||
mock_state_manager.mark_node_skipped.assert_any_call("node_4")
|
||||
assert mock_state_manager.mark_node_skipped.call_count == 2
|
||||
|
||||
def test_propagate_skip_from_edge_with_mixed_edge_states_handles_correctly(self) -> None:
|
||||
"""Test with mixed edge states (some unknown, some taken, some skipped)."""
|
||||
# Arrange
|
||||
mock_graph = create_autospec(Graph)
|
||||
mock_state_manager = create_autospec(GraphStateManager)
|
||||
|
||||
mock_edge = MagicMock(spec=Edge)
|
||||
mock_edge.id = "edge_1"
|
||||
mock_edge.head = "node_2"
|
||||
|
||||
mock_graph.edges = {"edge_1": mock_edge}
|
||||
incoming_edges = [MagicMock(spec=Edge), MagicMock(spec=Edge), MagicMock(spec=Edge)]
|
||||
mock_graph.get_incoming_edges.return_value = incoming_edges
|
||||
|
||||
# Test 1: has_unknown=True, has_taken=False, all_skipped=False
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": True,
|
||||
"has_taken": False,
|
||||
"all_skipped": False,
|
||||
}
|
||||
|
||||
propagator = SkipPropagator(mock_graph, mock_state_manager)
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert - should stop processing
|
||||
mock_state_manager.enqueue_node.assert_not_called()
|
||||
mock_state_manager.mark_node_skipped.assert_not_called()
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_state_manager.reset_mock()
|
||||
mock_graph.reset_mock()
|
||||
|
||||
# Test 2: has_unknown=False, has_taken=True, all_skipped=False
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": False,
|
||||
"has_taken": True,
|
||||
"all_skipped": False,
|
||||
}
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert - should enqueue node
|
||||
mock_state_manager.enqueue_node.assert_called_once_with("node_2")
|
||||
mock_state_manager.start_execution.assert_called_once_with("node_2")
|
||||
|
||||
# Reset mocks for next test
|
||||
mock_state_manager.reset_mock()
|
||||
mock_graph.reset_mock()
|
||||
|
||||
# Test 3: has_unknown=False, has_taken=False, all_skipped=True
|
||||
mock_state_manager.analyze_edge_states.return_value = {
|
||||
"has_unknown": False,
|
||||
"has_taken": False,
|
||||
"all_skipped": True,
|
||||
}
|
||||
|
||||
# Act
|
||||
propagator.propagate_skip_from_edge("edge_1")
|
||||
|
||||
# Assert - should propagate skip
|
||||
mock_state_manager.mark_node_skipped.assert_called_once_with("node_2")
|
||||
|
|
@ -8,11 +8,12 @@ class TestCelerySSLConfiguration:
|
|||
"""Test suite for Celery SSL configuration."""
|
||||
|
||||
def test_get_celery_ssl_options_when_ssl_disabled(self):
|
||||
"""Test SSL options when REDIS_USE_SSL is False."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = False
|
||||
"""Test SSL options when BROKER_USE_SSL is False."""
|
||||
from configs import DifyConfig
|
||||
|
||||
with patch("extensions.ext_celery.dify_config", mock_config):
|
||||
dify_config = DifyConfig(CELERY_BROKER_URL="redis://localhost:6379/0")
|
||||
|
||||
with patch("extensions.ext_celery.dify_config", dify_config):
|
||||
from extensions.ext_celery import _get_celery_ssl_options
|
||||
|
||||
result = _get_celery_ssl_options()
|
||||
|
|
@ -21,7 +22,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_get_celery_ssl_options_when_broker_not_redis(self):
|
||||
"""Test SSL options when broker is not Redis."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "amqp://localhost:5672"
|
||||
|
||||
with patch("extensions.ext_celery.dify_config", mock_config):
|
||||
|
|
@ -33,7 +33,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_get_celery_ssl_options_with_cert_none(self):
|
||||
"""Test SSL options with CERT_NONE requirement."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0"
|
||||
mock_config.REDIS_SSL_CERT_REQS = "CERT_NONE"
|
||||
mock_config.REDIS_SSL_CA_CERTS = None
|
||||
|
|
@ -53,7 +52,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_get_celery_ssl_options_with_cert_required(self):
|
||||
"""Test SSL options with CERT_REQUIRED and certificates."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "rediss://localhost:6380/0"
|
||||
mock_config.REDIS_SSL_CERT_REQS = "CERT_REQUIRED"
|
||||
mock_config.REDIS_SSL_CA_CERTS = "/path/to/ca.crt"
|
||||
|
|
@ -73,7 +71,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_get_celery_ssl_options_with_cert_optional(self):
|
||||
"""Test SSL options with CERT_OPTIONAL requirement."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0"
|
||||
mock_config.REDIS_SSL_CERT_REQS = "CERT_OPTIONAL"
|
||||
mock_config.REDIS_SSL_CA_CERTS = "/path/to/ca.crt"
|
||||
|
|
@ -91,7 +88,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_get_celery_ssl_options_with_invalid_cert_reqs(self):
|
||||
"""Test SSL options with invalid cert requirement defaults to CERT_NONE."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0"
|
||||
mock_config.REDIS_SSL_CERT_REQS = "INVALID_VALUE"
|
||||
mock_config.REDIS_SSL_CA_CERTS = None
|
||||
|
|
@ -108,7 +104,6 @@ class TestCelerySSLConfiguration:
|
|||
def test_celery_init_applies_ssl_to_broker_and_backend(self):
|
||||
"""Test that SSL options are applied to both broker and backend when using Redis."""
|
||||
mock_config = MagicMock()
|
||||
mock_config.REDIS_USE_SSL = True
|
||||
mock_config.CELERY_BROKER_URL = "redis://localhost:6379/0"
|
||||
mock_config.CELERY_BACKEND = "redis"
|
||||
mock_config.CELERY_RESULT_BACKEND = "redis://localhost:6379/0"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,272 @@
|
|||
import base64
|
||||
import hashlib
|
||||
from datetime import datetime
|
||||
from unittest.mock import ANY, MagicMock
|
||||
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from libs import archive_storage as storage_module
|
||||
from libs.archive_storage import (
|
||||
ArchiveStorage,
|
||||
ArchiveStorageError,
|
||||
ArchiveStorageNotConfiguredError,
|
||||
)
|
||||
|
||||
BUCKET_NAME = "archive-bucket"
|
||||
|
||||
|
||||
def _configure_storage(monkeypatch, **overrides):
|
||||
defaults = {
|
||||
"ARCHIVE_STORAGE_ENABLED": True,
|
||||
"ARCHIVE_STORAGE_ENDPOINT": "https://storage.example.com",
|
||||
"ARCHIVE_STORAGE_ARCHIVE_BUCKET": BUCKET_NAME,
|
||||
"ARCHIVE_STORAGE_ACCESS_KEY": "access",
|
||||
"ARCHIVE_STORAGE_SECRET_KEY": "secret",
|
||||
"ARCHIVE_STORAGE_REGION": "auto",
|
||||
}
|
||||
defaults.update(overrides)
|
||||
for key, value in defaults.items():
|
||||
monkeypatch.setattr(storage_module.dify_config, key, value, raising=False)
|
||||
|
||||
|
||||
def _client_error(code: str) -> ClientError:
|
||||
return ClientError({"Error": {"Code": code}}, "Operation")
|
||||
|
||||
|
||||
def _mock_client(monkeypatch):
|
||||
client = MagicMock()
|
||||
client.head_bucket.return_value = None
|
||||
boto_client = MagicMock(return_value=client)
|
||||
monkeypatch.setattr(storage_module.boto3, "client", boto_client)
|
||||
return client, boto_client
|
||||
|
||||
|
||||
def test_init_disabled(monkeypatch):
|
||||
_configure_storage(monkeypatch, ARCHIVE_STORAGE_ENABLED=False)
|
||||
with pytest.raises(ArchiveStorageNotConfiguredError, match="not enabled"):
|
||||
ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
|
||||
def test_init_missing_config(monkeypatch):
|
||||
_configure_storage(monkeypatch, ARCHIVE_STORAGE_ENDPOINT=None)
|
||||
with pytest.raises(ArchiveStorageNotConfiguredError, match="incomplete"):
|
||||
ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
|
||||
def test_init_bucket_not_found(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.head_bucket.side_effect = _client_error("404")
|
||||
|
||||
with pytest.raises(ArchiveStorageNotConfiguredError, match="does not exist"):
|
||||
ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
|
||||
def test_init_bucket_access_denied(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.head_bucket.side_effect = _client_error("403")
|
||||
|
||||
with pytest.raises(ArchiveStorageNotConfiguredError, match="Access denied"):
|
||||
ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
|
||||
def test_init_bucket_other_error(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.head_bucket.side_effect = _client_error("500")
|
||||
|
||||
with pytest.raises(ArchiveStorageError, match="Failed to access archive bucket"):
|
||||
ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
|
||||
def test_init_sets_client(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, boto_client = _mock_client(monkeypatch)
|
||||
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
boto_client.assert_called_once_with(
|
||||
"s3",
|
||||
endpoint_url="https://storage.example.com",
|
||||
aws_access_key_id="access",
|
||||
aws_secret_access_key="secret",
|
||||
region_name="auto",
|
||||
config=ANY,
|
||||
)
|
||||
assert storage.client is client
|
||||
assert storage.bucket == BUCKET_NAME
|
||||
|
||||
|
||||
def test_put_object_returns_checksum(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
data = b"hello"
|
||||
checksum = storage.put_object("key", data)
|
||||
|
||||
expected_md5 = hashlib.md5(data).hexdigest()
|
||||
expected_content_md5 = base64.b64encode(hashlib.md5(data).digest()).decode()
|
||||
client.put_object.assert_called_once_with(
|
||||
Bucket="archive-bucket",
|
||||
Key="key",
|
||||
Body=data,
|
||||
ContentMD5=expected_content_md5,
|
||||
)
|
||||
assert checksum == expected_md5
|
||||
|
||||
|
||||
def test_put_object_raises_on_error(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
client.put_object.side_effect = _client_error("500")
|
||||
|
||||
with pytest.raises(ArchiveStorageError, match="Failed to upload object"):
|
||||
storage.put_object("key", b"data")
|
||||
|
||||
|
||||
def test_get_object_returns_bytes(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
body = MagicMock()
|
||||
body.read.return_value = b"payload"
|
||||
client.get_object.return_value = {"Body": body}
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
assert storage.get_object("key") == b"payload"
|
||||
|
||||
|
||||
def test_get_object_missing(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.get_object.side_effect = _client_error("NoSuchKey")
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="Archive object not found"):
|
||||
storage.get_object("missing")
|
||||
|
||||
|
||||
def test_get_object_stream(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
body = MagicMock()
|
||||
body.iter_chunks.return_value = [b"a", b"b"]
|
||||
client.get_object.return_value = {"Body": body}
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
assert list(storage.get_object_stream("key")) == [b"a", b"b"]
|
||||
|
||||
|
||||
def test_get_object_stream_missing(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.get_object.side_effect = _client_error("NoSuchKey")
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="Archive object not found"):
|
||||
list(storage.get_object_stream("missing"))
|
||||
|
||||
|
||||
def test_object_exists(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
assert storage.object_exists("key") is True
|
||||
client.head_object.side_effect = _client_error("404")
|
||||
assert storage.object_exists("missing") is False
|
||||
|
||||
|
||||
def test_delete_object_error(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.delete_object.side_effect = _client_error("500")
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
with pytest.raises(ArchiveStorageError, match="Failed to delete object"):
|
||||
storage.delete_object("key")
|
||||
|
||||
|
||||
def test_list_objects(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
paginator = MagicMock()
|
||||
paginator.paginate.return_value = [
|
||||
{"Contents": [{"Key": "a"}, {"Key": "b"}]},
|
||||
{"Contents": [{"Key": "c"}]},
|
||||
]
|
||||
client.get_paginator.return_value = paginator
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
assert storage.list_objects("prefix") == ["a", "b", "c"]
|
||||
paginator.paginate.assert_called_once_with(Bucket="archive-bucket", Prefix="prefix")
|
||||
|
||||
|
||||
def test_list_objects_error(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
paginator = MagicMock()
|
||||
paginator.paginate.side_effect = _client_error("500")
|
||||
client.get_paginator.return_value = paginator
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
with pytest.raises(ArchiveStorageError, match="Failed to list objects"):
|
||||
storage.list_objects("prefix")
|
||||
|
||||
|
||||
def test_generate_presigned_url(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.generate_presigned_url.return_value = "http://signed-url"
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
url = storage.generate_presigned_url("key", expires_in=123)
|
||||
|
||||
client.generate_presigned_url.assert_called_once_with(
|
||||
ClientMethod="get_object",
|
||||
Params={"Bucket": "archive-bucket", "Key": "key"},
|
||||
ExpiresIn=123,
|
||||
)
|
||||
assert url == "http://signed-url"
|
||||
|
||||
|
||||
def test_generate_presigned_url_error(monkeypatch):
|
||||
_configure_storage(monkeypatch)
|
||||
client, _ = _mock_client(monkeypatch)
|
||||
client.generate_presigned_url.side_effect = _client_error("500")
|
||||
storage = ArchiveStorage(bucket=BUCKET_NAME)
|
||||
|
||||
with pytest.raises(ArchiveStorageError, match="Failed to generate pre-signed URL"):
|
||||
storage.generate_presigned_url("key")
|
||||
|
||||
|
||||
def test_serialization_roundtrip():
|
||||
records = [
|
||||
{
|
||||
"id": "1",
|
||||
"created_at": datetime(2024, 1, 1, 12, 0, 0),
|
||||
"payload": {"nested": "value"},
|
||||
"items": [{"name": "a"}],
|
||||
},
|
||||
{"id": "2", "value": 123},
|
||||
]
|
||||
|
||||
data = ArchiveStorage.serialize_to_jsonl_gz(records)
|
||||
decoded = ArchiveStorage.deserialize_from_jsonl_gz(data)
|
||||
|
||||
assert decoded[0]["id"] == "1"
|
||||
assert decoded[0]["payload"]["nested"] == "value"
|
||||
assert decoded[0]["items"][0]["name"] == "a"
|
||||
assert "2024-01-01T12:00:00" in decoded[0]["created_at"]
|
||||
assert decoded[1]["value"] == 123
|
||||
|
||||
|
||||
def test_content_md5_matches_checksum():
|
||||
data = b"checksum"
|
||||
expected = base64.b64encode(hashlib.md5(data).digest()).decode()
|
||||
|
||||
assert ArchiveStorage._content_md5(data) == expected
|
||||
assert ArchiveStorage.compute_checksum(data) == hashlib.md5(data).hexdigest()
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from unittest.mock import patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from qcloud_cos import CosConfig
|
||||
|
|
@ -18,3 +18,72 @@ class TestTencentCos(BaseStorageTest):
|
|||
with patch.object(CosConfig, "__init__", return_value=None):
|
||||
self.storage = TencentCosStorage()
|
||||
self.storage.bucket_name = get_example_bucket()
|
||||
|
||||
|
||||
class TestTencentCosConfiguration:
|
||||
"""Tests for TencentCosStorage initialization with different configurations."""
|
||||
|
||||
def test_init_with_custom_domain(self):
|
||||
"""Test initialization with custom domain configured."""
|
||||
# Mock dify_config to return custom domain configuration
|
||||
mock_dify_config = MagicMock()
|
||||
mock_dify_config.TENCENT_COS_CUSTOM_DOMAIN = "cos.example.com"
|
||||
mock_dify_config.TENCENT_COS_SECRET_ID = "test-secret-id"
|
||||
mock_dify_config.TENCENT_COS_SECRET_KEY = "test-secret-key"
|
||||
mock_dify_config.TENCENT_COS_SCHEME = "https"
|
||||
|
||||
# Mock CosConfig and CosS3Client
|
||||
mock_config_instance = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
|
||||
with (
|
||||
patch("extensions.storage.tencent_cos_storage.dify_config", mock_dify_config),
|
||||
patch(
|
||||
"extensions.storage.tencent_cos_storage.CosConfig", return_value=mock_config_instance
|
||||
) as mock_cos_config,
|
||||
patch("extensions.storage.tencent_cos_storage.CosS3Client", return_value=mock_client),
|
||||
):
|
||||
TencentCosStorage()
|
||||
|
||||
# Verify CosConfig was called with Domain parameter (not Region)
|
||||
mock_cos_config.assert_called_once()
|
||||
call_kwargs = mock_cos_config.call_args[1]
|
||||
assert "Domain" in call_kwargs
|
||||
assert call_kwargs["Domain"] == "cos.example.com"
|
||||
assert "Region" not in call_kwargs
|
||||
assert call_kwargs["SecretId"] == "test-secret-id"
|
||||
assert call_kwargs["SecretKey"] == "test-secret-key"
|
||||
assert call_kwargs["Scheme"] == "https"
|
||||
|
||||
def test_init_with_region(self):
|
||||
"""Test initialization with region configured (no custom domain)."""
|
||||
# Mock dify_config to return region configuration
|
||||
mock_dify_config = MagicMock()
|
||||
mock_dify_config.TENCENT_COS_CUSTOM_DOMAIN = None
|
||||
mock_dify_config.TENCENT_COS_REGION = "ap-guangzhou"
|
||||
mock_dify_config.TENCENT_COS_SECRET_ID = "test-secret-id"
|
||||
mock_dify_config.TENCENT_COS_SECRET_KEY = "test-secret-key"
|
||||
mock_dify_config.TENCENT_COS_SCHEME = "https"
|
||||
|
||||
# Mock CosConfig and CosS3Client
|
||||
mock_config_instance = MagicMock()
|
||||
mock_client = MagicMock()
|
||||
|
||||
with (
|
||||
patch("extensions.storage.tencent_cos_storage.dify_config", mock_dify_config),
|
||||
patch(
|
||||
"extensions.storage.tencent_cos_storage.CosConfig", return_value=mock_config_instance
|
||||
) as mock_cos_config,
|
||||
patch("extensions.storage.tencent_cos_storage.CosS3Client", return_value=mock_client),
|
||||
):
|
||||
TencentCosStorage()
|
||||
|
||||
# Verify CosConfig was called with Region parameter (not Domain)
|
||||
mock_cos_config.assert_called_once()
|
||||
call_kwargs = mock_cos_config.call_args[1]
|
||||
assert "Region" in call_kwargs
|
||||
assert call_kwargs["Region"] == "ap-guangzhou"
|
||||
assert "Domain" not in call_kwargs
|
||||
assert call_kwargs["SecretId"] == "test-secret-id"
|
||||
assert call_kwargs["SecretKey"] == "test-secret-key"
|
||||
assert call_kwargs["Scheme"] == "https"
|
||||
|
|
|
|||
|
|
@ -1294,6 +1294,42 @@ class TestBillingServiceSubscriptionOperations:
|
|||
# Assert
|
||||
assert result == {}
|
||||
|
||||
def test_get_plan_bulk_with_invalid_tenant_plan_skipped(self, mock_send_request):
|
||||
"""Test bulk plan retrieval when one tenant has invalid plan data (should skip that tenant)."""
|
||||
# Arrange
|
||||
tenant_ids = ["tenant-valid-1", "tenant-invalid", "tenant-valid-2"]
|
||||
|
||||
# Response with one invalid tenant plan (missing expiration_date) and two valid ones
|
||||
mock_send_request.return_value = {
|
||||
"data": {
|
||||
"tenant-valid-1": {"plan": "sandbox", "expiration_date": 1735689600},
|
||||
"tenant-invalid": {"plan": "professional"}, # Missing expiration_date field
|
||||
"tenant-valid-2": {"plan": "team", "expiration_date": 1767225600},
|
||||
}
|
||||
}
|
||||
|
||||
# Act
|
||||
with patch("services.billing_service.logger") as mock_logger:
|
||||
result = BillingService.get_plan_bulk(tenant_ids)
|
||||
|
||||
# Assert - should only contain valid tenants
|
||||
assert len(result) == 2
|
||||
assert "tenant-valid-1" in result
|
||||
assert "tenant-valid-2" in result
|
||||
assert "tenant-invalid" not in result
|
||||
|
||||
# Verify valid tenants have correct data
|
||||
assert result["tenant-valid-1"]["plan"] == "sandbox"
|
||||
assert result["tenant-valid-1"]["expiration_date"] == 1735689600
|
||||
assert result["tenant-valid-2"]["plan"] == "team"
|
||||
assert result["tenant-valid-2"]["expiration_date"] == 1767225600
|
||||
|
||||
# Verify exception was logged for the invalid tenant
|
||||
mock_logger.exception.assert_called_once()
|
||||
log_call_args = mock_logger.exception.call_args[0]
|
||||
assert "get_plan_bulk: failed to validate subscription plan for tenant" in log_call_args[0]
|
||||
assert "tenant-invalid" in log_call_args[1]
|
||||
|
||||
def test_get_expired_subscription_cleanup_whitelist_success(self, mock_send_request):
|
||||
"""Test successful retrieval of expired subscription cleanup whitelist."""
|
||||
# Arrange
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
import base64
|
||||
from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from core.mcp.types import (
|
||||
AudioContent,
|
||||
BlobResourceContents,
|
||||
CallToolResult,
|
||||
EmbeddedResource,
|
||||
ImageContent,
|
||||
TextResourceContents,
|
||||
)
|
||||
from core.tools.__base.tool_runtime import ToolRuntime
|
||||
from core.tools.entities.common_entities import I18nObject
|
||||
from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage
|
||||
from core.tools.mcp_tool.tool import MCPTool
|
||||
|
||||
|
||||
def _make_mcp_tool(output_schema: dict | None = None) -> MCPTool:
|
||||
identity = ToolIdentity(
|
||||
author="test",
|
||||
name="test_mcp_tool",
|
||||
label=I18nObject(en_US="Test MCP Tool", zh_Hans="测试MCP工具"),
|
||||
provider="test_provider",
|
||||
)
|
||||
entity = ToolEntity(identity=identity, output_schema=output_schema or {})
|
||||
runtime = Mock(spec=ToolRuntime)
|
||||
runtime.credentials = {}
|
||||
return MCPTool(
|
||||
entity=entity,
|
||||
runtime=runtime,
|
||||
tenant_id="test_tenant",
|
||||
icon="",
|
||||
server_url="https://server.invalid",
|
||||
provider_id="provider_1",
|
||||
headers={},
|
||||
)
|
||||
|
||||
|
||||
class TestMCPToolInvoke:
|
||||
@pytest.mark.parametrize(
|
||||
("content_factory", "mime_type"),
|
||||
[
|
||||
(
|
||||
lambda b64, mt: ImageContent(type="image", data=b64, mimeType=mt),
|
||||
"image/png",
|
||||
),
|
||||
(
|
||||
lambda b64, mt: AudioContent(type="audio", data=b64, mimeType=mt),
|
||||
"audio/mpeg",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_invoke_image_or_audio_yields_blob(self, content_factory, mime_type) -> None:
|
||||
tool = _make_mcp_tool()
|
||||
raw = b"\x00\x01test-bytes\x02"
|
||||
b64 = base64.b64encode(raw).decode()
|
||||
content = content_factory(b64, mime_type)
|
||||
result = CallToolResult(content=[content])
|
||||
|
||||
with patch.object(tool, "invoke_remote_mcp_tool", return_value=result):
|
||||
messages = list(tool._invoke(user_id="test_user", tool_parameters={}))
|
||||
|
||||
assert len(messages) == 1
|
||||
msg = messages[0]
|
||||
assert msg.type == ToolInvokeMessage.MessageType.BLOB
|
||||
assert isinstance(msg.message, ToolInvokeMessage.BlobMessage)
|
||||
assert msg.message.blob == raw
|
||||
assert msg.meta == {"mime_type": mime_type}
|
||||
|
||||
def test_invoke_embedded_text_resource_yields_text(self) -> None:
|
||||
tool = _make_mcp_tool()
|
||||
text_resource = TextResourceContents(uri="file://test.txt", mimeType="text/plain", text="hello world")
|
||||
content = EmbeddedResource(type="resource", resource=text_resource)
|
||||
result = CallToolResult(content=[content])
|
||||
|
||||
with patch.object(tool, "invoke_remote_mcp_tool", return_value=result):
|
||||
messages = list(tool._invoke(user_id="test_user", tool_parameters={}))
|
||||
|
||||
assert len(messages) == 1
|
||||
msg = messages[0]
|
||||
assert msg.type == ToolInvokeMessage.MessageType.TEXT
|
||||
assert isinstance(msg.message, ToolInvokeMessage.TextMessage)
|
||||
assert msg.message.text == "hello world"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("mime_type", "expected_mime"),
|
||||
[("application/pdf", "application/pdf"), (None, "application/octet-stream")],
|
||||
)
|
||||
def test_invoke_embedded_blob_resource_yields_blob(self, mime_type, expected_mime) -> None:
|
||||
tool = _make_mcp_tool()
|
||||
raw = b"binary-data"
|
||||
b64 = base64.b64encode(raw).decode()
|
||||
blob_resource = BlobResourceContents(uri="file://doc.bin", mimeType=mime_type, blob=b64)
|
||||
content = EmbeddedResource(type="resource", resource=blob_resource)
|
||||
result = CallToolResult(content=[content])
|
||||
|
||||
with patch.object(tool, "invoke_remote_mcp_tool", return_value=result):
|
||||
messages = list(tool._invoke(user_id="test_user", tool_parameters={}))
|
||||
|
||||
assert len(messages) == 1
|
||||
msg = messages[0]
|
||||
assert msg.type == ToolInvokeMessage.MessageType.BLOB
|
||||
assert isinstance(msg.message, ToolInvokeMessage.BlobMessage)
|
||||
assert msg.message.blob == raw
|
||||
assert msg.meta == {"mime_type": expected_mime}
|
||||
|
||||
def test_invoke_yields_variables_when_structured_content_and_schema(self) -> None:
|
||||
tool = _make_mcp_tool(output_schema={"type": "object"})
|
||||
result = CallToolResult(content=[], structuredContent={"a": 1, "b": "x"})
|
||||
|
||||
with patch.object(tool, "invoke_remote_mcp_tool", return_value=result):
|
||||
messages = list(tool._invoke(user_id="test_user", tool_parameters={}))
|
||||
|
||||
# Expect two variable messages corresponding to keys a and b
|
||||
assert len(messages) == 2
|
||||
var_msgs = [m for m in messages if isinstance(m.message, ToolInvokeMessage.VariableMessage)]
|
||||
assert {m.message.variable_name for m in var_msgs} == {"a", "b"}
|
||||
# Validate values
|
||||
values = {m.message.variable_name: m.message.variable_value for m in var_msgs}
|
||||
assert values == {"a": 1, "b": "x"}
|
||||
|
|
@ -15,6 +15,11 @@ from core.tools.utils.text_processing_utils import remove_leading_symbols
|
|||
("", ""),
|
||||
(" ", " "),
|
||||
("【测试】", "【测试】"),
|
||||
# Markdown link preservation - should be preserved if text starts with a markdown link
|
||||
("[Google](https://google.com) is a search engine", "[Google](https://google.com) is a search engine"),
|
||||
("[Example](http://example.com) some text", "[Example](http://example.com) some text"),
|
||||
# Leading symbols before markdown link are removed, including the opening bracket [
|
||||
("@[Test](https://example.com)", "Test](https://example.com)"),
|
||||
],
|
||||
)
|
||||
def test_remove_leading_symbols(input_text, expected_output):
|
||||
|
|
|
|||
|
|
@ -3072,11 +3072,11 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "json-repair"
|
||||
version = "0.54.1"
|
||||
version = "0.54.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/00/46/d3a4d9a3dad39bb4a2ad16b8adb9fe2e8611b20b71197fe33daa6768e85d/json_repair-0.54.1.tar.gz", hash = "sha256:d010bc31f1fc66e7c36dc33bff5f8902674498ae5cb8e801ad455a53b455ad1d", size = 38555, upload-time = "2025-11-19T14:55:24.265Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b5/86/48b12ac02032f121ac7e5f11a32143edca6c1e3d19ffc54d6fb9ca0aafd0/json_repair-0.54.3.tar.gz", hash = "sha256:e50feec9725e52ac91f12184609754684ac1656119dfbd31de09bdaf9a1d8bf6", size = 38626, upload-time = "2025-12-15T09:41:58.594Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/db/96/c9aad7ee949cc1bf15df91f347fbc2d3bd10b30b80c7df689ce6fe9332b5/json_repair-0.54.1-py3-none-any.whl", hash = "sha256:016160c5db5d5fe443164927bb58d2dfbba5f43ad85719fa9bc51c713a443ab1", size = 29311, upload-time = "2025-11-19T14:55:22.886Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e9/08/abe317237add63c3e62f18a981bccf92112b431835b43d844aedaf61f4a0/json_repair-0.54.3-py3-none-any.whl", hash = "sha256:4cdc132ee27d4780576f71bf27a113877046224a808bfc17392e079cb344fb81", size = 29357, upload-time = "2025-12-15T09:41:57.436Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -399,6 +399,7 @@ CONSOLE_CORS_ALLOW_ORIGINS=*
|
|||
COOKIE_DOMAIN=
|
||||
# When the frontend and backend run on different subdomains, set NEXT_PUBLIC_COOKIE_DOMAIN=1.
|
||||
NEXT_PUBLIC_COOKIE_DOMAIN=
|
||||
NEXT_PUBLIC_BATCH_CONCURRENCY=5
|
||||
|
||||
# ------------------------------
|
||||
# File Storage Configuration
|
||||
|
|
@ -446,6 +447,15 @@ S3_SECRET_KEY=
|
|||
# If set to false, the access key and secret key must be provided.
|
||||
S3_USE_AWS_MANAGED_IAM=false
|
||||
|
||||
# Workflow run and Conversation archive storage (S3-compatible)
|
||||
ARCHIVE_STORAGE_ENABLED=false
|
||||
ARCHIVE_STORAGE_ENDPOINT=
|
||||
ARCHIVE_STORAGE_ARCHIVE_BUCKET=
|
||||
ARCHIVE_STORAGE_EXPORT_BUCKET=
|
||||
ARCHIVE_STORAGE_ACCESS_KEY=
|
||||
ARCHIVE_STORAGE_SECRET_KEY=
|
||||
ARCHIVE_STORAGE_REGION=auto
|
||||
|
||||
# Azure Blob Configuration
|
||||
#
|
||||
AZURE_BLOB_ACCOUNT_NAME=difyai
|
||||
|
|
@ -477,6 +487,7 @@ TENCENT_COS_SECRET_KEY=your-secret-key
|
|||
TENCENT_COS_SECRET_ID=your-secret-id
|
||||
TENCENT_COS_REGION=your-region
|
||||
TENCENT_COS_SCHEME=your-scheme
|
||||
TENCENT_COS_CUSTOM_DOMAIN=your-custom-domain
|
||||
|
||||
# Oracle Storage Configuration
|
||||
#
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ x-shared-env: &shared-api-worker-env
|
|||
CONSOLE_CORS_ALLOW_ORIGINS: ${CONSOLE_CORS_ALLOW_ORIGINS:-*}
|
||||
COOKIE_DOMAIN: ${COOKIE_DOMAIN:-}
|
||||
NEXT_PUBLIC_COOKIE_DOMAIN: ${NEXT_PUBLIC_COOKIE_DOMAIN:-}
|
||||
NEXT_PUBLIC_BATCH_CONCURRENCY: ${NEXT_PUBLIC_BATCH_CONCURRENCY:-5}
|
||||
STORAGE_TYPE: ${STORAGE_TYPE:-opendal}
|
||||
OPENDAL_SCHEME: ${OPENDAL_SCHEME:-fs}
|
||||
OPENDAL_FS_ROOT: ${OPENDAL_FS_ROOT:-storage}
|
||||
|
|
@ -121,6 +122,13 @@ x-shared-env: &shared-api-worker-env
|
|||
S3_ACCESS_KEY: ${S3_ACCESS_KEY:-}
|
||||
S3_SECRET_KEY: ${S3_SECRET_KEY:-}
|
||||
S3_USE_AWS_MANAGED_IAM: ${S3_USE_AWS_MANAGED_IAM:-false}
|
||||
ARCHIVE_STORAGE_ENABLED: ${ARCHIVE_STORAGE_ENABLED:-false}
|
||||
ARCHIVE_STORAGE_ENDPOINT: ${ARCHIVE_STORAGE_ENDPOINT:-}
|
||||
ARCHIVE_STORAGE_ARCHIVE_BUCKET: ${ARCHIVE_STORAGE_ARCHIVE_BUCKET:-}
|
||||
ARCHIVE_STORAGE_EXPORT_BUCKET: ${ARCHIVE_STORAGE_EXPORT_BUCKET:-}
|
||||
ARCHIVE_STORAGE_ACCESS_KEY: ${ARCHIVE_STORAGE_ACCESS_KEY:-}
|
||||
ARCHIVE_STORAGE_SECRET_KEY: ${ARCHIVE_STORAGE_SECRET_KEY:-}
|
||||
ARCHIVE_STORAGE_REGION: ${ARCHIVE_STORAGE_REGION:-auto}
|
||||
AZURE_BLOB_ACCOUNT_NAME: ${AZURE_BLOB_ACCOUNT_NAME:-difyai}
|
||||
AZURE_BLOB_ACCOUNT_KEY: ${AZURE_BLOB_ACCOUNT_KEY:-difyai}
|
||||
AZURE_BLOB_CONTAINER_NAME: ${AZURE_BLOB_CONTAINER_NAME:-difyai-container}
|
||||
|
|
@ -140,6 +148,7 @@ x-shared-env: &shared-api-worker-env
|
|||
TENCENT_COS_SECRET_ID: ${TENCENT_COS_SECRET_ID:-your-secret-id}
|
||||
TENCENT_COS_REGION: ${TENCENT_COS_REGION:-your-region}
|
||||
TENCENT_COS_SCHEME: ${TENCENT_COS_SCHEME:-your-scheme}
|
||||
TENCENT_COS_CUSTOM_DOMAIN: ${TENCENT_COS_CUSTOM_DOMAIN:-your-custom-domain}
|
||||
OCI_ENDPOINT: ${OCI_ENDPOINT:-https://your-object-storage-namespace.compat.objectstorage.us-ashburn-1.oraclecloud.com}
|
||||
OCI_BUCKET_NAME: ${OCI_BUCKET_NAME:-your-bucket-name}
|
||||
OCI_ACCESS_KEY: ${OCI_ACCESS_KEY:-your-access-key}
|
||||
|
|
|
|||
|
|
@ -54,17 +54,17 @@
|
|||
"publish:npm": "./scripts/publish.sh"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.3.5"
|
||||
"axios": "^1.13.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.2.0",
|
||||
"@types/node": "^20.11.30",
|
||||
"@eslint/js": "^9.39.2",
|
||||
"@types/node": "^25.0.3",
|
||||
"@typescript-eslint/eslint-plugin": "^8.50.1",
|
||||
"@typescript-eslint/parser": "^8.50.1",
|
||||
"@vitest/coverage-v8": "1.6.1",
|
||||
"eslint": "^9.2.0",
|
||||
"@vitest/coverage-v8": "4.0.16",
|
||||
"eslint": "^9.39.2",
|
||||
"tsup": "^8.5.1",
|
||||
"typescript": "^5.4.5",
|
||||
"vitest": "^1.5.0"
|
||||
"typescript": "^5.9.3",
|
||||
"vitest": "^4.0.16"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue