From ef3569e6677d2ff7500a0a6bf30ea84979da8acb Mon Sep 17 00:00:00 2001 From: Panpan Date: Wed, 21 May 2025 13:52:21 +0800 Subject: [PATCH 01/19] feat: support chatflow start node custom input field hidden (#19678) --- api/core/app/app_config/entities.py | 1 + .../config-var/config-modal/index.tsx | 7 ++++++- .../base/chat/chat-with-history/chat-wrapper.tsx | 14 +++++++++----- .../base/chat/chat-with-history/context.tsx | 2 ++ .../base/chat/chat-with-history/hooks.tsx | 11 ++++++++++- .../base/chat/chat-with-history/index.tsx | 2 ++ .../chat/chat-with-history/inputs-form/content.tsx | 4 +++- .../chat/chat-with-history/inputs-form/index.tsx | 5 +++++ web/app/components/base/chat/chat/type.ts | 1 + .../base/chat/embedded-chatbot/chat-wrapper.tsx | 14 +++++++++----- .../base/chat/embedded-chatbot/context.tsx | 2 ++ .../base/chat/embedded-chatbot/hooks.tsx | 10 +++++++++- .../base/chat/embedded-chatbot/index.tsx | 2 ++ .../chat/embedded-chatbot/inputs-form/content.tsx | 4 +++- .../chat/embedded-chatbot/inputs-form/index.tsx | 5 +++++ .../workflow/panel/debug-and-preview/index.tsx | 3 ++- .../panel/debug-and-preview/user-input.tsx | 5 +++-- web/app/components/workflow/types.ts | 1 + web/i18n/en-US/app-debug.ts | 1 + 19 files changed, 76 insertions(+), 18 deletions(-) diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 8ae52131f2..3f31b1c3d5 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -109,6 +109,7 @@ class VariableEntity(BaseModel): description: str = "" type: VariableEntityType required: bool = False + hide: bool = False max_length: Optional[int] = None options: Sequence[str] = Field(default_factory=list) allowed_file_types: Sequence[FileType] = Field(default_factory=list) diff --git a/web/app/components/app/configuration/config-var/config-modal/index.tsx b/web/app/components/app/configuration/config-var/config-modal/index.tsx index 4b6bfdacd6..29cbc55b90 100644 --- a/web/app/components/app/configuration/config-var/config-modal/index.tsx +++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx @@ -234,9 +234,14 @@ const ConfigModal: FC = ({ )}
- handlePayloadChange('required')(!tempPayload.required)} /> + handlePayloadChange('required')(!tempPayload.required)} /> {t('appDebug.variableConfig.required')}
+ +
+ handlePayloadChange('hide')(!tempPayload.hide)} /> + {t('appDebug.variableConfig.hide')} +
{ clearChatList, setClearChatList, setIsResponding, + allInputsHidden, } = useChatWithHistoryContext() const appConfig = useMemo(() => { const config = appParams || {} @@ -81,6 +82,9 @@ const ChatWrapper = () => { ) const inputsFormValue = currentConversationId ? currentConversationInputs : newConversationInputsRef?.current const inputDisabled = useMemo(() => { + if (allInputsHidden) + return false + let hasEmptyInput = '' let fileIsUploading = false const requiredVars = inputsForms.filter(({ required }) => required) @@ -110,7 +114,7 @@ const ChatWrapper = () => { if (fileIsUploading) return true return false - }, [inputsFormValue, inputsForms]) + }, [inputsFormValue, inputsForms, allInputsHidden]) useEffect(() => { if (currentChatInstanceRef.current) @@ -161,7 +165,7 @@ const ChatWrapper = () => { const [collapsed, setCollapsed] = useState(!!currentConversationId) const chatNode = useMemo(() => { - if (!inputsForms.length) + if (allInputsHidden || !inputsForms.length) return null if (isMobile) { if (!currentConversationId) @@ -171,7 +175,7 @@ const ChatWrapper = () => { else { return } - }, [inputsForms.length, isMobile, currentConversationId, collapsed]) + }, [inputsForms.length, isMobile, currentConversationId, collapsed, allInputsHidden]) const welcome = useMemo(() => { const welcomeMessage = chatList.find(item => item.isOpeningStatement) @@ -181,7 +185,7 @@ const ChatWrapper = () => { return null if (!welcomeMessage) return null - if (!collapsed && inputsForms.length > 0) + if (!collapsed && inputsForms.length > 0 && !allInputsHidden) return null if (welcomeMessage.suggestedQuestions && welcomeMessage.suggestedQuestions?.length > 0) { return ( @@ -218,7 +222,7 @@ const ChatWrapper = () => { ) - }, [appData?.site.icon, appData?.site.icon_background, appData?.site.icon_type, appData?.site.icon_url, chatList, collapsed, currentConversationId, inputsForms.length, respondingState]) + }, [appData?.site.icon, appData?.site.icon_background, appData?.site.icon_type, appData?.site.icon_url, chatList, collapsed, currentConversationId, inputsForms.length, respondingState, allInputsHidden]) const answerIcon = (appData?.site && appData.site.use_icon_as_answer_icon) ? void, currentConversationInputs: Record | null, setCurrentConversationInputs: (v: Record) => void, + allInputsHidden: boolean, } export const ChatWithHistoryContext = createContext({ @@ -95,5 +96,6 @@ export const ChatWithHistoryContext = createContext setIsResponding: noop, currentConversationInputs: {}, setCurrentConversationInputs: noop, + allInputsHidden: false, }) export const useChatWithHistoryContext = () => useContext(ChatWithHistoryContext) diff --git a/web/app/components/base/chat/chat-with-history/hooks.tsx b/web/app/components/base/chat/chat-with-history/hooks.tsx index d03a28ae57..3694666139 100644 --- a/web/app/components/base/chat/chat-with-history/hooks.tsx +++ b/web/app/components/base/chat/chat-with-history/hooks.tsx @@ -240,6 +240,11 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => { } }) }, [appParams]) + + const allInputsHidden = useMemo(() => { + return inputsForms.length > 0 && inputsForms.every(item => item.hide === true) + }, [inputsForms]) + useEffect(() => { const conversationInputs: Record = {} @@ -304,6 +309,9 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => { const { notify } = useToastContext() const checkInputsRequired = useCallback((silent?: boolean) => { + if (allInputsHidden) + return true + let hasEmptyInput = '' let fileIsUploading = false const requiredVars = inputsForms.filter(({ required }) => required) @@ -339,7 +347,7 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => { } return true - }, [inputsForms, notify, t]) + }, [inputsForms, notify, t, allInputsHidden]) const handleStartChat = useCallback((callback: any) => { if (checkInputsRequired()) { setShowNewConversationItemInList(true) @@ -507,5 +515,6 @@ export const useChatWithHistory = (installedAppInfo?: InstalledApp) => { setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, } } diff --git a/web/app/components/base/chat/chat-with-history/index.tsx b/web/app/components/base/chat/chat-with-history/index.tsx index 2ba50ea49b..de023e7f58 100644 --- a/web/app/components/base/chat/chat-with-history/index.tsx +++ b/web/app/components/base/chat/chat-with-history/index.tsx @@ -161,6 +161,7 @@ const ChatWithHistoryWrap: FC = ({ setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, } = useChatWithHistory(installedAppInfo) return ( @@ -206,6 +207,7 @@ const ChatWithHistoryWrap: FC = ({ setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, }}> diff --git a/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx b/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx index b8d18244e1..73a1f07b69 100644 --- a/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx +++ b/web/app/components/base/chat/chat-with-history/inputs-form/content.tsx @@ -36,9 +36,11 @@ const InputsFormContent = ({ showTip }: Props) => { }) }, [newConversationInputsRef, handleNewConversationInputsChange, currentConversationInputs, setCurrentConversationInputs]) + const visibleInputsForms = inputsForms.filter(form => form.hide !== true) + return (
- {inputsForms.map(form => ( + {visibleInputsForms.map(form => (
{form.label}
diff --git a/web/app/components/base/chat/chat-with-history/inputs-form/index.tsx b/web/app/components/base/chat/chat-with-history/inputs-form/index.tsx index 30ec11c729..3a1b92089c 100644 --- a/web/app/components/base/chat/chat-with-history/inputs-form/index.tsx +++ b/web/app/components/base/chat/chat-with-history/inputs-form/index.tsx @@ -21,9 +21,14 @@ const InputsFormNode = ({ isMobile, currentConversationId, handleStartChat, + allInputsHidden, themeBuilder, + inputsForms, } = useChatWithHistoryContext() + if (allInputsHidden || inputsForms.length === 0) + return null + return (
{ clearChatList, setClearChatList, setIsResponding, + allInputsHidden, } = useEmbeddedChatbotContext() const appConfig = useMemo(() => { const config = appParams || {} @@ -82,6 +83,9 @@ const ChatWrapper = () => { ) const inputsFormValue = currentConversationId ? currentConversationInputs : newConversationInputsRef?.current const inputDisabled = useMemo(() => { + if (allInputsHidden) + return false + let hasEmptyInput = '' let fileIsUploading = false const requiredVars = inputsForms.filter(({ required }) => required) @@ -111,7 +115,7 @@ const ChatWrapper = () => { if (fileIsUploading) return true return false - }, [inputsFormValue, inputsForms]) + }, [inputsFormValue, inputsForms, allInputsHidden]) useEffect(() => { if (currentChatInstanceRef.current) @@ -160,7 +164,7 @@ const ChatWrapper = () => { const [collapsed, setCollapsed] = useState(!!currentConversationId) const chatNode = useMemo(() => { - if (!inputsForms.length) + if (allInputsHidden || !inputsForms.length) return null if (isMobile) { if (!currentConversationId) @@ -170,7 +174,7 @@ const ChatWrapper = () => { else { return } - }, [inputsForms.length, isMobile, currentConversationId, collapsed]) + }, [inputsForms.length, isMobile, currentConversationId, collapsed, allInputsHidden]) const welcome = useMemo(() => { const welcomeMessage = chatList.find(item => item.isOpeningStatement) @@ -180,7 +184,7 @@ const ChatWrapper = () => { return null if (!welcomeMessage) return null - if (!collapsed && inputsForms.length > 0) + if (!collapsed && inputsForms.length > 0 && !allInputsHidden) return null if (welcomeMessage.suggestedQuestions && welcomeMessage.suggestedQuestions?.length > 0) { return ( @@ -215,7 +219,7 @@ const ChatWrapper = () => {
) - }, [appData?.site.icon, appData?.site.icon_background, appData?.site.icon_type, appData?.site.icon_url, chatList, collapsed, currentConversationId, inputsForms.length, respondingState]) + }, [appData?.site.icon, appData?.site.icon_background, appData?.site.icon_type, appData?.site.icon_url, chatList, collapsed, currentConversationId, inputsForms.length, respondingState, allInputsHidden]) const answerIcon = isDify() ? diff --git a/web/app/components/base/chat/embedded-chatbot/context.tsx b/web/app/components/base/chat/embedded-chatbot/context.tsx index 0dd6e7a29a..5964efd806 100644 --- a/web/app/components/base/chat/embedded-chatbot/context.tsx +++ b/web/app/components/base/chat/embedded-chatbot/context.tsx @@ -53,6 +53,7 @@ export type EmbeddedChatbotContextValue = { setIsResponding: (state: boolean) => void, currentConversationInputs: Record | null, setCurrentConversationInputs: (v: Record) => void, + allInputsHidden: boolean } export const EmbeddedChatbotContext = createContext({ @@ -82,5 +83,6 @@ export const EmbeddedChatbotContext = createContext setIsResponding: noop, currentConversationInputs: {}, setCurrentConversationInputs: noop, + allInputsHidden: false, }) export const useEmbeddedChatbotContext = () => useContext(EmbeddedChatbotContext) diff --git a/web/app/components/base/chat/embedded-chatbot/hooks.tsx b/web/app/components/base/chat/embedded-chatbot/hooks.tsx index ce42575bc9..40c56eca7b 100644 --- a/web/app/components/base/chat/embedded-chatbot/hooks.tsx +++ b/web/app/components/base/chat/embedded-chatbot/hooks.tsx @@ -235,6 +235,10 @@ export const useEmbeddedChatbot = () => { }) }, [initInputs, appParams]) + const allInputsHidden = useMemo(() => { + return inputsForms.length > 0 && inputsForms.every(item => item.hide === true) + }, [inputsForms]) + useEffect(() => { // init inputs from url params (async () => { @@ -306,6 +310,9 @@ export const useEmbeddedChatbot = () => { const { notify } = useToastContext() const checkInputsRequired = useCallback((silent?: boolean) => { + if (allInputsHidden) + return true + let hasEmptyInput = '' let fileIsUploading = false const requiredVars = inputsForms.filter(({ required }) => required) @@ -341,7 +348,7 @@ export const useEmbeddedChatbot = () => { } return true - }, [inputsForms, notify, t]) + }, [inputsForms, notify, t, allInputsHidden]) const handleStartChat = useCallback((callback?: any) => { if (checkInputsRequired()) { setShowNewConversationItemInList(true) @@ -417,5 +424,6 @@ export const useEmbeddedChatbot = () => { setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, } } diff --git a/web/app/components/base/chat/embedded-chatbot/index.tsx b/web/app/components/base/chat/embedded-chatbot/index.tsx index 49189c419e..ffcb128c8f 100644 --- a/web/app/components/base/chat/embedded-chatbot/index.tsx +++ b/web/app/components/base/chat/embedded-chatbot/index.tsx @@ -168,6 +168,7 @@ const EmbeddedChatbotWrapper = () => { setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, } = useEmbeddedChatbot() return { setIsResponding, currentConversationInputs, setCurrentConversationInputs, + allInputsHidden, }}> diff --git a/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx b/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx index e56520d23f..c5f39718f1 100644 --- a/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx +++ b/web/app/components/base/chat/embedded-chatbot/inputs-form/content.tsx @@ -36,9 +36,11 @@ const InputsFormContent = ({ showTip }: Props) => { }) }, [newConversationInputsRef, handleNewConversationInputsChange, currentConversationInputs, setCurrentConversationInputs]) + const visibleInputsForms = inputsForms.filter(form => form.hide !== true) + return (
- {inputsForms.map(form => ( + {visibleInputsForms.map(form => (
{form.label}
diff --git a/web/app/components/base/chat/embedded-chatbot/inputs-form/index.tsx b/web/app/components/base/chat/embedded-chatbot/inputs-form/index.tsx index 4ac4aaa16b..88472b5d8f 100644 --- a/web/app/components/base/chat/embedded-chatbot/inputs-form/index.tsx +++ b/web/app/components/base/chat/embedded-chatbot/inputs-form/index.tsx @@ -22,8 +22,13 @@ const InputsFormNode = ({ currentConversationId, themeBuilder, handleStartChat, + allInputsHidden, + inputsForms, } = useEmbeddedChatbotContext() + if (allInputsHidden || inputsForms.length === 0) + return null + return (
{ const nodes = useNodes() const startNode = nodes.find(node => node.data.type === BlockEnum.Start) const variables = startNode?.data.variables || [] + const visibleVariables = variables.filter(v => v.hide !== true) const [showConversationVariableModal, setShowConversationVariableModal] = useState(false) @@ -107,7 +108,7 @@ const DebugAndPreview = () => { )} - {variables.length > 0 && ( + {visibleVariables.length > 0 && (
{ const nodes = useNodes() const startNode = nodes.find(node => node.data.type === BlockEnum.Start) const variables = startNode?.data.variables || [] + const visibleVariables = variables.filter(v => v.hide !== true) const handleValueChange = (variable: string, v: string) => { const { @@ -29,13 +30,13 @@ const UserInput = () => { }) } - if (!variables.length) + if (!visibleVariables.length) return null return (
- {variables.map((variable, index) => ( + {visibleVariables.map((variable, index) => (
export type ModelConfig = { diff --git a/web/i18n/en-US/app-debug.ts b/web/i18n/en-US/app-debug.ts index 3ee5fd3e1d..349ff37118 100644 --- a/web/i18n/en-US/app-debug.ts +++ b/web/i18n/en-US/app-debug.ts @@ -368,6 +368,7 @@ const translation = { 'inputPlaceholder': 'Please input', 'content': 'Content', 'required': 'Required', + 'hide': 'Hide', 'file': { supportFileTypes: 'Support File Types', image: { From 3196dc2d61825b587dd42c1a5cbcb716bc3d5c5a Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 21 May 2025 15:38:03 +0800 Subject: [PATCH 02/19] refactor: Use typed SQLAlchemy base model and fix type errors (#19980) Signed-off-by: -LAN- --- api/controllers/console/auth/login.py | 12 +- api/controllers/console/auth/oauth.py | 12 +- api/controllers/console/datasets/datasets.py | 19 ++- .../console/datasets/datasets_document.py | 42 +++-- .../console/workspace/workspace.py | 22 ++- api/controllers/files/upload.py | 21 ++- .../service_api/dataset/document.py | 21 ++- api/core/rag/datasource/retrieval_service.py | 24 ++- api/core/tools/tool_manager.py | 6 +- api/core/tools/workflow_as_tool/tool.py | 20 +-- api/extensions/ext_login.py | 49 ++++-- api/models/account.py | 157 +++++++++--------- api/models/base.py | 6 +- api/models/tools.py | 4 - api/models/workflow.py | 4 +- api/services/vector_service.py | 15 +- api/services/workflow_service.py | 6 +- api/tasks/add_document_to_index_task.py | 2 +- api/tasks/remove_app_and_related_data_task.py | 2 +- .../core/tools/workflow_as_tool/test_tool.py | 2 +- 20 files changed, 272 insertions(+), 174 deletions(-) diff --git a/api/controllers/console/auth/login.py b/api/controllers/console/auth/login.py index 86231bf616..5f2a24322d 100644 --- a/api/controllers/console/auth/login.py +++ b/api/controllers/console/auth/login.py @@ -202,18 +202,18 @@ class EmailCodeLoginApi(Resource): except AccountRegisterError as are: raise AccountInFreezeError() if account: - tenant = TenantService.get_join_tenants(account) - if not tenant: + tenants = TenantService.get_join_tenants(account) + if not tenants: workspaces = FeatureService.get_system_features().license.workspaces if not workspaces.is_available(): raise WorkspacesLimitExceeded() if not FeatureService.get_system_features().is_allow_create_workspace: raise NotAllowedCreateWorkspace() else: - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") - TenantService.create_tenant_member(tenant, account, role="owner") - account.current_tenant = tenant - tenant_was_created.send(tenant) + new_tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + TenantService.create_tenant_member(new_tenant, account, role="owner") + account.current_tenant = new_tenant + tenant_was_created.send(new_tenant) if account is None: try: diff --git a/api/controllers/console/auth/oauth.py b/api/controllers/console/auth/oauth.py index f5284cc43b..395367c9e2 100644 --- a/api/controllers/console/auth/oauth.py +++ b/api/controllers/console/auth/oauth.py @@ -148,15 +148,15 @@ def _generate_account(provider: str, user_info: OAuthUserInfo): account = _get_account_by_openid_or_email(provider, user_info) if account: - tenant = TenantService.get_join_tenants(account) - if not tenant: + tenants = TenantService.get_join_tenants(account) + if not tenants: if not FeatureService.get_system_features().is_allow_create_workspace: raise WorkSpaceNotAllowedCreateError() else: - tenant = TenantService.create_tenant(f"{account.name}'s Workspace") - TenantService.create_tenant_member(tenant, account, role="owner") - account.current_tenant = tenant - tenant_was_created.send(tenant) + new_tenant = TenantService.create_tenant(f"{account.name}'s Workspace") + TenantService.create_tenant_member(new_tenant, account, role="owner") + account.current_tenant = new_tenant + tenant_was_created.send(new_tenant) if not account: if not FeatureService.get_system_features().is_allow_register: diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 981619b0cb..e68273afa6 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -540,9 +540,22 @@ class DatasetIndexingStatusApi(Resource): .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") .count() ) - document.completed_segments = completed_segments - document.total_segments = total_segments - documents_status.append(marshal(document, document_status_fields)) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data diff --git a/api/controllers/console/datasets/datasets_document.py b/api/controllers/console/datasets/datasets_document.py index ca18c25e74..f7c04102a9 100644 --- a/api/controllers/console/datasets/datasets_document.py +++ b/api/controllers/console/datasets/datasets_document.py @@ -583,11 +583,22 @@ class DocumentBatchIndexingStatusApi(DocumentResource): .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") .count() ) - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - documents_status.append(marshal(document, document_status_fields)) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data @@ -616,11 +627,22 @@ class DocumentIndexingStatusApi(DocumentResource): .count() ) - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - return marshal(document, document_status_fields) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + return marshal(document_dict, document_status_fields) class DocumentDetailApi(DocumentResource): diff --git a/api/controllers/console/workspace/workspace.py b/api/controllers/console/workspace/workspace.py index 34af80bca7..19999e7361 100644 --- a/api/controllers/console/workspace/workspace.py +++ b/api/controllers/console/workspace/workspace.py @@ -68,16 +68,24 @@ class TenantListApi(Resource): @account_initialization_required def get(self): tenants = TenantService.get_join_tenants(current_user) + tenant_dicts = [] for tenant in tenants: features = FeatureService.get_features(tenant.id) - if features.billing.enabled: - tenant.plan = features.billing.subscription.plan - else: - tenant.plan = "sandbox" - if tenant.id == current_user.current_tenant_id: - tenant.current = True # Set current=True for current tenant - return {"workspaces": marshal(tenants, tenants_fields)}, 200 + + # Create a dictionary with tenant attributes + tenant_dict = { + "id": tenant.id, + "name": tenant.name, + "status": tenant.status, + "created_at": tenant.created_at, + "plan": features.billing.subscription.plan if features.billing.enabled else "sandbox", + "current": tenant.id == current_user.current_tenant_id, + } + + tenant_dicts.append(tenant_dict) + + return {"workspaces": marshal(tenant_dicts, tenants_fields)}, 200 class WorkspaceListApi(Resource): diff --git a/api/controllers/files/upload.py b/api/controllers/files/upload.py index 6641632169..f1a15793c7 100644 --- a/api/controllers/files/upload.py +++ b/api/controllers/files/upload.py @@ -64,9 +64,24 @@ class PluginUploadFileApi(Resource): extension = guess_extension(tool_file.mimetype) or ".bin" preview_url = ToolFileManager.sign_file(tool_file_id=tool_file.id, extension=extension) - tool_file.mime_type = mimetype - tool_file.extension = extension - tool_file.preview_url = preview_url + + # Create a dictionary with all the necessary attributes + result = { + "id": tool_file.id, + "user_id": tool_file.user_id, + "tenant_id": tool_file.tenant_id, + "conversation_id": tool_file.conversation_id, + "file_key": tool_file.file_key, + "mimetype": tool_file.mimetype, + "original_url": tool_file.original_url, + "name": tool_file.name, + "size": tool_file.size, + "mime_type": mimetype, + "extension": extension, + "preview_url": preview_url, + } + + return result, 201 except services.errors.file.FileTooLargeError as file_too_large_error: raise FileTooLargeError(file_too_large_error.description) except services.errors.file.UnsupportedFileTypeError: diff --git a/api/controllers/service_api/dataset/document.py b/api/controllers/service_api/dataset/document.py index 44c75f40ef..418363ffbb 100644 --- a/api/controllers/service_api/dataset/document.py +++ b/api/controllers/service_api/dataset/document.py @@ -388,11 +388,22 @@ class DocumentIndexingStatusApi(DatasetApiResource): .filter(DocumentSegment.document_id == str(document.id), DocumentSegment.status != "re_segment") .count() ) - document.completed_segments = completed_segments - document.total_segments = total_segments - if document.is_paused: - document.indexing_status = "paused" - documents_status.append(marshal(document, document_status_fields)) + # Create a dictionary with document attributes and additional fields + document_dict = { + "id": document.id, + "indexing_status": "paused" if document.is_paused else document.indexing_status, + "processing_started_at": document.processing_started_at, + "parsing_completed_at": document.parsing_completed_at, + "cleaning_completed_at": document.cleaning_completed_at, + "splitting_completed_at": document.splitting_completed_at, + "completed_at": document.completed_at, + "paused_at": document.paused_at, + "error": document.error, + "stopped_at": document.stopped_at, + "completed_segments": completed_segments, + "total_segments": total_segments, + } + documents_status.append(marshal(document_dict, document_status_fields)) data = {"data": documents_status} return data diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index 01f74b4a22..2c5178241c 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -405,7 +405,29 @@ class RetrievalService: record["child_chunks"] = segment_child_map[record["segment"].id].get("child_chunks") # type: ignore record["score"] = segment_child_map[record["segment"].id]["max_score"] - return [RetrievalSegments(**record) for record in records] + result = [] + for record in records: + # Extract segment + segment = record["segment"] + + # Extract child_chunks, ensuring it's a list or None + child_chunks = record.get("child_chunks") + if not isinstance(child_chunks, list): + child_chunks = None + + # Extract score, ensuring it's a float or None + score_value = record.get("score") + score = ( + float(score_value) + if score_value is not None and isinstance(score_value, int | float | str) + else None + ) + + # Create RetrievalSegments object + retrieval_segment = RetrievalSegments(segment=segment, child_chunks=child_chunks, score=score) + result.append(retrieval_segment) + + return result except Exception as e: db.session.rollback() raise e diff --git a/api/core/tools/tool_manager.py b/api/core/tools/tool_manager.py index aa2661fe63..0bfe6329b1 100644 --- a/api/core/tools/tool_manager.py +++ b/api/core/tools/tool_manager.py @@ -528,7 +528,7 @@ class ToolManager: yield provider except Exception: - logger.exception(f"load builtin provider {provider}") + logger.exception(f"load builtin provider {provider_path}") continue # set builtin providers loaded cls._builtin_providers_loaded = True @@ -644,10 +644,10 @@ class ToolManager: ) workflow_provider_controllers: list[WorkflowToolProviderController] = [] - for provider in workflow_providers: + for workflow_provider in workflow_providers: try: workflow_provider_controllers.append( - ToolTransformService.workflow_provider_to_controller(db_provider=provider) + ToolTransformService.workflow_provider_to_controller(db_provider=workflow_provider) ) except Exception: # app has been deleted diff --git a/api/core/tools/workflow_as_tool/tool.py b/api/core/tools/workflow_as_tool/tool.py index 241b4a94de..57c93d1d45 100644 --- a/api/core/tools/workflow_as_tool/tool.py +++ b/api/core/tools/workflow_as_tool/tool.py @@ -1,7 +1,9 @@ import json import logging from collections.abc import Generator -from typing import Any, Optional, Union, cast +from typing import Any, Optional, cast + +from flask_login import current_user from core.file import FILE_MODEL_IDENTITY, File, FileTransferMethod from core.tools.__base.tool import Tool @@ -87,7 +89,7 @@ class WorkflowTool(Tool): result = generator.generate( app_model=app, workflow=workflow, - user=self._get_user(user_id), + user=cast("Account | EndUser", current_user), args={"inputs": tool_parameters, "files": files}, invoke_from=self.runtime.invoke_from, streaming=False, @@ -111,20 +113,6 @@ class WorkflowTool(Tool): yield self.create_text_message(json.dumps(outputs, ensure_ascii=False)) yield self.create_json_message(outputs) - def _get_user(self, user_id: str) -> Union[EndUser, Account]: - """ - get the user by user id - """ - - user = db.session.query(EndUser).filter(EndUser.id == user_id).first() - if not user: - user = db.session.query(Account).filter(Account.id == user_id).first() - - if not user: - raise ValueError("user not found") - - return user - def fork_tool_runtime(self, runtime: ToolRuntime) -> "WorkflowTool": """ fork a new tool with metadata diff --git a/api/extensions/ext_login.py b/api/extensions/ext_login.py index 10fb89eb73..80fee7ccd8 100644 --- a/api/extensions/ext_login.py +++ b/api/extensions/ext_login.py @@ -3,11 +3,14 @@ import json import flask_login # type: ignore from flask import Response, request from flask_login import user_loaded_from_request, user_logged_in -from werkzeug.exceptions import Unauthorized +from werkzeug.exceptions import NotFound, Unauthorized import contexts from dify_app import DifyApp +from extensions.ext_database import db from libs.passport import PassportService +from models.account import Account +from models.model import EndUser from services.account_service import AccountService login_manager = flask_login.LoginManager() @@ -17,34 +20,48 @@ login_manager = flask_login.LoginManager() @login_manager.request_loader def load_user_from_request(request_from_flask_login): """Load user based on the request.""" - if request.blueprint not in {"console", "inner_api"}: - return None - # Check if the user_id contains a dot, indicating the old format auth_header = request.headers.get("Authorization", "") - if not auth_header: - auth_token = request.args.get("_token") - if not auth_token: - raise Unauthorized("Invalid Authorization token.") - else: + auth_token: str | None = None + if auth_header: if " " not in auth_header: raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") - auth_scheme, auth_token = auth_header.split(None, 1) + auth_scheme, auth_token = auth_header.split(maxsplit=1) auth_scheme = auth_scheme.lower() if auth_scheme != "bearer": raise Unauthorized("Invalid Authorization header format. Expected 'Bearer ' format.") + else: + auth_token = request.args.get("_token") - decoded = PassportService().verify(auth_token) - user_id = decoded.get("user_id") + if request.blueprint in {"console", "inner_api"}: + if not auth_token: + raise Unauthorized("Invalid Authorization token.") + decoded = PassportService().verify(auth_token) + user_id = decoded.get("user_id") + if not user_id: + raise Unauthorized("Invalid Authorization token.") - logged_in_account = AccountService.load_logged_in_account(account_id=user_id) - return logged_in_account + logged_in_account = AccountService.load_logged_in_account(account_id=user_id) + return logged_in_account + elif request.blueprint == "web": + decoded = PassportService().verify(auth_token) + end_user_id = decoded.get("end_user_id") + if not end_user_id: + raise Unauthorized("Invalid Authorization token.") + end_user = db.session.query(EndUser).filter(EndUser.id == decoded["end_user_id"]).first() + if not end_user: + raise NotFound("End user not found.") + return end_user @user_logged_in.connect @user_loaded_from_request.connect def on_user_logged_in(_sender, user): - """Called when a user logged in.""" - if user: + """Called when a user logged in. + + Note: AccountService.load_logged_in_account will populate user.current_tenant_id + through the load_user method, which calls account.set_tenant_id(). + """ + if user and isinstance(user, Account) and user.current_tenant_id: contexts.tenant_id.set(user.current_tenant_id) diff --git a/api/models/account.py b/api/models/account.py index bb6a2a4735..7ffeefa980 100644 --- a/api/models/account.py +++ b/api/models/account.py @@ -1,10 +1,10 @@ import enum import json -from typing import cast +from typing import Optional, cast from flask_login import UserMixin # type: ignore from sqlalchemy import func -from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.orm import Mapped, mapped_column, reconstructor from models.base import Base @@ -12,6 +12,66 @@ from .engine import db from .types import StringUUID +class TenantAccountRole(enum.StrEnum): + OWNER = "owner" + ADMIN = "admin" + EDITOR = "editor" + NORMAL = "normal" + DATASET_OPERATOR = "dataset_operator" + + @staticmethod + def is_valid_role(role: str) -> bool: + if not role: + return False + return role in { + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.NORMAL, + TenantAccountRole.DATASET_OPERATOR, + } + + @staticmethod + def is_privileged_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN} + + @staticmethod + def is_admin_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role == TenantAccountRole.ADMIN + + @staticmethod + def is_non_owner_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in { + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.NORMAL, + TenantAccountRole.DATASET_OPERATOR, + } + + @staticmethod + def is_editing_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR} + + @staticmethod + def is_dataset_edit_role(role: Optional["TenantAccountRole"]) -> bool: + if not role: + return False + return role in { + TenantAccountRole.OWNER, + TenantAccountRole.ADMIN, + TenantAccountRole.EDITOR, + TenantAccountRole.DATASET_OPERATOR, + } + + class AccountStatus(enum.StrEnum): PENDING = "pending" UNINITIALIZED = "uninitialized" @@ -41,24 +101,27 @@ class Account(UserMixin, Base): created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) updated_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + @reconstructor + def init_on_load(self): + self.role: Optional[TenantAccountRole] = None + self._current_tenant: Optional[Tenant] = None + @property def is_password_set(self): return self.password is not None @property def current_tenant(self): - return self._current_tenant # type: ignore + return self._current_tenant @current_tenant.setter - def current_tenant(self, value: "Tenant"): - tenant = value + def current_tenant(self, tenant: "Tenant"): ta = db.session.query(TenantAccountJoin).filter_by(tenant_id=tenant.id, account_id=self.id).first() if ta: - tenant.current_role = ta.role - else: - tenant = None # type: ignore - - self._current_tenant = tenant + self.role = TenantAccountRole(ta.role) + self._current_tenant = tenant + return + self._current_tenant = None @property def current_tenant_id(self) -> str | None: @@ -80,12 +143,12 @@ class Account(UserMixin, Base): return tenant, join = tenant_account_join - tenant.current_role = join.role + self.role = join.role self._current_tenant = tenant @property def current_role(self): - return self._current_tenant.current_role + return self.role def get_status(self) -> AccountStatus: status_str = self.status @@ -105,23 +168,23 @@ class Account(UserMixin, Base): # check current_user.current_tenant.current_role in ['admin', 'owner'] @property def is_admin_or_owner(self): - return TenantAccountRole.is_privileged_role(self._current_tenant.current_role) + return TenantAccountRole.is_privileged_role(self.role) @property def is_admin(self): - return TenantAccountRole.is_admin_role(self._current_tenant.current_role) + return TenantAccountRole.is_admin_role(self.role) @property def is_editor(self): - return TenantAccountRole.is_editing_role(self._current_tenant.current_role) + return TenantAccountRole.is_editing_role(self.role) @property def is_dataset_editor(self): - return TenantAccountRole.is_dataset_edit_role(self._current_tenant.current_role) + return TenantAccountRole.is_dataset_edit_role(self.role) @property def is_dataset_operator(self): - return self._current_tenant.current_role == TenantAccountRole.DATASET_OPERATOR + return self.role == TenantAccountRole.DATASET_OPERATOR class TenantStatus(enum.StrEnum): @@ -129,66 +192,6 @@ class TenantStatus(enum.StrEnum): ARCHIVE = "archive" -class TenantAccountRole(enum.StrEnum): - OWNER = "owner" - ADMIN = "admin" - EDITOR = "editor" - NORMAL = "normal" - DATASET_OPERATOR = "dataset_operator" - - @staticmethod - def is_valid_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.OWNER, - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.NORMAL, - TenantAccountRole.DATASET_OPERATOR, - } - - @staticmethod - def is_privileged_role(role: str) -> bool: - if not role: - return False - return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN} - - @staticmethod - def is_admin_role(role: str) -> bool: - if not role: - return False - return role == TenantAccountRole.ADMIN - - @staticmethod - def is_non_owner_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.NORMAL, - TenantAccountRole.DATASET_OPERATOR, - } - - @staticmethod - def is_editing_role(role: str) -> bool: - if not role: - return False - return role in {TenantAccountRole.OWNER, TenantAccountRole.ADMIN, TenantAccountRole.EDITOR} - - @staticmethod - def is_dataset_edit_role(role: str) -> bool: - if not role: - return False - return role in { - TenantAccountRole.OWNER, - TenantAccountRole.ADMIN, - TenantAccountRole.EDITOR, - TenantAccountRole.DATASET_OPERATOR, - } - - class Tenant(Base): __tablename__ = "tenants" __table_args__ = (db.PrimaryKeyConstraint("id", name="tenant_pkey"),) diff --git a/api/models/base.py b/api/models/base.py index da9509301a..bd120f5487 100644 --- a/api/models/base.py +++ b/api/models/base.py @@ -1,5 +1,7 @@ -from sqlalchemy.orm import declarative_base +from sqlalchemy.orm import DeclarativeBase from models.engine import metadata -Base = declarative_base(metadata=metadata) + +class Base(DeclarativeBase): + metadata = metadata diff --git a/api/models/tools.py b/api/models/tools.py index e027475e38..03fbc3acb1 100644 --- a/api/models/tools.py +++ b/api/models/tools.py @@ -172,10 +172,6 @@ class WorkflowToolProvider(Base): db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)") ) - @property - def schema_type(self) -> ApiProviderSchemaType: - return ApiProviderSchemaType.value_of(self.schema_type_str) - @property def user(self) -> Account | None: return db.session.query(Account).filter(Account.id == self.user_id).first() diff --git a/api/models/workflow.py b/api/models/workflow.py index 91be1cda6e..8751fd1aa8 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -3,7 +3,7 @@ import logging from collections.abc import Mapping, Sequence from datetime import UTC, datetime from enum import Enum, StrEnum -from typing import TYPE_CHECKING, Any, Optional, Self, Union +from typing import TYPE_CHECKING, Any, Optional, Union from uuid import uuid4 from core.variables import utils as variable_utils @@ -150,7 +150,7 @@ class Workflow(Base): conversation_variables: Sequence[Variable], marked_name: str = "", marked_comment: str = "", - ) -> Self: + ) -> "Workflow": workflow = Workflow() workflow.id = str(uuid4()) workflow.tenant_id = tenant_id diff --git a/api/services/vector_service.py b/api/services/vector_service.py index 58292c59f4..19e37f4ee3 100644 --- a/api/services/vector_service.py +++ b/api/services/vector_service.py @@ -23,11 +23,10 @@ class VectorService: ): documents: list[Document] = [] - document: Document | None = None for segment in segments: if doc_form == IndexType.PARENT_CHILD_INDEX: - document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first() - if not document: + dataset_document = db.session.query(DatasetDocument).filter_by(id=segment.document_id).first() + if not dataset_document: _logger.warning( "Expected DatasetDocument record to exist, but none was found, document_id=%s, segment_id=%s", segment.document_id, @@ -37,7 +36,7 @@ class VectorService: # get the process rule processing_rule = ( db.session.query(DatasetProcessRule) - .filter(DatasetProcessRule.id == document.dataset_process_rule_id) + .filter(DatasetProcessRule.id == dataset_document.dataset_process_rule_id) .first() ) if not processing_rule: @@ -61,9 +60,11 @@ class VectorService: ) else: raise ValueError("The knowledge base index technique is not high quality!") - cls.generate_child_chunks(segment, document, dataset, embedding_model_instance, processing_rule, False) + cls.generate_child_chunks( + segment, dataset_document, dataset, embedding_model_instance, processing_rule, False + ) else: - document = Document( + rag_document = Document( page_content=segment.content, metadata={ "doc_id": segment.index_node_id, @@ -72,7 +73,7 @@ class VectorService: "dataset_id": segment.dataset_id, }, ) - documents.append(document) + documents.append(rag_document) if len(documents) > 0: index_processor = IndexProcessorFactory(doc_form).init_index_processor() index_processor.load(dataset, documents, with_keywords=True, keywords_list=keywords_list) diff --git a/api/services/workflow_service.py b/api/services/workflow_service.py index 06d92b9e29..50bb8f40ae 100644 --- a/api/services/workflow_service.py +++ b/api/services/workflow_service.py @@ -508,11 +508,11 @@ class WorkflowService: raise DraftWorkflowDeletionError("Cannot delete draft workflow versions") # Check if this workflow is currently referenced by an app - stmt = select(App).where(App.workflow_id == workflow_id) - app = session.scalar(stmt) + app_stmt = select(App).where(App.workflow_id == workflow_id) + app = session.scalar(app_stmt) if app: # Cannot delete a workflow that's currently in use by an app - raise WorkflowInUseError(f"Cannot delete workflow that is currently in use by app '{app.name}'") + raise WorkflowInUseError(f"Cannot delete workflow that is currently in use by app '{app.id}'") # Don't use workflow.tool_published as it's not accurate for specific workflow versions # Check if there's a tool provider using this specific workflow version diff --git a/api/tasks/add_document_to_index_task.py b/api/tasks/add_document_to_index_task.py index be88881efc..75d648e1b7 100644 --- a/api/tasks/add_document_to_index_task.py +++ b/api/tasks/add_document_to_index_task.py @@ -111,7 +111,7 @@ def add_document_to_index_task(dataset_document_id: str): logging.exception("add document to index failed") dataset_document.enabled = False dataset_document.disabled_at = datetime.datetime.now(datetime.UTC).replace(tzinfo=None) - dataset_document.status = "error" + dataset_document.indexing_status = "error" dataset_document.error = str(e) db.session.commit() finally: diff --git a/api/tasks/remove_app_and_related_data_task.py b/api/tasks/remove_app_and_related_data_task.py index d9c1980d3f..ae6f923ca8 100644 --- a/api/tasks/remove_app_and_related_data_task.py +++ b/api/tasks/remove_app_and_related_data_task.py @@ -193,7 +193,7 @@ def _delete_app_workflow_runs(tenant_id: str, app_id: str): def _delete_app_workflow_node_executions(tenant_id: str, app_id: str): # Get app's owner with Session(db.engine, expire_on_commit=False) as session: - stmt = select(Account).where(Account.id == App.owner_id).where(App.id == app_id) + stmt = select(Account).where(Account.id == App.created_by).where(App.id == app_id) user = session.scalar(stmt) if user is None: diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py index 15a9e8e9f4..fa6fc3ba32 100644 --- a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py @@ -34,13 +34,13 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel # needs to patch those methods to avoid database access. monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) - monkeypatch.setattr(tool, "_get_user", lambda *args, **kwargs: None) # replace `WorkflowAppGenerator.generate` 's return value. monkeypatch.setattr( "core.app.apps.workflow.app_generator.WorkflowAppGenerator.generate", lambda *args, **kwargs: {"data": {"error": "oops"}}, ) + monkeypatch.setattr("flask_login.current_user", lambda *args, **kwargs: None) with pytest.raises(ToolInvokeError) as exc_info: # WorkflowTool always returns a generator, so we need to iterate to From 57bcb616bc503a5ed92b55e0efcbb02472e1c36a Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 21 May 2025 16:37:44 +0800 Subject: [PATCH 03/19] fix(sqlalchemy_workflow_node_execution_repository): Missing `triggered_from` while querying WorkflowNodeExecution (#20044) Signed-off-by: -LAN- --- ...hemy_workflow_node_execution_repository.py | 100 +++++++++++------- api/services/workflow_run_service.py | 12 +-- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index 7e0115e4e7..b213449be5 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -4,8 +4,8 @@ SQLAlchemy implementation of the WorkflowNodeExecutionRepository. import json import logging -from collections.abc import Mapping, Sequence -from typing import Any, Optional, Union, cast +from collections.abc import Sequence +from typing import Optional, Union from sqlalchemy import UnaryExpression, asc, delete, desc, select from sqlalchemy.engine import Engine @@ -86,8 +86,8 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER # Initialize in-memory cache for node executions - # Key: node_execution_id, Value: NodeExecution - self._node_execution_cache: dict[str, NodeExecution] = {} + # Key: node_execution_id, Value: WorkflowNodeExecution (DB model) + self._node_execution_cache: dict[str, WorkflowNodeExecution] = {} def _to_domain_model(self, db_model: WorkflowNodeExecution) -> NodeExecution: """ @@ -103,7 +103,10 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) inputs = db_model.inputs_dict process_data = db_model.process_data_dict outputs = db_model.outputs_dict - metadata = db_model.execution_metadata_dict + if db_model.execution_metadata_dict: + metadata = {NodeRunMetadataKey(k): v for k, v in db_model.execution_metadata_dict.items()} + else: + metadata = {} # Convert status to domain enum status = NodeExecutionStatus(db_model.status) @@ -124,12 +127,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) status=status, error=db_model.error, elapsed_time=db_model.elapsed_time, - # FIXME(QuantumGhost): a temporary workaround for the following type check failure in Python 3.11. - # However, this problem is not occurred in Python 3.12. - # - # A case of this error is: - # https://github.com/langgenius/dify/actions/runs/15112698604/job/42475659482?pr=19737#step:9:24 - metadata=cast(Mapping[NodeRunMetadataKey, Any] | None, metadata), + metadata=metadata, created_at=db_model.created_at, finished_at=db_model.finished_at, ) @@ -211,7 +209,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) # Only cache if we have a node_execution_id to use as the cache key if db_model.node_execution_id: logger.debug(f"Updating cache for node_execution_id: {db_model.node_execution_id}") - self._node_execution_cache[db_model.node_execution_id] = execution + self._node_execution_cache[db_model.node_execution_id] = db_model def get_by_node_execution_id(self, node_execution_id: str) -> Optional[NodeExecution]: """ @@ -229,7 +227,9 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) # First check the cache if node_execution_id in self._node_execution_cache: logger.debug(f"Cache hit for node_execution_id: {node_execution_id}") - return self._node_execution_cache[node_execution_id] + # Convert cached DB model to domain model + cached_db_model = self._node_execution_cache[node_execution_id] + return self._to_domain_model(cached_db_model) # If not in cache, query the database logger.debug(f"Cache miss for node_execution_id: {node_execution_id}, querying database") @@ -244,26 +244,25 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) db_model = session.scalar(stmt) if db_model: - # Convert to domain model - domain_model = self._to_domain_model(db_model) + # Add DB model to cache + self._node_execution_cache[node_execution_id] = db_model - # Add to cache - self._node_execution_cache[node_execution_id] = domain_model - - return domain_model + # Convert to domain model and return + return self._to_domain_model(db_model) return None - def get_by_workflow_run( + def get_db_models_by_workflow_run( self, workflow_run_id: str, order_config: Optional[OrderConfig] = None, - ) -> Sequence[NodeExecution]: + ) -> Sequence[WorkflowNodeExecution]: """ - Retrieve all NodeExecution instances for a specific workflow run. + Retrieve all WorkflowNodeExecution database models for a specific workflow run. - This method always queries the database to ensure complete and ordered results, - but updates the cache with any retrieved executions. + This method directly returns database models without converting to domain models, + which is useful when you need to access database-specific fields like triggered_from. + It also updates the in-memory cache with the retrieved models. Args: workflow_run_id: The workflow run ID @@ -272,7 +271,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) order_config.order_direction: Direction to order ("asc" or "desc") Returns: - A list of NodeExecution instances + A list of WorkflowNodeExecution database models """ with self._session_factory() as session: stmt = select(WorkflowNodeExecution).where( @@ -301,16 +300,43 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) db_models = session.scalars(stmt).all() - # Convert database models to domain models and update cache - domain_models = [] + # Update the cache with the retrieved DB models for model in db_models: - domain_model = self._to_domain_model(model) - # Update cache if node_execution_id is present - if domain_model.node_execution_id: - self._node_execution_cache[domain_model.node_execution_id] = domain_model - domain_models.append(domain_model) + if model.node_execution_id: + self._node_execution_cache[model.node_execution_id] = model - return domain_models + return db_models + + def get_by_workflow_run( + self, + workflow_run_id: str, + order_config: Optional[OrderConfig] = None, + ) -> Sequence[NodeExecution]: + """ + Retrieve all NodeExecution instances for a specific workflow run. + + This method always queries the database to ensure complete and ordered results, + but updates the cache with any retrieved executions. + + Args: + workflow_run_id: The workflow run ID + order_config: Optional configuration for ordering results + order_config.order_by: List of fields to order by (e.g., ["index", "created_at"]) + order_config.order_direction: Direction to order ("asc" or "desc") + + Returns: + A list of NodeExecution instances + """ + # Get the database models using the new method + db_models = self.get_db_models_by_workflow_run(workflow_run_id, order_config) + + # Convert database models to domain models + domain_models = [] + for model in db_models: + domain_model = self._to_domain_model(model) + domain_models.append(domain_model) + + return domain_models def get_running_executions(self, workflow_run_id: str) -> Sequence[NodeExecution]: """ @@ -340,10 +366,12 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) domain_models = [] for model in db_models: - domain_model = self._to_domain_model(model) # Update cache if node_execution_id is present - if domain_model.node_execution_id: - self._node_execution_cache[domain_model.node_execution_id] = domain_model + if model.node_execution_id: + self._node_execution_cache[model.node_execution_id] = model + + # Convert to domain model + domain_model = self._to_domain_model(model) domain_models.append(domain_model) return domain_models diff --git a/api/services/workflow_run_service.py b/api/services/workflow_run_service.py index 3efdb3a79d..21366a4552 100644 --- a/api/services/workflow_run_service.py +++ b/api/services/workflow_run_service.py @@ -15,6 +15,7 @@ from models import ( WorkflowRun, WorkflowRunTriggeredFrom, ) +from models.workflow import WorkflowNodeExecutionTriggeredFrom class WorkflowRunService: @@ -140,14 +141,13 @@ class WorkflowRunService: session_factory=db.engine, user=user, app_id=app_model.id, - triggered_from=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, ) - # Use the repository to get the node executions with ordering + # Use the repository to get the database models directly order_config = OrderConfig(order_by=["index"], order_direction="desc") - node_executions = repository.get_by_workflow_run(workflow_run_id=run_id, order_config=order_config) - - # Convert domain models to database models - workflow_node_executions = [repository.to_db_model(node_execution) for node_execution in node_executions] + workflow_node_executions = repository.get_db_models_by_workflow_run( + workflow_run_id=run_id, order_config=order_config + ) return workflow_node_executions From 997b46bfaac7b1d84cad383968faff14dc522b52 Mon Sep 17 00:00:00 2001 From: Hanqing Zhao Date: Wed, 21 May 2025 16:41:05 +0800 Subject: [PATCH 04/19] Fix/modify translation (#20046) --- web/i18n/ja-JP/app-debug.ts | 18 ++++++------ web/i18n/ja-JP/app-overview.ts | 6 ++-- web/i18n/ja-JP/app.ts | 44 ++++++++++++++++------------- web/i18n/ja-JP/billing.ts | 12 ++++---- web/i18n/ja-JP/common.ts | 12 ++++---- web/i18n/ja-JP/dataset-documents.ts | 2 +- web/i18n/ja-JP/dataset-settings.ts | 2 +- web/i18n/ja-JP/dataset.ts | 2 +- web/i18n/ja-JP/login.ts | 4 +-- web/i18n/ja-JP/time.ts | 12 ++++---- web/i18n/ja-JP/tools.ts | 2 +- 11 files changed, 59 insertions(+), 57 deletions(-) diff --git a/web/i18n/ja-JP/app-debug.ts b/web/i18n/ja-JP/app-debug.ts index 23fc7adba7..350d58d60a 100644 --- a/web/i18n/ja-JP/app-debug.ts +++ b/web/i18n/ja-JP/app-debug.ts @@ -218,6 +218,10 @@ const translation = { enableText: '有効な機能', manage: '管理', }, + documentUpload: { + title: 'ドキュメント', + description: 'ドキュメント機能を有効にすると、AIモデルがファイルを処理し、その内容に基づいて質問に回答できるようになります。', + }, }, codegen: { title: 'コードジェネレーター', @@ -246,6 +250,7 @@ const translation = { noDataLine1: '左側に使用例を記入してください,', noDataLine2: 'オーケストレーションのプレビューがこちらに表示されます。', apply: '適用', + noData: '左側にユースケースを入力すると、こちらでプレビューができます。', loading: 'アプリケーションを処理中です', overwriteTitle: '既存の設定を上書きしますか?', overwriteMessage: 'このプロンプトを適用すると、既存の設定が上書きされます。', @@ -302,10 +307,7 @@ const translation = { waitForImgUpload: '画像のアップロードが完了するまでお待ちください', waitForFileUpload: 'ファイルのアップロードが完了するまでお待ちください', }, - warningMessage: { - timeoutExceeded: 'タイムアウトのため結果が表示されません。完全な結果を手にいれるためには、ログを参照してください。', - }, - chatSubTitle: '手順', + chatSubTitle: 'プロンプト', completionSubTitle: '接頭辞プロンプト', promptTip: 'プロンプトは、AIの応答を指示と制約で誘導します。 {{input}} のような変数を挿入します。このプロンプトはユーザーには表示されません。', formattingChangedTitle: '書式が変更されました', @@ -356,7 +358,6 @@ const translation = { 'varName': '変数名', 'labelName': 'ラベル名', 'inputPlaceholder': '入力してください', - 'content': 'コンテンツ', 'required': '必須', 'file': { supportFileTypes: 'サポートされたファイルタイプ', @@ -452,10 +453,8 @@ const translation = { noPrompt: 'プレプロンプト入力にいくつかのプロンプトを記入してみてください', userInputField: 'ユーザー入力フィールド', noVar: '変数の値を入力してください。新しいセッションが開始されるたびにプロンプトの単語が自動的に置換されます。', - chatVarTip: - '変数の値を入力してください。新しいセッションが開始されるたびにプロンプトの単語が自動的に置換されます。', - completionVarTip: - '変数の値を入力してください。質問が送信されるたびにプロンプトの単語が自動的に置換されます。', + chatVarTip: '変数の値を入力してください。新しいセッションが開始されるたびにプロンプトの単語が自動的に置換されます。', + completionVarTip: '変数の値を入力してください。質問が送信されるたびにプロンプトの単語が自動的に置換されます。', previewTitle: 'プロンプトのプレビュー', queryTitle: 'クエリ内容', queryPlaceholder: 'リクエストテキストを入力してください。', @@ -474,6 +473,7 @@ const translation = { title: 'マルチパスリトリーバル', description: 'ユーザーの意図に基づいて、すべてのナレッジをクエリし、複数のソースから関連するテキストを取得し、再順位付け後、ユーザークエリに最適な結果を選択します。再順位付けモデル API の構成が必要です。', }, + embeddingModelRequired: 'Embeddingモデルが設定されていない', rerankModelRequired: '再順位付けモデルが必要です', params: 'パラメータ', top_k: 'トップK', diff --git a/web/i18n/ja-JP/app-overview.ts b/web/i18n/ja-JP/app-overview.ts index d67860ff37..3b0505b0de 100644 --- a/web/i18n/ja-JP/app-overview.ts +++ b/web/i18n/ja-JP/app-overview.ts @@ -3,7 +3,7 @@ const translation = { firstStepTip: 'はじめるには、', enterKeyTip: '以下にOpenAI APIキーを入力してください', getKeyTip: 'OpenAIダッシュボードからAPIキーを取得してください', - placeholder: 'あなた様のOpenAI APIキー(例:sk-xxxx)', + placeholder: 'OpenAI APIキー(例:sk-xxxx)', }, apiKeyInfo: { cloud: { @@ -67,7 +67,7 @@ const translation = { customDisclaimerPlaceholder: '免責事項を入力してください', customDisclaimerTip: 'アプリケーションの使用に関する免責事項を提供します。', copyrightTooltip: 'プロフェッショナルプラン以上にアップグレードしてください', - copyrightTip: 'ウェブアプリに著作権情報を表示する', + copyrightTip: 'Webアプリに著作権情報を表示する', }, sso: { title: 'WebアプリのSSO', @@ -117,7 +117,7 @@ const translation = { }, apiInfo: { title: 'バックエンドサービスAPI', - explanation: 'あなた様のアプリケーションに簡単に統合できます', + explanation: 'あなたのアプリケーションに簡単に統合できます', accessibleAddress: 'サービスAPIエンドポイント', doc: 'APIリファレンス', }, diff --git a/web/i18n/ja-JP/app.ts b/web/i18n/ja-JP/app.ts index f0bc4ec72d..2c5d2c0f92 100644 --- a/web/i18n/ja-JP/app.ts +++ b/web/i18n/ja-JP/app.ts @@ -10,6 +10,10 @@ const translation = { advanced: 'チャットフロー', }, duplicate: '複製', + mermaid: { + handDrawn: '手描き', + classic: 'クラシック', + }, duplicateTitle: 'アプリを複製する', export: 'DSL をエクスポート', exportFailed: 'DSL のエクスポートに失敗しました。', @@ -21,12 +25,11 @@ const translation = { importFromDSLUrlPlaceholder: 'DSLリンクをここに貼り付けます', deleteAppConfirmTitle: 'このアプリを削除しますか?', deleteAppConfirmContent: - 'アプリを削除すると、元に戻すことはできません。ユーザーはもはやあなた様のアプリにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。', + 'アプリを削除すると、元に戻すことはできません。他のユーザーはもはやこのアプリにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。', appDeleted: 'アプリが削除されました', appDeleteFailed: 'アプリの削除に失敗しました', join: 'コミュニティに参加する', - communityIntro: - 'さまざまなチャンネルでチームメンバーや貢献者、開発者と議論します。', + communityIntro: 'さまざまなチャンネルでチームメンバーや貢献者、開発者と議論します。', roadmap: 'ロードマップを見る', newApp: { startFromBlank: '最初から作成', @@ -128,6 +131,7 @@ const translation = { title: 'アプリのパフォーマンスの追跡', description: 'サードパーティのLLMOpsサービスとトレースアプリケーションのパフォーマンス設定を行います。', config: '設定', + view: '見る', collapse: '折りたたむ', expand: '展開', tracing: '追跡', @@ -148,25 +152,24 @@ const translation = { title: 'Langfuse', description: 'トレース、評価、プロンプトの管理、そしてメトリクスを駆使して、LLMアプリケーションのデバッグや改善に役立てます。', }, - inUse: '使用中', - configProvider: { - title: '配置 ', - placeholder: 'あなた様の{{key}}を入力してください', - project: 'プロジェクト', - publicKey: '公開キー', - secretKey: '秘密キー', - viewDocsLink: '{{key}}のドキュメントを見る', - removeConfirmTitle: '{{key}}の設定を削除しますか?', - removeConfirmContent: '現在の設定は使用中です。これを削除すると、トレース機能が無効になります。', - }, - view: '見る', opik: { title: 'オピック', description: 'Opik は、LLM アプリケーションを評価、テスト、監視するためのオープンソース プラットフォームです。', }, + inUse: '使用中', + configProvider: { + title: '配置 ', + placeholder: '{{key}}を入力してください', + project: 'プロジェクト', + publicKey: '公開キー', + secretKey: '秘密キー', + viewDocsLink: '{{key}}に関するドキュメントを見る', + removeConfirmTitle: '{{key}}の設定を削除しますか?', + removeConfirmContent: '現在の設定は使用中です。これを削除すると、トレース機能が無効になります。', + }, weave: { - description: 'Weaveは、LLMアプリケーションを評価、テスト、および監視するためのオープンソースプラットフォームです。', title: '織る', + description: 'Weaveは、LLMアプリケーションを評価、テスト、および監視するためのオープンソースプラットフォームです。', }, }, answerIcon: { @@ -174,10 +177,6 @@ const translation = { description: '共有アプリケーションの中で Webアプリアイコンを使用して🤖を置き換えるかどうか', descriptionInExplore: 'ExploreでWebアプリアイコンを使用して🤖を置き換えるかどうか', }, - mermaid: { - handDrawn: '手描き', - classic: 'クラシック', - }, newAppFromTemplate: { sidebar: { Agent: 'エージェント', @@ -219,6 +218,11 @@ const translation = { title: 'アクセス権限', description: 'Webアプリのアクセス権限を設定します', accessLabel: '誰がアクセスできますか', + accessItemsDescription: { + anyone: '誰でもWebアプリにアクセス可能です', + specific: '特定のグループやメンバーがWebアプリにアクセス可能です', + organization: '組織内の誰でもWebアプリにアクセス可能です', + }, accessItems: { anyone: 'すべてのユーザー', specific: '特定のグループメンバー', diff --git a/web/i18n/ja-JP/billing.ts b/web/i18n/ja-JP/billing.ts index 1b85449b33..16362cedc4 100644 --- a/web/i18n/ja-JP/billing.ts +++ b/web/i18n/ja-JP/billing.ts @@ -173,13 +173,11 @@ const translation = { fullSolution: 'より多くのスペースを得るためにプランをアップグレードしてください。', }, apps: { - fullTipLine1: 'より多くのアプリを作成するには、', - fullTipLine2: 'プランをアップグレードしてください。', - fullTip1: 'アプリをもっと作成するためにアップグレードする', - contactUs: 'お問い合わせ', - fullTip2: 'プランの制限に達しました', - fullTip2des: '使用状況を解放するために非アクティブなアプリケーションを整理することをお勧めします。または、お問い合わせください。', - fullTip1des: 'このプランでのアプリ構築の制限に達しました', + fullTip1: 'アップグレードして制限を解除する', + fullTip1des: 'このプランのアプリ数の上限に達しました。', + fullTip2: 'プラン制限に達しました。', + fullTip2des: '非アクティブなアプリを削除するか、アップグレードプランをご検討ください。', + contactUs: 'こちらからお問い合わせください', }, annotatedResponse: { fullTipLine1: 'より多くの会話を注釈するには、', diff --git a/web/i18n/ja-JP/common.ts b/web/i18n/ja-JP/common.ts index 0ced117db0..f9184332de 100644 --- a/web/i18n/ja-JP/common.ts +++ b/web/i18n/ja-JP/common.ts @@ -171,7 +171,7 @@ const translation = { community: 'コミュニティ', about: 'Difyについて', logout: 'ログアウト', - github: 'ギットハブ', + github: 'GitHub', }, compliance: { soc2Type1: 'SOC 2 Type I 報告書', @@ -252,7 +252,7 @@ const translation = { datasetOperator: 'ナレッジ管理員', datasetOperatorTip: 'ナレッジベースのみを管理できる', inviteTeamMember: 'チームメンバーを招待する', - inviteTeamMemberTip: '彼らはサインイン後、直接あなた様のチームデータにアクセスできます。', + inviteTeamMemberTip: '彼らはサインイン後、直接あなたのチームデータにアクセスできます。', emailNotSetup: 'メールサーバーがセットアップされていないので、招待メールを送信することはできません。代わりに招待後に発行される招待リンクをユーザーに通知してください。', email: 'メール', emailInvalid: '無効なメール形式', @@ -260,7 +260,7 @@ const translation = { sendInvite: '招待を送る', invitedAsRole: '{{role}}ユーザーとして招待されました', invitationSent: '招待が送信されました', - invitationSentTip: '招待が送信され、彼らはDifyにサインインしてあなた様のチームデータにアクセスできます。', + invitationSentTip: '招待が送信され、彼らはDifyにサインインしてあなたのチームデータにアクセスできます。', invitationLink: '招待リンク', failedInvitationEmails: '以下のユーザーは正常に招待されませんでした', ok: 'OK', @@ -272,7 +272,7 @@ const translation = { setEditor: 'エディターに設定', disInvite: '招待をキャンセル', deleteMember: 'メンバーを削除', - you: '(あなた様)', + you: '(あなた)', }, integrations: { connected: '接続済み', @@ -448,8 +448,8 @@ const translation = { connect: '接続', configure: '設定', notion: { - title: 'ノーション', - description: 'ナレッジデータソースとしてノーションを使用します。', + title: 'Notion', + description: 'ナレッジデータソースとしてNotionを使用します。', connectedWorkspace: '接続済みワークスペース', addWorkspace: 'ワークスペースの追加', connected: '接続済み', diff --git a/web/i18n/ja-JP/dataset-documents.ts b/web/i18n/ja-JP/dataset-documents.ts index 3943146b7e..81047872ad 100644 --- a/web/i18n/ja-JP/dataset-documents.ts +++ b/web/i18n/ja-JP/dataset-documents.ts @@ -51,7 +51,7 @@ const translation = { empty: { title: 'まだドキュメントがありません', upload: { - tip: 'ファイルをアップロードしたり、ウェブサイトから同期したり、NotionやGitHubなどのウェブアプリから同期することができます。', + tip: 'ファイルをアップロードしたり、ウェブサイトから同期したり、NotionやGitHubなどのWebアプリから同期することができます。', }, sync: { tip: 'Difyは定期的にNotionからファイルをダウンロードし、処理を完了します。', diff --git a/web/i18n/ja-JP/dataset-settings.ts b/web/i18n/ja-JP/dataset-settings.ts index 6b809ddd43..b0fd2ec55b 100644 --- a/web/i18n/ja-JP/dataset-settings.ts +++ b/web/i18n/ja-JP/dataset-settings.ts @@ -14,7 +14,7 @@ const translation = { permissionsOnlyMe: '自分のみ', permissionsAllMember: 'すべてのチームメンバー', permissionsInvitedMembers: '一部のチームメンバー', - me: '(あなた様)', + me: '(あなた)', indexMethod: 'インデックス方法', indexMethodHighQuality: '高品質', indexMethodHighQualityTip: 'より正確な検索のため、埋め込みモデルを呼び出してドキュメントを処理することで、LLMは高品質な回答を生成できます。', diff --git a/web/i18n/ja-JP/dataset.ts b/web/i18n/ja-JP/dataset.ts index 4e367f7809..1078fee69d 100644 --- a/web/i18n/ja-JP/dataset.ts +++ b/web/i18n/ja-JP/dataset.ts @@ -72,7 +72,7 @@ const translation = { createDatasetIntro: '独自のテキストデータをインポートするか、LLMコンテキストの強化のためにWebhookを介してリアルタイムでデータを書き込むことができます。', deleteDatasetConfirmTitle: 'このナレッジベースを削除しますか?', deleteDatasetConfirmContent: - 'ナレッジベースを削除すると元に戻すことはできません。ユーザーはもはやあなた様のナレッジベースにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。', + 'ナレッジベースを削除すると元に戻すことはできません。ユーザーはもはやあなたのナレッジベースにアクセスできず、すべてのプロンプトの設定とログが永久に削除されます。', datasetUsedByApp: 'このナレッジベースは一部のアプリによって使用されています。アプリはこのナレッジベースを使用できなくなり、すべてのプロンプト設定とログは永久に削除されます。', datasetDeleted: 'ナレッジベースが削除されました', datasetDeleteFailed: 'ナレッジベースの削除に失敗しました', diff --git a/web/i18n/ja-JP/login.ts b/web/i18n/ja-JP/login.ts index 974212564f..3db651c580 100644 --- a/web/i18n/ja-JP/login.ts +++ b/web/i18n/ja-JP/login.ts @@ -62,11 +62,11 @@ const translation = { link: 'オープンソースライセンス', }, join: '参加する', - joinTipStart: 'あなた様を招待します', + joinTipStart: 'あなたを招待します', joinTipEnd: 'チームに参加する', invalid: 'リンクの有効期限が切れています', explore: 'Difyを探索する', - activatedTipStart: 'あなた様は', + activatedTipStart: 'あなたは', activatedTipEnd: 'チームに参加しました', activated: '今すぐサインイン', adminInitPassword: '管理者初期化パスワード', diff --git a/web/i18n/ja-JP/time.ts b/web/i18n/ja-JP/time.ts index 36d4c699bb..09203a0cc2 100644 --- a/web/i18n/ja-JP/time.ts +++ b/web/i18n/ja-JP/time.ts @@ -2,11 +2,11 @@ const translation = { daysInWeek: { Tue: '火曜日', Sat: '土曜日', - Mon: 'モン', + Mon: '月曜日', Thu: '木曜日', - Fri: '自由', + Fri: '金曜日', Wed: '水曜日', - Sun: '太陽', + Sun: '日曜日', }, months: { November: '11月', @@ -14,13 +14,13 @@ const translation = { March: '3月', September: '9月', July: '7月', - April: '四月', + April: '4月', February: '2月', June: '6月', January: '1月', May: '5月', - August: '八月', - October: '十月', + August: '8月', + October: '10月', }, operation: { now: '今', diff --git a/web/i18n/ja-JP/tools.ts b/web/i18n/ja-JP/tools.ts index f234625eba..57eab7e736 100644 --- a/web/i18n/ja-JP/tools.ts +++ b/web/i18n/ja-JP/tools.ts @@ -108,7 +108,7 @@ const translation = { confirmTitle: '保存しますか?', confirmTip: 'このツールを使用しているアプリは影響を受けます', deleteToolConfirmTitle: 'このツールを削除しますか?', - deleteToolConfirmContent: 'ツールの削除は取り消しできません。ユーザーはもうあなた様のツールにアクセスできません。', + deleteToolConfirmContent: 'ツールの削除は取り消しできません。ユーザーはもうあなたのツールにアクセスできません。', }, test: { title: 'テスト', From 13dc1c879509b501aaa92b04282ac0a6cf6ad931 Mon Sep 17 00:00:00 2001 From: QuantumGhost Date: Wed, 21 May 2025 18:38:16 +0800 Subject: [PATCH 05/19] Simplify `execution_metadata` Handling for `WorkflowNodeExecution` (#20062) Currently, `WorkflowNodeExecution.execution_metadata_dict` returns `None` when metadata is absent in the database. This requires all callers to perform `None` checks when processing metadata, leading to more complex caller-side logic. This pull request updates the `execution_metadata_dict` method to return an empty dictionary instead of `None` when metadata is absent. This change would simplify the caller logic, as it removes the need for explicit `None` checks and provides a more consistent data structure to work with. --- ...qlalchemy_workflow_node_execution_repository.py | 5 +---- api/models/workflow.py | 7 +++++-- api/tests/unit_tests/models/test_workflow.py | 14 +++++++++++++- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py index b213449be5..8d916a19db 100644 --- a/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py +++ b/api/core/repositories/sqlalchemy_workflow_node_execution_repository.py @@ -103,10 +103,7 @@ class SQLAlchemyWorkflowNodeExecutionRepository(WorkflowNodeExecutionRepository) inputs = db_model.inputs_dict process_data = db_model.process_data_dict outputs = db_model.outputs_dict - if db_model.execution_metadata_dict: - metadata = {NodeRunMetadataKey(k): v for k, v in db_model.execution_metadata_dict.items()} - else: - metadata = {} + metadata = {NodeRunMetadataKey(k): v for k, v in db_model.execution_metadata_dict.items()} # Convert status to domain enum status = NodeExecutionStatus(db_model.status) diff --git a/api/models/workflow.py b/api/models/workflow.py index 8751fd1aa8..fc6de8692c 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -666,8 +666,11 @@ class WorkflowNodeExecution(Base): return json.loads(self.process_data) if self.process_data else None @property - def execution_metadata_dict(self) -> dict[str, Any] | None: - return json.loads(self.execution_metadata) if self.execution_metadata else None + def execution_metadata_dict(self) -> dict[str, Any]: + # When the metadata is unset, we return an empty dictionary instead of `None`. + # This approach streamlines the logic for the caller, making it easier to handle + # cases where metadata is absent. + return json.loads(self.execution_metadata) if self.execution_metadata else {} @property def extras(self): diff --git a/api/tests/unit_tests/models/test_workflow.py b/api/tests/unit_tests/models/test_workflow.py index fe56f18f1b..70ce224eb6 100644 --- a/api/tests/unit_tests/models/test_workflow.py +++ b/api/tests/unit_tests/models/test_workflow.py @@ -1,10 +1,11 @@ +import json from unittest import mock from uuid import uuid4 import contexts from constants import HIDDEN_VALUE from core.variables import FloatVariable, IntegerVariable, SecretVariable, StringVariable -from models.workflow import Workflow +from models.workflow import Workflow, WorkflowNodeExecution def test_environment_variables(): @@ -137,3 +138,14 @@ def test_to_dict(): workflow_dict = workflow.to_dict(include_secret=True) assert workflow_dict["environment_variables"][0]["value"] == "secret" assert workflow_dict["environment_variables"][1]["value"] == "text" + + +class TestWorkflowNodeExecution: + def test_execution_metadata_dict(self): + node_exec = WorkflowNodeExecution() + node_exec.execution_metadata = None + assert node_exec.execution_metadata_dict == {} + + original = {"a": 1, "b": ["2"]} + node_exec.execution_metadata = json.dumps(original) + assert node_exec.execution_metadata_dict == original From 7d230acf401dcecf2871d842469961025c9ccd97 Mon Sep 17 00:00:00 2001 From: wlleiiwang <1025164922@qq.com> Date: Wed, 21 May 2025 20:24:05 +0800 Subject: [PATCH 06/19] tencent vectordb compatible with version 1.1.3 and below (#20056) Co-authored-by: wlleiiwang --- api/core/rag/datasource/vdb/tencent/tencent_vector.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/core/rag/datasource/vdb/tencent/tencent_vector.py b/api/core/rag/datasource/vdb/tencent/tencent_vector.py index e266659075..d2bf3eb92a 100644 --- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py +++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py @@ -271,12 +271,15 @@ class TencentVector(BaseVector): for result in res[0]: meta = result.get(self.field_metadata) + if isinstance(meta, str): + # Compatible with version 1.1.3 and below. + meta = json.loads(meta) + score = 1 - result.get("score", 0.0) score = result.get("score", 0.0) if score > score_threshold: meta["score"] = score doc = Document(page_content=result.get(self.field_text), metadata=meta) docs.append(doc) - return docs def delete(self) -> None: From d31235ca134ca6b9d52f969a2309ef898d7fe773 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 21 May 2025 22:01:53 +0800 Subject: [PATCH 07/19] feat: Introduce WorkflowExecution Domain Entity and Repository, Replace WorkflowRun Direct Usage, and Unify Stream Response Logic (#20067) Signed-off-by: -LAN- --- .../app/apps/advanced_chat/app_generator.py | 52 +- .../advanced_chat/generate_task_pipeline.py | 230 +++---- api/core/app/apps/workflow/app_generator.py | 53 +- .../apps/workflow/generate_task_pipeline.py | 591 ++++++++++++++++++ api/core/app/entities/task_entities.py | 6 +- api/core/ops/ops_trace_manager.py | 5 +- ...qlalchemy_workflow_execution_repository.py | 242 +++++++ .../entities/workflow_execution_entities.py | 91 +++ .../workflow_execution_repository.py | 42 ++ .../workflow_app_generate_task_pipeline.py | 228 +++---- api/core/workflow/workflow_cycle_manager.py | 355 +++++------ api/models/workflow.py | 36 +- .../workflow/test_workflow_cycle_manager.py | 423 ++++++++----- 13 files changed, 1710 insertions(+), 644 deletions(-) create mode 100644 api/core/app/apps/workflow/generate_task_pipeline.py create mode 100644 api/core/repositories/sqlalchemy_workflow_execution_repository.py create mode 100644 api/core/workflow/entities/workflow_execution_entities.py create mode 100644 api/core/workflow/repository/workflow_execution_repository.py diff --git a/api/core/app/apps/advanced_chat/app_generator.py b/api/core/app/apps/advanced_chat/app_generator.py index b74100bb19..4b021aa0b3 100644 --- a/api/core/app/apps/advanced_chat/app_generator.py +++ b/api/core/app/apps/advanced_chat/app_generator.py @@ -26,10 +26,13 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager from core.prompt.utils.get_thread_messages_length import get_thread_messages_length from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from extensions.ext_database import db from factories import file_factory from models import Account, App, Conversation, EndUser, Message, Workflow, WorkflowNodeExecutionTriggeredFrom +from models.enums import WorkflowRunTriggeredFrom from services.conversation_service import ConversationService from services.errors.message import MessageNotExistsError @@ -159,8 +162,22 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + if invoke_from == InvokeFrom.DEBUGGER: + workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING + else: + workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=workflow_triggered_from, + ) + # Create workflow node execution repository workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( session_factory=session_factory, user=user, @@ -173,6 +190,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=invoke_from, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=conversation, stream=streaming, @@ -226,8 +244,18 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + # Create workflow node execution repository workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( session_factory=session_factory, user=user, @@ -240,6 +268,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, @@ -291,8 +320,18 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + # Create workflow node execution repository workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( session_factory=session_factory, user=user, @@ -305,6 +344,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, conversation=None, stream=streaming, @@ -317,6 +357,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): user: Union[Account, EndUser], invoke_from: InvokeFrom, application_generate_entity: AdvancedChatAppGenerateEntity, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, conversation: Optional[Conversation] = None, stream: bool = True, @@ -381,6 +422,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation=conversation, message=message, user=user, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, stream=stream, ) @@ -453,6 +495,7 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation: Conversation, message: Message, user: Union[Account, EndUser], + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[ChatbotAppBlockingResponse, Generator[ChatbotAppStreamResponse, None, None]]: @@ -476,9 +519,10 @@ class AdvancedChatAppGenerator(MessageBasedAppGenerator): conversation=conversation, message=message, user=user, - stream=stream, dialogue_count=self._dialogue_count, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, + stream=stream, ) try: diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index 735b2a9709..cd764d56a5 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -64,6 +64,7 @@ from core.ops.ops_trace_manager import TraceQueueManager from core.workflow.enums import SystemVariableKey from core.workflow.graph_engine.entities.graph_runtime_state import GraphRuntimeState from core.workflow.nodes import NodeType +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_cycle_manager import WorkflowCycleManager from events.message_event import message_was_created @@ -94,6 +95,7 @@ class AdvancedChatAppGenerateTaskPipeline: user: Union[Account, EndUser], stream: bool, dialogue_count: int, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( @@ -125,6 +127,7 @@ class AdvancedChatAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, ) @@ -294,21 +297,19 @@ class AdvancedChatAppGenerateTaskPipeline: with Session(db.engine, expire_on_commit=False) as session: # init workflow run - workflow_run = self._workflow_cycle_manager._handle_workflow_run_start( + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( session=session, workflow_id=self._workflow_id, - user_id=self._user_id, - created_by_role=self._created_by_role, ) - self._workflow_run_id = workflow_run.id + self._workflow_run_id = workflow_execution.id message = self._get_message(session=session) if not message: raise ValueError(f"Message not found: {self._message_id}") - message.workflow_run_id = workflow_run.id - workflow_start_resp = self._workflow_cycle_manager._workflow_start_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + message.workflow_run_id = workflow_execution.id + workflow_start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield workflow_start_resp elif isinstance( @@ -319,13 +320,10 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( + workflow_execution_id=self._workflow_run_id, event=event ) - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried( - workflow_run=workflow_run, event=event - ) - node_retry_resp = self._workflow_cycle_manager._workflow_node_retry_to_stream_response( + node_retry_resp = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -338,20 +336,15 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start( - workflow_run=workflow_run, event=event - ) + workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=self._workflow_run_id, event=event + ) - node_start_resp = self._workflow_cycle_manager._workflow_node_start_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - session.commit() + node_start_resp = self._workflow_cycle_manager.workflow_node_start_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) if node_start_resp: yield node_start_resp @@ -359,15 +352,15 @@ class AdvancedChatAppGenerateTaskPipeline: # Record files if it's an answer node or end node if event.node_type in [NodeType.ANSWER, NodeType.END]: self._recorded_files.extend( - self._workflow_cycle_manager._fetch_files_from_node_outputs(event.outputs or {}) + self._workflow_cycle_manager.fetch_files_from_node_outputs(event.outputs or {}) ) with Session(db.engine, expire_on_commit=False) as session: - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( event=event ) - node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -383,11 +376,11 @@ class AdvancedChatAppGenerateTaskPipeline: | QueueNodeInLoopFailedEvent | QueueNodeExceptionEvent, ): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( event=event ) - node_finish_resp = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -399,132 +392,90 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_start_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) - ) + parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield parallel_start_resp elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_finish_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_finished_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_finish_resp = ( + self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_finish_resp elif isinstance(event, QueueIterationStartEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_start_resp = self._workflow_cycle_manager._workflow_iteration_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_start_resp elif isinstance(event, QueueIterationNextEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_next_resp = self._workflow_cycle_manager._workflow_iteration_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_next_resp elif isinstance(event, QueueIterationCompletedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_finish_resp = self._workflow_cycle_manager._workflow_iteration_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_finish_resp elif isinstance(event, QueueLoopStartEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_start_resp = self._workflow_cycle_manager._workflow_loop_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_start_resp elif isinstance(event, QueueLoopNextEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_next_resp = self._workflow_cycle_manager._workflow_loop_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_next_resp elif isinstance(event, QueueLoopCompletedEvent): if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_finish_resp = self._workflow_cycle_manager._workflow_loop_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_finish_resp elif isinstance(event, QueueWorkflowSucceededEvent): @@ -535,10 +486,8 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -546,10 +495,11 @@ class AdvancedChatAppGenerateTaskPipeline: trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield workflow_finish_resp self._base_task_pipeline._queue_manager.publish( @@ -562,10 +512,8 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_partial_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -573,10 +521,11 @@ class AdvancedChatAppGenerateTaskPipeline: conversation_id=None, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield workflow_finish_resp self._base_task_pipeline._queue_manager.publish( @@ -589,26 +538,25 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.FAILED, - error=event.error, + error_message=event.error, conversation_id=self._conversation_id, trace_manager=trace_manager, exceptions_count=event.exceptions_count, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_run.error}")) + err_event = QueueErrorEvent(error=ValueError(f"Run failed: {workflow_execution.error_message}")) err = self._base_task_pipeline._handle_error( event=err_event, session=session, message_id=self._message_id ) - session.commit() yield workflow_finish_resp yield self._base_task_pipeline._error_to_stream_response(err) @@ -616,21 +564,19 @@ class AdvancedChatAppGenerateTaskPipeline: elif isinstance(event, QueueStopEvent): if self._workflow_run_id and graph_runtime_state: with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.STOPPED, - error=event.get_stop_reason(), + error_message=event.get_stop_reason(), conversation_id=self._conversation_id, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, + workflow_execution=workflow_execution, ) # Save message self._save_message(session=session, graph_runtime_state=graph_runtime_state) @@ -711,7 +657,7 @@ class AdvancedChatAppGenerateTaskPipeline: yield self._message_end_to_stream_response() elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager._handle_agent_log( + yield self._workflow_cycle_manager.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: diff --git a/api/core/app/apps/workflow/app_generator.py b/api/core/app/apps/workflow/app_generator.py index d49ff682b9..01d35c57ce 100644 --- a/api/core/app/apps/workflow/app_generator.py +++ b/api/core/app/apps/workflow/app_generator.py @@ -18,16 +18,19 @@ from core.app.apps.workflow.app_config_manager import WorkflowAppConfigManager from core.app.apps.workflow.app_queue_manager import WorkflowAppQueueManager from core.app.apps.workflow.app_runner import WorkflowAppRunner from core.app.apps.workflow.generate_response_converter import WorkflowAppGenerateResponseConverter +from core.app.apps.workflow.generate_task_pipeline import WorkflowAppGenerateTaskPipeline from core.app.entities.app_invoke_entities import InvokeFrom, WorkflowAppGenerateEntity from core.app.entities.task_entities import WorkflowAppBlockingResponse, WorkflowAppStreamResponse from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.ops.ops_trace_manager import TraceQueueManager from core.repositories import SQLAlchemyWorkflowNodeExecutionRepository +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository -from core.workflow.workflow_app_generate_task_pipeline import WorkflowAppGenerateTaskPipeline from extensions.ext_database import db from factories import file_factory from models import Account, App, EndUser, Workflow, WorkflowNodeExecutionTriggeredFrom +from models.enums import WorkflowRunTriggeredFrom logger = logging.getLogger(__name__) @@ -136,9 +139,22 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) - # Create workflow node execution repository + # Create repositories + # + # Create session factory session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) - + # Create workflow execution(aka workflow run) repository + if invoke_from == InvokeFrom.DEBUGGER: + workflow_triggered_from = WorkflowRunTriggeredFrom.DEBUGGING + else: + workflow_triggered_from = WorkflowRunTriggeredFrom.APP_RUN + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=workflow_triggered_from, + ) + # Create workflow node execution repository workflow_node_execution_repository = SQLAlchemyWorkflowNodeExecutionRepository( session_factory=session_factory, user=user, @@ -152,6 +168,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, application_generate_entity=application_generate_entity, invoke_from=invoke_from, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, workflow_thread_pool_id=workflow_thread_pool_id, @@ -165,6 +182,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user: Union[Account, EndUser], application_generate_entity: WorkflowAppGenerateEntity, invoke_from: InvokeFrom, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, streaming: bool = True, workflow_thread_pool_id: Optional[str] = None, @@ -209,6 +227,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow=workflow, queue_manager=queue_manager, user=user, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, stream=streaming, ) @@ -262,6 +281,17 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create repositories + # + # Create session factory + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) # Create workflow node execution repository session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) @@ -278,6 +308,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -327,6 +358,17 @@ class WorkflowAppGenerator(BaseAppGenerator): contexts.plugin_tool_providers.set({}) contexts.plugin_tool_providers_lock.set(threading.Lock()) + # Create repositories + # + # Create session factory + session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) + # Create workflow execution(aka workflow run) repository + workflow_execution_repository = SQLAlchemyWorkflowExecutionRepository( + session_factory=session_factory, + user=user, + app_id=application_generate_entity.app_config.app_id, + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) # Create workflow node execution repository session_factory = sessionmaker(bind=db.engine, expire_on_commit=False) @@ -343,6 +385,7 @@ class WorkflowAppGenerator(BaseAppGenerator): user=user, invoke_from=InvokeFrom.DEBUGGER, application_generate_entity=application_generate_entity, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, streaming=streaming, ) @@ -400,6 +443,7 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow: Workflow, queue_manager: AppQueueManager, user: Union[Account, EndUser], + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, stream: bool = False, ) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]: @@ -419,8 +463,9 @@ class WorkflowAppGenerator(BaseAppGenerator): workflow=workflow, queue_manager=queue_manager, user=user, - stream=stream, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, + stream=stream, ) try: diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py new file mode 100644 index 0000000000..f2ebd78b36 --- /dev/null +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -0,0 +1,591 @@ +import logging +import time +from collections.abc import Generator +from typing import Optional, Union + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME +from core.app.apps.base_app_queue_manager import AppQueueManager +from core.app.entities.app_invoke_entities import ( + InvokeFrom, + WorkflowAppGenerateEntity, +) +from core.app.entities.queue_entities import ( + QueueAgentLogEvent, + QueueErrorEvent, + QueueIterationCompletedEvent, + QueueIterationNextEvent, + QueueIterationStartEvent, + QueueLoopCompletedEvent, + QueueLoopNextEvent, + QueueLoopStartEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeInIterationFailedEvent, + QueueNodeInLoopFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, + QueueParallelBranchRunFailedEvent, + QueueParallelBranchRunStartedEvent, + QueueParallelBranchRunSucceededEvent, + QueuePingEvent, + QueueStopEvent, + QueueTextChunkEvent, + QueueWorkflowFailedEvent, + QueueWorkflowPartialSuccessEvent, + QueueWorkflowStartedEvent, + QueueWorkflowSucceededEvent, +) +from core.app.entities.task_entities import ( + ErrorStreamResponse, + MessageAudioEndStreamResponse, + MessageAudioStreamResponse, + StreamResponse, + TextChunkStreamResponse, + WorkflowAppBlockingResponse, + WorkflowAppStreamResponse, + WorkflowFinishStreamResponse, + WorkflowStartStreamResponse, + WorkflowTaskState, +) +from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline +from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk +from core.ops.ops_trace_manager import TraceQueueManager +from core.workflow.entities.workflow_execution_entities import WorkflowExecution +from core.workflow.enums import SystemVariableKey +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository +from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from core.workflow.workflow_cycle_manager import WorkflowCycleManager +from extensions.ext_database import db +from models.account import Account +from models.enums import CreatorUserRole +from models.model import EndUser +from models.workflow import ( + Workflow, + WorkflowAppLog, + WorkflowAppLogCreatedFrom, + WorkflowRun, + WorkflowRunStatus, +) + +logger = logging.getLogger(__name__) + + +class WorkflowAppGenerateTaskPipeline: + """ + WorkflowAppGenerateTaskPipeline is a class that generate stream output and state management for Application. + """ + + def __init__( + self, + application_generate_entity: WorkflowAppGenerateEntity, + workflow: Workflow, + queue_manager: AppQueueManager, + user: Union[Account, EndUser], + stream: bool, + workflow_execution_repository: WorkflowExecutionRepository, + workflow_node_execution_repository: WorkflowNodeExecutionRepository, + ) -> None: + self._base_task_pipeline = BasedGenerateTaskPipeline( + application_generate_entity=application_generate_entity, + queue_manager=queue_manager, + stream=stream, + ) + + if isinstance(user, EndUser): + self._user_id = user.id + user_session_id = user.session_id + self._created_by_role = CreatorUserRole.END_USER + elif isinstance(user, Account): + self._user_id = user.id + user_session_id = user.id + self._created_by_role = CreatorUserRole.ACCOUNT + else: + raise ValueError(f"Invalid user type: {type(user)}") + + self._workflow_cycle_manager = WorkflowCycleManager( + application_generate_entity=application_generate_entity, + workflow_system_variables={ + SystemVariableKey.FILES: application_generate_entity.files, + SystemVariableKey.USER_ID: user_session_id, + SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id, + SystemVariableKey.WORKFLOW_ID: workflow.id, + SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, + }, + workflow_execution_repository=workflow_execution_repository, + workflow_node_execution_repository=workflow_node_execution_repository, + ) + + self._application_generate_entity = application_generate_entity + self._workflow_id = workflow.id + self._workflow_features_dict = workflow.features_dict + self._task_state = WorkflowTaskState() + self._workflow_run_id = "" + + def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]: + """ + Process generate task pipeline. + :return: + """ + generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) + if self._base_task_pipeline._stream: + return self._to_stream_response(generator) + else: + return self._to_blocking_response(generator) + + def _to_blocking_response(self, generator: Generator[StreamResponse, None, None]) -> WorkflowAppBlockingResponse: + """ + To blocking response. + :return: + """ + for stream_response in generator: + if isinstance(stream_response, ErrorStreamResponse): + raise stream_response.err + elif isinstance(stream_response, WorkflowFinishStreamResponse): + response = WorkflowAppBlockingResponse( + task_id=self._application_generate_entity.task_id, + workflow_run_id=stream_response.data.id, + data=WorkflowAppBlockingResponse.Data( + id=stream_response.data.id, + workflow_id=stream_response.data.workflow_id, + status=stream_response.data.status, + outputs=stream_response.data.outputs, + error=stream_response.data.error, + elapsed_time=stream_response.data.elapsed_time, + total_tokens=stream_response.data.total_tokens, + total_steps=stream_response.data.total_steps, + created_at=int(stream_response.data.created_at), + finished_at=int(stream_response.data.finished_at), + ), + ) + + return response + else: + continue + + raise ValueError("queue listening stopped unexpectedly.") + + def _to_stream_response( + self, generator: Generator[StreamResponse, None, None] + ) -> Generator[WorkflowAppStreamResponse, None, None]: + """ + To stream response. + :return: + """ + workflow_run_id = None + for stream_response in generator: + if isinstance(stream_response, WorkflowStartStreamResponse): + workflow_run_id = stream_response.workflow_run_id + + yield WorkflowAppStreamResponse(workflow_run_id=workflow_run_id, stream_response=stream_response) + + def _listen_audio_msg(self, publisher: AppGeneratorTTSPublisher | None, task_id: str): + if not publisher: + return None + audio_msg = publisher.check_and_get_audio() + if audio_msg and isinstance(audio_msg, AudioTrunk) and audio_msg.status != "finish": + return MessageAudioStreamResponse(audio=audio_msg.audio, task_id=task_id) + return None + + def _wrapper_process_stream_response( + self, trace_manager: Optional[TraceQueueManager] = None + ) -> Generator[StreamResponse, None, None]: + tts_publisher = None + task_id = self._application_generate_entity.task_id + tenant_id = self._application_generate_entity.app_config.tenant_id + features_dict = self._workflow_features_dict + + if ( + features_dict.get("text_to_speech") + and features_dict["text_to_speech"].get("enabled") + and features_dict["text_to_speech"].get("autoPlay") == "enabled" + ): + tts_publisher = AppGeneratorTTSPublisher( + tenant_id, features_dict["text_to_speech"].get("voice"), features_dict["text_to_speech"].get("language") + ) + + for response in self._process_stream_response(tts_publisher=tts_publisher, trace_manager=trace_manager): + while True: + audio_response = self._listen_audio_msg(publisher=tts_publisher, task_id=task_id) + if audio_response: + yield audio_response + else: + break + yield response + + start_listener_time = time.time() + while (time.time() - start_listener_time) < TTS_AUTO_PLAY_TIMEOUT: + try: + if not tts_publisher: + break + audio_trunk = tts_publisher.check_and_get_audio() + if audio_trunk is None: + # release cpu + # sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file) + time.sleep(TTS_AUTO_PLAY_YIELD_CPU_TIME) + continue + if audio_trunk.status == "finish": + break + else: + yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) + except Exception: + logger.exception(f"Fails to get audio trunk, task_id: {task_id}") + break + if tts_publisher: + yield MessageAudioEndStreamResponse(audio="", task_id=task_id) + + def _process_stream_response( + self, + tts_publisher: Optional[AppGeneratorTTSPublisher] = None, + trace_manager: Optional[TraceQueueManager] = None, + ) -> Generator[StreamResponse, None, None]: + """ + Process stream response. + :return: + """ + graph_runtime_state = None + + for queue_message in self._base_task_pipeline._queue_manager.listen(): + event = queue_message.event + + if isinstance(event, QueuePingEvent): + yield self._base_task_pipeline._ping_stream_response() + elif isinstance(event, QueueErrorEvent): + err = self._base_task_pipeline._handle_error(event=event) + yield self._base_task_pipeline._error_to_stream_response(err) + break + elif isinstance(event, QueueWorkflowStartedEvent): + # override graph runtime state + graph_runtime_state = event.graph_runtime_state + + with Session(db.engine, expire_on_commit=False) as session: + # init workflow run + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( + session=session, + workflow_id=self._workflow_id, + ) + self._workflow_run_id = workflow_execution.id + start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, + ) + + yield start_resp + elif isinstance( + event, + QueueNodeRetryEvent, + ): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + with Session(db.engine, expire_on_commit=False) as session: + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( + workflow_execution_id=self._workflow_run_id, + event=event, + ) + response = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) + session.commit() + + if response: + yield response + elif isinstance(event, QueueNodeStartedEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=self._workflow_run_id, event=event + ) + node_start_response = self._workflow_cycle_manager.workflow_node_start_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) + + if node_start_response: + yield node_start_response + elif isinstance(event, QueueNodeSucceededEvent): + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( + event=event + ) + node_success_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) + + if node_success_response: + yield node_success_response + elif isinstance( + event, + QueueNodeFailedEvent + | QueueNodeInIterationFailedEvent + | QueueNodeInLoopFailedEvent + | QueueNodeExceptionEvent, + ): + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( + event=event, + ) + node_failed_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) + + if node_failed_response: + yield node_failed_response + + elif isinstance(event, QueueParallelBranchRunStartedEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield parallel_start_resp + + elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + parallel_finish_resp = ( + self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + ) + + yield parallel_finish_resp + + elif isinstance(event, QueueIterationStartEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield iter_start_resp + + elif isinstance(event, QueueIterationNextEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield iter_next_resp + + elif isinstance(event, QueueIterationCompletedEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield iter_finish_resp + + elif isinstance(event, QueueLoopStartEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield loop_start_resp + + elif isinstance(event, QueueLoopNextEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield loop_next_resp + + elif isinstance(event, QueueLoopCompletedEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + + loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) + + yield loop_finish_resp + + elif isinstance(event, QueueWorkflowSucceededEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + if not graph_runtime_state: + raise ValueError("graph runtime state not initialized.") + + with Session(db.engine, expire_on_commit=False) as session: + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( + workflow_run_id=self._workflow_run_id, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + conversation_id=None, + trace_manager=trace_manager, + ) + + # save workflow app log + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) + + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, + ) + session.commit() + + yield workflow_finish_resp + elif isinstance(event, QueueWorkflowPartialSuccessEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + if not graph_runtime_state: + raise ValueError("graph runtime state not initialized.") + + with Session(db.engine, expire_on_commit=False) as session: + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( + workflow_run_id=self._workflow_run_id, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + outputs=event.outputs, + exceptions_count=event.exceptions_count, + conversation_id=None, + trace_manager=trace_manager, + ) + + # save workflow app log + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) + + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, + ) + session.commit() + + yield workflow_finish_resp + elif isinstance(event, QueueWorkflowFailedEvent | QueueStopEvent): + if not self._workflow_run_id: + raise ValueError("workflow run not initialized.") + if not graph_runtime_state: + raise ValueError("graph runtime state not initialized.") + + with Session(db.engine, expire_on_commit=False) as session: + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( + workflow_run_id=self._workflow_run_id, + total_tokens=graph_runtime_state.total_tokens, + total_steps=graph_runtime_state.node_run_steps, + status=WorkflowRunStatus.FAILED + if isinstance(event, QueueWorkflowFailedEvent) + else WorkflowRunStatus.STOPPED, + error_message=event.error + if isinstance(event, QueueWorkflowFailedEvent) + else event.get_stop_reason(), + conversation_id=None, + trace_manager=trace_manager, + exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, + ) + + # save workflow app log + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) + + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, + ) + session.commit() + + yield workflow_finish_resp + elif isinstance(event, QueueTextChunkEvent): + delta_text = event.text + if delta_text is None: + continue + + # only publish tts message at text chunk streaming + if tts_publisher: + tts_publisher.publish(queue_message) + + self._task_state.answer += delta_text + yield self._text_chunk_to_stream_response( + delta_text, from_variable_selector=event.from_variable_selector + ) + elif isinstance(event, QueueAgentLogEvent): + yield self._workflow_cycle_manager.handle_agent_log( + task_id=self._application_generate_entity.task_id, event=event + ) + else: + continue + + if tts_publisher: + tts_publisher.publish(None) + + def _save_workflow_app_log(self, *, session: Session, workflow_execution: WorkflowExecution) -> None: + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None + invoke_from = self._application_generate_entity.invoke_from + if invoke_from == InvokeFrom.SERVICE_API: + created_from = WorkflowAppLogCreatedFrom.SERVICE_API + elif invoke_from == InvokeFrom.EXPLORE: + created_from = WorkflowAppLogCreatedFrom.INSTALLED_APP + elif invoke_from == InvokeFrom.WEB_APP: + created_from = WorkflowAppLogCreatedFrom.WEB_APP + else: + # not save log for debugging + return + + workflow_app_log = WorkflowAppLog() + workflow_app_log.tenant_id = workflow_run.tenant_id + workflow_app_log.app_id = workflow_run.app_id + workflow_app_log.workflow_id = workflow_run.workflow_id + workflow_app_log.workflow_run_id = workflow_run.id + workflow_app_log.created_from = created_from.value + workflow_app_log.created_by_role = self._created_by_role + workflow_app_log.created_by = self._user_id + + session.add(workflow_app_log) + session.commit() + + def _text_chunk_to_stream_response( + self, text: str, from_variable_selector: Optional[list[str]] = None + ) -> TextChunkStreamResponse: + """ + Handle completed event. + :param text: text + :return: + """ + response = TextChunkStreamResponse( + task_id=self._application_generate_entity.task_id, + data=TextChunkStreamResponse.Data(text=text, from_variable_selector=from_variable_selector), + ) + + return response diff --git a/api/core/app/entities/task_entities.py b/api/core/app/entities/task_entities.py index 0c2d617f80..9b2bfcbf61 100644 --- a/api/core/app/entities/task_entities.py +++ b/api/core/app/entities/task_entities.py @@ -190,7 +190,7 @@ class WorkflowStartStreamResponse(StreamResponse): id: str workflow_id: str sequence_number: int - inputs: dict + inputs: Mapping[str, Any] created_at: int event: StreamEvent = StreamEvent.WORKFLOW_STARTED @@ -212,7 +212,7 @@ class WorkflowFinishStreamResponse(StreamResponse): workflow_id: str sequence_number: int status: str - outputs: Optional[dict] = None + outputs: Optional[Mapping[str, Any]] = None error: Optional[str] = None elapsed_time: float total_tokens: int @@ -788,7 +788,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse): id: str workflow_id: str status: str - outputs: Optional[dict] = None + outputs: Optional[Mapping[str, Any]] = None error: Optional[str] = None elapsed_time: float total_tokens: int diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 84520a5991..47c65a3e50 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -30,6 +30,7 @@ from core.ops.entities.trace_entity import ( WorkflowTraceInfo, ) from core.ops.utils import get_message_data +from core.workflow.entities.workflow_execution_entities import WorkflowExecution from extensions.ext_database import db from extensions.ext_storage import storage from models.model import App, AppModelConfig, Conversation, Message, MessageFile, TraceAppConfig @@ -373,7 +374,7 @@ class TraceTask: self, trace_type: Any, message_id: Optional[str] = None, - workflow_run: Optional[WorkflowRun] = None, + workflow_execution: Optional[WorkflowExecution] = None, conversation_id: Optional[str] = None, user_id: Optional[str] = None, timer: Optional[Any] = None, @@ -381,7 +382,7 @@ class TraceTask: ): self.trace_type = trace_type self.message_id = message_id - self.workflow_run_id = workflow_run.id if workflow_run else None + self.workflow_run_id = workflow_execution.id if workflow_execution else None self.conversation_id = conversation_id self.user_id = user_id self.timer = timer diff --git a/api/core/repositories/sqlalchemy_workflow_execution_repository.py b/api/core/repositories/sqlalchemy_workflow_execution_repository.py new file mode 100644 index 0000000000..c1a71b45d0 --- /dev/null +++ b/api/core/repositories/sqlalchemy_workflow_execution_repository.py @@ -0,0 +1,242 @@ +""" +SQLAlchemy implementation of the WorkflowExecutionRepository. +""" + +import json +import logging +from typing import Optional, Union + +from sqlalchemy import select +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from core.workflow.entities.workflow_execution_entities import ( + WorkflowExecution, + WorkflowExecutionStatus, + WorkflowType, +) +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository +from models import ( + Account, + CreatorUserRole, + EndUser, + WorkflowRun, +) +from models.enums import WorkflowRunTriggeredFrom + +logger = logging.getLogger(__name__) + + +class SQLAlchemyWorkflowExecutionRepository(WorkflowExecutionRepository): + """ + SQLAlchemy implementation of the WorkflowExecutionRepository interface. + + This implementation supports multi-tenancy by filtering operations based on tenant_id. + Each method creates its own session, handles the transaction, and commits changes + to the database. This prevents long-running connections in the workflow core. + + This implementation also includes an in-memory cache for workflow executions to improve + performance by reducing database queries. + """ + + def __init__( + self, + session_factory: sessionmaker | Engine, + user: Union[Account, EndUser], + app_id: Optional[str], + triggered_from: Optional[WorkflowRunTriggeredFrom], + ): + """ + Initialize the repository with a SQLAlchemy sessionmaker or engine and context information. + + Args: + session_factory: SQLAlchemy sessionmaker or engine for creating sessions + user: Account or EndUser object containing tenant_id, user ID, and role information + app_id: App ID for filtering by application (can be None) + triggered_from: Source of the execution trigger (DEBUGGING or APP_RUN) + """ + # If an engine is provided, create a sessionmaker from it + if isinstance(session_factory, Engine): + self._session_factory = sessionmaker(bind=session_factory, expire_on_commit=False) + elif isinstance(session_factory, sessionmaker): + self._session_factory = session_factory + else: + raise ValueError( + f"Invalid session_factory type {type(session_factory).__name__}; expected sessionmaker or Engine" + ) + + # Extract tenant_id from user + tenant_id: str | None = user.tenant_id if isinstance(user, EndUser) else user.current_tenant_id + if not tenant_id: + raise ValueError("User must have a tenant_id or current_tenant_id") + self._tenant_id = tenant_id + + # Store app context + self._app_id = app_id + + # Extract user context + self._triggered_from = triggered_from + self._creator_user_id = user.id + + # Determine user role based on user type + self._creator_user_role = CreatorUserRole.ACCOUNT if isinstance(user, Account) else CreatorUserRole.END_USER + + # Initialize in-memory cache for workflow executions + # Key: execution_id, Value: WorkflowRun (DB model) + self._execution_cache: dict[str, WorkflowRun] = {} + + def _to_domain_model(self, db_model: WorkflowRun) -> WorkflowExecution: + """ + Convert a database model to a domain model. + + Args: + db_model: The database model to convert + + Returns: + The domain model + """ + # Parse JSON fields + inputs = db_model.inputs_dict + outputs = db_model.outputs_dict + graph = db_model.graph_dict + + # Convert status to domain enum + status = WorkflowExecutionStatus(db_model.status) + + return WorkflowExecution( + id=db_model.id, + workflow_id=db_model.workflow_id, + sequence_number=db_model.sequence_number, + type=WorkflowType(db_model.type), + workflow_version=db_model.version, + graph=graph, + inputs=inputs, + outputs=outputs, + status=status, + error_message=db_model.error or "", + total_tokens=db_model.total_tokens, + total_steps=db_model.total_steps, + exceptions_count=db_model.exceptions_count, + started_at=db_model.created_at, + finished_at=db_model.finished_at, + ) + + def _to_db_model(self, domain_model: WorkflowExecution) -> WorkflowRun: + """ + Convert a domain model to a database model. + + Args: + domain_model: The domain model to convert + + Returns: + The database model + """ + # Use values from constructor if provided + if not self._triggered_from: + raise ValueError("triggered_from is required in repository constructor") + if not self._creator_user_id: + raise ValueError("created_by is required in repository constructor") + if not self._creator_user_role: + raise ValueError("created_by_role is required in repository constructor") + + db_model = WorkflowRun() + db_model.id = domain_model.id + db_model.tenant_id = self._tenant_id + if self._app_id is not None: + db_model.app_id = self._app_id + db_model.workflow_id = domain_model.workflow_id + db_model.triggered_from = self._triggered_from + db_model.sequence_number = domain_model.sequence_number + db_model.type = domain_model.type + db_model.version = domain_model.workflow_version + db_model.graph = json.dumps(domain_model.graph) if domain_model.graph else None + db_model.inputs = json.dumps(domain_model.inputs) if domain_model.inputs else None + db_model.outputs = json.dumps(domain_model.outputs) if domain_model.outputs else None + db_model.status = domain_model.status + db_model.error = domain_model.error_message if domain_model.error_message else None + db_model.total_tokens = domain_model.total_tokens + db_model.total_steps = domain_model.total_steps + db_model.exceptions_count = domain_model.exceptions_count + db_model.created_by_role = self._creator_user_role + db_model.created_by = self._creator_user_id + db_model.created_at = domain_model.started_at + db_model.finished_at = domain_model.finished_at + + # Calculate elapsed time if finished_at is available + if domain_model.finished_at: + db_model.elapsed_time = (domain_model.finished_at - domain_model.started_at).total_seconds() + else: + db_model.elapsed_time = 0 + + return db_model + + def save(self, execution: WorkflowExecution) -> None: + """ + Save or update a WorkflowExecution domain entity to the database. + + This method serves as a domain-to-database adapter that: + 1. Converts the domain entity to its database representation + 2. Persists the database model using SQLAlchemy's merge operation + 3. Maintains proper multi-tenancy by including tenant context during conversion + 4. Updates the in-memory cache for faster subsequent lookups + + The method handles both creating new records and updating existing ones through + SQLAlchemy's merge operation. + + Args: + execution: The WorkflowExecution domain entity to persist + """ + # Convert domain model to database model using tenant context and other attributes + db_model = self._to_db_model(execution) + + # Create a new database session + with self._session_factory() as session: + # SQLAlchemy merge intelligently handles both insert and update operations + # based on the presence of the primary key + session.merge(db_model) + session.commit() + + # Update the in-memory cache for faster subsequent lookups + logger.debug(f"Updating cache for execution_id: {db_model.id}") + self._execution_cache[db_model.id] = db_model + + def get(self, execution_id: str) -> Optional[WorkflowExecution]: + """ + Retrieve a WorkflowExecution by its ID. + + First checks the in-memory cache, and if not found, queries the database. + If found in the database, adds it to the cache for future lookups. + + Args: + execution_id: The workflow execution ID + + Returns: + The WorkflowExecution instance if found, None otherwise + """ + # First check the cache + if execution_id in self._execution_cache: + logger.debug(f"Cache hit for execution_id: {execution_id}") + # Convert cached DB model to domain model + cached_db_model = self._execution_cache[execution_id] + return self._to_domain_model(cached_db_model) + + # If not in cache, query the database + logger.debug(f"Cache miss for execution_id: {execution_id}, querying database") + with self._session_factory() as session: + stmt = select(WorkflowRun).where( + WorkflowRun.id == execution_id, + WorkflowRun.tenant_id == self._tenant_id, + ) + + if self._app_id: + stmt = stmt.where(WorkflowRun.app_id == self._app_id) + + db_model = session.scalar(stmt) + if db_model: + # Add DB model to cache + self._execution_cache[execution_id] = db_model + + # Convert to domain model and return + return self._to_domain_model(db_model) + + return None diff --git a/api/core/workflow/entities/workflow_execution_entities.py b/api/core/workflow/entities/workflow_execution_entities.py new file mode 100644 index 0000000000..200d4697b5 --- /dev/null +++ b/api/core/workflow/entities/workflow_execution_entities.py @@ -0,0 +1,91 @@ +""" +Domain entities for workflow execution. + +Models are independent of the storage mechanism and don't contain +implementation details like tenant_id, app_id, etc. +""" + +from collections.abc import Mapping +from datetime import UTC, datetime +from enum import StrEnum +from typing import Any, Optional + +from pydantic import BaseModel, Field + + +class WorkflowType(StrEnum): + """ + Workflow Type Enum for domain layer + """ + + WORKFLOW = "workflow" + CHAT = "chat" + + +class WorkflowExecutionStatus(StrEnum): + RUNNING = "running" + SUCCEEDED = "succeeded" + FAILED = "failed" + STOPPED = "stopped" + PARTIAL_SUCCEEDED = "partial-succeeded" + + +class WorkflowExecution(BaseModel): + """ + Domain model for workflow execution based on WorkflowRun but without + user, tenant, and app attributes. + """ + + id: str = Field(...) + workflow_id: str = Field(...) + workflow_version: str = Field(...) + sequence_number: int = Field(...) + + type: WorkflowType = Field(...) + graph: Mapping[str, Any] = Field(...) + + inputs: Mapping[str, Any] = Field(...) + outputs: Optional[Mapping[str, Any]] = None + + status: WorkflowExecutionStatus = WorkflowExecutionStatus.RUNNING + error_message: str = Field(default="") + total_tokens: int = Field(default=0) + total_steps: int = Field(default=0) + exceptions_count: int = Field(default=0) + + started_at: datetime = Field(...) + finished_at: Optional[datetime] = None + + @property + def elapsed_time(self) -> float: + """ + Calculate elapsed time in seconds. + If workflow is not finished, use current time. + """ + end_time = self.finished_at or datetime.now(UTC).replace(tzinfo=None) + return (end_time - self.started_at).total_seconds() + + @classmethod + def new( + cls, + *, + id: str, + workflow_id: str, + sequence_number: int, + type: WorkflowType, + workflow_version: str, + graph: Mapping[str, Any], + inputs: Mapping[str, Any], + started_at: datetime, + ) -> "WorkflowExecution": + return WorkflowExecution( + id=id, + workflow_id=workflow_id, + sequence_number=sequence_number, + type=type, + workflow_version=workflow_version, + graph=graph, + inputs=inputs, + status=WorkflowExecutionStatus.RUNNING, + started_at=started_at, + ) diff --git a/api/core/workflow/repository/workflow_execution_repository.py b/api/core/workflow/repository/workflow_execution_repository.py new file mode 100644 index 0000000000..a39a98ee33 --- /dev/null +++ b/api/core/workflow/repository/workflow_execution_repository.py @@ -0,0 +1,42 @@ +from typing import Optional, Protocol + +from core.workflow.entities.workflow_execution_entities import WorkflowExecution + + +class WorkflowExecutionRepository(Protocol): + """ + Repository interface for WorkflowExecution. + + This interface defines the contract for accessing and manipulating + WorkflowExecution data, regardless of the underlying storage mechanism. + + Note: Domain-specific concepts like multi-tenancy (tenant_id), application context (app_id), + and other implementation details should be handled at the implementation level, not in + the core interface. This keeps the core domain model clean and independent of specific + application domains or deployment scenarios. + """ + + def save(self, execution: WorkflowExecution) -> None: + """ + Save or update a WorkflowExecution instance. + + This method handles both creating new records and updating existing ones. + The implementation should determine whether to create or update based on + the execution's ID or other identifying fields. + + Args: + execution: The WorkflowExecution instance to save or update + """ + ... + + def get(self, execution_id: str) -> Optional[WorkflowExecution]: + """ + Retrieve a WorkflowExecution by its ID. + + Args: + execution_id: The workflow execution ID + + Returns: + The WorkflowExecution instance if found, None otherwise + """ + ... diff --git a/api/core/workflow/workflow_app_generate_task_pipeline.py b/api/core/workflow/workflow_app_generate_task_pipeline.py index 0396fa8157..f2ebd78b36 100644 --- a/api/core/workflow/workflow_app_generate_task_pipeline.py +++ b/api/core/workflow/workflow_app_generate_task_pipeline.py @@ -3,6 +3,7 @@ import time from collections.abc import Generator from typing import Optional, Union +from sqlalchemy import select from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME @@ -53,7 +54,9 @@ from core.app.entities.task_entities import ( from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk from core.ops.ops_trace_manager import TraceQueueManager +from core.workflow.entities.workflow_execution_entities import WorkflowExecution from core.workflow.enums import SystemVariableKey +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_cycle_manager import WorkflowCycleManager from extensions.ext_database import db @@ -83,6 +86,7 @@ class WorkflowAppGenerateTaskPipeline: queue_manager: AppQueueManager, user: Union[Account, EndUser], stream: bool, + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: self._base_task_pipeline = BasedGenerateTaskPipeline( @@ -111,6 +115,7 @@ class WorkflowAppGenerateTaskPipeline: SystemVariableKey.WORKFLOW_ID: workflow.id, SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, }, + workflow_execution_repository=workflow_execution_repository, workflow_node_execution_repository=workflow_node_execution_repository, ) @@ -258,17 +263,15 @@ class WorkflowAppGenerateTaskPipeline: with Session(db.engine, expire_on_commit=False) as session: # init workflow run - workflow_run = self._workflow_cycle_manager._handle_workflow_run_start( + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( session=session, workflow_id=self._workflow_id, - user_id=self._user_id, - created_by_role=self._created_by_role, ) - self._workflow_run_id = workflow_run.id - start_resp = self._workflow_cycle_manager._workflow_start_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + self._workflow_run_id = workflow_execution.id + start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) - session.commit() yield start_resp elif isinstance( @@ -278,13 +281,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( + workflow_execution_id=self._workflow_run_id, + event=event, ) - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_retried( - workflow_run=workflow_run, event=event - ) - response = self._workflow_cycle_manager._workflow_node_retry_to_stream_response( + response = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -297,27 +298,22 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - workflow_node_execution = self._workflow_cycle_manager._handle_node_execution_start( - workflow_run=workflow_run, event=event - ) - node_start_response = self._workflow_cycle_manager._workflow_node_start_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - session.commit() + workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=self._workflow_run_id, event=event + ) + node_start_response = self._workflow_cycle_manager.workflow_node_start_to_stream_response( + event=event, + task_id=self._application_generate_entity.task_id, + workflow_node_execution=workflow_node_execution, + ) if node_start_response: yield node_start_response elif isinstance(event, QueueNodeSucceededEvent): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_success( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( event=event ) - node_success_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_success_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -332,10 +328,10 @@ class WorkflowAppGenerateTaskPipeline: | QueueNodeInLoopFailedEvent | QueueNodeExceptionEvent, ): - workflow_node_execution = self._workflow_cycle_manager._handle_workflow_node_execution_failed( + workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( event=event, ) - node_failed_response = self._workflow_cycle_manager._workflow_node_finish_to_stream_response( + node_failed_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -348,18 +344,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_start_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) - ) + parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield parallel_start_resp @@ -367,18 +356,13 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - parallel_finish_resp = ( - self._workflow_cycle_manager._workflow_parallel_branch_finished_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + parallel_finish_resp = ( + self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, ) + ) yield parallel_finish_resp @@ -386,16 +370,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_start_resp = self._workflow_cycle_manager._workflow_iteration_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_start_resp @@ -403,16 +382,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_next_resp = self._workflow_cycle_manager._workflow_iteration_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_next_resp @@ -420,16 +394,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - iter_finish_resp = self._workflow_cycle_manager._workflow_iteration_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield iter_finish_resp @@ -437,16 +406,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_start_resp = self._workflow_cycle_manager._workflow_loop_start_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_start_resp @@ -454,16 +418,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_next_resp = self._workflow_cycle_manager._workflow_loop_next_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_next_resp @@ -471,16 +430,11 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._get_workflow_run( - session=session, workflow_run_id=self._workflow_run_id - ) - loop_finish_resp = self._workflow_cycle_manager._workflow_loop_completed_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, - event=event, - ) + loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) yield loop_finish_resp @@ -491,10 +445,8 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -503,12 +455,12 @@ class WorkflowAppGenerateTaskPipeline: ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, - workflow_run=workflow_run, + workflow_execution=workflow_execution, ) session.commit() @@ -520,10 +472,8 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_partial_success( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, outputs=event.outputs, @@ -533,10 +483,12 @@ class WorkflowAppGenerateTaskPipeline: ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) session.commit() @@ -548,26 +500,28 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("graph runtime state not initialized.") with Session(db.engine, expire_on_commit=False) as session: - workflow_run = self._workflow_cycle_manager._handle_workflow_run_failed( - session=session, + workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( workflow_run_id=self._workflow_run_id, - start_at=graph_runtime_state.start_at, total_tokens=graph_runtime_state.total_tokens, total_steps=graph_runtime_state.node_run_steps, status=WorkflowRunStatus.FAILED if isinstance(event, QueueWorkflowFailedEvent) else WorkflowRunStatus.STOPPED, - error=event.error if isinstance(event, QueueWorkflowFailedEvent) else event.get_stop_reason(), + error_message=event.error + if isinstance(event, QueueWorkflowFailedEvent) + else event.get_stop_reason(), conversation_id=None, trace_manager=trace_manager, exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, ) # save workflow app log - self._save_workflow_app_log(session=session, workflow_run=workflow_run) + self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager._workflow_finish_to_stream_response( - session=session, task_id=self._application_generate_entity.task_id, workflow_run=workflow_run + workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + session=session, + task_id=self._application_generate_entity.task_id, + workflow_execution=workflow_execution, ) session.commit() @@ -586,7 +540,7 @@ class WorkflowAppGenerateTaskPipeline: delta_text, from_variable_selector=event.from_variable_selector ) elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager._handle_agent_log( + yield self._workflow_cycle_manager.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: @@ -595,11 +549,9 @@ class WorkflowAppGenerateTaskPipeline: if tts_publisher: tts_publisher.publish(None) - def _save_workflow_app_log(self, *, session: Session, workflow_run: WorkflowRun) -> None: - """ - Save workflow app log. - :return: - """ + def _save_workflow_app_log(self, *, session: Session, workflow_execution: WorkflowExecution) -> None: + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None invoke_from = self._application_generate_entity.invoke_from if invoke_from == InvokeFrom.SERVICE_API: created_from = WorkflowAppLogCreatedFrom.SERVICE_API diff --git a/api/core/workflow/workflow_cycle_manager.py b/api/core/workflow/workflow_cycle_manager.py index 6d33d7372c..d4c2b1b6bd 100644 --- a/api/core/workflow/workflow_cycle_manager.py +++ b/api/core/workflow/workflow_cycle_manager.py @@ -1,4 +1,3 @@ -import json import time from collections.abc import Mapping, Sequence from datetime import UTC, datetime @@ -8,7 +7,7 @@ from uuid import uuid4 from sqlalchemy import func, select from sqlalchemy.orm import Session -from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom, WorkflowAppGenerateEntity +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity from core.app.entities.queue_entities import ( QueueAgentLogEvent, QueueIterationCompletedEvent, @@ -54,9 +53,11 @@ from core.workflow.entities.node_execution_entities import ( NodeExecution, NodeExecutionStatus, ) +from core.workflow.entities.workflow_execution_entities import WorkflowExecution, WorkflowExecutionStatus, WorkflowType from core.workflow.enums import SystemVariableKey from core.workflow.nodes import NodeType from core.workflow.nodes.tool.entities import ToolNodeData +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_entry import WorkflowEntry from models import ( @@ -67,7 +68,6 @@ from models import ( WorkflowNodeExecutionStatus, WorkflowRun, WorkflowRunStatus, - WorkflowRunTriggeredFrom, ) @@ -77,21 +77,20 @@ class WorkflowCycleManager: *, application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], workflow_system_variables: dict[SystemVariableKey, Any], + workflow_execution_repository: WorkflowExecutionRepository, workflow_node_execution_repository: WorkflowNodeExecutionRepository, ) -> None: - self._workflow_run: WorkflowRun | None = None self._application_generate_entity = application_generate_entity self._workflow_system_variables = workflow_system_variables + self._workflow_execution_repository = workflow_execution_repository self._workflow_node_execution_repository = workflow_node_execution_repository - def _handle_workflow_run_start( + def handle_workflow_run_start( self, *, session: Session, workflow_id: str, - user_id: str, - created_by_role: CreatorUserRole, - ) -> WorkflowRun: + ) -> WorkflowExecution: workflow_stmt = select(Workflow).where(Workflow.id == workflow_id) workflow = session.scalar(workflow_stmt) if not workflow: @@ -110,157 +109,116 @@ class WorkflowCycleManager: continue inputs[f"sys.{key.value}"] = value - triggered_from = ( - WorkflowRunTriggeredFrom.DEBUGGING - if self._application_generate_entity.invoke_from == InvokeFrom.DEBUGGER - else WorkflowRunTriggeredFrom.APP_RUN - ) - # handle special values inputs = dict(WorkflowEntry.handle_special_values(inputs) or {}) # init workflow run # TODO: This workflow_run_id should always not be None, maybe we can use a more elegant way to handle this - workflow_run_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID) or uuid4()) + execution_id = str(self._workflow_system_variables.get(SystemVariableKey.WORKFLOW_RUN_ID) or uuid4()) + execution = WorkflowExecution.new( + id=execution_id, + workflow_id=workflow.id, + sequence_number=new_sequence_number, + type=WorkflowType(workflow.type), + workflow_version=workflow.version, + graph=workflow.graph_dict, + inputs=inputs, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) - workflow_run = WorkflowRun() - workflow_run.id = workflow_run_id - workflow_run.tenant_id = workflow.tenant_id - workflow_run.app_id = workflow.app_id - workflow_run.sequence_number = new_sequence_number - workflow_run.workflow_id = workflow.id - workflow_run.type = workflow.type - workflow_run.triggered_from = triggered_from.value - workflow_run.version = workflow.version - workflow_run.graph = workflow.graph - workflow_run.inputs = json.dumps(inputs) - workflow_run.status = WorkflowRunStatus.RUNNING - workflow_run.created_by_role = created_by_role - workflow_run.created_by = user_id - workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) + self._workflow_execution_repository.save(execution) - session.add(workflow_run) + return execution - return workflow_run - - def _handle_workflow_run_success( + def handle_workflow_run_success( self, *, - session: Session, workflow_run_id: str, - start_at: float, total_tokens: int, total_steps: int, outputs: Mapping[str, Any] | None = None, conversation_id: Optional[str] = None, trace_manager: Optional[TraceQueueManager] = None, - ) -> WorkflowRun: - """ - Workflow run success - :param workflow_run_id: workflow run id - :param start_at: start time - :param total_tokens: total tokens - :param total_steps: total steps - :param outputs: outputs - :param conversation_id: conversation id - :return: - """ - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) + ) -> WorkflowExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_run_id) outputs = WorkflowEntry.handle_special_values(outputs) - workflow_run.status = WorkflowRunStatus.SUCCEEDED - workflow_run.outputs = json.dumps(outputs or {}) - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) + workflow_execution.status = WorkflowExecutionStatus.SUCCEEDED + workflow_execution.outputs = outputs or {} + workflow_execution.total_tokens = total_tokens + workflow_execution.total_steps = total_steps + workflow_execution.finished_at = datetime.now(UTC).replace(tzinfo=None) if trace_manager: trace_manager.add_trace_task( TraceTask( TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, + workflow_execution=workflow_execution, conversation_id=conversation_id, user_id=trace_manager.user_id, ) ) - return workflow_run + return workflow_execution - def _handle_workflow_run_partial_success( + def handle_workflow_run_partial_success( self, *, - session: Session, workflow_run_id: str, - start_at: float, total_tokens: int, total_steps: int, outputs: Mapping[str, Any] | None = None, exceptions_count: int = 0, conversation_id: Optional[str] = None, trace_manager: Optional[TraceQueueManager] = None, - ) -> WorkflowRun: - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) + ) -> WorkflowExecution: + execution = self._get_workflow_execution_or_raise_error(workflow_run_id) outputs = WorkflowEntry.handle_special_values(dict(outputs) if outputs else None) - workflow_run.status = WorkflowRunStatus.PARTIAL_SUCCEEDED.value - workflow_run.outputs = json.dumps(outputs or {}) - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - workflow_run.exceptions_count = exceptions_count + execution.status = WorkflowExecutionStatus.PARTIAL_SUCCEEDED + execution.outputs = outputs or {} + execution.total_tokens = total_tokens + execution.total_steps = total_steps + execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + execution.exceptions_count = exceptions_count if trace_manager: trace_manager.add_trace_task( TraceTask( TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, + workflow_execution=execution, conversation_id=conversation_id, user_id=trace_manager.user_id, ) ) - return workflow_run + return execution - def _handle_workflow_run_failed( + def handle_workflow_run_failed( self, *, - session: Session, workflow_run_id: str, - start_at: float, total_tokens: int, total_steps: int, status: WorkflowRunStatus, - error: str, + error_message: str, conversation_id: Optional[str] = None, trace_manager: Optional[TraceQueueManager] = None, exceptions_count: int = 0, - ) -> WorkflowRun: - """ - Workflow run failed - :param workflow_run_id: workflow run id - :param start_at: start time - :param total_tokens: total tokens - :param total_steps: total steps - :param status: status - :param error: error message - :return: - """ - workflow_run = self._get_workflow_run(session=session, workflow_run_id=workflow_run_id) + ) -> WorkflowExecution: + execution = self._get_workflow_execution_or_raise_error(workflow_run_id) - workflow_run.status = status.value - workflow_run.error = error - workflow_run.elapsed_time = time.perf_counter() - start_at - workflow_run.total_tokens = total_tokens - workflow_run.total_steps = total_steps - workflow_run.finished_at = datetime.now(UTC).replace(tzinfo=None) - workflow_run.exceptions_count = exceptions_count + execution.status = WorkflowExecutionStatus(status.value) + execution.error_message = error_message + execution.total_tokens = total_tokens + execution.total_steps = total_steps + execution.finished_at = datetime.now(UTC).replace(tzinfo=None) + execution.exceptions_count = exceptions_count # Use the instance repository to find running executions for a workflow run running_domain_executions = self._workflow_node_execution_repository.get_running_executions( - workflow_run_id=workflow_run.id + workflow_run_id=execution.id ) # Update the domain models @@ -269,7 +227,7 @@ class WorkflowCycleManager: if domain_execution.node_execution_id: # Update the domain model domain_execution.status = NodeExecutionStatus.FAILED - domain_execution.error = error + domain_execution.error = error_message domain_execution.finished_at = now domain_execution.elapsed_time = (now - domain_execution.created_at).total_seconds() @@ -280,15 +238,22 @@ class WorkflowCycleManager: trace_manager.add_trace_task( TraceTask( TraceTaskName.WORKFLOW_TRACE, - workflow_run=workflow_run, + workflow_execution=execution, conversation_id=conversation_id, user_id=trace_manager.user_id, ) ) - return workflow_run + return execution + + def handle_node_execution_start( + self, + *, + workflow_execution_id: str, + event: QueueNodeStartedEvent, + ) -> NodeExecution: + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_execution_id) - def _handle_node_execution_start(self, *, workflow_run: WorkflowRun, event: QueueNodeStartedEvent) -> NodeExecution: # Create a domain model created_at = datetime.now(UTC).replace(tzinfo=None) metadata = { @@ -299,8 +264,8 @@ class WorkflowCycleManager: domain_execution = NodeExecution( id=str(uuid4()), - workflow_id=workflow_run.workflow_id, - workflow_run_id=workflow_run.id, + workflow_id=workflow_execution.workflow_id, + workflow_run_id=workflow_execution.id, predecessor_node_id=event.predecessor_node_id, index=event.node_run_index, node_execution_id=event.node_execution_id, @@ -317,7 +282,7 @@ class WorkflowCycleManager: return domain_execution - def _handle_workflow_node_execution_success(self, *, event: QueueNodeSucceededEvent) -> NodeExecution: + def handle_workflow_node_execution_success(self, *, event: QueueNodeSucceededEvent) -> NodeExecution: # Get the domain model from repository domain_execution = self._workflow_node_execution_repository.get_by_node_execution_id(event.node_execution_id) if not domain_execution: @@ -350,7 +315,7 @@ class WorkflowCycleManager: return domain_execution - def _handle_workflow_node_execution_failed( + def handle_workflow_node_execution_failed( self, *, event: QueueNodeFailedEvent @@ -400,15 +365,10 @@ class WorkflowCycleManager: return domain_execution - def _handle_workflow_node_execution_retried( - self, *, workflow_run: WorkflowRun, event: QueueNodeRetryEvent + def handle_workflow_node_execution_retried( + self, *, workflow_execution_id: str, event: QueueNodeRetryEvent ) -> NodeExecution: - """ - Workflow node execution failed - :param workflow_run: workflow run - :param event: queue node failed event - :return: - """ + workflow_execution = self._get_workflow_execution_or_raise_error(workflow_execution_id) created_at = event.start_at finished_at = datetime.now(UTC).replace(tzinfo=None) elapsed_time = (finished_at - created_at).total_seconds() @@ -433,8 +393,8 @@ class WorkflowCycleManager: # Create a domain model domain_execution = NodeExecution( id=str(uuid4()), - workflow_id=workflow_run.workflow_id, - workflow_run_id=workflow_run.id, + workflow_id=workflow_execution.workflow_id, + workflow_run_id=workflow_execution.id, predecessor_node_id=event.predecessor_node_id, node_execution_id=event.node_execution_id, node_id=event.node_id, @@ -456,34 +416,34 @@ class WorkflowCycleManager: return domain_execution - def _workflow_start_to_stream_response( + def workflow_start_to_stream_response( self, *, - session: Session, task_id: str, - workflow_run: WorkflowRun, + workflow_execution: WorkflowExecution, ) -> WorkflowStartStreamResponse: - _ = session return WorkflowStartStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution.id, data=WorkflowStartStreamResponse.Data( - id=workflow_run.id, - workflow_id=workflow_run.workflow_id, - sequence_number=workflow_run.sequence_number, - inputs=dict(workflow_run.inputs_dict or {}), - created_at=int(workflow_run.created_at.timestamp()), + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + inputs=workflow_execution.inputs, + created_at=int(workflow_execution.started_at.timestamp()), ), ) - def _workflow_finish_to_stream_response( + def workflow_finish_to_stream_response( self, *, session: Session, task_id: str, - workflow_run: WorkflowRun, + workflow_execution: WorkflowExecution, ) -> WorkflowFinishStreamResponse: created_by = None + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None if workflow_run.created_by_role == CreatorUserRole.ACCOUNT: stmt = select(Account).where(Account.id == workflow_run.created_by) account = session.scalar(stmt) @@ -504,28 +464,35 @@ class WorkflowCycleManager: else: raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") + # Handle the case where finished_at is None by using current time as default + finished_at_timestamp = ( + int(workflow_execution.finished_at.timestamp()) + if workflow_execution.finished_at + else int(datetime.now(UTC).timestamp()) + ) + return WorkflowFinishStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution.id, data=WorkflowFinishStreamResponse.Data( - id=workflow_run.id, - workflow_id=workflow_run.workflow_id, - sequence_number=workflow_run.sequence_number, - status=workflow_run.status, - outputs=dict(workflow_run.outputs_dict) if workflow_run.outputs_dict else None, - error=workflow_run.error, - elapsed_time=workflow_run.elapsed_time, - total_tokens=workflow_run.total_tokens, - total_steps=workflow_run.total_steps, + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + status=workflow_execution.status, + outputs=workflow_execution.outputs, + error=workflow_execution.error_message, + elapsed_time=workflow_execution.elapsed_time, + total_tokens=workflow_execution.total_tokens, + total_steps=workflow_execution.total_steps, created_by=created_by, - created_at=int(workflow_run.created_at.timestamp()), - finished_at=int(workflow_run.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(dict(workflow_run.outputs_dict)), - exceptions_count=workflow_run.exceptions_count, + created_at=int(workflow_execution.started_at.timestamp()), + finished_at=finished_at_timestamp, + files=self.fetch_files_from_node_outputs(workflow_execution.outputs), + exceptions_count=workflow_execution.exceptions_count, ), ) - def _workflow_node_start_to_stream_response( + def workflow_node_start_to_stream_response( self, *, event: QueueNodeStartedEvent, @@ -571,7 +538,7 @@ class WorkflowCycleManager: return response - def _workflow_node_finish_to_stream_response( + def workflow_node_finish_to_stream_response( self, *, event: QueueNodeSucceededEvent @@ -608,7 +575,7 @@ class WorkflowCycleManager: execution_metadata=workflow_node_execution.metadata, created_at=int(workflow_node_execution.created_at.timestamp()), finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), parallel_id=event.parallel_id, parallel_start_node_id=event.parallel_start_node_id, parent_parallel_id=event.parent_parallel_id, @@ -618,7 +585,7 @@ class WorkflowCycleManager: ), ) - def _workflow_node_retry_to_stream_response( + def workflow_node_retry_to_stream_response( self, *, event: QueueNodeRetryEvent, @@ -651,7 +618,7 @@ class WorkflowCycleManager: execution_metadata=workflow_node_execution.metadata, created_at=int(workflow_node_execution.created_at.timestamp()), finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self._fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), parallel_id=event.parallel_id, parallel_start_node_id=event.parallel_start_node_id, parent_parallel_id=event.parent_parallel_id, @@ -662,13 +629,16 @@ class WorkflowCycleManager: ), ) - def _workflow_parallel_branch_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueParallelBranchRunStartedEvent + def workflow_parallel_branch_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueParallelBranchRunStartedEvent, ) -> ParallelBranchStartStreamResponse: - _ = session return ParallelBranchStartStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=ParallelBranchStartStreamResponse.Data( parallel_id=event.parallel_id, parallel_branch_id=event.parallel_start_node_id, @@ -680,18 +650,16 @@ class WorkflowCycleManager: ), ) - def _workflow_parallel_branch_finished_to_stream_response( + def workflow_parallel_branch_finished_to_stream_response( self, *, - session: Session, task_id: str, - workflow_run: WorkflowRun, + workflow_execution_id: str, event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent, ) -> ParallelBranchFinishedStreamResponse: - _ = session return ParallelBranchFinishedStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=ParallelBranchFinishedStreamResponse.Data( parallel_id=event.parallel_id, parallel_branch_id=event.parallel_start_node_id, @@ -705,13 +673,16 @@ class WorkflowCycleManager: ), ) - def _workflow_iteration_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationStartEvent + def workflow_iteration_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationStartEvent, ) -> IterationNodeStartStreamResponse: - _ = session return IterationNodeStartStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=IterationNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -726,13 +697,16 @@ class WorkflowCycleManager: ), ) - def _workflow_iteration_next_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationNextEvent + def workflow_iteration_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationNextEvent, ) -> IterationNodeNextStreamResponse: - _ = session return IterationNodeNextStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=IterationNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -749,13 +723,16 @@ class WorkflowCycleManager: ), ) - def _workflow_iteration_completed_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueIterationCompletedEvent + def workflow_iteration_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationCompletedEvent, ) -> IterationNodeCompletedStreamResponse: - _ = session return IterationNodeCompletedStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=IterationNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -779,13 +756,12 @@ class WorkflowCycleManager: ), ) - def _workflow_loop_start_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopStartEvent + def workflow_loop_start_to_stream_response( + self, *, task_id: str, workflow_execution_id: str, event: QueueLoopStartEvent ) -> LoopNodeStartStreamResponse: - _ = session return LoopNodeStartStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=LoopNodeStartStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -800,13 +776,16 @@ class WorkflowCycleManager: ), ) - def _workflow_loop_next_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopNextEvent + def workflow_loop_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopNextEvent, ) -> LoopNodeNextStreamResponse: - _ = session return LoopNodeNextStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=LoopNodeNextStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -823,13 +802,16 @@ class WorkflowCycleManager: ), ) - def _workflow_loop_completed_to_stream_response( - self, *, session: Session, task_id: str, workflow_run: WorkflowRun, event: QueueLoopCompletedEvent + def workflow_loop_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopCompletedEvent, ) -> LoopNodeCompletedStreamResponse: - _ = session return LoopNodeCompletedStreamResponse( task_id=task_id, - workflow_run_id=workflow_run.id, + workflow_run_id=workflow_execution_id, data=LoopNodeCompletedStreamResponse.Data( id=event.node_id, node_id=event.node_id, @@ -853,7 +835,7 @@ class WorkflowCycleManager: ), ) - def _fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any]) -> Sequence[Mapping[str, Any]]: + def fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any] | None) -> Sequence[Mapping[str, Any]]: """ Fetch files from node outputs :param outputs_dict: node outputs dict @@ -910,20 +892,13 @@ class WorkflowCycleManager: return None - def _get_workflow_run(self, *, session: Session, workflow_run_id: str) -> WorkflowRun: - if self._workflow_run and self._workflow_run.id == workflow_run_id: - cached_workflow_run = self._workflow_run - cached_workflow_run = session.merge(cached_workflow_run) - return cached_workflow_run - stmt = select(WorkflowRun).where(WorkflowRun.id == workflow_run_id) - workflow_run = session.scalar(stmt) - if not workflow_run: - raise WorkflowRunNotFoundError(workflow_run_id) - self._workflow_run = workflow_run + def _get_workflow_execution_or_raise_error(self, id: str, /) -> WorkflowExecution: + execution = self._workflow_execution_repository.get(id) + if not execution: + raise WorkflowRunNotFoundError(id) + return execution - return workflow_run - - def _handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: + def handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: """ Handle agent log :param task_id: task id diff --git a/api/models/workflow.py b/api/models/workflow.py index fc6de8692c..64b0e16577 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -425,14 +425,14 @@ class WorkflowRun(Base): status: Mapped[str] = mapped_column(db.String(255)) # running, succeeded, failed, stopped, partial-succeeded outputs: Mapped[Optional[str]] = mapped_column(sa.Text, default="{}") error: Mapped[Optional[str]] = mapped_column(db.Text) - elapsed_time = db.Column(db.Float, nullable=False, server_default=sa.text("0")) + elapsed_time: Mapped[float] = mapped_column(db.Float, nullable=False, server_default=sa.text("0")) total_tokens: Mapped[int] = mapped_column(sa.BigInteger, server_default=sa.text("0")) - total_steps = db.Column(db.Integer, server_default=db.text("0")) + total_steps: Mapped[int] = mapped_column(db.Integer, server_default=db.text("0")) created_by_role: Mapped[str] = mapped_column(db.String(255)) # account, end_user - created_by = db.Column(StringUUID, nullable=False) - created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) - finished_at = db.Column(db.DateTime) - exceptions_count = db.Column(db.Integer, server_default=db.text("0")) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + finished_at: Mapped[Optional[datetime]] = mapped_column(db.DateTime) + exceptions_count: Mapped[int] = mapped_column(db.Integer, server_default=db.text("0")) @property def created_by_account(self): @@ -447,7 +447,7 @@ class WorkflowRun(Base): return db.session.get(EndUser, self.created_by) if created_by_role == CreatorUserRole.END_USER else None @property - def graph_dict(self): + def graph_dict(self) -> Mapping[str, Any]: return json.loads(self.graph) if self.graph else {} @property @@ -752,12 +752,12 @@ class WorkflowAppLog(Base): id: Mapped[str] = mapped_column(StringUUID, server_default=db.text("uuid_generate_v4()")) tenant_id: Mapped[str] = mapped_column(StringUUID) app_id: Mapped[str] = mapped_column(StringUUID) - workflow_id = db.Column(StringUUID, nullable=False) + workflow_id: Mapped[str] = mapped_column(StringUUID, nullable=False) workflow_run_id: Mapped[str] = mapped_column(StringUUID) - created_from = db.Column(db.String(255), nullable=False) - created_by_role = db.Column(db.String(255), nullable=False) - created_by = db.Column(StringUUID, nullable=False) - created_at = db.Column(db.DateTime, nullable=False, server_default=func.current_timestamp()) + created_from: Mapped[str] = mapped_column(db.String(255), nullable=False) + created_by_role: Mapped[str] = mapped_column(db.String(255), nullable=False) + created_by: Mapped[str] = mapped_column(StringUUID, nullable=False) + created_at: Mapped[datetime] = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp()) @property def workflow_run(self): @@ -782,9 +782,11 @@ class ConversationVariable(Base): id: Mapped[str] = mapped_column(StringUUID, primary_key=True) conversation_id: Mapped[str] = mapped_column(StringUUID, nullable=False, primary_key=True, index=True) app_id: Mapped[str] = mapped_column(StringUUID, nullable=False, index=True) - data = mapped_column(db.Text, nullable=False) - created_at = mapped_column(db.DateTime, nullable=False, server_default=func.current_timestamp(), index=True) - updated_at = mapped_column( + data: Mapped[str] = mapped_column(db.Text, nullable=False) + created_at: Mapped[datetime] = mapped_column( + db.DateTime, nullable=False, server_default=func.current_timestamp(), index=True + ) + updated_at: Mapped[datetime] = mapped_column( db.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp() ) @@ -832,14 +834,14 @@ class WorkflowDraftVariable(Base): # id is the unique identifier of a draft variable. id: Mapped[str] = mapped_column(StringUUID, primary_key=True, server_default=db.text("uuid_generate_v4()")) - created_at = mapped_column( + created_at: Mapped[datetime] = mapped_column( db.DateTime, nullable=False, default=_naive_utc_datetime, server_default=func.current_timestamp(), ) - updated_at = mapped_column( + updated_at: Mapped[datetime] = mapped_column( db.DateTime, nullable=False, default=_naive_utc_datetime, diff --git a/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py b/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py index 94b9d3e2c6..9c955fc086 100644 --- a/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py +++ b/api/tests/unit_tests/core/workflow/test_workflow_cycle_manager.py @@ -1,45 +1,73 @@ import json -import time from datetime import UTC, datetime -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest from sqlalchemy.orm import Session +from core.app.app_config.entities import AppAdditionalFeatures, WorkflowUIBasedAppConfig from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, InvokeFrom from core.app.entities.queue_entities import ( QueueNodeFailedEvent, QueueNodeStartedEvent, QueueNodeSucceededEvent, ) +from core.workflow.entities.node_entities import NodeRunMetadataKey +from core.workflow.entities.node_execution_entities import NodeExecution, NodeExecutionStatus +from core.workflow.entities.workflow_execution_entities import WorkflowExecution, WorkflowExecutionStatus, WorkflowType from core.workflow.enums import SystemVariableKey from core.workflow.nodes import NodeType +from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_cycle_manager import WorkflowCycleManager from models.enums import CreatorUserRole +from models.model import AppMode from models.workflow import ( Workflow, - WorkflowNodeExecutionStatus, WorkflowRun, WorkflowRunStatus, ) @pytest.fixture -def mock_app_generate_entity(): - entity = MagicMock(spec=AdvancedChatAppGenerateEntity) - entity.inputs = {"query": "test query"} - entity.invoke_from = InvokeFrom.WEB_APP - # Create app_config as a separate mock - app_config = MagicMock() - app_config.tenant_id = "test-tenant-id" - app_config.app_id = "test-app-id" - entity.app_config = app_config +def real_app_generate_entity(): + additional_features = AppAdditionalFeatures( + file_upload=None, + opening_statement=None, + suggested_questions=[], + suggested_questions_after_answer=False, + show_retrieve_source=False, + more_like_this=False, + speech_to_text=False, + text_to_speech=None, + trace_config=None, + ) + + app_config = WorkflowUIBasedAppConfig( + tenant_id="test-tenant-id", + app_id="test-app-id", + app_mode=AppMode.WORKFLOW, + additional_features=additional_features, + workflow_id="test-workflow-id", + ) + + entity = AdvancedChatAppGenerateEntity( + task_id="test-task-id", + app_config=app_config, + inputs={"query": "test query"}, + files=[], + user_id="test-user-id", + stream=False, + invoke_from=InvokeFrom.WEB_APP, + query="test query", + conversation_id="test-conversation-id", + ) + return entity @pytest.fixture -def mock_workflow_system_variables(): +def real_workflow_system_variables(): return { SystemVariableKey.QUERY: "test query", SystemVariableKey.CONVERSATION_ID: "test-conversation-id", @@ -59,10 +87,23 @@ def mock_node_execution_repository(): @pytest.fixture -def workflow_cycle_manager(mock_app_generate_entity, mock_workflow_system_variables, mock_node_execution_repository): +def mock_workflow_execution_repository(): + repo = MagicMock(spec=WorkflowExecutionRepository) + repo.get.return_value = None + return repo + + +@pytest.fixture +def workflow_cycle_manager( + real_app_generate_entity, + real_workflow_system_variables, + mock_workflow_execution_repository, + mock_node_execution_repository, +): return WorkflowCycleManager( - application_generate_entity=mock_app_generate_entity, - workflow_system_variables=mock_workflow_system_variables, + application_generate_entity=real_app_generate_entity, + workflow_system_variables=real_workflow_system_variables, + workflow_execution_repository=mock_workflow_execution_repository, workflow_node_execution_repository=mock_node_execution_repository, ) @@ -74,121 +115,173 @@ def mock_session(): @pytest.fixture -def mock_workflow(): - workflow = MagicMock(spec=Workflow) +def real_workflow(): + workflow = Workflow() workflow.id = "test-workflow-id" workflow.tenant_id = "test-tenant-id" workflow.app_id = "test-app-id" workflow.type = "chat" workflow.version = "1.0" - workflow.graph = json.dumps({"nodes": [], "edges": []}) + + graph_data = {"nodes": [], "edges": []} + workflow.graph = json.dumps(graph_data) + workflow.features = json.dumps({"file_upload": {"enabled": False}}) + workflow.created_by = "test-user-id" + workflow.created_at = datetime.now(UTC).replace(tzinfo=None) + workflow.updated_at = datetime.now(UTC).replace(tzinfo=None) + workflow._environment_variables = "{}" + workflow._conversation_variables = "{}" + return workflow @pytest.fixture -def mock_workflow_run(): - workflow_run = MagicMock(spec=WorkflowRun) +def real_workflow_run(): + workflow_run = WorkflowRun() workflow_run.id = "test-workflow-run-id" workflow_run.tenant_id = "test-tenant-id" workflow_run.app_id = "test-app-id" workflow_run.workflow_id = "test-workflow-id" + workflow_run.sequence_number = 1 + workflow_run.type = "chat" + workflow_run.triggered_from = "app-run" + workflow_run.version = "1.0" + workflow_run.graph = json.dumps({"nodes": [], "edges": []}) + workflow_run.inputs = json.dumps({"query": "test query"}) workflow_run.status = WorkflowRunStatus.RUNNING + workflow_run.outputs = json.dumps({"answer": "test answer"}) workflow_run.created_by_role = CreatorUserRole.ACCOUNT workflow_run.created_by = "test-user-id" workflow_run.created_at = datetime.now(UTC).replace(tzinfo=None) - workflow_run.inputs_dict = {"query": "test query"} - workflow_run.outputs_dict = {"answer": "test answer"} + return workflow_run def test_init( - workflow_cycle_manager, mock_app_generate_entity, mock_workflow_system_variables, mock_node_execution_repository + workflow_cycle_manager, + real_app_generate_entity, + real_workflow_system_variables, + mock_workflow_execution_repository, + mock_node_execution_repository, ): """Test initialization of WorkflowCycleManager""" - assert workflow_cycle_manager._workflow_run is None - assert workflow_cycle_manager._application_generate_entity == mock_app_generate_entity - assert workflow_cycle_manager._workflow_system_variables == mock_workflow_system_variables + assert workflow_cycle_manager._application_generate_entity == real_app_generate_entity + assert workflow_cycle_manager._workflow_system_variables == real_workflow_system_variables + assert workflow_cycle_manager._workflow_execution_repository == mock_workflow_execution_repository assert workflow_cycle_manager._workflow_node_execution_repository == mock_node_execution_repository -def test_handle_workflow_run_start(workflow_cycle_manager, mock_session, mock_workflow): - """Test _handle_workflow_run_start method""" +def test_handle_workflow_run_start(workflow_cycle_manager, mock_session, real_workflow): + """Test handle_workflow_run_start method""" # Mock session.scalar to return the workflow and max sequence - mock_session.scalar.side_effect = [mock_workflow, 5] + mock_session.scalar.side_effect = [real_workflow, 5] # Call the method - workflow_run = workflow_cycle_manager._handle_workflow_run_start( + workflow_execution = workflow_cycle_manager.handle_workflow_run_start( session=mock_session, workflow_id="test-workflow-id", - user_id="test-user-id", - created_by_role=CreatorUserRole.ACCOUNT, ) # Verify the result - assert workflow_run.tenant_id == mock_workflow.tenant_id - assert workflow_run.app_id == mock_workflow.app_id - assert workflow_run.workflow_id == mock_workflow.id - assert workflow_run.sequence_number == 6 # max_sequence + 1 - assert workflow_run.status == WorkflowRunStatus.RUNNING - assert workflow_run.created_by_role == CreatorUserRole.ACCOUNT - assert workflow_run.created_by == "test-user-id" + assert workflow_execution.workflow_id == real_workflow.id + assert workflow_execution.sequence_number == 6 # max_sequence + 1 - # Verify session.add was called - mock_session.add.assert_called_once_with(workflow_run) + # Verify the workflow_execution_repository.save was called + workflow_cycle_manager._workflow_execution_repository.save.assert_called_once_with(workflow_execution) -def test_handle_workflow_run_success(workflow_cycle_manager, mock_session, mock_workflow_run): - """Test _handle_workflow_run_success method""" - # Mock _get_workflow_run to return the mock_workflow_run - with patch.object(workflow_cycle_manager, "_get_workflow_run", return_value=mock_workflow_run): - # Call the method - result = workflow_cycle_manager._handle_workflow_run_success( - session=mock_session, - workflow_run_id="test-workflow-run-id", - start_at=time.perf_counter() - 10, # 10 seconds ago - total_tokens=100, - total_steps=5, - outputs={"answer": "test answer"}, - ) +def test_handle_workflow_run_success(workflow_cycle_manager, mock_workflow_execution_repository): + """Test handle_workflow_run_success method""" + # Create a real WorkflowExecution - # Verify the result - assert result == mock_workflow_run - assert result.status == WorkflowRunStatus.SUCCEEDED - assert result.outputs == json.dumps({"answer": "test answer"}) - assert result.total_tokens == 100 - assert result.total_steps == 5 - assert result.finished_at is not None + workflow_execution = WorkflowExecution( + id="test-workflow-run-id", + workflow_id="test-workflow-id", + workflow_version="1.0", + sequence_number=1, + type=WorkflowType.CHAT, + graph={"nodes": [], "edges": []}, + inputs={"query": "test query"}, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) + + # Mock _get_workflow_execution_or_raise_error to return the real workflow_execution + workflow_cycle_manager._workflow_execution_repository.get.return_value = workflow_execution + + # Call the method + result = workflow_cycle_manager.handle_workflow_run_success( + workflow_run_id="test-workflow-run-id", + total_tokens=100, + total_steps=5, + outputs={"answer": "test answer"}, + ) + + # Verify the result + assert result == workflow_execution + assert result.status == WorkflowExecutionStatus.SUCCEEDED + assert result.outputs == {"answer": "test answer"} + assert result.total_tokens == 100 + assert result.total_steps == 5 + assert result.finished_at is not None -def test_handle_workflow_run_failed(workflow_cycle_manager, mock_session, mock_workflow_run): - """Test _handle_workflow_run_failed method""" - # Mock _get_workflow_run to return the mock_workflow_run - with patch.object(workflow_cycle_manager, "_get_workflow_run", return_value=mock_workflow_run): - # Mock get_running_executions to return an empty list - workflow_cycle_manager._workflow_node_execution_repository.get_running_executions.return_value = [] +def test_handle_workflow_run_failed(workflow_cycle_manager, mock_workflow_execution_repository): + """Test handle_workflow_run_failed method""" + # Create a real WorkflowExecution - # Call the method - result = workflow_cycle_manager._handle_workflow_run_failed( - session=mock_session, - workflow_run_id="test-workflow-run-id", - start_at=time.perf_counter() - 10, # 10 seconds ago - total_tokens=50, - total_steps=3, - status=WorkflowRunStatus.FAILED, - error="Test error message", - ) + workflow_execution = WorkflowExecution( + id="test-workflow-run-id", + workflow_id="test-workflow-id", + workflow_version="1.0", + sequence_number=1, + type=WorkflowType.CHAT, + graph={"nodes": [], "edges": []}, + inputs={"query": "test query"}, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) - # Verify the result - assert result == mock_workflow_run - assert result.status == WorkflowRunStatus.FAILED.value - assert result.error == "Test error message" - assert result.total_tokens == 50 - assert result.total_steps == 3 - assert result.finished_at is not None + # Mock _get_workflow_execution_or_raise_error to return the real workflow_execution + workflow_cycle_manager._workflow_execution_repository.get.return_value = workflow_execution + + # Mock get_running_executions to return an empty list + workflow_cycle_manager._workflow_node_execution_repository.get_running_executions.return_value = [] + + # Call the method + result = workflow_cycle_manager.handle_workflow_run_failed( + workflow_run_id="test-workflow-run-id", + total_tokens=50, + total_steps=3, + status=WorkflowRunStatus.FAILED, + error_message="Test error message", + ) + + # Verify the result + assert result == workflow_execution + assert result.status == WorkflowExecutionStatus(WorkflowRunStatus.FAILED.value) + assert result.error_message == "Test error message" + assert result.total_tokens == 50 + assert result.total_steps == 3 + assert result.finished_at is not None -def test_handle_node_execution_start(workflow_cycle_manager, mock_workflow_run): - """Test _handle_node_execution_start method""" +def test_handle_node_execution_start(workflow_cycle_manager, mock_workflow_execution_repository): + """Test handle_node_execution_start method""" + # Create a real WorkflowExecution + + workflow_execution = WorkflowExecution( + id="test-workflow-execution-id", + workflow_id="test-workflow-id", + workflow_version="1.0", + sequence_number=1, + type=WorkflowType.CHAT, + graph={"nodes": [], "edges": []}, + inputs={"query": "test query"}, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) + + # Mock _get_workflow_execution_or_raise_error to return the real workflow_execution + workflow_cycle_manager._workflow_execution_repository.get.return_value = workflow_execution + # Create a mock event event = MagicMock(spec=QueueNodeStartedEvent) event.node_execution_id = "test-node-execution-id" @@ -207,129 +300,171 @@ def test_handle_node_execution_start(workflow_cycle_manager, mock_workflow_run): event.in_loop_id = "test-loop-id" # Call the method - result = workflow_cycle_manager._handle_node_execution_start( - workflow_run=mock_workflow_run, + result = workflow_cycle_manager.handle_node_execution_start( + workflow_execution_id=workflow_execution.id, event=event, ) # Verify the result - # NodeExecution doesn't have tenant_id attribute, it's handled at repository level - # assert result.tenant_id == mock_workflow_run.tenant_id - # assert result.app_id == mock_workflow_run.app_id - assert result.workflow_id == mock_workflow_run.workflow_id - assert result.workflow_run_id == mock_workflow_run.id + assert result.workflow_id == workflow_execution.workflow_id + assert result.workflow_run_id == workflow_execution.id assert result.node_execution_id == event.node_execution_id assert result.node_id == event.node_id assert result.node_type == event.node_type assert result.title == event.node_data.title - assert result.status == WorkflowNodeExecutionStatus.RUNNING.value - # NodeExecution doesn't have created_by_role and created_by attributes, they're handled at repository level - # assert result.created_by_role == mock_workflow_run.created_by_role - # assert result.created_by == mock_workflow_run.created_by + assert result.status == NodeExecutionStatus.RUNNING # Verify save was called workflow_cycle_manager._workflow_node_execution_repository.save.assert_called_once_with(result) -def test_get_workflow_run(workflow_cycle_manager, mock_session, mock_workflow_run): - """Test _get_workflow_run method""" - # Mock session.scalar to return the workflow run - mock_session.scalar.return_value = mock_workflow_run +def test_get_workflow_execution_or_raise_error(workflow_cycle_manager, mock_workflow_execution_repository): + """Test _get_workflow_execution_or_raise_error method""" + # Create a real WorkflowExecution - # Call the method - result = workflow_cycle_manager._get_workflow_run( - session=mock_session, - workflow_run_id="test-workflow-run-id", + workflow_execution = WorkflowExecution( + id="test-workflow-run-id", + workflow_id="test-workflow-id", + workflow_version="1.0", + sequence_number=1, + type=WorkflowType.CHAT, + graph={"nodes": [], "edges": []}, + inputs={"query": "test query"}, + started_at=datetime.now(UTC).replace(tzinfo=None), ) + # Mock the repository get method to return the real execution + workflow_cycle_manager._workflow_execution_repository.get.return_value = workflow_execution + + # Call the method + result = workflow_cycle_manager._get_workflow_execution_or_raise_error("test-workflow-run-id") + # Verify the result - assert result == mock_workflow_run - assert workflow_cycle_manager._workflow_run == mock_workflow_run + assert result == workflow_execution + + # Test error case + workflow_cycle_manager._workflow_execution_repository.get.return_value = None + + # Expect an error when execution is not found + with pytest.raises(ValueError): + workflow_cycle_manager._get_workflow_execution_or_raise_error("non-existent-id") def test_handle_workflow_node_execution_success(workflow_cycle_manager): - """Test _handle_workflow_node_execution_success method""" + """Test handle_workflow_node_execution_success method""" # Create a mock event event = MagicMock(spec=QueueNodeSucceededEvent) event.node_execution_id = "test-node-execution-id" event.inputs = {"input": "test input"} event.process_data = {"process": "test process"} event.outputs = {"output": "test output"} - event.execution_metadata = {"metadata": "test metadata"} + event.execution_metadata = {NodeRunMetadataKey.TOTAL_TOKENS: 100} event.start_at = datetime.now(UTC).replace(tzinfo=None) - # Create a mock node execution - node_execution = MagicMock() - node_execution.node_execution_id = "test-node-execution-id" + # Create a real node execution + + node_execution = NodeExecution( + id="test-node-execution-record-id", + node_execution_id="test-node-execution-id", + workflow_id="test-workflow-id", + workflow_run_id="test-workflow-run-id", + index=1, + node_id="test-node-id", + node_type=NodeType.LLM, + title="Test Node", + created_at=datetime.now(UTC).replace(tzinfo=None), + ) # Mock the repository to return the node execution workflow_cycle_manager._workflow_node_execution_repository.get_by_node_execution_id.return_value = node_execution # Call the method - result = workflow_cycle_manager._handle_workflow_node_execution_success( + result = workflow_cycle_manager.handle_workflow_node_execution_success( event=event, ) # Verify the result assert result == node_execution - assert result.status == WorkflowNodeExecutionStatus.SUCCEEDED.value + assert result.status == NodeExecutionStatus.SUCCEEDED # Verify save was called workflow_cycle_manager._workflow_node_execution_repository.save.assert_called_once_with(node_execution) -def test_handle_workflow_run_partial_success(workflow_cycle_manager, mock_session, mock_workflow_run): - """Test _handle_workflow_run_partial_success method""" - # Mock _get_workflow_run to return the mock_workflow_run - with patch.object(workflow_cycle_manager, "_get_workflow_run", return_value=mock_workflow_run): - # Call the method - result = workflow_cycle_manager._handle_workflow_run_partial_success( - session=mock_session, - workflow_run_id="test-workflow-run-id", - start_at=time.perf_counter() - 10, # 10 seconds ago - total_tokens=75, - total_steps=4, - outputs={"partial_answer": "test partial answer"}, - exceptions_count=2, - ) +def test_handle_workflow_run_partial_success(workflow_cycle_manager, mock_workflow_execution_repository): + """Test handle_workflow_run_partial_success method""" + # Create a real WorkflowExecution - # Verify the result - assert result == mock_workflow_run - assert result.status == WorkflowRunStatus.PARTIAL_SUCCEEDED.value - assert result.outputs == json.dumps({"partial_answer": "test partial answer"}) - assert result.total_tokens == 75 - assert result.total_steps == 4 - assert result.exceptions_count == 2 - assert result.finished_at is not None + workflow_execution = WorkflowExecution( + id="test-workflow-run-id", + workflow_id="test-workflow-id", + workflow_version="1.0", + sequence_number=1, + type=WorkflowType.CHAT, + graph={"nodes": [], "edges": []}, + inputs={"query": "test query"}, + started_at=datetime.now(UTC).replace(tzinfo=None), + ) + + # Mock _get_workflow_execution_or_raise_error to return the real workflow_execution + workflow_cycle_manager._workflow_execution_repository.get.return_value = workflow_execution + + # Call the method + result = workflow_cycle_manager.handle_workflow_run_partial_success( + workflow_run_id="test-workflow-run-id", + total_tokens=75, + total_steps=4, + outputs={"partial_answer": "test partial answer"}, + exceptions_count=2, + ) + + # Verify the result + assert result == workflow_execution + assert result.status == WorkflowExecutionStatus.PARTIAL_SUCCEEDED + assert result.outputs == {"partial_answer": "test partial answer"} + assert result.total_tokens == 75 + assert result.total_steps == 4 + assert result.exceptions_count == 2 + assert result.finished_at is not None def test_handle_workflow_node_execution_failed(workflow_cycle_manager): - """Test _handle_workflow_node_execution_failed method""" + """Test handle_workflow_node_execution_failed method""" # Create a mock event event = MagicMock(spec=QueueNodeFailedEvent) event.node_execution_id = "test-node-execution-id" event.inputs = {"input": "test input"} event.process_data = {"process": "test process"} event.outputs = {"output": "test output"} - event.execution_metadata = {"metadata": "test metadata"} + event.execution_metadata = {NodeRunMetadataKey.TOTAL_TOKENS: 100} event.start_at = datetime.now(UTC).replace(tzinfo=None) event.error = "Test error message" - # Create a mock node execution - node_execution = MagicMock() - node_execution.node_execution_id = "test-node-execution-id" + # Create a real node execution + + node_execution = NodeExecution( + id="test-node-execution-record-id", + node_execution_id="test-node-execution-id", + workflow_id="test-workflow-id", + workflow_run_id="test-workflow-run-id", + index=1, + node_id="test-node-id", + node_type=NodeType.LLM, + title="Test Node", + created_at=datetime.now(UTC).replace(tzinfo=None), + ) # Mock the repository to return the node execution workflow_cycle_manager._workflow_node_execution_repository.get_by_node_execution_id.return_value = node_execution # Call the method - result = workflow_cycle_manager._handle_workflow_node_execution_failed( + result = workflow_cycle_manager.handle_workflow_node_execution_failed( event=event, ) # Verify the result assert result == node_execution - assert result.status == WorkflowNodeExecutionStatus.FAILED.value + assert result.status == NodeExecutionStatus.FAILED assert result.error == "Test error message" # Verify save was called From 02929b2cce81589f453da48d12dc526ecbb6068b Mon Sep 17 00:00:00 2001 From: heyszt <270985384@qq.com> Date: Wed, 21 May 2025 23:51:42 +0800 Subject: [PATCH 08/19] Fix/fix trace provider delete err (#20070) --- api/core/ops/ops_trace_manager.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index 47c65a3e50..a3424c7421 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -292,10 +292,11 @@ class OpsTraceManager: :return: """ # auth check - try: - provider_config_map[tracing_provider] - except KeyError: - raise ValueError(f"Invalid tracing provider: {tracing_provider}") + if tracing_provider is not None: + try: + provider_config_map[tracing_provider] + except KeyError: + raise ValueError(f"Invalid tracing provider: {tracing_provider}") app_config: Optional[App] = db.session.query(App).filter(App.id == app_id).first() if not app_config: From 6b3666f8265899ed574d5cc6527457c20fd8f861 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Thu, 22 May 2025 09:49:25 +0800 Subject: [PATCH 09/19] feat: Split WorkflowCycleManager (#20071) Signed-off-by: -LAN- --- .../advanced_chat/generate_task_pipeline.py | 51 +- api/core/app/apps/common/__init__.py | 0 .../common/workflow_response_converter.py | 564 +++++++++++++++++ .../apps/workflow/generate_task_pipeline.py | 47 +- .../workflow_app_generate_task_pipeline.py | 591 ------------------ api/core/workflow/workflow_cycle_manager.py | 537 +--------------- 6 files changed, 622 insertions(+), 1168 deletions(-) create mode 100644 api/core/app/apps/common/__init__.py create mode 100644 api/core/app/apps/common/workflow_response_converter.py delete mode 100644 api/core/workflow/workflow_app_generate_task_pipeline.py diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py index cd764d56a5..e43417668d 100644 --- a/api/core/app/apps/advanced_chat/generate_task_pipeline.py +++ b/api/core/app/apps/advanced_chat/generate_task_pipeline.py @@ -10,6 +10,7 @@ from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom +from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.entities.app_invoke_entities import ( AdvancedChatAppGenerateEntity, InvokeFrom, @@ -131,6 +132,10 @@ class AdvancedChatAppGenerateTaskPipeline: workflow_node_execution_repository=workflow_node_execution_repository, ) + self._workflow_response_converter = WorkflowResponseConverter( + application_generate_entity=application_generate_entity, + ) + self._task_state = WorkflowTaskState() self._message_cycle_manager = MessageCycleManage( application_generate_entity=application_generate_entity, task_state=self._task_state @@ -306,7 +311,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not message: raise ValueError(f"Message not found: {self._message_id}") message.workflow_run_id = workflow_execution.id - workflow_start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( + workflow_start_resp = self._workflow_response_converter.workflow_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, ) @@ -323,7 +328,7 @@ class AdvancedChatAppGenerateTaskPipeline: workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( workflow_execution_id=self._workflow_run_id, event=event ) - node_retry_resp = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( + node_retry_resp = self._workflow_response_converter.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -340,7 +345,7 @@ class AdvancedChatAppGenerateTaskPipeline: workflow_execution_id=self._workflow_run_id, event=event ) - node_start_resp = self._workflow_cycle_manager.workflow_node_start_to_stream_response( + node_start_resp = self._workflow_response_converter.workflow_node_start_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -352,7 +357,7 @@ class AdvancedChatAppGenerateTaskPipeline: # Record files if it's an answer node or end node if event.node_type in [NodeType.ANSWER, NodeType.END]: self._recorded_files.extend( - self._workflow_cycle_manager.fetch_files_from_node_outputs(event.outputs or {}) + self._workflow_response_converter.fetch_files_from_node_outputs(event.outputs or {}) ) with Session(db.engine, expire_on_commit=False) as session: @@ -360,7 +365,7 @@ class AdvancedChatAppGenerateTaskPipeline: event=event ) - node_finish_resp = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -380,7 +385,7 @@ class AdvancedChatAppGenerateTaskPipeline: event=event ) - node_finish_resp = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + node_finish_resp = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -392,10 +397,12 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, + parallel_start_resp = ( + self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) ) yield parallel_start_resp @@ -404,7 +411,7 @@ class AdvancedChatAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") parallel_finish_resp = ( - self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( + self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -416,7 +423,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( + iter_start_resp = self._workflow_response_converter.workflow_iteration_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -427,7 +434,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( + iter_next_resp = self._workflow_response_converter.workflow_iteration_next_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -438,7 +445,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( + iter_finish_resp = self._workflow_response_converter.workflow_iteration_completed_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -449,7 +456,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( + loop_start_resp = self._workflow_response_converter.workflow_loop_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -460,7 +467,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( + loop_next_resp = self._workflow_response_converter.workflow_loop_next_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -471,7 +478,7 @@ class AdvancedChatAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( + loop_finish_resp = self._workflow_response_converter.workflow_loop_completed_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -495,7 +502,7 @@ class AdvancedChatAppGenerateTaskPipeline: trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -521,7 +528,7 @@ class AdvancedChatAppGenerateTaskPipeline: conversation_id=None, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -548,7 +555,7 @@ class AdvancedChatAppGenerateTaskPipeline: trace_manager=trace_manager, exceptions_count=event.exceptions_count, ) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -573,7 +580,7 @@ class AdvancedChatAppGenerateTaskPipeline: conversation_id=self._conversation_id, trace_manager=trace_manager, ) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -657,7 +664,7 @@ class AdvancedChatAppGenerateTaskPipeline: yield self._message_end_to_stream_response() elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager.handle_agent_log( + yield self._workflow_response_converter.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: diff --git a/api/core/app/apps/common/__init__.py b/api/core/app/apps/common/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/app/apps/common/workflow_response_converter.py b/api/core/app/apps/common/workflow_response_converter.py new file mode 100644 index 0000000000..7669bf74bb --- /dev/null +++ b/api/core/app/apps/common/workflow_response_converter.py @@ -0,0 +1,564 @@ +import time +from collections.abc import Mapping, Sequence +from datetime import UTC, datetime +from typing import Any, Optional, Union, cast + +from sqlalchemy import select +from sqlalchemy.orm import Session + +from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity +from core.app.entities.queue_entities import ( + QueueAgentLogEvent, + QueueIterationCompletedEvent, + QueueIterationNextEvent, + QueueIterationStartEvent, + QueueLoopCompletedEvent, + QueueLoopNextEvent, + QueueLoopStartEvent, + QueueNodeExceptionEvent, + QueueNodeFailedEvent, + QueueNodeInIterationFailedEvent, + QueueNodeInLoopFailedEvent, + QueueNodeRetryEvent, + QueueNodeStartedEvent, + QueueNodeSucceededEvent, + QueueParallelBranchRunFailedEvent, + QueueParallelBranchRunStartedEvent, + QueueParallelBranchRunSucceededEvent, +) +from core.app.entities.task_entities import ( + AgentLogStreamResponse, + IterationNodeCompletedStreamResponse, + IterationNodeNextStreamResponse, + IterationNodeStartStreamResponse, + LoopNodeCompletedStreamResponse, + LoopNodeNextStreamResponse, + LoopNodeStartStreamResponse, + NodeFinishStreamResponse, + NodeRetryStreamResponse, + NodeStartStreamResponse, + ParallelBranchFinishedStreamResponse, + ParallelBranchStartStreamResponse, + WorkflowFinishStreamResponse, + WorkflowStartStreamResponse, +) +from core.file import FILE_MODEL_IDENTITY, File +from core.tools.tool_manager import ToolManager +from core.workflow.entities.node_execution_entities import NodeExecution +from core.workflow.entities.workflow_execution_entities import WorkflowExecution +from core.workflow.nodes import NodeType +from core.workflow.nodes.tool.entities import ToolNodeData +from models import ( + Account, + CreatorUserRole, + EndUser, + WorkflowNodeExecutionStatus, + WorkflowRun, +) + + +class WorkflowResponseConverter: + def __init__( + self, + *, + application_generate_entity: Union[AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity], + ) -> None: + self._application_generate_entity = application_generate_entity + + def workflow_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution: WorkflowExecution, + ) -> WorkflowStartStreamResponse: + return WorkflowStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution.id, + data=WorkflowStartStreamResponse.Data( + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + inputs=workflow_execution.inputs, + created_at=int(workflow_execution.started_at.timestamp()), + ), + ) + + def workflow_finish_to_stream_response( + self, + *, + session: Session, + task_id: str, + workflow_execution: WorkflowExecution, + ) -> WorkflowFinishStreamResponse: + created_by = None + workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) + assert workflow_run is not None + if workflow_run.created_by_role == CreatorUserRole.ACCOUNT: + stmt = select(Account).where(Account.id == workflow_run.created_by) + account = session.scalar(stmt) + if account: + created_by = { + "id": account.id, + "name": account.name, + "email": account.email, + } + elif workflow_run.created_by_role == CreatorUserRole.END_USER: + stmt = select(EndUser).where(EndUser.id == workflow_run.created_by) + end_user = session.scalar(stmt) + if end_user: + created_by = { + "id": end_user.id, + "user": end_user.session_id, + } + else: + raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") + + # Handle the case where finished_at is None by using current time as default + finished_at_timestamp = ( + int(workflow_execution.finished_at.timestamp()) + if workflow_execution.finished_at + else int(datetime.now(UTC).timestamp()) + ) + + return WorkflowFinishStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution.id, + data=WorkflowFinishStreamResponse.Data( + id=workflow_execution.id, + workflow_id=workflow_execution.workflow_id, + sequence_number=workflow_execution.sequence_number, + status=workflow_execution.status, + outputs=workflow_execution.outputs, + error=workflow_execution.error_message, + elapsed_time=workflow_execution.elapsed_time, + total_tokens=workflow_execution.total_tokens, + total_steps=workflow_execution.total_steps, + created_by=created_by, + created_at=int(workflow_execution.started_at.timestamp()), + finished_at=finished_at_timestamp, + files=self.fetch_files_from_node_outputs(workflow_execution.outputs), + exceptions_count=workflow_execution.exceptions_count, + ), + ) + + def workflow_node_start_to_stream_response( + self, + *, + event: QueueNodeStartedEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[NodeStartStreamResponse]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + + response = NodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeStartStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + title=workflow_node_execution.title, + index=workflow_node_execution.index, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + created_at=int(workflow_node_execution.created_at.timestamp()), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + parallel_run_id=event.parallel_mode_run_id, + agent_strategy=event.agent_strategy, + ), + ) + + # extras logic + if event.node_type == NodeType.TOOL: + node_data = cast(ToolNodeData, event.node_data) + response.data.extras["icon"] = ToolManager.get_tool_icon( + tenant_id=self._application_generate_entity.app_config.tenant_id, + provider_type=node_data.provider_type, + provider_id=node_data.provider_id, + ) + + return response + + def workflow_node_finish_to_stream_response( + self, + *, + event: QueueNodeSucceededEvent + | QueueNodeFailedEvent + | QueueNodeInIterationFailedEvent + | QueueNodeInLoopFailedEvent + | QueueNodeExceptionEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[NodeFinishStreamResponse]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + if not workflow_node_execution.finished_at: + return None + + return NodeFinishStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeFinishStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + index=workflow_node_execution.index, + title=workflow_node_execution.title, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + process_data=workflow_node_execution.process_data, + outputs=workflow_node_execution.outputs, + status=workflow_node_execution.status, + error=workflow_node_execution.error, + elapsed_time=workflow_node_execution.elapsed_time, + execution_metadata=workflow_node_execution.metadata, + created_at=int(workflow_node_execution.created_at.timestamp()), + finished_at=int(workflow_node_execution.finished_at.timestamp()), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + ), + ) + + def workflow_node_retry_to_stream_response( + self, + *, + event: QueueNodeRetryEvent, + task_id: str, + workflow_node_execution: NodeExecution, + ) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]: + if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: + return None + if not workflow_node_execution.workflow_run_id: + return None + if not workflow_node_execution.finished_at: + return None + + return NodeRetryStreamResponse( + task_id=task_id, + workflow_run_id=workflow_node_execution.workflow_run_id, + data=NodeRetryStreamResponse.Data( + id=workflow_node_execution.id, + node_id=workflow_node_execution.node_id, + node_type=workflow_node_execution.node_type, + index=workflow_node_execution.index, + title=workflow_node_execution.title, + predecessor_node_id=workflow_node_execution.predecessor_node_id, + inputs=workflow_node_execution.inputs, + process_data=workflow_node_execution.process_data, + outputs=workflow_node_execution.outputs, + status=workflow_node_execution.status, + error=workflow_node_execution.error, + elapsed_time=workflow_node_execution.elapsed_time, + execution_metadata=workflow_node_execution.metadata, + created_at=int(workflow_node_execution.created_at.timestamp()), + finished_at=int(workflow_node_execution.finished_at.timestamp()), + files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + retry_index=event.retry_index, + ), + ) + + def workflow_parallel_branch_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueParallelBranchRunStartedEvent, + ) -> ParallelBranchStartStreamResponse: + return ParallelBranchStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=ParallelBranchStartStreamResponse.Data( + parallel_id=event.parallel_id, + parallel_branch_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + created_at=int(time.time()), + ), + ) + + def workflow_parallel_branch_finished_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent, + ) -> ParallelBranchFinishedStreamResponse: + return ParallelBranchFinishedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=ParallelBranchFinishedStreamResponse.Data( + parallel_id=event.parallel_id, + parallel_branch_id=event.parallel_start_node_id, + parent_parallel_id=event.parent_parallel_id, + parent_parallel_start_node_id=event.parent_parallel_start_node_id, + iteration_id=event.in_iteration_id, + loop_id=event.in_loop_id, + status="succeeded" if isinstance(event, QueueParallelBranchRunSucceededEvent) else "failed", + error=event.error if isinstance(event, QueueParallelBranchRunFailedEvent) else None, + created_at=int(time.time()), + ), + ) + + def workflow_iteration_start_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationStartEvent, + ) -> IterationNodeStartStreamResponse: + return IterationNodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeStartStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + metadata=event.metadata or {}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_iteration_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationNextEvent, + ) -> IterationNodeNextStreamResponse: + return IterationNodeNextStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeNextStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + index=event.index, + pre_iteration_output=event.output, + created_at=int(time.time()), + extras={}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parallel_mode_run_id=event.parallel_mode_run_id, + duration=event.duration, + ), + ) + + def workflow_iteration_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueIterationCompletedEvent, + ) -> IterationNodeCompletedStreamResponse: + return IterationNodeCompletedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=IterationNodeCompletedStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + outputs=event.outputs, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + status=WorkflowNodeExecutionStatus.SUCCEEDED + if event.error is None + else WorkflowNodeExecutionStatus.FAILED, + error=None, + elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, + execution_metadata=event.metadata, + finished_at=int(time.time()), + steps=event.steps, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_loop_start_to_stream_response( + self, *, task_id: str, workflow_execution_id: str, event: QueueLoopStartEvent + ) -> LoopNodeStartStreamResponse: + return LoopNodeStartStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeStartStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + metadata=event.metadata or {}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def workflow_loop_next_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopNextEvent, + ) -> LoopNodeNextStreamResponse: + return LoopNodeNextStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeNextStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + index=event.index, + pre_loop_output=event.output, + created_at=int(time.time()), + extras={}, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + parallel_mode_run_id=event.parallel_mode_run_id, + duration=event.duration, + ), + ) + + def workflow_loop_completed_to_stream_response( + self, + *, + task_id: str, + workflow_execution_id: str, + event: QueueLoopCompletedEvent, + ) -> LoopNodeCompletedStreamResponse: + return LoopNodeCompletedStreamResponse( + task_id=task_id, + workflow_run_id=workflow_execution_id, + data=LoopNodeCompletedStreamResponse.Data( + id=event.node_id, + node_id=event.node_id, + node_type=event.node_type.value, + title=event.node_data.title, + outputs=event.outputs, + created_at=int(time.time()), + extras={}, + inputs=event.inputs or {}, + status=WorkflowNodeExecutionStatus.SUCCEEDED + if event.error is None + else WorkflowNodeExecutionStatus.FAILED, + error=None, + elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), + total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, + execution_metadata=event.metadata, + finished_at=int(time.time()), + steps=event.steps, + parallel_id=event.parallel_id, + parallel_start_node_id=event.parallel_start_node_id, + ), + ) + + def fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any] | None) -> Sequence[Mapping[str, Any]]: + """ + Fetch files from node outputs + :param outputs_dict: node outputs dict + :return: + """ + if not outputs_dict: + return [] + + files = [self._fetch_files_from_variable_value(output_value) for output_value in outputs_dict.values()] + # Remove None + files = [file for file in files if file] + # Flatten list + # Flatten the list of sequences into a single list of mappings + flattened_files = [file for sublist in files if sublist for file in sublist] + + # Convert to tuple to match Sequence type + return tuple(flattened_files) + + def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]: + """ + Fetch files from variable value + :param value: variable value + :return: + """ + if not value: + return [] + + files = [] + if isinstance(value, list): + for item in value: + file = self._get_file_var_from_value(item) + if file: + files.append(file) + elif isinstance(value, dict): + file = self._get_file_var_from_value(value) + if file: + files.append(file) + + return files + + def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None: + """ + Get file var from value + :param value: variable value + :return: + """ + if not value: + return None + + if isinstance(value, dict) and value.get("dify_model_identity") == FILE_MODEL_IDENTITY: + return value + elif isinstance(value, File): + return value.to_dict() + + return None + + def handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: + """ + Handle agent log + :param task_id: task id + :param event: agent log event + :return: + """ + return AgentLogStreamResponse( + task_id=task_id, + data=AgentLogStreamResponse.Data( + node_execution_id=event.node_execution_id, + id=event.id, + parent_id=event.parent_id, + label=event.label, + error=event.error, + status=event.status, + data=event.data, + metadata=event.metadata, + node_id=event.node_id, + ), + ) diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py index f2ebd78b36..0291f49cac 100644 --- a/api/core/app/apps/workflow/generate_task_pipeline.py +++ b/api/core/app/apps/workflow/generate_task_pipeline.py @@ -8,6 +8,7 @@ from sqlalchemy.orm import Session from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME from core.app.apps.base_app_queue_manager import AppQueueManager +from core.app.apps.common.workflow_response_converter import WorkflowResponseConverter from core.app.entities.app_invoke_entities import ( InvokeFrom, WorkflowAppGenerateEntity, @@ -119,6 +120,10 @@ class WorkflowAppGenerateTaskPipeline: workflow_node_execution_repository=workflow_node_execution_repository, ) + self._workflow_response_converter = WorkflowResponseConverter( + application_generate_entity=application_generate_entity, + ) + self._application_generate_entity = application_generate_entity self._workflow_id = workflow.id self._workflow_features_dict = workflow.features_dict @@ -268,7 +273,7 @@ class WorkflowAppGenerateTaskPipeline: workflow_id=self._workflow_id, ) self._workflow_run_id = workflow_execution.id - start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( + start_resp = self._workflow_response_converter.workflow_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, ) @@ -285,7 +290,7 @@ class WorkflowAppGenerateTaskPipeline: workflow_execution_id=self._workflow_run_id, event=event, ) - response = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( + response = self._workflow_response_converter.workflow_node_retry_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -301,7 +306,7 @@ class WorkflowAppGenerateTaskPipeline: workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( workflow_execution_id=self._workflow_run_id, event=event ) - node_start_response = self._workflow_cycle_manager.workflow_node_start_to_stream_response( + node_start_response = self._workflow_response_converter.workflow_node_start_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -313,7 +318,7 @@ class WorkflowAppGenerateTaskPipeline: workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( event=event ) - node_success_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + node_success_response = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -331,7 +336,7 @@ class WorkflowAppGenerateTaskPipeline: workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( event=event, ) - node_failed_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( + node_failed_response = self._workflow_response_converter.workflow_node_finish_to_stream_response( event=event, task_id=self._application_generate_entity.task_id, workflow_node_execution=workflow_node_execution, @@ -344,10 +349,12 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, + parallel_start_resp = ( + self._workflow_response_converter.workflow_parallel_branch_start_to_stream_response( + task_id=self._application_generate_entity.task_id, + workflow_execution_id=self._workflow_run_id, + event=event, + ) ) yield parallel_start_resp @@ -357,7 +364,7 @@ class WorkflowAppGenerateTaskPipeline: raise ValueError("workflow run not initialized.") parallel_finish_resp = ( - self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( + self._workflow_response_converter.workflow_parallel_branch_finished_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -370,7 +377,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( + iter_start_resp = self._workflow_response_converter.workflow_iteration_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -382,7 +389,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( + iter_next_resp = self._workflow_response_converter.workflow_iteration_next_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -394,7 +401,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( + iter_finish_resp = self._workflow_response_converter.workflow_iteration_completed_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -406,7 +413,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( + loop_start_resp = self._workflow_response_converter.workflow_loop_start_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -418,7 +425,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( + loop_next_resp = self._workflow_response_converter.workflow_loop_next_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -430,7 +437,7 @@ class WorkflowAppGenerateTaskPipeline: if not self._workflow_run_id: raise ValueError("workflow run not initialized.") - loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( + loop_finish_resp = self._workflow_response_converter.workflow_loop_completed_to_stream_response( task_id=self._application_generate_entity.task_id, workflow_execution_id=self._workflow_run_id, event=event, @@ -457,7 +464,7 @@ class WorkflowAppGenerateTaskPipeline: # save workflow app log self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -485,7 +492,7 @@ class WorkflowAppGenerateTaskPipeline: # save workflow app log self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -518,7 +525,7 @@ class WorkflowAppGenerateTaskPipeline: # save workflow app log self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( + workflow_finish_resp = self._workflow_response_converter.workflow_finish_to_stream_response( session=session, task_id=self._application_generate_entity.task_id, workflow_execution=workflow_execution, @@ -540,7 +547,7 @@ class WorkflowAppGenerateTaskPipeline: delta_text, from_variable_selector=event.from_variable_selector ) elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager.handle_agent_log( + yield self._workflow_response_converter.handle_agent_log( task_id=self._application_generate_entity.task_id, event=event ) else: diff --git a/api/core/workflow/workflow_app_generate_task_pipeline.py b/api/core/workflow/workflow_app_generate_task_pipeline.py deleted file mode 100644 index f2ebd78b36..0000000000 --- a/api/core/workflow/workflow_app_generate_task_pipeline.py +++ /dev/null @@ -1,591 +0,0 @@ -import logging -import time -from collections.abc import Generator -from typing import Optional, Union - -from sqlalchemy import select -from sqlalchemy.orm import Session - -from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME -from core.app.apps.base_app_queue_manager import AppQueueManager -from core.app.entities.app_invoke_entities import ( - InvokeFrom, - WorkflowAppGenerateEntity, -) -from core.app.entities.queue_entities import ( - QueueAgentLogEvent, - QueueErrorEvent, - QueueIterationCompletedEvent, - QueueIterationNextEvent, - QueueIterationStartEvent, - QueueLoopCompletedEvent, - QueueLoopNextEvent, - QueueLoopStartEvent, - QueueNodeExceptionEvent, - QueueNodeFailedEvent, - QueueNodeInIterationFailedEvent, - QueueNodeInLoopFailedEvent, - QueueNodeRetryEvent, - QueueNodeStartedEvent, - QueueNodeSucceededEvent, - QueueParallelBranchRunFailedEvent, - QueueParallelBranchRunStartedEvent, - QueueParallelBranchRunSucceededEvent, - QueuePingEvent, - QueueStopEvent, - QueueTextChunkEvent, - QueueWorkflowFailedEvent, - QueueWorkflowPartialSuccessEvent, - QueueWorkflowStartedEvent, - QueueWorkflowSucceededEvent, -) -from core.app.entities.task_entities import ( - ErrorStreamResponse, - MessageAudioEndStreamResponse, - MessageAudioStreamResponse, - StreamResponse, - TextChunkStreamResponse, - WorkflowAppBlockingResponse, - WorkflowAppStreamResponse, - WorkflowFinishStreamResponse, - WorkflowStartStreamResponse, - WorkflowTaskState, -) -from core.app.task_pipeline.based_generate_task_pipeline import BasedGenerateTaskPipeline -from core.base.tts import AppGeneratorTTSPublisher, AudioTrunk -from core.ops.ops_trace_manager import TraceQueueManager -from core.workflow.entities.workflow_execution_entities import WorkflowExecution -from core.workflow.enums import SystemVariableKey -from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository -from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository -from core.workflow.workflow_cycle_manager import WorkflowCycleManager -from extensions.ext_database import db -from models.account import Account -from models.enums import CreatorUserRole -from models.model import EndUser -from models.workflow import ( - Workflow, - WorkflowAppLog, - WorkflowAppLogCreatedFrom, - WorkflowRun, - WorkflowRunStatus, -) - -logger = logging.getLogger(__name__) - - -class WorkflowAppGenerateTaskPipeline: - """ - WorkflowAppGenerateTaskPipeline is a class that generate stream output and state management for Application. - """ - - def __init__( - self, - application_generate_entity: WorkflowAppGenerateEntity, - workflow: Workflow, - queue_manager: AppQueueManager, - user: Union[Account, EndUser], - stream: bool, - workflow_execution_repository: WorkflowExecutionRepository, - workflow_node_execution_repository: WorkflowNodeExecutionRepository, - ) -> None: - self._base_task_pipeline = BasedGenerateTaskPipeline( - application_generate_entity=application_generate_entity, - queue_manager=queue_manager, - stream=stream, - ) - - if isinstance(user, EndUser): - self._user_id = user.id - user_session_id = user.session_id - self._created_by_role = CreatorUserRole.END_USER - elif isinstance(user, Account): - self._user_id = user.id - user_session_id = user.id - self._created_by_role = CreatorUserRole.ACCOUNT - else: - raise ValueError(f"Invalid user type: {type(user)}") - - self._workflow_cycle_manager = WorkflowCycleManager( - application_generate_entity=application_generate_entity, - workflow_system_variables={ - SystemVariableKey.FILES: application_generate_entity.files, - SystemVariableKey.USER_ID: user_session_id, - SystemVariableKey.APP_ID: application_generate_entity.app_config.app_id, - SystemVariableKey.WORKFLOW_ID: workflow.id, - SystemVariableKey.WORKFLOW_RUN_ID: application_generate_entity.workflow_run_id, - }, - workflow_execution_repository=workflow_execution_repository, - workflow_node_execution_repository=workflow_node_execution_repository, - ) - - self._application_generate_entity = application_generate_entity - self._workflow_id = workflow.id - self._workflow_features_dict = workflow.features_dict - self._task_state = WorkflowTaskState() - self._workflow_run_id = "" - - def process(self) -> Union[WorkflowAppBlockingResponse, Generator[WorkflowAppStreamResponse, None, None]]: - """ - Process generate task pipeline. - :return: - """ - generator = self._wrapper_process_stream_response(trace_manager=self._application_generate_entity.trace_manager) - if self._base_task_pipeline._stream: - return self._to_stream_response(generator) - else: - return self._to_blocking_response(generator) - - def _to_blocking_response(self, generator: Generator[StreamResponse, None, None]) -> WorkflowAppBlockingResponse: - """ - To blocking response. - :return: - """ - for stream_response in generator: - if isinstance(stream_response, ErrorStreamResponse): - raise stream_response.err - elif isinstance(stream_response, WorkflowFinishStreamResponse): - response = WorkflowAppBlockingResponse( - task_id=self._application_generate_entity.task_id, - workflow_run_id=stream_response.data.id, - data=WorkflowAppBlockingResponse.Data( - id=stream_response.data.id, - workflow_id=stream_response.data.workflow_id, - status=stream_response.data.status, - outputs=stream_response.data.outputs, - error=stream_response.data.error, - elapsed_time=stream_response.data.elapsed_time, - total_tokens=stream_response.data.total_tokens, - total_steps=stream_response.data.total_steps, - created_at=int(stream_response.data.created_at), - finished_at=int(stream_response.data.finished_at), - ), - ) - - return response - else: - continue - - raise ValueError("queue listening stopped unexpectedly.") - - def _to_stream_response( - self, generator: Generator[StreamResponse, None, None] - ) -> Generator[WorkflowAppStreamResponse, None, None]: - """ - To stream response. - :return: - """ - workflow_run_id = None - for stream_response in generator: - if isinstance(stream_response, WorkflowStartStreamResponse): - workflow_run_id = stream_response.workflow_run_id - - yield WorkflowAppStreamResponse(workflow_run_id=workflow_run_id, stream_response=stream_response) - - def _listen_audio_msg(self, publisher: AppGeneratorTTSPublisher | None, task_id: str): - if not publisher: - return None - audio_msg = publisher.check_and_get_audio() - if audio_msg and isinstance(audio_msg, AudioTrunk) and audio_msg.status != "finish": - return MessageAudioStreamResponse(audio=audio_msg.audio, task_id=task_id) - return None - - def _wrapper_process_stream_response( - self, trace_manager: Optional[TraceQueueManager] = None - ) -> Generator[StreamResponse, None, None]: - tts_publisher = None - task_id = self._application_generate_entity.task_id - tenant_id = self._application_generate_entity.app_config.tenant_id - features_dict = self._workflow_features_dict - - if ( - features_dict.get("text_to_speech") - and features_dict["text_to_speech"].get("enabled") - and features_dict["text_to_speech"].get("autoPlay") == "enabled" - ): - tts_publisher = AppGeneratorTTSPublisher( - tenant_id, features_dict["text_to_speech"].get("voice"), features_dict["text_to_speech"].get("language") - ) - - for response in self._process_stream_response(tts_publisher=tts_publisher, trace_manager=trace_manager): - while True: - audio_response = self._listen_audio_msg(publisher=tts_publisher, task_id=task_id) - if audio_response: - yield audio_response - else: - break - yield response - - start_listener_time = time.time() - while (time.time() - start_listener_time) < TTS_AUTO_PLAY_TIMEOUT: - try: - if not tts_publisher: - break - audio_trunk = tts_publisher.check_and_get_audio() - if audio_trunk is None: - # release cpu - # sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file) - time.sleep(TTS_AUTO_PLAY_YIELD_CPU_TIME) - continue - if audio_trunk.status == "finish": - break - else: - yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id) - except Exception: - logger.exception(f"Fails to get audio trunk, task_id: {task_id}") - break - if tts_publisher: - yield MessageAudioEndStreamResponse(audio="", task_id=task_id) - - def _process_stream_response( - self, - tts_publisher: Optional[AppGeneratorTTSPublisher] = None, - trace_manager: Optional[TraceQueueManager] = None, - ) -> Generator[StreamResponse, None, None]: - """ - Process stream response. - :return: - """ - graph_runtime_state = None - - for queue_message in self._base_task_pipeline._queue_manager.listen(): - event = queue_message.event - - if isinstance(event, QueuePingEvent): - yield self._base_task_pipeline._ping_stream_response() - elif isinstance(event, QueueErrorEvent): - err = self._base_task_pipeline._handle_error(event=event) - yield self._base_task_pipeline._error_to_stream_response(err) - break - elif isinstance(event, QueueWorkflowStartedEvent): - # override graph runtime state - graph_runtime_state = event.graph_runtime_state - - with Session(db.engine, expire_on_commit=False) as session: - # init workflow run - workflow_execution = self._workflow_cycle_manager.handle_workflow_run_start( - session=session, - workflow_id=self._workflow_id, - ) - self._workflow_run_id = workflow_execution.id - start_resp = self._workflow_cycle_manager.workflow_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution=workflow_execution, - ) - - yield start_resp - elif isinstance( - event, - QueueNodeRetryEvent, - ): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - with Session(db.engine, expire_on_commit=False) as session: - workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_retried( - workflow_execution_id=self._workflow_run_id, - event=event, - ) - response = self._workflow_cycle_manager.workflow_node_retry_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - session.commit() - - if response: - yield response - elif isinstance(event, QueueNodeStartedEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - workflow_node_execution = self._workflow_cycle_manager.handle_node_execution_start( - workflow_execution_id=self._workflow_run_id, event=event - ) - node_start_response = self._workflow_cycle_manager.workflow_node_start_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - - if node_start_response: - yield node_start_response - elif isinstance(event, QueueNodeSucceededEvent): - workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_success( - event=event - ) - node_success_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - - if node_success_response: - yield node_success_response - elif isinstance( - event, - QueueNodeFailedEvent - | QueueNodeInIterationFailedEvent - | QueueNodeInLoopFailedEvent - | QueueNodeExceptionEvent, - ): - workflow_node_execution = self._workflow_cycle_manager.handle_workflow_node_execution_failed( - event=event, - ) - node_failed_response = self._workflow_cycle_manager.workflow_node_finish_to_stream_response( - event=event, - task_id=self._application_generate_entity.task_id, - workflow_node_execution=workflow_node_execution, - ) - - if node_failed_response: - yield node_failed_response - - elif isinstance(event, QueueParallelBranchRunStartedEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - parallel_start_resp = self._workflow_cycle_manager.workflow_parallel_branch_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield parallel_start_resp - - elif isinstance(event, QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - parallel_finish_resp = ( - self._workflow_cycle_manager.workflow_parallel_branch_finished_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - ) - - yield parallel_finish_resp - - elif isinstance(event, QueueIterationStartEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - iter_start_resp = self._workflow_cycle_manager.workflow_iteration_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield iter_start_resp - - elif isinstance(event, QueueIterationNextEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - iter_next_resp = self._workflow_cycle_manager.workflow_iteration_next_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield iter_next_resp - - elif isinstance(event, QueueIterationCompletedEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - iter_finish_resp = self._workflow_cycle_manager.workflow_iteration_completed_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield iter_finish_resp - - elif isinstance(event, QueueLoopStartEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - loop_start_resp = self._workflow_cycle_manager.workflow_loop_start_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield loop_start_resp - - elif isinstance(event, QueueLoopNextEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - loop_next_resp = self._workflow_cycle_manager.workflow_loop_next_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield loop_next_resp - - elif isinstance(event, QueueLoopCompletedEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - - loop_finish_resp = self._workflow_cycle_manager.workflow_loop_completed_to_stream_response( - task_id=self._application_generate_entity.task_id, - workflow_execution_id=self._workflow_run_id, - event=event, - ) - - yield loop_finish_resp - - elif isinstance(event, QueueWorkflowSucceededEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - if not graph_runtime_state: - raise ValueError("graph runtime state not initialized.") - - with Session(db.engine, expire_on_commit=False) as session: - workflow_execution = self._workflow_cycle_manager.handle_workflow_run_success( - workflow_run_id=self._workflow_run_id, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - conversation_id=None, - trace_manager=trace_manager, - ) - - # save workflow app log - self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_execution=workflow_execution, - ) - session.commit() - - yield workflow_finish_resp - elif isinstance(event, QueueWorkflowPartialSuccessEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - if not graph_runtime_state: - raise ValueError("graph runtime state not initialized.") - - with Session(db.engine, expire_on_commit=False) as session: - workflow_execution = self._workflow_cycle_manager.handle_workflow_run_partial_success( - workflow_run_id=self._workflow_run_id, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - outputs=event.outputs, - exceptions_count=event.exceptions_count, - conversation_id=None, - trace_manager=trace_manager, - ) - - # save workflow app log - self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_execution=workflow_execution, - ) - session.commit() - - yield workflow_finish_resp - elif isinstance(event, QueueWorkflowFailedEvent | QueueStopEvent): - if not self._workflow_run_id: - raise ValueError("workflow run not initialized.") - if not graph_runtime_state: - raise ValueError("graph runtime state not initialized.") - - with Session(db.engine, expire_on_commit=False) as session: - workflow_execution = self._workflow_cycle_manager.handle_workflow_run_failed( - workflow_run_id=self._workflow_run_id, - total_tokens=graph_runtime_state.total_tokens, - total_steps=graph_runtime_state.node_run_steps, - status=WorkflowRunStatus.FAILED - if isinstance(event, QueueWorkflowFailedEvent) - else WorkflowRunStatus.STOPPED, - error_message=event.error - if isinstance(event, QueueWorkflowFailedEvent) - else event.get_stop_reason(), - conversation_id=None, - trace_manager=trace_manager, - exceptions_count=event.exceptions_count if isinstance(event, QueueWorkflowFailedEvent) else 0, - ) - - # save workflow app log - self._save_workflow_app_log(session=session, workflow_execution=workflow_execution) - - workflow_finish_resp = self._workflow_cycle_manager.workflow_finish_to_stream_response( - session=session, - task_id=self._application_generate_entity.task_id, - workflow_execution=workflow_execution, - ) - session.commit() - - yield workflow_finish_resp - elif isinstance(event, QueueTextChunkEvent): - delta_text = event.text - if delta_text is None: - continue - - # only publish tts message at text chunk streaming - if tts_publisher: - tts_publisher.publish(queue_message) - - self._task_state.answer += delta_text - yield self._text_chunk_to_stream_response( - delta_text, from_variable_selector=event.from_variable_selector - ) - elif isinstance(event, QueueAgentLogEvent): - yield self._workflow_cycle_manager.handle_agent_log( - task_id=self._application_generate_entity.task_id, event=event - ) - else: - continue - - if tts_publisher: - tts_publisher.publish(None) - - def _save_workflow_app_log(self, *, session: Session, workflow_execution: WorkflowExecution) -> None: - workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) - assert workflow_run is not None - invoke_from = self._application_generate_entity.invoke_from - if invoke_from == InvokeFrom.SERVICE_API: - created_from = WorkflowAppLogCreatedFrom.SERVICE_API - elif invoke_from == InvokeFrom.EXPLORE: - created_from = WorkflowAppLogCreatedFrom.INSTALLED_APP - elif invoke_from == InvokeFrom.WEB_APP: - created_from = WorkflowAppLogCreatedFrom.WEB_APP - else: - # not save log for debugging - return - - workflow_app_log = WorkflowAppLog() - workflow_app_log.tenant_id = workflow_run.tenant_id - workflow_app_log.app_id = workflow_run.app_id - workflow_app_log.workflow_id = workflow_run.workflow_id - workflow_app_log.workflow_run_id = workflow_run.id - workflow_app_log.created_from = created_from.value - workflow_app_log.created_by_role = self._created_by_role - workflow_app_log.created_by = self._user_id - - session.add(workflow_app_log) - session.commit() - - def _text_chunk_to_stream_response( - self, text: str, from_variable_selector: Optional[list[str]] = None - ) -> TextChunkStreamResponse: - """ - Handle completed event. - :param text: text - :return: - """ - response = TextChunkStreamResponse( - task_id=self._application_generate_entity.task_id, - data=TextChunkStreamResponse.Data(text=text, from_variable_selector=from_variable_selector), - ) - - return response diff --git a/api/core/workflow/workflow_cycle_manager.py b/api/core/workflow/workflow_cycle_manager.py index d4c2b1b6bd..1918dd9f09 100644 --- a/api/core/workflow/workflow_cycle_manager.py +++ b/api/core/workflow/workflow_cycle_manager.py @@ -1,7 +1,6 @@ -import time -from collections.abc import Mapping, Sequence +from collections.abc import Mapping from datetime import UTC, datetime -from typing import Any, Optional, Union, cast +from typing import Any, Optional, Union from uuid import uuid4 from sqlalchemy import func, select @@ -9,13 +8,6 @@ from sqlalchemy.orm import Session from core.app.entities.app_invoke_entities import AdvancedChatAppGenerateEntity, WorkflowAppGenerateEntity from core.app.entities.queue_entities import ( - QueueAgentLogEvent, - QueueIterationCompletedEvent, - QueueIterationNextEvent, - QueueIterationStartEvent, - QueueLoopCompletedEvent, - QueueLoopNextEvent, - QueueLoopStartEvent, QueueNodeExceptionEvent, QueueNodeFailedEvent, QueueNodeInIterationFailedEvent, @@ -23,31 +15,10 @@ from core.app.entities.queue_entities import ( QueueNodeRetryEvent, QueueNodeStartedEvent, QueueNodeSucceededEvent, - QueueParallelBranchRunFailedEvent, - QueueParallelBranchRunStartedEvent, - QueueParallelBranchRunSucceededEvent, -) -from core.app.entities.task_entities import ( - AgentLogStreamResponse, - IterationNodeCompletedStreamResponse, - IterationNodeNextStreamResponse, - IterationNodeStartStreamResponse, - LoopNodeCompletedStreamResponse, - LoopNodeNextStreamResponse, - LoopNodeStartStreamResponse, - NodeFinishStreamResponse, - NodeRetryStreamResponse, - NodeStartStreamResponse, - ParallelBranchFinishedStreamResponse, - ParallelBranchStartStreamResponse, - WorkflowFinishStreamResponse, - WorkflowStartStreamResponse, ) from core.app.task_pipeline.exc import WorkflowRunNotFoundError -from core.file import FILE_MODEL_IDENTITY, File from core.ops.entities.trace_entity import TraceTaskName from core.ops.ops_trace_manager import TraceQueueManager, TraceTask -from core.tools.tool_manager import ToolManager from core.workflow.entities.node_entities import NodeRunMetadataKey from core.workflow.entities.node_execution_entities import ( NodeExecution, @@ -55,17 +26,11 @@ from core.workflow.entities.node_execution_entities import ( ) from core.workflow.entities.workflow_execution_entities import WorkflowExecution, WorkflowExecutionStatus, WorkflowType from core.workflow.enums import SystemVariableKey -from core.workflow.nodes import NodeType -from core.workflow.nodes.tool.entities import ToolNodeData from core.workflow.repository.workflow_execution_repository import WorkflowExecutionRepository from core.workflow.repository.workflow_node_execution_repository import WorkflowNodeExecutionRepository from core.workflow.workflow_entry import WorkflowEntry from models import ( - Account, - CreatorUserRole, - EndUser, Workflow, - WorkflowNodeExecutionStatus, WorkflowRun, WorkflowRunStatus, ) @@ -416,506 +381,8 @@ class WorkflowCycleManager: return domain_execution - def workflow_start_to_stream_response( - self, - *, - task_id: str, - workflow_execution: WorkflowExecution, - ) -> WorkflowStartStreamResponse: - return WorkflowStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution.id, - data=WorkflowStartStreamResponse.Data( - id=workflow_execution.id, - workflow_id=workflow_execution.workflow_id, - sequence_number=workflow_execution.sequence_number, - inputs=workflow_execution.inputs, - created_at=int(workflow_execution.started_at.timestamp()), - ), - ) - - def workflow_finish_to_stream_response( - self, - *, - session: Session, - task_id: str, - workflow_execution: WorkflowExecution, - ) -> WorkflowFinishStreamResponse: - created_by = None - workflow_run = session.scalar(select(WorkflowRun).where(WorkflowRun.id == workflow_execution.id)) - assert workflow_run is not None - if workflow_run.created_by_role == CreatorUserRole.ACCOUNT: - stmt = select(Account).where(Account.id == workflow_run.created_by) - account = session.scalar(stmt) - if account: - created_by = { - "id": account.id, - "name": account.name, - "email": account.email, - } - elif workflow_run.created_by_role == CreatorUserRole.END_USER: - stmt = select(EndUser).where(EndUser.id == workflow_run.created_by) - end_user = session.scalar(stmt) - if end_user: - created_by = { - "id": end_user.id, - "user": end_user.session_id, - } - else: - raise NotImplementedError(f"unknown created_by_role: {workflow_run.created_by_role}") - - # Handle the case where finished_at is None by using current time as default - finished_at_timestamp = ( - int(workflow_execution.finished_at.timestamp()) - if workflow_execution.finished_at - else int(datetime.now(UTC).timestamp()) - ) - - return WorkflowFinishStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution.id, - data=WorkflowFinishStreamResponse.Data( - id=workflow_execution.id, - workflow_id=workflow_execution.workflow_id, - sequence_number=workflow_execution.sequence_number, - status=workflow_execution.status, - outputs=workflow_execution.outputs, - error=workflow_execution.error_message, - elapsed_time=workflow_execution.elapsed_time, - total_tokens=workflow_execution.total_tokens, - total_steps=workflow_execution.total_steps, - created_by=created_by, - created_at=int(workflow_execution.started_at.timestamp()), - finished_at=finished_at_timestamp, - files=self.fetch_files_from_node_outputs(workflow_execution.outputs), - exceptions_count=workflow_execution.exceptions_count, - ), - ) - - def workflow_node_start_to_stream_response( - self, - *, - event: QueueNodeStartedEvent, - task_id: str, - workflow_node_execution: NodeExecution, - ) -> Optional[NodeStartStreamResponse]: - if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: - return None - if not workflow_node_execution.workflow_run_id: - return None - - response = NodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeStartStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - title=workflow_node_execution.title, - index=workflow_node_execution.index, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs, - created_at=int(workflow_node_execution.created_at.timestamp()), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - parallel_run_id=event.parallel_mode_run_id, - agent_strategy=event.agent_strategy, - ), - ) - - # extras logic - if event.node_type == NodeType.TOOL: - node_data = cast(ToolNodeData, event.node_data) - response.data.extras["icon"] = ToolManager.get_tool_icon( - tenant_id=self._application_generate_entity.app_config.tenant_id, - provider_type=node_data.provider_type, - provider_id=node_data.provider_id, - ) - - return response - - def workflow_node_finish_to_stream_response( - self, - *, - event: QueueNodeSucceededEvent - | QueueNodeFailedEvent - | QueueNodeInIterationFailedEvent - | QueueNodeInLoopFailedEvent - | QueueNodeExceptionEvent, - task_id: str, - workflow_node_execution: NodeExecution, - ) -> Optional[NodeFinishStreamResponse]: - if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: - return None - if not workflow_node_execution.workflow_run_id: - return None - if not workflow_node_execution.finished_at: - return None - - return NodeFinishStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeFinishStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - index=workflow_node_execution.index, - title=workflow_node_execution.title, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs, - process_data=workflow_node_execution.process_data, - outputs=workflow_node_execution.outputs, - status=workflow_node_execution.status, - error=workflow_node_execution.error, - elapsed_time=workflow_node_execution.elapsed_time, - execution_metadata=workflow_node_execution.metadata, - created_at=int(workflow_node_execution.created_at.timestamp()), - finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - ), - ) - - def workflow_node_retry_to_stream_response( - self, - *, - event: QueueNodeRetryEvent, - task_id: str, - workflow_node_execution: NodeExecution, - ) -> Optional[Union[NodeRetryStreamResponse, NodeFinishStreamResponse]]: - if workflow_node_execution.node_type in {NodeType.ITERATION, NodeType.LOOP}: - return None - if not workflow_node_execution.workflow_run_id: - return None - if not workflow_node_execution.finished_at: - return None - - return NodeRetryStreamResponse( - task_id=task_id, - workflow_run_id=workflow_node_execution.workflow_run_id, - data=NodeRetryStreamResponse.Data( - id=workflow_node_execution.id, - node_id=workflow_node_execution.node_id, - node_type=workflow_node_execution.node_type, - index=workflow_node_execution.index, - title=workflow_node_execution.title, - predecessor_node_id=workflow_node_execution.predecessor_node_id, - inputs=workflow_node_execution.inputs, - process_data=workflow_node_execution.process_data, - outputs=workflow_node_execution.outputs, - status=workflow_node_execution.status, - error=workflow_node_execution.error, - elapsed_time=workflow_node_execution.elapsed_time, - execution_metadata=workflow_node_execution.metadata, - created_at=int(workflow_node_execution.created_at.timestamp()), - finished_at=int(workflow_node_execution.finished_at.timestamp()), - files=self.fetch_files_from_node_outputs(workflow_node_execution.outputs or {}), - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - retry_index=event.retry_index, - ), - ) - - def workflow_parallel_branch_start_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueParallelBranchRunStartedEvent, - ) -> ParallelBranchStartStreamResponse: - return ParallelBranchStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=ParallelBranchStartStreamResponse.Data( - parallel_id=event.parallel_id, - parallel_branch_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - created_at=int(time.time()), - ), - ) - - def workflow_parallel_branch_finished_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueParallelBranchRunSucceededEvent | QueueParallelBranchRunFailedEvent, - ) -> ParallelBranchFinishedStreamResponse: - return ParallelBranchFinishedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=ParallelBranchFinishedStreamResponse.Data( - parallel_id=event.parallel_id, - parallel_branch_id=event.parallel_start_node_id, - parent_parallel_id=event.parent_parallel_id, - parent_parallel_start_node_id=event.parent_parallel_start_node_id, - iteration_id=event.in_iteration_id, - loop_id=event.in_loop_id, - status="succeeded" if isinstance(event, QueueParallelBranchRunSucceededEvent) else "failed", - error=event.error if isinstance(event, QueueParallelBranchRunFailedEvent) else None, - created_at=int(time.time()), - ), - ) - - def workflow_iteration_start_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueIterationStartEvent, - ) -> IterationNodeStartStreamResponse: - return IterationNodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=IterationNodeStartStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - metadata=event.metadata or {}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def workflow_iteration_next_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueIterationNextEvent, - ) -> IterationNodeNextStreamResponse: - return IterationNodeNextStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=IterationNodeNextStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - index=event.index, - pre_iteration_output=event.output, - created_at=int(time.time()), - extras={}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parallel_mode_run_id=event.parallel_mode_run_id, - duration=event.duration, - ), - ) - - def workflow_iteration_completed_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueIterationCompletedEvent, - ) -> IterationNodeCompletedStreamResponse: - return IterationNodeCompletedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=IterationNodeCompletedStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - outputs=event.outputs, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - status=WorkflowNodeExecutionStatus.SUCCEEDED - if event.error is None - else WorkflowNodeExecutionStatus.FAILED, - error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), - total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, - execution_metadata=event.metadata, - finished_at=int(time.time()), - steps=event.steps, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def workflow_loop_start_to_stream_response( - self, *, task_id: str, workflow_execution_id: str, event: QueueLoopStartEvent - ) -> LoopNodeStartStreamResponse: - return LoopNodeStartStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=LoopNodeStartStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - metadata=event.metadata or {}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def workflow_loop_next_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueLoopNextEvent, - ) -> LoopNodeNextStreamResponse: - return LoopNodeNextStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=LoopNodeNextStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - index=event.index, - pre_loop_output=event.output, - created_at=int(time.time()), - extras={}, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - parallel_mode_run_id=event.parallel_mode_run_id, - duration=event.duration, - ), - ) - - def workflow_loop_completed_to_stream_response( - self, - *, - task_id: str, - workflow_execution_id: str, - event: QueueLoopCompletedEvent, - ) -> LoopNodeCompletedStreamResponse: - return LoopNodeCompletedStreamResponse( - task_id=task_id, - workflow_run_id=workflow_execution_id, - data=LoopNodeCompletedStreamResponse.Data( - id=event.node_id, - node_id=event.node_id, - node_type=event.node_type.value, - title=event.node_data.title, - outputs=event.outputs, - created_at=int(time.time()), - extras={}, - inputs=event.inputs or {}, - status=WorkflowNodeExecutionStatus.SUCCEEDED - if event.error is None - else WorkflowNodeExecutionStatus.FAILED, - error=None, - elapsed_time=(datetime.now(UTC).replace(tzinfo=None) - event.start_at).total_seconds(), - total_tokens=event.metadata.get("total_tokens", 0) if event.metadata else 0, - execution_metadata=event.metadata, - finished_at=int(time.time()), - steps=event.steps, - parallel_id=event.parallel_id, - parallel_start_node_id=event.parallel_start_node_id, - ), - ) - - def fetch_files_from_node_outputs(self, outputs_dict: Mapping[str, Any] | None) -> Sequence[Mapping[str, Any]]: - """ - Fetch files from node outputs - :param outputs_dict: node outputs dict - :return: - """ - if not outputs_dict: - return [] - - files = [self._fetch_files_from_variable_value(output_value) for output_value in outputs_dict.values()] - # Remove None - files = [file for file in files if file] - # Flatten list - # Flatten the list of sequences into a single list of mappings - flattened_files = [file for sublist in files if sublist for file in sublist] - - # Convert to tuple to match Sequence type - return tuple(flattened_files) - - def _fetch_files_from_variable_value(self, value: Union[dict, list]) -> Sequence[Mapping[str, Any]]: - """ - Fetch files from variable value - :param value: variable value - :return: - """ - if not value: - return [] - - files = [] - if isinstance(value, list): - for item in value: - file = self._get_file_var_from_value(item) - if file: - files.append(file) - elif isinstance(value, dict): - file = self._get_file_var_from_value(value) - if file: - files.append(file) - - return files - - def _get_file_var_from_value(self, value: Union[dict, list]) -> Mapping[str, Any] | None: - """ - Get file var from value - :param value: variable value - :return: - """ - if not value: - return None - - if isinstance(value, dict) and value.get("dify_model_identity") == FILE_MODEL_IDENTITY: - return value - elif isinstance(value, File): - return value.to_dict() - - return None - def _get_workflow_execution_or_raise_error(self, id: str, /) -> WorkflowExecution: execution = self._workflow_execution_repository.get(id) if not execution: raise WorkflowRunNotFoundError(id) return execution - - def handle_agent_log(self, task_id: str, event: QueueAgentLogEvent) -> AgentLogStreamResponse: - """ - Handle agent log - :param task_id: task id - :param event: agent log event - :return: - """ - return AgentLogStreamResponse( - task_id=task_id, - data=AgentLogStreamResponse.Data( - node_execution_id=event.node_execution_id, - id=event.id, - parent_id=event.parent_id, - label=event.label, - error=event.error, - status=event.status, - data=event.data, - metadata=event.metadata, - node_id=event.node_id, - ), - ) From 38b1e46241926a7ed3baccb737e47a72a0c4f7d1 Mon Sep 17 00:00:00 2001 From: GonzaHM <122193788+GonzaHM@users.noreply.github.com> Date: Thu, 22 May 2025 11:05:24 +0900 Subject: [PATCH 10/19] fix: correct indentation in dataset retrieval model assignment (#20040) --- api/services/dataset_service.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index bac1bf389f..6957315409 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -960,11 +960,11 @@ class DocumentService: "score_threshold_enabled": False, } - dataset.retrieval_model = ( - knowledge_config.retrieval_model.model_dump() - if knowledge_config.retrieval_model - else default_retrieval_model - ) # type: ignore + dataset.retrieval_model = ( + knowledge_config.retrieval_model.model_dump() + if knowledge_config.retrieval_model + else default_retrieval_model + ) # type: ignore documents = [] if knowledge_config.original_document_id: From adca981eee8aa5279ea64d70e7c7de0ba340f5b1 Mon Sep 17 00:00:00 2001 From: zhudongwork <32665466+zhudongwork@users.noreply.github.com> Date: Thu, 22 May 2025 10:09:07 +0800 Subject: [PATCH 11/19] fix: uninitialized variable error on empty knowledge retrieval(agent) (#20025) Co-authored-by: crazywoola <427733928@qq.com> --- .../tools/utils/dataset_retriever/dataset_retriever_tool.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py index fff261e0bd..7b6882ed52 100644 --- a/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py +++ b/api/core/tools/utils/dataset_retriever/dataset_retriever_tool.py @@ -125,6 +125,7 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): return "" # get retrieval model , if the model is not setting , using default retrieval_model: dict[str, Any] = dataset.retrieval_model or default_retrieval_model + retrieval_resource_list = [] if dataset.indexing_technique == "economy": # use keyword table query documents = RetrievalService.retrieve( @@ -181,7 +182,7 @@ class DatasetRetrieverTool(DatasetRetrieverBaseTool): score=record.score, ) ) - retrieval_resource_list = [] + if self.return_resource: for record in records: segment = record.segment From 6f48af2610b5976a1efce641546ff3fb4cbd2ddf Mon Sep 17 00:00:00 2001 From: He Huang Date: Thu, 22 May 2025 06:14:38 +0400 Subject: [PATCH 12/19] Refactor OpenSearch config to separate use_ssl and verify_certs flags (#20075) Co-authored-by: he.huang Co-authored-by: crazywoola <427733928@qq.com> --- api/.env.example | 1 + api/configs/middleware/vdb/opensearch_config.py | 5 +++++ .../rag/datasource/vdb/opensearch/opensearch_vector.py | 8 ++++++-- docker/.env.example | 1 + docker/docker-compose.yaml | 1 + 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/api/.env.example b/api/.env.example index 9dec564c28..e63aac1409 100644 --- a/api/.env.example +++ b/api/.env.example @@ -269,6 +269,7 @@ OPENSEARCH_PORT=9200 OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=admin OPENSEARCH_SECURE=true +OPENSEARCH_VERIFY_CERTS=true # Baidu configuration BAIDU_VECTOR_DB_ENDPOINT=http://127.0.0.1:5287 diff --git a/api/configs/middleware/vdb/opensearch_config.py b/api/configs/middleware/vdb/opensearch_config.py index 96f478e9a6..9fd9b60194 100644 --- a/api/configs/middleware/vdb/opensearch_config.py +++ b/api/configs/middleware/vdb/opensearch_config.py @@ -33,6 +33,11 @@ class OpenSearchConfig(BaseSettings): default=False, ) + OPENSEARCH_VERIFY_CERTS: bool = Field( + description="Whether to verify SSL certificates for HTTPS connections (recommended to set True in production)", + default=True, + ) + OPENSEARCH_AUTH_METHOD: AuthMethod = Field( description="Authentication method for OpenSearch connection (default is 'basic')", default=AuthMethod.BASIC, diff --git a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py index e23b8d197f..6991598ce6 100644 --- a/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py +++ b/api/core/rag/datasource/vdb/opensearch/opensearch_vector.py @@ -23,7 +23,8 @@ logger = logging.getLogger(__name__) class OpenSearchConfig(BaseModel): host: str port: int - secure: bool = False + secure: bool = False # use_ssl + verify_certs: bool = True auth_method: Literal["basic", "aws_managed_iam"] = "basic" user: Optional[str] = None password: Optional[str] = None @@ -42,6 +43,8 @@ class OpenSearchConfig(BaseModel): raise ValueError("config OPENSEARCH_AWS_REGION is required for AWS_MANAGED_IAM auth method") if not values.get("aws_service"): raise ValueError("config OPENSEARCH_AWS_SERVICE is required for AWS_MANAGED_IAM auth method") + if not values.get("OPENSEARCH_SECURE") and values.get("OPENSEARCH_VERIFY_CERTS"): + raise ValueError("verify_certs=True requires secure (HTTPS) connection") return values def create_aws_managed_iam_auth(self) -> Urllib3AWSV4SignerAuth: @@ -57,7 +60,7 @@ class OpenSearchConfig(BaseModel): params = { "hosts": [{"host": self.host, "port": self.port}], "use_ssl": self.secure, - "verify_certs": self.secure, + "verify_certs": self.verify_certs, "connection_class": Urllib3HttpConnection, "pool_maxsize": 20, } @@ -279,6 +282,7 @@ class OpenSearchVectorFactory(AbstractVectorFactory): host=dify_config.OPENSEARCH_HOST or "localhost", port=dify_config.OPENSEARCH_PORT, secure=dify_config.OPENSEARCH_SECURE, + verify_certs=dify_config.OPENSEARCH_VERIFY_CERTS, auth_method=dify_config.OPENSEARCH_AUTH_METHOD.value, user=dify_config.OPENSEARCH_USER, password=dify_config.OPENSEARCH_PASSWORD, diff --git a/docker/.env.example b/docker/.env.example index aacef0e1f5..86c1a5779a 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -531,6 +531,7 @@ RELYT_DATABASE=postgres OPENSEARCH_HOST=opensearch OPENSEARCH_PORT=9200 OPENSEARCH_SECURE=true +OPENSEARCH_VERIFY_CERTS=true OPENSEARCH_AUTH_METHOD=basic OPENSEARCH_USER=admin OPENSEARCH_PASSWORD=admin diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index bb71169907..371646eb9f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -227,6 +227,7 @@ x-shared-env: &shared-api-worker-env OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch} OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200} OPENSEARCH_SECURE: ${OPENSEARCH_SECURE:-true} + OPENSEARCH_VERIFY_CERTS: ${OPENSEARCH_VERIFY_CERTS:-true} OPENSEARCH_AUTH_METHOD: ${OPENSEARCH_AUTH_METHOD:-basic} OPENSEARCH_USER: ${OPENSEARCH_USER:-admin} OPENSEARCH_PASSWORD: ${OPENSEARCH_PASSWORD:-admin} From 648393cc7bc1347f93b40ec24e563405b416ae60 Mon Sep 17 00:00:00 2001 From: jameshui1997 <115518541+jameshui1997@users.noreply.github.com> Date: Thu, 22 May 2025 14:08:36 +0800 Subject: [PATCH 13/19] fix: improve tracing provider validation logic in OpsTraceManager (#20042) --- api/core/ops/ops_trace_manager.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/core/ops/ops_trace_manager.py b/api/core/ops/ops_trace_manager.py index a3424c7421..32301e11e7 100644 --- a/api/core/ops/ops_trace_manager.py +++ b/api/core/ops/ops_trace_manager.py @@ -292,11 +292,14 @@ class OpsTraceManager: :return: """ # auth check - if tracing_provider is not None: + if enabled == True: try: provider_config_map[tracing_provider] except KeyError: raise ValueError(f"Invalid tracing provider: {tracing_provider}") + else: + if tracing_provider is not None: + raise ValueError(f"Invalid tracing provider: {tracing_provider}") app_config: Optional[App] = db.session.query(App).filter(App.id == app_id).first() if not app_config: From 9afd7f6c87182f620be5eb0c4a39f74d57731789 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 22 May 2025 08:10:14 +0200 Subject: [PATCH 14/19] chore: Update S3StorageConfig to match boto3 type hints (#20072) --- .../storage/amazon_s3_storage_config.py | 4 +- api/pyproject.toml | 1 + api/uv.lock | 46 +++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/api/configs/middleware/storage/amazon_s3_storage_config.py b/api/configs/middleware/storage/amazon_s3_storage_config.py index f2d94b12ff..e14c210718 100644 --- a/api/configs/middleware/storage/amazon_s3_storage_config.py +++ b/api/configs/middleware/storage/amazon_s3_storage_config.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Literal, Optional from pydantic import Field from pydantic_settings import BaseSettings @@ -34,7 +34,7 @@ class S3StorageConfig(BaseSettings): default=None, ) - S3_ADDRESS_STYLE: str = Field( + S3_ADDRESS_STYLE: Literal["auto", "virtual", "path"] = Field( description="S3 addressing style: 'auto', 'path', or 'virtual'", default="auto", ) diff --git a/api/pyproject.toml b/api/pyproject.toml index 26a0bdb11e..2606c7db4b 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -148,6 +148,7 @@ dev = [ "types-tensorflow~=2.18.0", "types-tqdm~=4.67.0", "types-ujson~=5.10.0", + "boto3-stubs>=1.38.20", ] ############################################################ diff --git a/api/uv.lock b/api/uv.lock index 3a877762d3..1dc28d09c0 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -540,6 +540,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/65/77/8bbca82f70b062181cf0ae53fd43f1ac6556f3078884bfef9da2269c06a3/boto3-1.35.99-py3-none-any.whl", hash = "sha256:83e560faaec38a956dfb3d62e05e1703ee50432b45b788c09e25107c5058bd71", size = 139178, upload-time = "2025-01-14T20:20:25.48Z" }, ] +[[package]] +name = "boto3-stubs" +version = "1.38.20" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore-stubs" }, + { name = "types-s3transfer" }, + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4e/89/824fb0a9bebf9f1d6df70bb145f8e9c24fc4d918d4050b5d4dca075cc292/boto3_stubs-1.38.20.tar.gz", hash = "sha256:7f1d7bfff7355eb4d17e7984fbf27f44709cd8484abb54bd6ba34ec73a552605", size = 99063, upload-time = "2025-05-20T23:30:19.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/69/cfc45dfce3b4ea417f9aec708ade1eda7f280fe8ae7feca796b036619587/boto3_stubs-1.38.20-py3-none-any.whl", hash = "sha256:5406da868980a3854cc9b57db150c6f2e39a4fe4a58f2872e61ac5a3d46f734e", size = 68667, upload-time = "2025-05-20T23:30:12.393Z" }, +] + [[package]] name = "botocore" version = "1.35.99" @@ -554,6 +568,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fc/dd/d87e2a145fad9e08d0ec6edcf9d71f838ccc7acdd919acc4c0d4a93515f8/botocore-1.35.99-py3-none-any.whl", hash = "sha256:b22d27b6b617fc2d7342090d6129000af2efd20174215948c0d7ae2da0fab445", size = 13293216, upload-time = "2025-01-14T20:20:06.427Z" }, ] +[[package]] +name = "botocore-stubs" +version = "1.38.19" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "types-awscrt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/70/6204c97f8d8362364f11c16085566abdcaa114c264d3a4d709ff697b203b/botocore_stubs-1.38.19.tar.gz", hash = "sha256:84f67a42bb240a8ea0c5fe4f05d497cc411177db600bc7012182e499ac24bf19", size = 42269, upload-time = "2025-05-19T20:18:13.556Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/ce/28b143452c22b678678d832bf8b41218e3d319bf94062b48c28fe5d81163/botocore_stubs-1.38.19-py3-none-any.whl", hash = "sha256:66fd7d231c21134a12acbe313ef7a6b152cbf9bfd7bfa12a62f8c33e94737e26", size = 65603, upload-time = "2025-05-19T20:18:10.445Z" }, +] + [[package]] name = "bottleneck" version = "1.4.2" @@ -1260,6 +1286,7 @@ dependencies = [ [package.dev-dependencies] dev = [ + { name = "boto3-stubs" }, { name = "coverage" }, { name = "dotenv-linter" }, { name = "faker" }, @@ -1430,6 +1457,7 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "boto3-stubs", specifier = ">=1.38.20" }, { name = "coverage", specifier = "~=7.2.4" }, { name = "dotenv-linter", specifier = "~=0.5.0" }, { name = "faker", specifier = "~=32.1.0" }, @@ -5557,6 +5585,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/18/1016ffd4c7775f24371f6a0309483dc5597e8245b5add67924e54ea3b83a/types_aiofiles-24.1.0.20250326-py3-none-any.whl", hash = "sha256:dfb58c9aa18bd449e80fb5d7f49dc3dd20d31de920a46223a61798ee4a521a70", size = 14344, upload-time = "2025-03-26T02:53:31.856Z" }, ] +[[package]] +name = "types-awscrt" +version = "0.27.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/6c/583522cfb3c330e92e726af517a91c13247e555e021791a60f1b03c6ff16/types_awscrt-0.27.2.tar.gz", hash = "sha256:acd04f57119eb15626ab0ba9157fc24672421de56e7bd7b9f61681fedee44e91", size = 16304, upload-time = "2025-05-16T03:10:08.712Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/82/1ee2e5c9d28deac086ab3a6ff07c8bc393ef013a083f546c623699881715/types_awscrt-0.27.2-py3-none-any.whl", hash = "sha256:49a045f25bbd5ad2865f314512afced933aed35ddbafc252e2268efa8a787e4e", size = 37761, upload-time = "2025-05-16T03:10:07.466Z" }, +] + [[package]] name = "types-beautifulsoup4" version = "4.12.0.20250204" @@ -5854,6 +5891,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/b1/f4ba392a3341cd9d613f2dce855e82471073c5ec34996fe84ac3857956d0/types_requests_oauthlib-2.0.0.20250306-py3-none-any.whl", hash = "sha256:37707de81d9ce54894afcccd70d4a845dbe4c59e747908faaeba59a96453d993", size = 14446, upload-time = "2025-03-06T02:49:24.364Z" }, ] +[[package]] +name = "types-s3transfer" +version = "0.12.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/d5/830e9efe91a26601a2bebde6f299239d2d26e542f5d4b3bc7e8c23c81a3f/types_s3transfer-0.12.0.tar.gz", hash = "sha256:f8f59201481e904362873bf0be3267f259d60ad946ebdfcb847d092a1fa26f98", size = 14096, upload-time = "2025-04-23T00:38:19.131Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/43/6097275152463ac9bacf1e00aab30bc6682bf45f6a031be8bf029c030ba2/types_s3transfer-0.12.0-py3-none-any.whl", hash = "sha256:101bbc5b7f00b71512374df881f480fc6bf63c948b5098ab024bf3370fbfb0e8", size = 19553, upload-time = "2025-04-23T00:38:17.865Z" }, +] + [[package]] name = "types-shapely" version = "2.0.0.20250404" From 916c415b4b78258754753ad49793476e8888a00b Mon Sep 17 00:00:00 2001 From: Nite Knite Date: Thu, 22 May 2025 14:15:00 +0800 Subject: [PATCH 15/19] feat: add entry point for requesting a plugin (#20026) --- web/app/components/base/logo/dify-logo.tsx | 7 ++++--- web/app/components/header/index.tsx | 2 +- web/app/components/plugins/plugin-page/index.tsx | 13 ++++++++++++- web/i18n/en-US/plugin.ts | 1 + web/i18n/zh-Hans/plugin.ts | 1 + web/i18n/zh-Hant/plugin.ts | 1 + 6 files changed, 20 insertions(+), 5 deletions(-) diff --git a/web/app/components/base/logo/dify-logo.tsx b/web/app/components/base/logo/dify-logo.tsx index c3dca3e71d..9e8f077372 100644 --- a/web/app/components/base/logo/dify-logo.tsx +++ b/web/app/components/base/logo/dify-logo.tsx @@ -33,16 +33,17 @@ const DifyLogo: FC = ({ const { theme } = useTheme() const themedStyle = (theme === 'dark' && style === 'default') ? 'monochromeWhite' : style const { systemFeatures } = useGlobalPublicStore() + const hasBrandingLogo = Boolean(systemFeatures.branding.enabled && systemFeatures.branding.workspace_logo) let src = `${basePath}${logoPathMap[themedStyle]}` - if (systemFeatures.branding.enabled) + if (hasBrandingLogo) src = systemFeatures.branding.workspace_logo return ( Dify logo ) } diff --git a/web/app/components/header/index.tsx b/web/app/components/header/index.tsx index c2345fbdaa..6e8d1704dd 100644 --- a/web/app/components/header/index.tsx +++ b/web/app/components/header/index.tsx @@ -60,7 +60,7 @@ const Header = () => { { !isMobile &&
- +
/
diff --git a/web/app/components/plugins/plugin-page/index.tsx b/web/app/components/plugins/plugin-page/index.tsx index 7cba865c66..e769c8e29a 100644 --- a/web/app/components/plugins/plugin-page/index.tsx +++ b/web/app/components/plugins/plugin-page/index.tsx @@ -186,6 +186,17 @@ const PluginPage = ({ { isExploringMarketplace && ( <> + + + -
+
) } diff --git a/web/i18n/en-US/plugin.ts b/web/i18n/en-US/plugin.ts index a0b36fbd65..51399d5310 100644 --- a/web/i18n/en-US/plugin.ts +++ b/web/i18n/en-US/plugin.ts @@ -208,6 +208,7 @@ const translation = { installedError: '{{errorLength}} plugins failed to install', clearAll: 'Clear all', }, + requestAPlugin: 'Request a plugin', submitPlugin: 'Submit plugin', difyVersionNotCompatible: 'The current Dify version is not compatible with this plugin, please upgrade to the minimum version required: {{minimalDifyVersion}}', } diff --git a/web/i18n/zh-Hans/plugin.ts b/web/i18n/zh-Hans/plugin.ts index e088557dfb..9b059ac5f4 100644 --- a/web/i18n/zh-Hans/plugin.ts +++ b/web/i18n/zh-Hans/plugin.ts @@ -208,6 +208,7 @@ const translation = { installedError: '{{errorLength}} 个插件安装失败', clearAll: '清除所有', }, + requestAPlugin: '申请插件', submitPlugin: '上传插件', difyVersionNotCompatible: '当前 Dify 版本不兼容该插件,其最低版本要求为 {{minimalDifyVersion}}', } diff --git a/web/i18n/zh-Hant/plugin.ts b/web/i18n/zh-Hant/plugin.ts index 03f6ece7e5..5e93925a6d 100644 --- a/web/i18n/zh-Hant/plugin.ts +++ b/web/i18n/zh-Hant/plugin.ts @@ -191,6 +191,7 @@ const translation = { clearAll: '全部清除', installing: '安裝 {{installingLength}} 個外掛程式,0 個完成。', }, + requestAPlugin: '申请外掛程式', submitPlugin: '提交外掛程式', findMoreInMarketplace: '在 Marketplace 中查找更多內容', installPlugin: '安裝外掛程式', From c939f04b1aeb94287b9650595cf83894f0ff6825 Mon Sep 17 00:00:00 2001 From: Boris Feld Date: Thu, 22 May 2025 10:11:50 +0200 Subject: [PATCH 16/19] Add support for tracking conversation with Opik Tracer (#20063) --- api/core/ops/opik_trace/opik_trace.py | 4 ++++ api/pyproject.toml | 2 +- api/uv.lock | 27 +++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/api/core/ops/opik_trace/opik_trace.py b/api/core/ops/opik_trace/opik_trace.py index c22df55357..6c159a4831 100644 --- a/api/core/ops/opik_trace/opik_trace.py +++ b/api/core/ops/opik_trace/opik_trace.py @@ -115,6 +115,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": workflow_metadata, "input": wrap_dict("input", trace_info.workflow_run_inputs), "output": wrap_dict("output", trace_info.workflow_run_outputs), + "thread_id": trace_info.conversation_id, "tags": ["message", "workflow"], "project_name": self.project, } @@ -144,6 +145,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": workflow_metadata, "input": wrap_dict("input", trace_info.workflow_run_inputs), "output": wrap_dict("output", trace_info.workflow_run_outputs), + "thread_id": trace_info.conversation_id, "tags": ["workflow"], "project_name": self.project, } @@ -306,6 +308,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": wrap_metadata(metadata), "input": trace_info.inputs, "output": message_data.answer, + "thread_id": message_data.conversation_id, "tags": ["message", str(trace_info.conversation_mode)], "project_name": self.project, } @@ -420,6 +423,7 @@ class OpikDataTrace(BaseTraceInstance): "metadata": wrap_metadata(trace_info.metadata), "input": trace_info.inputs, "output": trace_info.outputs, + "thread_id": trace_info.conversation_id, "tags": ["generate_name"], "project_name": self.project, } diff --git a/api/pyproject.toml b/api/pyproject.toml index 2606c7db4b..d9d1d054bf 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -39,7 +39,7 @@ dependencies = [ "oci~=2.135.1", "openai~=1.61.0", "openpyxl~=3.1.5", - "opik~=1.3.4", + "opik~=1.7.25", "opentelemetry-api==1.27.0", "opentelemetry-distro==0.48b0", "opentelemetry-exporter-otlp==1.27.0", diff --git a/api/uv.lock b/api/uv.lock index 1dc28d09c0..520270c207 100644 --- a/api/uv.lock +++ b/api/uv.lock @@ -554,6 +554,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/69/cfc45dfce3b4ea417f9aec708ade1eda7f280fe8ae7feca796b036619587/boto3_stubs-1.38.20-py3-none-any.whl", hash = "sha256:5406da868980a3854cc9b57db150c6f2e39a4fe4a58f2872e61ac5a3d46f734e", size = 68667, upload-time = "2025-05-20T23:30:12.393Z" }, ] +[package.optional-dependencies] +bedrock-runtime = [ + { name = "mypy-boto3-bedrock-runtime" }, +] + [[package]] name = "botocore" version = "1.35.99" @@ -1426,7 +1431,7 @@ requires-dist = [ { name = "opentelemetry-sdk", specifier = "==1.27.0" }, { name = "opentelemetry-semantic-conventions", specifier = "==0.48b0" }, { name = "opentelemetry-util-http", specifier = "==0.48b0" }, - { name = "opik", specifier = "~=1.3.4" }, + { name = "opik", specifier = "~=1.7.25" }, { name = "pandas", extras = ["excel", "output-formatting", "performance"], specifier = "~=2.2.2" }, { name = "pandas-stubs", specifier = "~=2.2.3.241009" }, { name = "pandoc", specifier = "~=2.4" }, @@ -3229,6 +3234,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/09/4e/a7d65c7322c510de2c409ff3828b03354a7c43f5a8ed458a7a131b41c7b9/mypy-1.15.0-py3-none-any.whl", hash = "sha256:5469affef548bd1895d86d3bf10ce2b44e33d86923c29e4d675b3e323437ea3e", size = 2221777, upload-time = "2025-02-05T03:50:08.348Z" }, ] +[[package]] +name = "mypy-boto3-bedrock-runtime" +version = "1.38.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/55/56ce6f23d7fb98ce5b8a4261f089890bc94250666ea7089539dab55f6c25/mypy_boto3_bedrock_runtime-1.38.4.tar.gz", hash = "sha256:315a5f84c014c54e5406fdb80b030aba5cc79eb27982aff3d09ef331fb2cdd4d", size = 26169, upload-time = "2025-04-28T19:26:13.437Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/eb/3015c6504540ca4888789ee14f47590c0340b748a33b059eeb6a48b406bb/mypy_boto3_bedrock_runtime-1.38.4-py3-none-any.whl", hash = "sha256:af14320532e9b798095129a3307f4b186ba80258917bb31410cda7f423592d72", size = 31858, upload-time = "2025-04-28T19:26:09.667Z" }, +] + [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3720,11 +3737,13 @@ wheels = [ [[package]] name = "opik" -version = "1.3.6" +version = "1.7.25" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "boto3-stubs", extra = ["bedrock-runtime"] }, { name = "click" }, { name = "httpx" }, + { name = "jinja2" }, { name = "levenshtein" }, { name = "litellm" }, { name = "openai" }, @@ -3737,9 +3756,9 @@ dependencies = [ { name = "tqdm" }, { name = "uuid6" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d8/16/b37208d6a77f3cc74750cff4e0970e6f596aef0f491a675a40aa879157e6/opik-1.3.6.tar.gz", hash = "sha256:25d6fa8b7aa1ef23d10d598040e539440912c12b765eabfc75c8780bbbfc8ad3", size = 177174, upload-time = "2025-01-15T17:20:48.71Z" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/dd/313895410761ee3eb36c1141fa339254c093b3cdfceb79b111c80eb396be/opik-1.7.25.tar.gz", hash = "sha256:5fcdb05bbc98e995f3eea2f94096f98c5ff7a2aca2c895d50636c44d00a07d4b", size = 286950, upload-time = "2025-05-20T13:51:16.6Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/3f/e9d14a97f85d34505770b7c7715bd72bbfc40a778163818f0d3e871264bb/opik-1.3.6-py3-none-any.whl", hash = "sha256:888973c2a1276d68c9b3cf26d8078db8aa675d2c907edda328cdab4995a8e29b", size = 341630, upload-time = "2025-01-15T17:20:45.983Z" }, + { url = "https://files.pythonhosted.org/packages/63/0a/daee58db3cdd56681672dbc62e5a71200af6d41f34bac2425d1556d3e004/opik-1.7.25-py3-none-any.whl", hash = "sha256:595fc2e6794e35d87449f64dc5d6092705645575d2c34469d04dc2bbe44dd32f", size = 547198, upload-time = "2025-05-20T13:51:14.964Z" }, ] [[package]] From 8fad3036bfaa416dd9886f99a7c4e64f6898e40a Mon Sep 17 00:00:00 2001 From: He Wang Date: Thu, 22 May 2025 16:19:53 +0800 Subject: [PATCH 17/19] set oceanbase ip to 127.0.0.1 to avoid connection failure after restart (#20103) --- docker/docker-compose-template.yaml | 1 + docker/docker-compose.yaml | 1 + 2 files changed, 2 insertions(+) diff --git a/docker/docker-compose-template.yaml b/docker/docker-compose-template.yaml index ceb32e4aba..cff5efdb6c 100644 --- a/docker/docker-compose-template.yaml +++ b/docker/docker-compose-template.yaml @@ -444,6 +444,7 @@ services: OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} + OB_SERVER_IP: 127.0.0.1 MODE: MINI ports: - "${OCEANBASE_VECTOR_PORT:-2881}:2881" diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 371646eb9f..8fb3addf88 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -942,6 +942,7 @@ services: OB_SYS_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_TENANT_PASSWORD: ${OCEANBASE_VECTOR_PASSWORD:-difyai123456} OB_CLUSTER_NAME: ${OCEANBASE_CLUSTER_NAME:-difyai} + OB_SERVER_IP: 127.0.0.1 MODE: MINI ports: - "${OCEANBASE_VECTOR_PORT:-2881}:2881" From 7bf00ef25ccaa5da133c7fa7ce8a0d81f06ce7fe Mon Sep 17 00:00:00 2001 From: sayThQ199 <18852951350@163.com> Date: Thu, 22 May 2025 16:31:13 +0800 Subject: [PATCH 18/19] =?UTF-8?q?fix(markdown):=20improve=20ECharts=20rend?= =?UTF-8?q?ering=20for=20streaming=20content=20and=20da=E2=80=A6=20(#20101?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: crazywoola <427733928@qq.com> --- web/app/components/base/markdown.tsx | 269 +++++++++++++++++++++++++-- 1 file changed, 250 insertions(+), 19 deletions(-) diff --git a/web/app/components/base/markdown.tsx b/web/app/components/base/markdown.tsx index 4600543ac4..b28bf2bc8f 100644 --- a/web/app/components/base/markdown.tsx +++ b/web/app/components/base/markdown.tsx @@ -11,7 +11,7 @@ import { atelierHeathDark, atelierHeathLight, } from 'react-syntax-highlighter/dist/esm/styles/hljs' -import { Component, memo, useMemo, useRef, useState } from 'react' +import { Component, memo, useEffect, useMemo, useRef, useState } from 'react' import { flow } from 'lodash-es' import ActionButton from '@/app/components/base/action-button' import CopyIcon from '@/app/components/base/copy-icon' @@ -74,7 +74,7 @@ const preprocessLaTeX = (content: string) => { processedContent = flow([ (str: string) => str.replace(/\\\[(.*?)\\\]/g, (_, equation) => `$$${equation}$$`), - (str: string) => str.replace(/\\\[(.*?)\\\]/gs, (_, equation) => `$$${equation}$$`), + (str: string) => str.replace(/\\\[([\s\S]*?)\\\]/g, (_, equation) => `$$${equation}$$`), (str: string) => str.replace(/\\\((.*?)\\\)/g, (_, equation) => `$$${equation}$$`), (str: string) => str.replace(/(^|[^\\])\$(.+?)\$/g, (_, prefix, equation) => `${prefix}$${equation}$`), ])(processedContent) @@ -124,23 +124,143 @@ export function PreCode(props: { children: any }) { const CodeBlock: any = memo(({ inline, className, children = '', ...props }: any) => { const { theme } = useTheme() const [isSVG, setIsSVG] = useState(true) + const [chartState, setChartState] = useState<'loading' | 'success' | 'error'>('loading') + const [finalChartOption, setFinalChartOption] = useState(null) + const echartsRef = useRef(null) + const contentRef = useRef('') + const processedRef = useRef(false) // Track if content was successfully processed const match = /language-(\w+)/.exec(className || '') const language = match?.[1] const languageShowName = getCorrectCapitalizationLanguageName(language || '') - const chartData = useMemo(() => { - const str = String(children).replace(/\n$/, '') - if (language === 'echarts') { - try { - return JSON.parse(str) - } - catch { } - try { - // eslint-disable-next-line no-new-func, sonarjs/code-eval - return new Function(`return ${str}`)() - } - catch { } + const isDarkMode = theme === Theme.dark + + // Handle container resize for echarts + useEffect(() => { + if (language !== 'echarts' || !echartsRef.current) return + + const handleResize = () => { + // This gets the echarts instance from the component + const instance = echartsRef.current?.getEchartsInstance?.() + if (instance) + instance.resize() + } + + window.addEventListener('resize', handleResize) + + // Also manually trigger resize after a short delay to ensure proper sizing + const resizeTimer = setTimeout(handleResize, 200) + + return () => { + window.removeEventListener('resize', handleResize) + clearTimeout(resizeTimer) + } + }, [language, echartsRef.current]) + + // Process chart data when content changes + useEffect(() => { + // Only process echarts content + if (language !== 'echarts') return + + // Reset state when new content is detected + if (!contentRef.current) { + setChartState('loading') + processedRef.current = false + } + + const newContent = String(children).replace(/\n$/, '') + + // Skip if content hasn't changed + if (contentRef.current === newContent) return + contentRef.current = newContent + + const trimmedContent = newContent.trim() + if (!trimmedContent) return + + // Detect if this is historical data (already complete) + // Historical data typically comes as a complete code block with complete JSON + const isCompleteJson + = (trimmedContent.startsWith('{') && trimmedContent.endsWith('}') + && trimmedContent.split('{').length === trimmedContent.split('}').length) + || (trimmedContent.startsWith('[') && trimmedContent.endsWith(']') + && trimmedContent.split('[').length === trimmedContent.split(']').length) + + // If the JSON structure looks complete, try to parse it right away + if (isCompleteJson && !processedRef.current) { + try { + const parsed = JSON.parse(trimmedContent) + if (typeof parsed === 'object' && parsed !== null) { + setFinalChartOption(parsed) + setChartState('success') + processedRef.current = true + return + } + } + catch { + try { + // eslint-disable-next-line no-new-func, sonarjs/code-eval + const result = new Function(`return ${trimmedContent}`)() + if (typeof result === 'object' && result !== null) { + setFinalChartOption(result) + setChartState('success') + processedRef.current = true + return + } + } + catch { + // If we have a complete JSON structure but it doesn't parse, + // it's likely an error rather than incomplete data + setChartState('error') + processedRef.current = true + return + } + } + } + + // If we get here, either the JSON isn't complete yet, or we failed to parse it + // Check more conditions for streaming data + const isIncomplete + = trimmedContent.length < 5 + || (trimmedContent.startsWith('{') + && (!trimmedContent.endsWith('}') + || trimmedContent.split('{').length !== trimmedContent.split('}').length)) + || (trimmedContent.startsWith('[') + && (!trimmedContent.endsWith(']') + || trimmedContent.split('[').length !== trimmedContent.split('}').length)) + || (trimmedContent.split('"').length % 2 !== 1) + || (trimmedContent.includes('{"') && !trimmedContent.includes('"}')) + + // Only try to parse streaming data if it looks complete and hasn't been processed + if (!isIncomplete && !processedRef.current) { + let isValidOption = false + + try { + const parsed = JSON.parse(trimmedContent) + if (typeof parsed === 'object' && parsed !== null) { + setFinalChartOption(parsed) + isValidOption = true + } + } + catch { + try { + // eslint-disable-next-line no-new-func, sonarjs/code-eval + const result = new Function(`return ${trimmedContent}`)() + if (typeof result === 'object' && result !== null) { + setFinalChartOption(result) + isValidOption = true + } + } + catch { + // Both parsing methods failed, but content looks complete + setChartState('error') + processedRef.current = true + } + } + + if (isValidOption) { + setChartState('success') + processedRef.current = true + } } - return JSON.parse('{"title":{"text":"ECharts error - Wrong option."}}') }, [language, children]) const renderCodeContent = useMemo(() => { @@ -150,14 +270,125 @@ const CodeBlock: any = memo(({ inline, className, children = '', ...props }: any if (isSVG) return break - case 'echarts': + case 'echarts': { + // Loading state: show loading indicator + if (chartState === 'loading') { + return ( +
+
+ {/* Rotating spinner that works in both light and dark modes */} + + + + + +
+
Chart loading...
+
+ ) + } + + // Success state: show the chart + if (chartState === 'success' && finalChartOption) { + return ( +
+ + { + const instance = echartsRef.current?.getEchartsInstance?.() + if (instance) + instance.resize() + }, + }} + /> + +
+ ) + } + + // Error state: show error message + const errorOption = { + title: { + text: 'ECharts error - Wrong option.', + }, + } + return ( -
+
- +
) + } case 'svg': if (isSVG) { return ( @@ -192,7 +423,7 @@ const CodeBlock: any = memo(({ inline, className, children = '', ...props }: any ) } - }, [children, language, isSVG, chartData, props, theme, match]) + }, [children, language, isSVG, finalChartOption, props, theme, match]) if (inline || !match) return {children} From c2a7e0e986b3a71b223e5e60ba93fe97b1ed1116 Mon Sep 17 00:00:00 2001 From: zxhlyh Date: Thu, 22 May 2025 16:43:04 +0800 Subject: [PATCH 19/19] version panel --- .../[datasetId]/pipeline/page.tsx | 4 +++- .../dataset-config/params-config/weighted-score.tsx | 1 + .../rag-pipeline/components/panel/index.tsx | 13 ++++++++++++- web/app/components/rag-pipeline/index.tsx | 1 + web/app/components/workflow/index.tsx | 1 - .../workflow/nodes/data-source/hooks/use-config.ts | 5 +++++ .../components/chunk-structure/index.tsx | 6 ++++-- .../components/chunk-structure/selector.tsx | 6 ++++-- .../knowledge-base/components/input-variable.tsx | 6 ++++-- .../components/retrieval-setting/index.tsx | 3 +-- web/i18n/en-US/workflow.ts | 6 ++++++ web/i18n/zh-Hans/workflow.ts | 6 ++++++ 12 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 web/app/components/workflow/nodes/data-source/hooks/use-config.ts diff --git a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/pipeline/page.tsx b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/pipeline/page.tsx index c5b5f22bf1..9a18021cc0 100644 --- a/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/pipeline/page.tsx +++ b/web/app/(commonLayout)/datasets/(datasetDetailLayout)/[datasetId]/pipeline/page.tsx @@ -3,7 +3,9 @@ import RagPipeline from '@/app/components/rag-pipeline' const PipelinePage = () => { return ( - +
+ +
) } export default PipelinePage diff --git a/web/app/components/app/configuration/dataset-config/params-config/weighted-score.tsx b/web/app/components/app/configuration/dataset-config/params-config/weighted-score.tsx index d3ee89f28c..ebfa3b1e12 100644 --- a/web/app/components/app/configuration/dataset-config/params-config/weighted-score.tsx +++ b/web/app/components/app/configuration/dataset-config/params-config/weighted-score.tsx @@ -41,6 +41,7 @@ const WeightedScore = ({ value={value.value[0]} onChange={v => !readonly && onChange({ value: [v, (10 - v * 10) / 10] })} trackClassName='weightedScoreSliderTrack' + disabled={readonly} />
diff --git a/web/app/components/rag-pipeline/components/panel/index.tsx b/web/app/components/rag-pipeline/components/panel/index.tsx index 14140e41f0..12811c6790 100644 --- a/web/app/components/rag-pipeline/components/panel/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/index.tsx @@ -24,14 +24,25 @@ const RagPipelinePanelOnRight = () => { } const RagPipelinePanel = () => { + const pipelineId = useStore(s => s.pipelineId) + const versionHistoryPanelProps = useMemo(() => { + return { + getVersionListUrl: `/rag/pipelines/${pipelineId}/workflows`, + deleteVersionUrl: (versionId: string) => `/rag/pipelines/${pipelineId}/workflows/${versionId}`, + updateVersionUrl: (versionId: string) => `/rag/pipelines/${pipelineId}/workflows/${versionId}`, + latestVersionId: '', + } + }, [pipelineId]) + const panelProps: PanelProps = useMemo(() => { return { components: { left: null, right: , }, + versionHistoryPanelProps, } - }, []) + }, [versionHistoryPanelProps]) return ( diff --git a/web/app/components/rag-pipeline/index.tsx b/web/app/components/rag-pipeline/index.tsx index c6b642994a..d1f0208684 100644 --- a/web/app/components/rag-pipeline/index.tsx +++ b/web/app/components/rag-pipeline/index.tsx @@ -46,6 +46,7 @@ const RagPipeline = () => { ) diff --git a/web/app/components/workflow/index.tsx b/web/app/components/workflow/index.tsx index 549117faf7..beea4116cf 100644 --- a/web/app/components/workflow/index.tsx +++ b/web/app/components/workflow/index.tsx @@ -152,7 +152,6 @@ export const Workflow: FC = memo(({ setAutoFreeze(true) } }, []) - useEffect(() => { return () => { handleSyncWorkflowDraft(true, true) diff --git a/web/app/components/workflow/nodes/data-source/hooks/use-config.ts b/web/app/components/workflow/nodes/data-source/hooks/use-config.ts new file mode 100644 index 0000000000..e3aee2d19c --- /dev/null +++ b/web/app/components/workflow/nodes/data-source/hooks/use-config.ts @@ -0,0 +1,5 @@ +export const useConfig = (id: string) => { + return { + id, + } +} diff --git a/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/index.tsx b/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/index.tsx index b4e2e59ede..92d1e3e748 100644 --- a/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/index.tsx +++ b/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/index.tsx @@ -1,4 +1,5 @@ import { memo } from 'react' +import { useTranslation } from 'react-i18next' import { Field } from '@/app/components/workflow/nodes/_base/components/layout' import type { ChunkStructureEnum } from '../../types' import OptionCard from '../option-card' @@ -15,6 +16,7 @@ const ChunkStructure = ({ onChunkStructureChange, readonly = false, }: ChunkStructureProps) => { + const { t } = useTranslation() const { options, optionMap, @@ -23,8 +25,8 @@ const ChunkStructure = ({ return ( { + const { t } = useTranslation() const [open, setOpen] = useState(false) const handleSelect = useCallback((optionId: ChunkStructureEnum) => { @@ -47,13 +49,13 @@ const Selector = ({ size='small' variant='ghost-accent' > - change + {t('workflow.panel.change')}
- change Chunk Structure + {t('workflow.nodes.knowledgeBase.changeChunkStructure')}
{ diff --git a/web/app/components/workflow/nodes/knowledge-base/components/input-variable.tsx b/web/app/components/workflow/nodes/knowledge-base/components/input-variable.tsx index 91f5e396c2..ae91041903 100644 --- a/web/app/components/workflow/nodes/knowledge-base/components/input-variable.tsx +++ b/web/app/components/workflow/nodes/knowledge-base/components/input-variable.tsx @@ -1,4 +1,5 @@ import { memo } from 'react' +import { useTranslation } from 'react-i18next' import VarReferencePicker from '@/app/components/workflow/nodes/_base/components/variable/var-reference-picker' import { Field } from '@/app/components/workflow/nodes/_base/components/layout' import type { ValueSelector } from '@/app/components/workflow/types' @@ -15,11 +16,12 @@ const InputVariable = ({ onInputVariableChange, readonly = false, }: InputVariableProps) => { + const { t } = useTranslation() + return ( {t('datasetSettings.form.retrievalSetting.learnMore')} -   - about retrieval method. +  {t('workflow.nodes.knowledgeBase.aboutRetrieval')}
), }} diff --git a/web/i18n/en-US/workflow.ts b/web/i18n/en-US/workflow.ts index d28cab291f..81726464a4 100644 --- a/web/i18n/en-US/workflow.ts +++ b/web/i18n/en-US/workflow.ts @@ -368,6 +368,7 @@ const translation = { ms: 'ms', retries: '{{num}} Retries', }, + inputVars: 'Input Variables', }, start: { required: 'required', @@ -767,6 +768,11 @@ const translation = { currentLoopCount: 'Current loop count: {{count}}', totalLoopCount: 'Total loop count: {{count}}', }, + knowledgeBase: { + chunkStructure: 'Chunk Structure', + changeChunkStructure: 'Change Chunk Structure', + aboutRetrieval: 'about retrieval method.', + }, note: { addNote: 'Add Note', editor: { diff --git a/web/i18n/zh-Hans/workflow.ts b/web/i18n/zh-Hans/workflow.ts index ae381b9b8a..1d854c4749 100644 --- a/web/i18n/zh-Hans/workflow.ts +++ b/web/i18n/zh-Hans/workflow.ts @@ -369,6 +369,7 @@ const translation = { ms: '毫秒', retries: '{{num}} 重试次数', }, + inputVars: '输入变量', }, start: { required: '必填', @@ -768,6 +769,11 @@ const translation = { currentLoopCount: '当前循环次数:{{count}}', totalLoopCount: '总循环次数:{{count}}', }, + knowledgeBase: { + chunkStructure: '分段结构', + changeChunkStructure: '更改分段结构', + aboutRetrieval: '关于知识检索。', + }, note: { addNote: '添加注释', editor: {