feat: implement SSE for data source node processing and completion events, replacing previous run methods

This commit is contained in:
twwu 2025-06-18 15:06:50 +08:00
parent 4b3a54633f
commit f85e6a0dea
6 changed files with 115 additions and 168 deletions

View File

@ -1,12 +1,14 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useCallback, useEffect, useMemo, useState } from 'react'
import WorkspaceSelector from '@/app/components/base/notion-page-selector/workspace-selector' import WorkspaceSelector from '@/app/components/base/notion-page-selector/workspace-selector'
import SearchInput from '@/app/components/base/notion-page-selector/search-input' import SearchInput from '@/app/components/base/notion-page-selector/search-input'
import PageSelector from '@/app/components/base/notion-page-selector/page-selector' import PageSelector from '@/app/components/base/notion-page-selector/page-selector'
import type { DataSourceNotionPageMap, DataSourceNotionWorkspace, NotionPage } from '@/models/common' import type { DataSourceNotionPageMap, DataSourceNotionWorkspace, NotionPage } from '@/models/common'
import Header from '@/app/components/datasets/create/website/base/header' import Header from '@/app/components/datasets/create/website/base/header'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useDraftDatasourceNodeRun, usePublishedDatasourceNodeRun } from '@/service/use-pipeline'
import { DatasourceType } from '@/models/pipeline' import { DatasourceType } from '@/models/pipeline'
import { ssePost } from '@/service/base'
import Toast from '@/app/components/base/toast'
import type { DataSourceNodeCompletedResponse } from '@/types/pipeline'
type OnlineDocumentSelectorProps = { type OnlineDocumentSelectorProps = {
value?: string[] value?: string[]
@ -33,28 +35,37 @@ const OnlineDocumentSelector = ({
nodeId, nodeId,
headerInfo, headerInfo,
}: OnlineDocumentSelectorProps) => { }: OnlineDocumentSelectorProps) => {
const pipeline_id = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const [documentsData, setDocumentsData] = useState<DataSourceNotionWorkspace[]>([]) const [documentsData, setDocumentsData] = useState<DataSourceNotionWorkspace[]>([])
const [searchValue, setSearchValue] = useState('') const [searchValue, setSearchValue] = useState('')
const [currentWorkspaceId, setCurrentWorkspaceId] = useState('') const [currentWorkspaceId, setCurrentWorkspaceId] = useState('')
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun) const datasourceNodeRunURL = !isInPipeline
const { mutateAsync: crawlOnlineDocuments } = useDatasourceNodeRun.current() ? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
const getOnlineDocuments = useCallback(async () => { const getOnlineDocuments = useCallback(async () => {
if (pipeline_id) { ssePost(
await crawlOnlineDocuments({ datasourceNodeRunURL,
pipeline_id, {
node_id: nodeId, body: {
inputs: {}, inputs: {},
datasource_type: DatasourceType.onlineDocument, datasource_type: DatasourceType.onlineDocument,
}, {
onSuccess(documentsData) {
setDocumentsData(documentsData.result as DataSourceNotionWorkspace[])
}, },
}) },
} {
}, [crawlOnlineDocuments, nodeId, pipeline_id]) onDataSourceNodeCompleted: (documentsData: DataSourceNodeCompletedResponse) => {
setDocumentsData(documentsData.data as DataSourceNotionWorkspace[])
},
onError: (message: string) => {
Toast.notify({
type: 'error',
message,
})
},
},
)
}, [datasourceNodeRunURL])
useEffect(() => { useEffect(() => {
getOnlineDocuments() getOnlineDocuments()

View File

@ -8,16 +8,16 @@ import Crawling from './crawling'
import ErrorMessage from './error-message' import ErrorMessage from './error-message'
import CrawledResult from './crawled-result' import CrawledResult from './crawled-result'
import { import {
useDraftDatasourceNodeRun,
useDraftDatasourceNodeRunStatus,
useDraftPipelinePreProcessingParams, useDraftPipelinePreProcessingParams,
usePublishedDatasourceNodeRun,
usePublishedDatasourceNodeRunStatus,
usePublishedPipelinePreProcessingParams, usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline' } from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline' import { DatasourceType } from '@/models/pipeline'
import { sleep } from '@/utils' import { ssePost } from '@/service/base'
import type {
DataSourceNodeCompletedResponse,
DataSourceNodeProcessingResponse,
} from '@/types/pipeline'
const I18N_PREFIX = 'datasetCreation.stepOne.website' const I18N_PREFIX = 'datasetCreation.stepOne.website'
@ -51,6 +51,8 @@ const Crawler = ({
const { t } = useTranslation() const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init) const [step, setStep] = useState<Step>(Step.init)
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0) const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
const [totalNum, setTotalNum] = useState(0)
const [crawledNum, setCrawledNum] = useState(0)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams) const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams)
@ -68,66 +70,49 @@ const Crawler = ({
const isCrawlFinished = step === Step.finished const isCrawlFinished = step === Step.finished
const isRunning = step === Step.running const isRunning = step === Step.running
const [crawlResult, setCrawlResult] = useState<{ const [crawlResult, setCrawlResult] = useState<{
result: CrawlResultItem[] data: CrawlResultItem[]
time_consuming: number | string time_consuming: number | string
} | undefined>(undefined) } | undefined>(undefined)
const [crawlErrorMessage, setCrawlErrorMessage] = useState('') const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
const showError = isCrawlFinished && crawlErrorMessage const showError = isCrawlFinished && crawlErrorMessage
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun) const datasourceNodeRunURL = !isInPipeline
const useDatasourceNodeRunStatus = useRef(!isInPipeline ? usePublishedDatasourceNodeRunStatus : useDraftDatasourceNodeRunStatus) ? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current() : `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
const { mutateAsync: getDatasourceNodeRunStatus } = useDatasourceNodeRunStatus.current()
const checkCrawlStatus = useCallback(async (jobId: string) => {
const res = await getDatasourceNodeRunStatus({
node_id: nodeId,
pipeline_id: pipelineId!,
job_id: jobId,
datasource_type: DatasourceType.websiteCrawl,
}, {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
},
}) as any
if (res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
}
else if (res.status === 'processing') {
await sleep(2500)
await checkCrawlStatus(jobId)
}
}, [getDatasourceNodeRunStatus, nodeId, pipelineId, t, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: Record<string, any>) => { const handleRun = useCallback(async (value: Record<string, any>) => {
setStep(Step.running) setStep(Step.running)
const res = await runDatasourceNode({ ssePost(
node_id: nodeId, datasourceNodeRunURL,
pipeline_id: pipelineId!, {
inputs: value, body: {
datasource_type: DatasourceType.websiteCrawl, inputs: value,
}, { datasource_type: DatasourceType.websiteCrawl,
onError: async (error: any) => { response_mode: 'streaming',
const message = await error.json() },
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setStep(Step.finished)
}, },
}) as any {
const jobId = res.job_id onDataSourceNodeProcessing: (data: DataSourceNodeProcessingResponse) => {
if (!jobId && res.status === 'completed') { setTotalNum(data.total ?? 0)
setCrawlResult(res) setCrawledNum(data.completed ?? 0)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result },
setStep(Step.finished) onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
} const { data: crawlData, time_consuming } = data
else if (jobId) { setCrawlResult({
await checkCrawlStatus(jobId) data: crawlData as CrawlResultItem[],
} time_consuming: time_consuming ?? 0,
setCrawlErrorMessage('') })
}, [runDatasourceNode, nodeId, pipelineId, onCheckedCrawlResultChange, checkCrawlStatus, t]) onCheckedCrawlResultChange(crawlData || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
},
onError: (message: string) => {
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setStep(Step.finished)
},
},
)
}, [datasourceNodeRunURL, onCheckedCrawlResultChange, t])
const handleSubmit = useCallback((value: Record<string, any>) => { const handleSubmit = useCallback((value: Record<string, any>) => {
handleRun(value) handleRun(value)
@ -152,8 +137,8 @@ const Crawler = ({
<div className='relative flex flex-col'> <div className='relative flex flex-col'>
{isRunning && ( {isRunning && (
<Crawling <Crawling
crawledNum={0} crawledNum={crawledNum}
totalNum={0} totalNum={totalNum}
/> />
)} )}
{showError && ( {showError && (
@ -166,7 +151,7 @@ const Crawler = ({
{isCrawlFinished && !showError && ( {isCrawlFinished && !showError && (
<CrawledResult <CrawledResult
className='mt-2' className='mt-2'
list={crawlResult?.result || []} list={crawlResult?.data || []}
checkedList={checkedCrawlResult} checkedList={checkedCrawlResult}
onSelectedChange={onCheckedCrawlResultChange} onSelectedChange={onCheckedCrawlResultChange}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0} usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}

View File

@ -168,34 +168,6 @@ export type PipelinePreProcessingParamsResponse = {
variables: RAGPipelineVariables variables: RAGPipelineVariables
} }
export type PipelineDatasourceNodeRunRequest = {
pipeline_id: string
node_id: string
inputs: Record<string, any>
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunResponse = {
job_id?: string
status: 'processing' | 'completed'
result: any
provider_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusRequest = {
pipeline_id: string
node_id: string
job_id: string
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusResponse = {
provider_type: DatasourceType
result: Record<string, any>
status: 'processing' | 'completed'
job_id: string
}
export type PublishedPipelineInfoResponse = { export type PublishedPipelineInfoResponse = {
id: string id: string
graph: { graph: {

View File

@ -25,6 +25,10 @@ import { removeAccessToken } from '@/app/components/share/utils'
import type { FetchOptionType, ResponseError } from './fetch' import type { FetchOptionType, ResponseError } from './fetch'
import { ContentType, base, baseOptions, getAccessToken } from './fetch' import { ContentType, base, baseOptions, getAccessToken } from './fetch'
import { asyncRunSafe } from '@/utils' import { asyncRunSafe } from '@/utils'
import type {
DataSourceNodeCompletedResponse,
DataSourceNodeProcessingResponse,
} from '@/types/pipeline'
const TIME_OUT = 100000 const TIME_OUT = 100000
export type IOnDataMoreInfo = { export type IOnDataMoreInfo = {
@ -63,6 +67,9 @@ export type IOnLoopNext = (workflowStarted: LoopNextResponse) => void
export type IOnLoopFinished = (workflowFinished: LoopFinishedResponse) => void export type IOnLoopFinished = (workflowFinished: LoopFinishedResponse) => void
export type IOnAgentLog = (agentLog: AgentLogResponse) => void export type IOnAgentLog = (agentLog: AgentLogResponse) => void
export type IOnDataSourceNodeProcessing = (dataSourceNodeProcessing: DataSourceNodeProcessingResponse) => void
export type IOnDataSourceNodeCompleted = (dataSourceNodeCompleted: DataSourceNodeCompletedResponse) => void
export type IOtherOptions = { export type IOtherOptions = {
isPublicAPI?: boolean isPublicAPI?: boolean
isMarketplaceAPI?: boolean isMarketplaceAPI?: boolean
@ -97,6 +104,10 @@ export type IOtherOptions = {
onLoopNext?: IOnLoopNext onLoopNext?: IOnLoopNext
onLoopFinish?: IOnLoopFinished onLoopFinish?: IOnLoopFinished
onAgentLog?: IOnAgentLog onAgentLog?: IOnAgentLog
// Pipeline data source node run
onDataSourceNodeProcessing?: IOnDataSourceNodeProcessing
onDataSourceNodeCompleted?: IOnDataSourceNodeCompleted
} }
function unicodeToChar(text: string) { function unicodeToChar(text: string) {
@ -152,6 +163,8 @@ const handleStream = (
onTTSEnd?: IOnTTSEnd, onTTSEnd?: IOnTTSEnd,
onTextReplace?: IOnTextReplace, onTextReplace?: IOnTextReplace,
onAgentLog?: IOnAgentLog, onAgentLog?: IOnAgentLog,
onDataSourceNodeProcessing?: IOnDataSourceNodeProcessing,
onDataSourceNodeCompleted?: IOnDataSourceNodeCompleted,
) => { ) => {
if (!response.ok) if (!response.ok)
throw new Error('Network response was not ok') throw new Error('Network response was not ok')
@ -270,6 +283,15 @@ const handleStream = (
else if (bufferObj.event === 'tts_message_end') { else if (bufferObj.event === 'tts_message_end') {
onTTSEnd?.(bufferObj.message_id, bufferObj.audio) onTTSEnd?.(bufferObj.message_id, bufferObj.audio)
} }
else if (bufferObj.event === 'datasource_processing') {
onDataSourceNodeProcessing?.(bufferObj as DataSourceNodeProcessingResponse)
}
else if (bufferObj.event === 'datasource_completed') {
onDataSourceNodeCompleted?.(bufferObj as DataSourceNodeCompletedResponse)
}
else {
console.warn(`Unknown event: ${bufferObj.event}`, bufferObj)
}
} }
}) })
buffer = lines[lines.length - 1] buffer = lines[lines.length - 1]
@ -363,6 +385,8 @@ export const ssePost = async (
onLoopStart, onLoopStart,
onLoopNext, onLoopNext,
onLoopFinish, onLoopFinish,
onDataSourceNodeProcessing,
onDataSourceNodeCompleted,
} = otherOptions } = otherOptions
const abortController = new AbortController() const abortController = new AbortController()
@ -460,6 +484,8 @@ export const ssePost = async (
onTTSEnd, onTTSEnd,
onTextReplace, onTextReplace,
onAgentLog, onAgentLog,
onDataSourceNodeProcessing,
onDataSourceNodeCompleted,
) )
}).catch((e) => { }).catch((e) => {
if (e.toString() !== 'AbortError: The user aborted a request.' && !e.toString().errorMessage.includes('TypeError: Cannot assign to read only property')) if (e.toString() !== 'AbortError: The user aborted a request.' && !e.toString().errorMessage.includes('TypeError: Cannot assign to read only property'))

View File

@ -8,10 +8,6 @@ import type {
ImportPipelineDSLRequest, ImportPipelineDSLRequest,
ImportPipelineDSLResponse, ImportPipelineDSLResponse,
PipelineCheckDependenciesResponse, PipelineCheckDependenciesResponse,
PipelineDatasourceNodeRunRequest,
PipelineDatasourceNodeRunResponse,
PipelineDatasourceNodeRunStatusRequest,
PipelineDatasourceNodeRunStatusResponse,
PipelinePreProcessingParamsRequest, PipelinePreProcessingParamsRequest,
PipelinePreProcessingParamsResponse, PipelinePreProcessingParamsResponse,
PipelineProcessingParamsRequest, PipelineProcessingParamsRequest,
@ -133,66 +129,6 @@ export const useCheckPipelineDependencies = (
}) })
} }
export const useDraftDatasourceNodeRun = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunResponse, Error, PipelineDatasourceNodeRunRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run'],
mutationFn: (request: PipelineDatasourceNodeRunRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRun = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunResponse, Error, PipelineDatasourceNodeRunRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run'],
mutationFn: (request: PipelineDatasourceNodeRunRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => { export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => {
const { pipeline_id, node_id } = params const { pipeline_id, node_id } = params
return useQuery<PipelineProcessingParamsResponse>({ return useQuery<PipelineProcessingParamsResponse>({

17
web/types/pipeline.tsx Normal file
View File

@ -0,0 +1,17 @@
export type DataSourceNodeProcessingResponse = {
event: 'datasource_processing'
total: number
completed: number
}
export type DataSourceNodeError = {
event: 'datasource_error'
message: string
code?: string
}
export type DataSourceNodeCompletedResponse = {
event: 'datasource_completed'
data: any
time_consuming?: number
}