feat: implement SSE for data source node processing and completion events, replacing previous run methods

This commit is contained in:
twwu 2025-06-18 15:06:50 +08:00
parent 4b3a54633f
commit f85e6a0dea
6 changed files with 115 additions and 168 deletions

View File

@ -1,12 +1,14 @@
import { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useCallback, useEffect, useMemo, useState } from 'react'
import WorkspaceSelector from '@/app/components/base/notion-page-selector/workspace-selector'
import SearchInput from '@/app/components/base/notion-page-selector/search-input'
import PageSelector from '@/app/components/base/notion-page-selector/page-selector'
import type { DataSourceNotionPageMap, DataSourceNotionWorkspace, NotionPage } from '@/models/common'
import Header from '@/app/components/datasets/create/website/base/header'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useDraftDatasourceNodeRun, usePublishedDatasourceNodeRun } from '@/service/use-pipeline'
import { DatasourceType } from '@/models/pipeline'
import { ssePost } from '@/service/base'
import Toast from '@/app/components/base/toast'
import type { DataSourceNodeCompletedResponse } from '@/types/pipeline'
type OnlineDocumentSelectorProps = {
value?: string[]
@ -33,28 +35,37 @@ const OnlineDocumentSelector = ({
nodeId,
headerInfo,
}: OnlineDocumentSelectorProps) => {
const pipeline_id = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const [documentsData, setDocumentsData] = useState<DataSourceNotionWorkspace[]>([])
const [searchValue, setSearchValue] = useState('')
const [currentWorkspaceId, setCurrentWorkspaceId] = useState('')
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun)
const { mutateAsync: crawlOnlineDocuments } = useDatasourceNodeRun.current()
const datasourceNodeRunURL = !isInPipeline
? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
const getOnlineDocuments = useCallback(async () => {
if (pipeline_id) {
await crawlOnlineDocuments({
pipeline_id,
node_id: nodeId,
inputs: {},
datasource_type: DatasourceType.onlineDocument,
}, {
onSuccess(documentsData) {
setDocumentsData(documentsData.result as DataSourceNotionWorkspace[])
ssePost(
datasourceNodeRunURL,
{
body: {
inputs: {},
datasource_type: DatasourceType.onlineDocument,
},
})
}
}, [crawlOnlineDocuments, nodeId, pipeline_id])
},
{
onDataSourceNodeCompleted: (documentsData: DataSourceNodeCompletedResponse) => {
setDocumentsData(documentsData.data as DataSourceNotionWorkspace[])
},
onError: (message: string) => {
Toast.notify({
type: 'error',
message,
})
},
},
)
}, [datasourceNodeRunURL])
useEffect(() => {
getOnlineDocuments()

View File

@ -8,16 +8,16 @@ import Crawling from './crawling'
import ErrorMessage from './error-message'
import CrawledResult from './crawled-result'
import {
useDraftDatasourceNodeRun,
useDraftDatasourceNodeRunStatus,
useDraftPipelinePreProcessingParams,
usePublishedDatasourceNodeRun,
usePublishedDatasourceNodeRunStatus,
usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline'
import { sleep } from '@/utils'
import { ssePost } from '@/service/base'
import type {
DataSourceNodeCompletedResponse,
DataSourceNodeProcessingResponse,
} from '@/types/pipeline'
const I18N_PREFIX = 'datasetCreation.stepOne.website'
@ -51,6 +51,8 @@ const Crawler = ({
const { t } = useTranslation()
const [step, setStep] = useState<Step>(Step.init)
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
const [totalNum, setTotalNum] = useState(0)
const [crawledNum, setCrawledNum] = useState(0)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams)
@ -68,66 +70,49 @@ const Crawler = ({
const isCrawlFinished = step === Step.finished
const isRunning = step === Step.running
const [crawlResult, setCrawlResult] = useState<{
result: CrawlResultItem[]
data: CrawlResultItem[]
time_consuming: number | string
} | undefined>(undefined)
const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
const showError = isCrawlFinished && crawlErrorMessage
const useDatasourceNodeRun = useRef(!isInPipeline ? usePublishedDatasourceNodeRun : useDraftDatasourceNodeRun)
const useDatasourceNodeRunStatus = useRef(!isInPipeline ? usePublishedDatasourceNodeRunStatus : useDraftDatasourceNodeRunStatus)
const { mutateAsync: runDatasourceNode } = useDatasourceNodeRun.current()
const { mutateAsync: getDatasourceNodeRunStatus } = useDatasourceNodeRunStatus.current()
const checkCrawlStatus = useCallback(async (jobId: string) => {
const res = await getDatasourceNodeRunStatus({
node_id: nodeId,
pipeline_id: pipelineId!,
job_id: jobId,
datasource_type: DatasourceType.websiteCrawl,
}, {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
},
}) as any
if (res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
}
else if (res.status === 'processing') {
await sleep(2500)
await checkCrawlStatus(jobId)
}
}, [getDatasourceNodeRunStatus, nodeId, pipelineId, t, onCheckedCrawlResultChange])
const datasourceNodeRunURL = !isInPipeline
? `/rag/pipelines/${pipelineId}/workflows/published/datasource/nodes/${nodeId}/run`
: `/rag/pipelines/${pipelineId}/workflows/draft/datasource/nodes/${nodeId}/run`
const handleRun = useCallback(async (value: Record<string, any>) => {
setStep(Step.running)
const res = await runDatasourceNode({
node_id: nodeId,
pipeline_id: pipelineId!,
inputs: value,
datasource_type: DatasourceType.websiteCrawl,
}, {
onError: async (error: any) => {
const message = await error.json()
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setStep(Step.finished)
ssePost(
datasourceNodeRunURL,
{
body: {
inputs: value,
datasource_type: DatasourceType.websiteCrawl,
response_mode: 'streaming',
},
},
}) as any
const jobId = res.job_id
if (!jobId && res.status === 'completed') {
setCrawlResult(res)
onCheckedCrawlResultChange(res.result || []) // default select the crawl result
setStep(Step.finished)
}
else if (jobId) {
await checkCrawlStatus(jobId)
}
setCrawlErrorMessage('')
}, [runDatasourceNode, nodeId, pipelineId, onCheckedCrawlResultChange, checkCrawlStatus, t])
{
onDataSourceNodeProcessing: (data: DataSourceNodeProcessingResponse) => {
setTotalNum(data.total ?? 0)
setCrawledNum(data.completed ?? 0)
},
onDataSourceNodeCompleted: (data: DataSourceNodeCompletedResponse) => {
const { data: crawlData, time_consuming } = data
setCrawlResult({
data: crawlData as CrawlResultItem[],
time_consuming: time_consuming ?? 0,
})
onCheckedCrawlResultChange(crawlData || []) // default select the crawl result
setCrawlErrorMessage('')
setStep(Step.finished)
},
onError: (message: string) => {
setCrawlErrorMessage(message || t(`${I18N_PREFIX}.unknownError`))
setStep(Step.finished)
},
},
)
}, [datasourceNodeRunURL, onCheckedCrawlResultChange, t])
const handleSubmit = useCallback((value: Record<string, any>) => {
handleRun(value)
@ -152,8 +137,8 @@ const Crawler = ({
<div className='relative flex flex-col'>
{isRunning && (
<Crawling
crawledNum={0}
totalNum={0}
crawledNum={crawledNum}
totalNum={totalNum}
/>
)}
{showError && (
@ -166,7 +151,7 @@ const Crawler = ({
{isCrawlFinished && !showError && (
<CrawledResult
className='mt-2'
list={crawlResult?.result || []}
list={crawlResult?.data || []}
checkedList={checkedCrawlResult}
onSelectedChange={onCheckedCrawlResultChange}
usedTime={Number.parseFloat(crawlResult?.time_consuming as string) || 0}

View File

@ -168,34 +168,6 @@ export type PipelinePreProcessingParamsResponse = {
variables: RAGPipelineVariables
}
export type PipelineDatasourceNodeRunRequest = {
pipeline_id: string
node_id: string
inputs: Record<string, any>
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunResponse = {
job_id?: string
status: 'processing' | 'completed'
result: any
provider_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusRequest = {
pipeline_id: string
node_id: string
job_id: string
datasource_type: DatasourceType
}
export type PipelineDatasourceNodeRunStatusResponse = {
provider_type: DatasourceType
result: Record<string, any>
status: 'processing' | 'completed'
job_id: string
}
export type PublishedPipelineInfoResponse = {
id: string
graph: {

View File

@ -25,6 +25,10 @@ import { removeAccessToken } from '@/app/components/share/utils'
import type { FetchOptionType, ResponseError } from './fetch'
import { ContentType, base, baseOptions, getAccessToken } from './fetch'
import { asyncRunSafe } from '@/utils'
import type {
DataSourceNodeCompletedResponse,
DataSourceNodeProcessingResponse,
} from '@/types/pipeline'
const TIME_OUT = 100000
export type IOnDataMoreInfo = {
@ -63,6 +67,9 @@ export type IOnLoopNext = (workflowStarted: LoopNextResponse) => void
export type IOnLoopFinished = (workflowFinished: LoopFinishedResponse) => void
export type IOnAgentLog = (agentLog: AgentLogResponse) => void
export type IOnDataSourceNodeProcessing = (dataSourceNodeProcessing: DataSourceNodeProcessingResponse) => void
export type IOnDataSourceNodeCompleted = (dataSourceNodeCompleted: DataSourceNodeCompletedResponse) => void
export type IOtherOptions = {
isPublicAPI?: boolean
isMarketplaceAPI?: boolean
@ -97,6 +104,10 @@ export type IOtherOptions = {
onLoopNext?: IOnLoopNext
onLoopFinish?: IOnLoopFinished
onAgentLog?: IOnAgentLog
// Pipeline data source node run
onDataSourceNodeProcessing?: IOnDataSourceNodeProcessing
onDataSourceNodeCompleted?: IOnDataSourceNodeCompleted
}
function unicodeToChar(text: string) {
@ -152,6 +163,8 @@ const handleStream = (
onTTSEnd?: IOnTTSEnd,
onTextReplace?: IOnTextReplace,
onAgentLog?: IOnAgentLog,
onDataSourceNodeProcessing?: IOnDataSourceNodeProcessing,
onDataSourceNodeCompleted?: IOnDataSourceNodeCompleted,
) => {
if (!response.ok)
throw new Error('Network response was not ok')
@ -270,6 +283,15 @@ const handleStream = (
else if (bufferObj.event === 'tts_message_end') {
onTTSEnd?.(bufferObj.message_id, bufferObj.audio)
}
else if (bufferObj.event === 'datasource_processing') {
onDataSourceNodeProcessing?.(bufferObj as DataSourceNodeProcessingResponse)
}
else if (bufferObj.event === 'datasource_completed') {
onDataSourceNodeCompleted?.(bufferObj as DataSourceNodeCompletedResponse)
}
else {
console.warn(`Unknown event: ${bufferObj.event}`, bufferObj)
}
}
})
buffer = lines[lines.length - 1]
@ -363,6 +385,8 @@ export const ssePost = async (
onLoopStart,
onLoopNext,
onLoopFinish,
onDataSourceNodeProcessing,
onDataSourceNodeCompleted,
} = otherOptions
const abortController = new AbortController()
@ -460,6 +484,8 @@ export const ssePost = async (
onTTSEnd,
onTextReplace,
onAgentLog,
onDataSourceNodeProcessing,
onDataSourceNodeCompleted,
)
}).catch((e) => {
if (e.toString() !== 'AbortError: The user aborted a request.' && !e.toString().errorMessage.includes('TypeError: Cannot assign to read only property'))

View File

@ -8,10 +8,6 @@ import type {
ImportPipelineDSLRequest,
ImportPipelineDSLResponse,
PipelineCheckDependenciesResponse,
PipelineDatasourceNodeRunRequest,
PipelineDatasourceNodeRunResponse,
PipelineDatasourceNodeRunStatusRequest,
PipelineDatasourceNodeRunStatusResponse,
PipelinePreProcessingParamsRequest,
PipelinePreProcessingParamsResponse,
PipelineProcessingParamsRequest,
@ -133,66 +129,6 @@ export const useCheckPipelineDependencies = (
})
}
export const useDraftDatasourceNodeRun = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunResponse, Error, PipelineDatasourceNodeRunRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run'],
mutationFn: (request: PipelineDatasourceNodeRunRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRun = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunResponse, Error, PipelineDatasourceNodeRunRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run'],
mutationFn: (request: PipelineDatasourceNodeRunRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'draft-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/draft/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const usePublishedDatasourceNodeRunStatus = (
mutationOptions: MutationOptions<PipelineDatasourceNodeRunStatusResponse, Error, PipelineDatasourceNodeRunStatusRequest> = {},
) => {
return useMutation({
mutationKey: [NAME_SPACE, 'published-datasource-node-run-status'],
mutationFn: (request: PipelineDatasourceNodeRunStatusRequest) => {
const { pipeline_id, node_id, ...rest } = request
return post<PipelineDatasourceNodeRunStatusResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/datasource/nodes/${node_id}/run`, {
body: rest,
})
},
...mutationOptions,
})
}
export const useDraftPipelineProcessingParams = (params: PipelineProcessingParamsRequest, enabled = true) => {
const { pipeline_id, node_id } = params
return useQuery<PipelineProcessingParamsResponse>({

17
web/types/pipeline.tsx Normal file
View File

@ -0,0 +1,17 @@
export type DataSourceNodeProcessingResponse = {
event: 'datasource_processing'
total: number
completed: number
}
export type DataSourceNodeError = {
event: 'datasource_error'
message: string
code?: string
}
export type DataSourceNodeCompletedResponse = {
event: 'datasource_completed'
data: any
time_consuming?: number
}