From 3e2f12b0652fe2a1f1c23f85094af6cefaff44ff Mon Sep 17 00:00:00 2001 From: twwu Date: Fri, 6 Jun 2025 17:00:34 +0800 Subject: [PATCH] refactor: update website crawl handling and improve parameter naming in pipeline processing --- .../documents/create-from-pipeline/hooks.ts | 7 ++++- .../documents/create-from-pipeline/index.tsx | 27 +++++++------------ .../website-crawl/base/crawled-result.tsx | 4 +-- .../website-crawl/base/crawler.tsx | 4 +-- .../components/panel/test-run/index.tsx | 13 +++------ web/service/use-pipeline.ts | 2 +- 6 files changed, 25 insertions(+), 32 deletions(-) diff --git a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts index 65a0f33786..15f30ed726 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts +++ b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts @@ -162,11 +162,16 @@ export const useWebsiteCrawl = () => { setCurrentWebsite(undefined) }, []) + const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => { + setWebsitePages(checkedCrawlResult) + previewWebsitePage.current = checkedCrawlResult[0] + }, []) + return { websitePages, websiteCrawlJobId, previewWebsitePage, - setWebsitePages, + updataCheckedCrawlResultChange, setWebsiteCrawlJobId, currentWebsite, updateCurrentWebsite, diff --git a/web/app/components/datasets/documents/create-from-pipeline/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/index.tsx index a293697790..7e4d2988fa 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx @@ -72,9 +72,9 @@ const CreateFormPipeline = () => { } = useOnlineDocuments() const { websitePages, - websiteCrawlJobId, + // websiteCrawlJobId, // todo: Add status query previewWebsitePage, - setWebsitePages, + updataCheckedCrawlResultChange, setWebsiteCrawlJobId, currentWebsite, updateCurrentWebsite, @@ -124,13 +124,8 @@ const CreateFormPipeline = () => { } datasourceInfoList.push(documentInfo) } - if (datasource.type === DatasourceType.websiteCrawl) { - const documentInfo = { - job_id: websiteCrawlJobId, - result: previewWebsitePage.current, - } - datasourceInfoList.push(documentInfo) - } + if (datasource.type === DatasourceType.websiteCrawl) + datasourceInfoList.push(previewWebsitePage.current) await runPublishedPipeline({ pipeline_id: pipelineId!, inputs: data, @@ -143,7 +138,7 @@ const CreateFormPipeline = () => { setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs) }, }) - }, [datasource, pipelineId, previewFile, previewOnlineDocument, previewWebsitePage, runPublishedPipeline, websiteCrawlJobId]) + }, [datasource, pipelineId, previewFile, previewOnlineDocument, previewWebsitePage, runPublishedPipeline]) const handleProcess = useCallback(async (data: Record) => { if (!datasource) @@ -176,11 +171,9 @@ const CreateFormPipeline = () => { }) } if (datasource.type === DatasourceType.websiteCrawl) { - const documentInfo = { - job_id: websiteCrawlJobId, - result: websitePages, - } - datasourceInfoList.push(documentInfo) + websitePages.forEach((websitePage) => { + datasourceInfoList.push(websitePage) + }) } await runPublishedPipeline({ pipeline_id: pipelineId!, @@ -196,7 +189,7 @@ const CreateFormPipeline = () => { handleNextStep() }, }) - }, [datasource, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages]) + }, [datasource, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, websitePages]) const onClickProcess = useCallback(() => { isPreview.current = false @@ -285,7 +278,7 @@ const CreateFormPipeline = () => { docLink: datasource.docLink || '', }} checkedCrawlResult={websitePages} - onCheckedCrawlResultChange={setWebsitePages} + onCheckedCrawlResultChange={updataCheckedCrawlResultChange} onJobIdChange={setWebsiteCrawlJobId} onPreview={updateCurrentWebsite} /> diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx index 9d5019f31e..0115dc5e51 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl/base/crawled-result.tsx @@ -62,14 +62,14 @@ const CrawledResult = ({ time: usedTime.toFixed(1), })} -
+
-
+
{list.map((item, index) => ( (0) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) - const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelineProcessingParams : useDraftPipelinePreProcessingParams) + const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams) const { data: paramsConfig } = usePreProcessingParams.current({ pipeline_id: pipelineId!, node_id: nodeId, diff --git a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx index b924200922..7b59575210 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx @@ -40,7 +40,7 @@ const TestRunPanel = () => { } = useOnlineDocuments() const { websitePages, - websiteCrawlJobId, + // websiteCrawlJobId, // todo: Add status query setWebsitePages, setWebsiteCrawlJobId, } = useWebsiteCrawl() @@ -90,20 +90,15 @@ const TestRunPanel = () => { } datasourceInfoList.push(documentInfo) } - if (datasource.type === DatasourceType.websiteCrawl) { - const documentInfo = { - job_id: websiteCrawlJobId, - result: [websitePages[0]], - } - datasourceInfoList.push(documentInfo) - } + if (datasource.type === DatasourceType.websiteCrawl) + datasourceInfoList.push(websitePages[0]) handleRun({ inputs: data, start_node_id: datasource.nodeId, datasource_type: datasource.type, datasource_info_list: datasourceInfoList, }) - }, [datasource, fileList, handleRun, onlineDocuments, websiteCrawlJobId, websitePages]) + }, [datasource, fileList, handleRun, onlineDocuments, websitePages]) return (
({ queryKey: [NAME_SPACE, 'published-pipeline-pre-processing-params', pipeline_id, node_id], queryFn: () => { - return get(`/rag/pipelines/${pipeline_id}/workflows/published/processing/parameters`, { + return get(`/rag/pipelines/${pipeline_id}/workflows/published/pre-processing/parameters`, { params: { node_id, },