refactor: update website crawl handling and improve parameter naming in pipeline processing

This commit is contained in:
twwu 2025-06-06 17:00:34 +08:00
parent 547bd3cc1b
commit 3e2f12b065
6 changed files with 25 additions and 32 deletions

View File

@ -162,11 +162,16 @@ export const useWebsiteCrawl = () => {
setCurrentWebsite(undefined)
}, [])
const updataCheckedCrawlResultChange = useCallback((checkedCrawlResult: CrawlResultItem[]) => {
setWebsitePages(checkedCrawlResult)
previewWebsitePage.current = checkedCrawlResult[0]
}, [])
return {
websitePages,
websiteCrawlJobId,
previewWebsitePage,
setWebsitePages,
updataCheckedCrawlResultChange,
setWebsiteCrawlJobId,
currentWebsite,
updateCurrentWebsite,

View File

@ -72,9 +72,9 @@ const CreateFormPipeline = () => {
} = useOnlineDocuments()
const {
websitePages,
websiteCrawlJobId,
// websiteCrawlJobId, // todo: Add status query
previewWebsitePage,
setWebsitePages,
updataCheckedCrawlResultChange,
setWebsiteCrawlJobId,
currentWebsite,
updateCurrentWebsite,
@ -124,13 +124,8 @@ const CreateFormPipeline = () => {
}
datasourceInfoList.push(documentInfo)
}
if (datasource.type === DatasourceType.websiteCrawl) {
const documentInfo = {
job_id: websiteCrawlJobId,
result: previewWebsitePage.current,
}
datasourceInfoList.push(documentInfo)
}
if (datasource.type === DatasourceType.websiteCrawl)
datasourceInfoList.push(previewWebsitePage.current)
await runPublishedPipeline({
pipeline_id: pipelineId!,
inputs: data,
@ -143,7 +138,7 @@ const CreateFormPipeline = () => {
setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
},
})
}, [datasource, pipelineId, previewFile, previewOnlineDocument, previewWebsitePage, runPublishedPipeline, websiteCrawlJobId])
}, [datasource, pipelineId, previewFile, previewOnlineDocument, previewWebsitePage, runPublishedPipeline])
const handleProcess = useCallback(async (data: Record<string, any>) => {
if (!datasource)
@ -176,11 +171,9 @@ const CreateFormPipeline = () => {
})
}
if (datasource.type === DatasourceType.websiteCrawl) {
const documentInfo = {
job_id: websiteCrawlJobId,
result: websitePages,
}
datasourceInfoList.push(documentInfo)
websitePages.forEach((websitePage) => {
datasourceInfoList.push(websitePage)
})
}
await runPublishedPipeline({
pipeline_id: pipelineId!,
@ -196,7 +189,7 @@ const CreateFormPipeline = () => {
handleNextStep()
},
})
}, [datasource, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages])
}, [datasource, fileList, handleNextStep, onlineDocuments, pipelineId, runPublishedPipeline, websitePages])
const onClickProcess = useCallback(() => {
isPreview.current = false
@ -285,7 +278,7 @@ const CreateFormPipeline = () => {
docLink: datasource.docLink || '',
}}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onCheckedCrawlResultChange={updataCheckedCrawlResultChange}
onJobIdChange={setWebsiteCrawlJobId}
onPreview={updateCurrentWebsite}
/>

View File

@ -62,14 +62,14 @@ const CrawledResult = ({
time: usedTime.toFixed(1),
})}
</div>
<div className='rounded-xl border border-components-panel-border bg-components-panel-bg'>
<div className='overflow-hidden rounded-xl border border-components-panel-border bg-components-panel-bg'>
<div className='flex items-center px-4 py-2'>
<CheckboxWithLabel
isChecked={isCheckAll}
onChange={handleCheckedAll} label={isCheckAll ? t(`${I18N_PREFIX}.resetAll`) : t(`${I18N_PREFIX}.selectAll`)}
/>
</div>
<div className='flex flex-col gap-y-px overflow-hidden border-t border-divider-subtle bg-background-default-subtle p-2'>
<div className='flex flex-col gap-y-px border-t border-divider-subtle bg-background-default-subtle p-2'>
{list.map((item, index) => (
<CrawledResultItem
key={item.source_url}

View File

@ -11,7 +11,7 @@ import {
useDraftDatasourceNodeRun,
useDraftPipelinePreProcessingParams,
usePublishedDatasourceNodeRun,
usePublishedPipelineProcessingParams,
usePublishedPipelinePreProcessingParams,
} from '@/service/use-pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { DatasourceType } from '@/models/pipeline'
@ -52,7 +52,7 @@ const Crawler = ({
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelineProcessingParams : useDraftPipelinePreProcessingParams)
const usePreProcessingParams = useRef(!isInPipeline ? usePublishedPipelinePreProcessingParams : useDraftPipelinePreProcessingParams)
const { data: paramsConfig } = usePreProcessingParams.current({
pipeline_id: pipelineId!,
node_id: nodeId,

View File

@ -40,7 +40,7 @@ const TestRunPanel = () => {
} = useOnlineDocuments()
const {
websitePages,
websiteCrawlJobId,
// websiteCrawlJobId, // todo: Add status query
setWebsitePages,
setWebsiteCrawlJobId,
} = useWebsiteCrawl()
@ -90,20 +90,15 @@ const TestRunPanel = () => {
}
datasourceInfoList.push(documentInfo)
}
if (datasource.type === DatasourceType.websiteCrawl) {
const documentInfo = {
job_id: websiteCrawlJobId,
result: [websitePages[0]],
}
datasourceInfoList.push(documentInfo)
}
if (datasource.type === DatasourceType.websiteCrawl)
datasourceInfoList.push(websitePages[0])
handleRun({
inputs: data,
start_node_id: datasource.nodeId,
datasource_type: datasource.type,
datasource_info_list: datasourceInfoList,
})
}, [datasource, fileList, handleRun, onlineDocuments, websiteCrawlJobId, websitePages])
}, [datasource, fileList, handleRun, onlineDocuments, websitePages])
return (
<div

View File

@ -288,7 +288,7 @@ export const usePublishedPipelinePreProcessingParams = (params: PipelinePreProce
return useQuery<PipelinePreProcessingParamsResponse>({
queryKey: [NAME_SPACE, 'published-pipeline-pre-processing-params', pipeline_id, node_id],
queryFn: () => {
return get<PipelinePreProcessingParamsResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/processing/parameters`, {
return get<PipelinePreProcessingParamsResponse>(`/rag/pipelines/${pipeline_id}/workflows/published/pre-processing/parameters`, {
params: {
node_id,
},