mirror of
https://github.com/langgenius/dify.git
synced 2026-04-05 23:10:48 +08:00
refactor: refactor datasource type handling
This commit is contained in:
parent
d333645e09
commit
dcb4c9e84a
@ -4,8 +4,7 @@ import type { DataSourceOption, Datasource } from '@/app/components/rag-pipeline
|
||||
import { useMemo } from 'react'
|
||||
import { BlockEnum, type Node } from '@/app/components/workflow/types'
|
||||
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import type { DatasourceType } from '@/models/pipeline'
|
||||
|
||||
export const useAddDocumentsSteps = () => {
|
||||
const { t } = useTranslation()
|
||||
@ -27,77 +26,33 @@ export const useAddDocumentsSteps = () => {
|
||||
}
|
||||
|
||||
export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => {
|
||||
const { t } = useTranslation()
|
||||
const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
|
||||
const datasources: Datasource[] = useMemo(() => {
|
||||
const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
|
||||
return datasourceNodes.map((node) => {
|
||||
let type: DataSourceType | DataSourceProvider = DataSourceType.FILE
|
||||
switch (node.data.tool_name) {
|
||||
case 'file_upload':
|
||||
type = DataSourceType.FILE
|
||||
break
|
||||
case 'search_notion':
|
||||
type = DataSourceType.NOTION
|
||||
break
|
||||
case 'firecrawl':
|
||||
type = DataSourceProvider.fireCrawl
|
||||
break
|
||||
case 'jina_reader':
|
||||
type = DataSourceProvider.jinaReader
|
||||
break
|
||||
case 'water_crawl':
|
||||
type = DataSourceProvider.waterCrawl
|
||||
break
|
||||
}
|
||||
return {
|
||||
nodeId: node.id,
|
||||
type,
|
||||
variables: node.data.variables,
|
||||
type: node.data.provider_type as DatasourceType,
|
||||
variables: node.data.variables || [],
|
||||
description: node.data.desc || '',
|
||||
docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used
|
||||
docLink: '',
|
||||
}
|
||||
})
|
||||
}, [pipelineNodes])
|
||||
}, [datasourceNodes])
|
||||
|
||||
const options = useMemo(() => {
|
||||
const options: DataSourceOption[] = []
|
||||
datasources.forEach((source) => {
|
||||
if (source.type === DataSourceType.FILE) {
|
||||
options.push({
|
||||
label: t('datasetPipeline.testRun.dataSource.localFiles'),
|
||||
value: source.nodeId,
|
||||
type: DataSourceType.FILE,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceType.NOTION) {
|
||||
options.push({
|
||||
label: 'Notion',
|
||||
value: source.nodeId,
|
||||
type: DataSourceType.NOTION,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.fireCrawl) {
|
||||
options.push({
|
||||
label: 'Firecrawl',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.fireCrawl,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.jinaReader) {
|
||||
options.push({
|
||||
label: 'Jina Reader',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.jinaReader,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.waterCrawl) {
|
||||
options.push({
|
||||
label: 'Water Crawl',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.waterCrawl,
|
||||
})
|
||||
}
|
||||
datasourceNodes.forEach((node) => {
|
||||
const type = node.data.provider_type as DatasourceType
|
||||
const label = node.data.title
|
||||
options.push({
|
||||
label,
|
||||
value: node.id,
|
||||
type,
|
||||
})
|
||||
})
|
||||
return options
|
||||
}, [datasources, t])
|
||||
}, [datasourceNodes])
|
||||
|
||||
return { datasources, options }
|
||||
}
|
||||
|
||||
@ -1,22 +1,19 @@
|
||||
'use client'
|
||||
import { useCallback, useMemo, useRef, useState } from 'react'
|
||||
import DataSourceOptions from './data-source-options'
|
||||
import type { CrawlResultItem, CustomFile as File, FileItem } from '@/models/datasets'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import type { CrawlResultItem, CustomFile as File, FileIndexingEstimateResponse, FileItem } from '@/models/datasets'
|
||||
import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file'
|
||||
import produce from 'immer'
|
||||
import { useProviderContextSelector } from '@/context/provider-context'
|
||||
import { DataSourceProvider, type NotionPage } from '@/models/common'
|
||||
import type { NotionPage } from '@/models/common'
|
||||
import Notion from '@/app/components/rag-pipeline/components/panel/test-run/data-source/notion'
|
||||
import VectorSpaceFull from '@/app/components/billing/vector-space-full'
|
||||
import FireCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl'
|
||||
import JinaReader from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader'
|
||||
import WaterCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl'
|
||||
import WebsiteCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website-crawl'
|
||||
import Actions from './data-source/actions'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
|
||||
import LeftHeader from './left-header'
|
||||
import { usePublishedPipelineInfo } from '@/service/use-pipeline'
|
||||
import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import Loading from '@/app/components/base/loading'
|
||||
import type { Node } from '@/app/components/workflow/types'
|
||||
@ -27,6 +24,7 @@ import WebsitePreview from './preview/web-preview'
|
||||
import ProcessDocuments from './process-documents'
|
||||
import ChunkPreview from './preview/chunk-preview'
|
||||
import Processing from './processing'
|
||||
import { DatasourceType } from '@/models/pipeline'
|
||||
|
||||
const TestRunPanel = () => {
|
||||
const { t } = useTranslation()
|
||||
@ -39,6 +37,7 @@ const TestRunPanel = () => {
|
||||
const [currentFile, setCurrentFile] = useState<File | undefined>()
|
||||
const [currentNotionPage, setCurrentNotionPage] = useState<NotionPage | undefined>()
|
||||
const [currentWebsite, setCurrentWebsite] = useState<CrawlResultItem | undefined>()
|
||||
const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
|
||||
|
||||
const plan = useProviderContextSelector(state => state.plan)
|
||||
const enableBilling = useProviderContextSelector(state => state.enableBilling)
|
||||
@ -66,13 +65,11 @@ const TestRunPanel = () => {
|
||||
|
||||
const nextBtnDisabled = useMemo(() => {
|
||||
if (!datasource) return true
|
||||
if (datasource.type === DataSourceType.FILE)
|
||||
if (datasource.type === DatasourceType.localFile)
|
||||
return nextDisabled
|
||||
if (datasource.type === DataSourceType.NOTION)
|
||||
if (datasource.type === DatasourceType.onlineDocument)
|
||||
return isShowVectorSpaceFull || !notionPages.length
|
||||
if (datasource.type === DataSourceProvider.fireCrawl
|
||||
|| datasource.type === DataSourceProvider.jinaReader
|
||||
|| datasource.type === DataSourceProvider.waterCrawl)
|
||||
if (datasource.type === DatasourceType.websiteCrawl)
|
||||
return isShowVectorSpaceFull || !websitePages.length
|
||||
return false
|
||||
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
|
||||
@ -128,38 +125,100 @@ const TestRunPanel = () => {
|
||||
setCurrentStep(preStep => preStep - 1)
|
||||
}, [])
|
||||
|
||||
const handlePreviewChunks = useCallback((data: Record<string, any>) => {
|
||||
console.log(data)
|
||||
}, [])
|
||||
const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
|
||||
|
||||
const handleProcess = useCallback((data: Record<string, any>) => {
|
||||
const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
|
||||
if (!datasource)
|
||||
return
|
||||
const datasourceInfo: Record<string, any> = {}
|
||||
let datasource_type = ''
|
||||
if (datasource.type === DataSourceType.FILE) {
|
||||
datasource_type = 'local_file'
|
||||
datasourceInfo.fileId = fileList.map(file => file.fileID)
|
||||
const datasourceInfoList: Record<string, any>[] = []
|
||||
if (datasource.type === DatasourceType.localFile) {
|
||||
const { id, name, type, size, extension, mime_type } = fileList[0].file
|
||||
const documentInfo = {
|
||||
upload_file_id: id,
|
||||
name,
|
||||
type,
|
||||
size,
|
||||
extension,
|
||||
mime_type,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
if (datasource.type === DataSourceType.NOTION) {
|
||||
datasource_type = 'online_document'
|
||||
datasourceInfo.workspaceId = notionPages[0].workspace_id
|
||||
datasourceInfo.page = notionPages.map((page) => {
|
||||
const { workspace_id, ...rest } = page
|
||||
return rest
|
||||
if (datasource.type === DatasourceType.onlineDocument) {
|
||||
const { workspace_id, ...rest } = notionPages[0]
|
||||
const documentInfo = {
|
||||
workspace_id,
|
||||
page: rest,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
if (datasource.type === DatasourceType.websiteCrawl) {
|
||||
const documentInfo = {
|
||||
job_id: websiteCrawlJobId,
|
||||
result: websitePages[0],
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
await runPublishedPipeline({
|
||||
pipeline_id: pipelineId!,
|
||||
inputs: data,
|
||||
start_node_id: datasource.nodeId,
|
||||
datasource_type: datasource.type,
|
||||
datasource_info_list: datasourceInfoList,
|
||||
is_preview: true,
|
||||
}, {
|
||||
onSuccess: (res) => {
|
||||
setEstimateData(res as FileIndexingEstimateResponse)
|
||||
},
|
||||
})
|
||||
}, [datasource, fileList, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages])
|
||||
|
||||
const handleProcess = useCallback(async (data: Record<string, any>) => {
|
||||
if (!datasource)
|
||||
return
|
||||
const datasourceInfoList: Record<string, any>[] = []
|
||||
if (datasource.type === DatasourceType.localFile) {
|
||||
fileList.forEach((file) => {
|
||||
const { id, name, type, size, extension, mime_type } = file.file
|
||||
const documentInfo = {
|
||||
upload_file_id: id,
|
||||
name,
|
||||
type,
|
||||
size,
|
||||
extension,
|
||||
mime_type,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
})
|
||||
}
|
||||
if (datasource.type === DataSourceProvider.fireCrawl
|
||||
|| datasource.type === DataSourceProvider.jinaReader
|
||||
|| datasource.type === DataSourceProvider.waterCrawl) {
|
||||
datasource_type = 'website_crawl'
|
||||
datasourceInfo.jobId = websiteCrawlJobId
|
||||
datasourceInfo.result = websitePages
|
||||
if (datasource.type === DatasourceType.onlineDocument) {
|
||||
notionPages.forEach((page) => {
|
||||
const { workspace_id, ...rest } = page
|
||||
const documentInfo = {
|
||||
workspace_id,
|
||||
page: rest,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
})
|
||||
}
|
||||
// todo: Run Pipeline
|
||||
console.log('datasource_type', datasource_type)
|
||||
handleNextStep()
|
||||
}, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages])
|
||||
if (datasource.type === DatasourceType.websiteCrawl) {
|
||||
const documentInfo = {
|
||||
job_id: websiteCrawlJobId,
|
||||
result: websitePages,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
await runPublishedPipeline({
|
||||
pipeline_id: pipelineId!,
|
||||
inputs: data,
|
||||
start_node_id: datasource.nodeId,
|
||||
datasource_type: datasource.type,
|
||||
datasource_info_list: datasourceInfoList,
|
||||
}, {
|
||||
onSuccess: () => {
|
||||
handleNextStep()
|
||||
},
|
||||
})
|
||||
}, [datasource, fileList, handleNextStep, notionPages, pipelineId, runPublishedPipeline, websiteCrawlJobId, websitePages])
|
||||
|
||||
const onClickProcess = useCallback(() => {
|
||||
isPreview.current = false
|
||||
@ -203,7 +262,7 @@ const TestRunPanel = () => {
|
||||
onSelect={setDatasource}
|
||||
pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
|
||||
/>
|
||||
{datasource?.type === DataSourceType.FILE && (
|
||||
{datasource?.type === DatasourceType.localFile && (
|
||||
<LocalFile
|
||||
files={fileList}
|
||||
updateFile={updateFile}
|
||||
@ -212,7 +271,7 @@ const TestRunPanel = () => {
|
||||
notSupportBatchUpload={notSupportBatchUpload}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceType.NOTION && (
|
||||
{datasource?.type === DatasourceType.onlineDocument && (
|
||||
<Notion
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
notionPages={notionPages}
|
||||
@ -221,30 +280,15 @@ const TestRunPanel = () => {
|
||||
onPreview={updateCurrentPage}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.fireCrawl && (
|
||||
<FireCrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
onPreview={updateCurrentWebsite}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.jinaReader && (
|
||||
<JinaReader
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
onPreview={updateCurrentWebsite}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.waterCrawl && (
|
||||
<WaterCrawl
|
||||
{datasource?.type === DatasourceType.websiteCrawl && (
|
||||
<WebsiteCrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
headerInfo={{
|
||||
title: datasource.description,
|
||||
docTitle: datasource.docTitle || '',
|
||||
docLink: datasource.docLink || '',
|
||||
}}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
@ -287,7 +331,7 @@ const TestRunPanel = () => {
|
||||
{/* Preview */}
|
||||
{
|
||||
currentStep === 1 && (
|
||||
<div className='flex h-full w-[752px] shrink-0 flex-col pl-2 pt-2'>
|
||||
<div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
|
||||
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
|
||||
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
|
||||
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
|
||||
@ -296,16 +340,20 @@ const TestRunPanel = () => {
|
||||
}
|
||||
{
|
||||
currentStep === 2 && (
|
||||
<ChunkPreview
|
||||
datasource={datasource!}
|
||||
files={fileList.map(file => file.file)}
|
||||
notionPages={notionPages}
|
||||
websitePages={websitePages}
|
||||
isIdle={true}
|
||||
isPending={true}
|
||||
estimateData={undefined}
|
||||
onPreview={onClickPreview}
|
||||
/>
|
||||
<div className='flex h-full w-[752px] shrink-0 pl-2 pt-2'>
|
||||
{estimateData && (
|
||||
<ChunkPreview
|
||||
datasource={datasource!}
|
||||
files={fileList.map(file => file.file)}
|
||||
notionPages={notionPages}
|
||||
websitePages={websitePages}
|
||||
isIdle={isIdle}
|
||||
isPending={isPending}
|
||||
estimateData={estimateData}
|
||||
onPreview={onClickPreview}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
</div>
|
||||
|
||||
@ -3,15 +3,13 @@ import React, { useCallback, useEffect, useState } from 'react'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import Header from '@/app/components/datasets/create/website/base/header'
|
||||
import Options from '../base/options'
|
||||
import Crawling from '../base/crawling'
|
||||
import ErrorMessage from '../base/error-message'
|
||||
import CrawledResult from '../base/crawled-result'
|
||||
import Options from './options'
|
||||
import Crawling from './crawling'
|
||||
import ErrorMessage from './error-message'
|
||||
import CrawledResult from './crawled-result'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import { useDatasourceNodeRun } from '@/service/use-pipeline'
|
||||
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
|
||||
import { useWebCrawlerHeaderInfo } from '../../../hooks'
|
||||
import type { DataSourceProvider } from '@/models/common'
|
||||
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
@ -19,9 +17,13 @@ type CrawlerProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
datasourceProvider: DataSourceProvider
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
headerInfo: {
|
||||
title: string
|
||||
docTitle: string
|
||||
docLink: string
|
||||
}
|
||||
onPreview?: (payload: CrawlResultItem) => void
|
||||
}
|
||||
|
||||
@ -35,7 +37,7 @@ const Crawler = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
datasourceProvider,
|
||||
headerInfo,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
onPreview,
|
||||
@ -45,8 +47,6 @@ const Crawler = ({
|
||||
const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
|
||||
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
|
||||
|
||||
const headerInfoMap = useWebCrawlerHeaderInfo()
|
||||
|
||||
useEffect(() => {
|
||||
if (step !== Step.init)
|
||||
setControlFoldOptions(Date.now())
|
||||
@ -91,7 +91,7 @@ const Crawler = ({
|
||||
<div>
|
||||
<Header
|
||||
isInPipeline
|
||||
{...headerInfoMap[datasourceProvider]}
|
||||
{...headerInfo}
|
||||
/>
|
||||
<div className='mt-2 rounded-xl border border-components-panel-border bg-background-default-subtle'>
|
||||
<Options
|
||||
@ -2,36 +2,41 @@
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import Crawler from './base/crawler'
|
||||
|
||||
type FireCrawlProps = {
|
||||
type WebsiteCrawlProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
headerInfo: {
|
||||
title: string
|
||||
docTitle: string
|
||||
docLink: string
|
||||
}
|
||||
onPreview?: (payload: CrawlResultItem) => void
|
||||
}
|
||||
|
||||
const FireCrawl = ({
|
||||
const WebsiteCrawl = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
headerInfo,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
onPreview,
|
||||
}: FireCrawlProps) => {
|
||||
}: WebsiteCrawlProps) => {
|
||||
return (
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.fireCrawl}
|
||||
headerInfo={headerInfo}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
onPreview={onPreview}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default FireCrawl
|
||||
export default WebsiteCrawl
|
||||
@ -1,37 +0,0 @@
|
||||
'use client'
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
|
||||
type JinaReaderProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
onPreview?: (payload: CrawlResultItem) => void
|
||||
}
|
||||
|
||||
const JinaReader = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
onPreview,
|
||||
}: JinaReaderProps) => {
|
||||
return (
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.jinaReader}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
onPreview={onPreview}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default React.memo(JinaReader)
|
||||
@ -1,37 +0,0 @@
|
||||
'use client'
|
||||
import React from 'react'
|
||||
import type { CrawlResultItem } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import Crawler from '../base/crawler'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
|
||||
type WaterCrawlProps = {
|
||||
nodeId: string
|
||||
variables: RAGPipelineVariables
|
||||
checkedCrawlResult: CrawlResultItem[]
|
||||
onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
|
||||
onJobIdChange: (jobId: string) => void
|
||||
onPreview?: (payload: CrawlResultItem) => void
|
||||
}
|
||||
|
||||
const WaterCrawl = ({
|
||||
nodeId,
|
||||
variables,
|
||||
checkedCrawlResult,
|
||||
onCheckedCrawlResultChange,
|
||||
onJobIdChange,
|
||||
onPreview,
|
||||
}: WaterCrawlProps) => {
|
||||
return (
|
||||
<Crawler
|
||||
nodeId={nodeId}
|
||||
variables={variables}
|
||||
checkedCrawlResult={checkedCrawlResult}
|
||||
datasourceProvider={DataSourceProvider.jinaReader}
|
||||
onCheckedCrawlResultChange={onCheckedCrawlResultChange}
|
||||
onJobIdChange={onJobIdChange}
|
||||
onPreview={onPreview}
|
||||
/>
|
||||
)
|
||||
}
|
||||
export default React.memo(WaterCrawl)
|
||||
@ -1,12 +1,11 @@
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import type { DataSourceOption, Datasource } from './types'
|
||||
import { TestRunStep } from './types'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import { DataSourceProvider } from '@/models/common'
|
||||
import { useNodes } from 'reactflow'
|
||||
import { BlockEnum } from '@/app/components/workflow/types'
|
||||
import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
|
||||
import { useMemo } from 'react'
|
||||
import type { DatasourceType } from '@/models/pipeline'
|
||||
|
||||
export const useTestRunSteps = () => {
|
||||
const { t } = useTranslation()
|
||||
@ -24,106 +23,34 @@ export const useTestRunSteps = () => {
|
||||
}
|
||||
|
||||
export const useDatasourceOptions = () => {
|
||||
const { t } = useTranslation()
|
||||
const nodes = useNodes<DataSourceNodeType>()
|
||||
const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource)
|
||||
const datasources: Datasource[] = useMemo(() => {
|
||||
const datasourceNodes = nodes.filter(node => node.data.type === BlockEnum.DataSource)
|
||||
return datasourceNodes.map((node) => {
|
||||
let type: DataSourceType | DataSourceProvider = DataSourceType.FILE
|
||||
// todo: distinguish datasource type via provider_type field
|
||||
switch (node.data.tool_name) {
|
||||
case 'file_upload':
|
||||
type = DataSourceType.FILE
|
||||
break
|
||||
case 'search_notion':
|
||||
type = DataSourceType.NOTION
|
||||
break
|
||||
case 'firecrawl':
|
||||
type = DataSourceProvider.fireCrawl
|
||||
break
|
||||
case 'jina_reader':
|
||||
type = DataSourceProvider.jinaReader
|
||||
break
|
||||
case 'water_crawl':
|
||||
type = DataSourceProvider.waterCrawl
|
||||
break
|
||||
}
|
||||
return {
|
||||
nodeId: node.id,
|
||||
type,
|
||||
variables: node.data.variables,
|
||||
type: node.data.provider_type as DatasourceType,
|
||||
variables: node.data.variables || [],
|
||||
description: node.data.desc || '',
|
||||
docTitle: '', // todo: Add docTitle and docLink if needed, or remove these properties if not used
|
||||
docLink: '',
|
||||
}
|
||||
})
|
||||
}, [nodes])
|
||||
}, [datasourceNodes])
|
||||
|
||||
const options = useMemo(() => {
|
||||
const options: DataSourceOption[] = []
|
||||
datasources.forEach((source) => {
|
||||
if (source.type === DataSourceType.FILE) {
|
||||
options.push({
|
||||
label: t('datasetPipeline.testRun.dataSource.localFiles'),
|
||||
value: source.nodeId,
|
||||
type: DataSourceType.FILE,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceType.NOTION) {
|
||||
options.push({
|
||||
label: 'Notion',
|
||||
value: source.nodeId,
|
||||
type: DataSourceType.NOTION,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.fireCrawl) {
|
||||
options.push({
|
||||
label: 'Firecrawl',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.fireCrawl,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.jinaReader) {
|
||||
options.push({
|
||||
label: 'Jina Reader',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.jinaReader,
|
||||
})
|
||||
}
|
||||
if (source.type === DataSourceProvider.waterCrawl) {
|
||||
options.push({
|
||||
label: 'Water Crawl',
|
||||
value: source.nodeId,
|
||||
type: DataSourceProvider.waterCrawl,
|
||||
})
|
||||
}
|
||||
datasourceNodes.forEach((node) => {
|
||||
const type = node.data.provider_type as DatasourceType
|
||||
const label = node.data.title
|
||||
options.push({
|
||||
label,
|
||||
value: node.id,
|
||||
type,
|
||||
})
|
||||
})
|
||||
return options
|
||||
}, [datasources, t])
|
||||
}, [datasourceNodes])
|
||||
|
||||
return { datasources, options }
|
||||
}
|
||||
|
||||
export const useWebCrawlerHeaderInfo = () => {
|
||||
const { t } = useTranslation()
|
||||
const I18N_PREFIX = 'datasetCreation.stepOne.website'
|
||||
|
||||
const headerInfoMap: Record<DataSourceProvider, {
|
||||
title: string
|
||||
docTitle: string
|
||||
docLink: string
|
||||
}> = {
|
||||
[DataSourceProvider.fireCrawl]: {
|
||||
title: t(`${I18N_PREFIX}.firecrawlTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.firecrawlDoc`),
|
||||
docLink: 'https://docs.firecrawl.dev/introduction',
|
||||
},
|
||||
[DataSourceProvider.jinaReader]: {
|
||||
title: t(`${I18N_PREFIX}.jinaReaderTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.jinaReaderDoc`),
|
||||
docLink: 'https://jina.ai/reader',
|
||||
},
|
||||
[DataSourceProvider.waterCrawl]: {
|
||||
title: t(`${I18N_PREFIX}.watercrawlTitle`),
|
||||
docTitle: t(`${I18N_PREFIX}.watercrawlDoc`),
|
||||
docLink: 'https://docs.watercrawl.dev/',
|
||||
},
|
||||
}
|
||||
return headerInfoMap
|
||||
}
|
||||
|
||||
@ -5,21 +5,19 @@ import StepIndicator from './step-indicator'
|
||||
import { useTestRunSteps } from './hooks'
|
||||
import DataSourceOptions from './data-source-options'
|
||||
import type { CrawlResultItem, FileItem } from '@/models/datasets'
|
||||
import { DataSourceType } from '@/models/datasets'
|
||||
import LocalFile from './data-source/local-file'
|
||||
import produce from 'immer'
|
||||
import { useProviderContextSelector } from '@/context/provider-context'
|
||||
import { DataSourceProvider, type NotionPage } from '@/models/common'
|
||||
import type { NotionPage } from '@/models/common'
|
||||
import Notion from './data-source/notion'
|
||||
import VectorSpaceFull from '@/app/components/billing/vector-space-full'
|
||||
import Firecrawl from './data-source/website/firecrawl'
|
||||
import JinaReader from './data-source/website/jina-reader'
|
||||
import WaterCrawl from './data-source/website/water-crawl'
|
||||
import WebsiteCrawl from './data-source/website-crawl'
|
||||
import Actions from './data-source/actions'
|
||||
import DocumentProcessing from './document-processing'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
import { usePipelineRun } from '../../../hooks'
|
||||
import type { Datasource } from './types'
|
||||
import { DatasourceType } from '@/models/pipeline'
|
||||
|
||||
const TestRunPanel = () => {
|
||||
const { t } = useTranslation()
|
||||
@ -49,13 +47,11 @@ const TestRunPanel = () => {
|
||||
|
||||
const nextBtnDisabled = useMemo(() => {
|
||||
if (!datasource) return true
|
||||
if (datasource.type === DataSourceType.FILE)
|
||||
if (datasource.type === DatasourceType.localFile)
|
||||
return nextDisabled
|
||||
if (datasource.type === DataSourceType.NOTION)
|
||||
if (datasource.type === DatasourceType.onlineDocument)
|
||||
return isShowVectorSpaceFull || !notionPages.length
|
||||
if (datasource.type === DataSourceProvider.fireCrawl
|
||||
|| datasource.type === DataSourceProvider.jinaReader
|
||||
|| datasource.type === DataSourceProvider.waterCrawl)
|
||||
if (datasource.type === DatasourceType.websiteCrawl)
|
||||
return isShowVectorSpaceFull || !websitePages.length
|
||||
return false
|
||||
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
|
||||
@ -97,21 +93,19 @@ const TestRunPanel = () => {
|
||||
if (!datasource)
|
||||
return
|
||||
const datasourceInfoList: Record<string, any>[] = []
|
||||
let datasource_type = ''
|
||||
if (datasource.type === DataSourceType.FILE) {
|
||||
datasource_type = 'local_file'
|
||||
if (datasource.type === DatasourceType.localFile) {
|
||||
const { id, name, type, size, extension, mime_type } = fileList[0].file
|
||||
const documentInfo = {
|
||||
upload_file_id: fileList[0].file.id,
|
||||
name: fileList[0].file.name,
|
||||
type: fileList[0].file.type,
|
||||
size: fileList[0].file.size,
|
||||
extension: fileList[0].file.extension,
|
||||
mime_type: fileList[0].file.mime_type,
|
||||
upload_file_id: id,
|
||||
name,
|
||||
type,
|
||||
size,
|
||||
extension,
|
||||
mime_type,
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
if (datasource.type === DataSourceType.NOTION) {
|
||||
datasource_type = 'online_document'
|
||||
if (datasource.type === DatasourceType.onlineDocument) {
|
||||
const { workspace_id, ...rest } = notionPages[0]
|
||||
const documentInfo = {
|
||||
workspace_id,
|
||||
@ -119,20 +113,17 @@ const TestRunPanel = () => {
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
if (datasource.type === DataSourceProvider.fireCrawl
|
||||
|| datasource.type === DataSourceProvider.jinaReader
|
||||
|| datasource.type === DataSourceProvider.waterCrawl) {
|
||||
datasource_type = 'website_crawl'
|
||||
if (datasource.type === DatasourceType.websiteCrawl) {
|
||||
const documentInfo = {
|
||||
job_id: websiteCrawlJobId,
|
||||
result: websitePages[0],
|
||||
result: [websitePages[0]],
|
||||
}
|
||||
datasourceInfoList.push(documentInfo)
|
||||
}
|
||||
handleRun({
|
||||
inputs: data,
|
||||
start_node_id: datasource.nodeId,
|
||||
datasource_type,
|
||||
datasource_type: datasource.type,
|
||||
datasource_info_list: datasourceInfoList,
|
||||
})
|
||||
}, [datasource, fileList, handleRun, notionPages, websiteCrawlJobId, websitePages])
|
||||
@ -163,7 +154,7 @@ const TestRunPanel = () => {
|
||||
datasourceNodeId={datasource?.nodeId || ''}
|
||||
onSelect={setDatasource}
|
||||
/>
|
||||
{datasource?.type === DataSourceType.FILE && (
|
||||
{datasource?.type === DatasourceType.localFile && (
|
||||
<LocalFile
|
||||
files={fileList}
|
||||
updateFile={updateFile}
|
||||
@ -171,36 +162,23 @@ const TestRunPanel = () => {
|
||||
notSupportBatchUpload={false} // only support single file upload in test run
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceType.NOTION && (
|
||||
{datasource?.type === DatasourceType.onlineDocument && (
|
||||
<Notion
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
notionPages={notionPages}
|
||||
updateNotionPages={updateNotionPages}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.fireCrawl && (
|
||||
<Firecrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.jinaReader && (
|
||||
<JinaReader
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
/>
|
||||
)}
|
||||
{datasource?.type === DataSourceProvider.waterCrawl && (
|
||||
<WaterCrawl
|
||||
{datasource?.type === DatasourceType.websiteCrawl && (
|
||||
<WebsiteCrawl
|
||||
nodeId={datasource?.nodeId || ''}
|
||||
variables={datasource?.variables}
|
||||
checkedCrawlResult={websitePages}
|
||||
headerInfo={{
|
||||
title: datasource.description,
|
||||
docTitle: datasource.docTitle || '',
|
||||
docLink: datasource.docLink || '',
|
||||
}}
|
||||
onCheckedCrawlResultChange={setWebsitePages}
|
||||
onJobIdChange={setWebsiteCrawlJobId}
|
||||
/>
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
import type { DataSourceProvider } from '@/models/common'
|
||||
import type { DataSourceType } from '@/models/datasets'
|
||||
import type { RAGPipelineVariables } from '@/models/pipeline'
|
||||
import type { DatasourceType, RAGPipelineVariables } from '@/models/pipeline'
|
||||
|
||||
export enum TestRunStep {
|
||||
dataSource = 'dataSource',
|
||||
@ -10,11 +8,14 @@ export enum TestRunStep {
|
||||
export type DataSourceOption = {
|
||||
label: string
|
||||
value: string
|
||||
type: DataSourceType | DataSourceProvider
|
||||
type: DatasourceType
|
||||
}
|
||||
|
||||
export type Datasource = {
|
||||
nodeId: string
|
||||
type: DataSourceType | DataSourceProvider
|
||||
type: DatasourceType
|
||||
variables: RAGPipelineVariables
|
||||
description: string
|
||||
docTitle?: string
|
||||
docLink?: string
|
||||
}
|
||||
|
||||
@ -6,6 +6,12 @@ import type { AppIconSelection } from '@/app/components/base/app-icon-picker'
|
||||
import type { Viewport } from 'reactflow'
|
||||
import type { TransferMethod } from '@/types/app'
|
||||
|
||||
export enum DatasourceType {
|
||||
localFile = 'local-file',
|
||||
onlineDocument = 'online-document',
|
||||
websiteCrawl = 'website-crawl',
|
||||
}
|
||||
|
||||
export type PipelineTemplateListParams = {
|
||||
type: 'built-in' | 'customized'
|
||||
}
|
||||
@ -170,3 +176,12 @@ export type PublishedPipelineInfoResponse = {
|
||||
marked_name: string
|
||||
marked_comment: string
|
||||
}
|
||||
|
||||
export type PublishedPipelineRunRequest = {
|
||||
pipeline_id: string
|
||||
inputs: Record<string, any>
|
||||
start_node_id: string
|
||||
datasource_type: DatasourceType
|
||||
datasource_info_list: Array<Record<string, any>>
|
||||
is_preview?: boolean
|
||||
}
|
||||
|
||||
@ -16,6 +16,7 @@ import type {
|
||||
PipelineTemplateListParams,
|
||||
PipelineTemplateListResponse,
|
||||
PublishedPipelineInfoResponse,
|
||||
PublishedPipelineRunRequest,
|
||||
UpdateTemplateInfoRequest,
|
||||
UpdateTemplateInfoResponse,
|
||||
} from '@/models/pipeline'
|
||||
@ -183,3 +184,21 @@ export const usePublishedPipelineInfo = (pipelineId: string) => {
|
||||
enabled: !!pipelineId,
|
||||
})
|
||||
}
|
||||
|
||||
export const useRunPublishedPipeline = (
|
||||
mutationOptions: MutationOptions<any, Error, PublishedPipelineRunRequest> = {},
|
||||
) => {
|
||||
return useMutation({
|
||||
mutationKey: [NAME_SPACE, 'run-published-pipeline'],
|
||||
mutationFn: (request: PublishedPipelineRunRequest) => {
|
||||
const { pipeline_id: pipelineId, ...rest } = request
|
||||
return post<PublishedPipelineInfoResponse>(`/rag/pipelines/${pipelineId}/workflows/published/run`, {
|
||||
body: {
|
||||
...rest,
|
||||
response_mode: 'blocking',
|
||||
},
|
||||
})
|
||||
},
|
||||
...mutationOptions,
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user