refactor: update data source handling and improve internationalization support in test run panel

This commit is contained in:
twwu 2025-05-09 12:56:57 +08:00
parent d9ed61287d
commit 7e31da7882
14 changed files with 216 additions and 146 deletions

View File

@ -6,10 +6,11 @@ import { Notion } from '@/app/components/base/icons/src/public/common'
import { Jina } from '@/app/components/base/icons/src/public/llm'
import { DataSourceType } from '@/models/datasets'
import { DataSourceProvider } from '@/models/common'
import type { Datasource } from '../types'
type DataSourceOptionsProps = {
dataSources: string[]
dataSourceType: string
dataSources: Datasource[]
dataSourceNodeId: string
onSelect: (option: string) => void
}
@ -23,7 +24,7 @@ const DATA_SOURCE_ICONS = {
const DataSourceOptions = ({
dataSources,
dataSourceType,
dataSourceNodeId,
onSelect,
}: DataSourceOptionsProps) => {
const options = useDataSourceOptions(dataSources)
@ -38,8 +39,8 @@ const DataSourceOptions = ({
<OptionCard
key={option.value}
label={option.label}
selected={dataSourceType === option.value}
Icon={DATA_SOURCE_ICONS[option.value as keyof typeof DATA_SOURCE_ICONS]}
selected={dataSourceNodeId === option.value}
Icon={DATA_SOURCE_ICONS[option.type as keyof typeof DATA_SOURCE_ICONS]}
onClick={handelSelect.bind(null, option.value)}
/>
))}

View File

@ -6,7 +6,6 @@ import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
import { sleep } from '@/utils'
import Header from '@/app/components/datasets/create/website/base/header'
import type { FormData } from '../base/options'
import Options from '../base/options'
import { useConfigurations, useSchema } from './hooks'
import Crawling from '../base/crawling'
@ -109,9 +108,9 @@ const FireCrawl = ({
}
}, [crawlOptions.limit, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: FormData) => {
const handleRun = useCallback(async (value: Record<string, any>) => {
const { url, ...crawlOptions } = value
onCrawlOptionsChange(crawlOptions)
onCrawlOptionsChange(crawlOptions as CrawlOptions)
setStep(Step.running)
try {
const passToServerCrawlOptions: any = {

View File

@ -9,7 +9,6 @@ import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datas
import { sleep } from '@/utils'
import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import Header from '@/app/components/datasets/create/website/base/header'
import type { FormData } from '../base/options'
import Options from '../base/options'
import { useConfigurations, useSchema } from './hooks'
@ -108,9 +107,9 @@ const JinaReader = ({
}
}, [crawlOptions.limit, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: FormData) => {
const handleRun = useCallback(async (value: Record<string, any>) => {
const { url, ...crawlOptions } = value
onCrawlOptionsChange(crawlOptions)
onCrawlOptionsChange(crawlOptions as CrawlOptions)
setStep(Step.running)
try {
const startTime = Date.now()

View File

@ -6,7 +6,6 @@ import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
import { checkWatercrawlTaskStatus, createWatercrawlTask } from '@/service/datasets'
import { sleep } from '@/utils'
import Header from '@/app/components/datasets/create/website/base/header'
import type { FormData } from '../base/options'
import Options from '../base/options'
import { useConfigurations, useSchema } from './hooks'
import Crawling from '../base/crawling'
@ -109,9 +108,9 @@ const WaterCrawl = ({
}
}, [crawlOptions.limit, onCheckedCrawlResultChange])
const handleRun = useCallback(async (value: FormData) => {
const handleRun = useCallback(async (value: Record<string, any>) => {
const { url, ...crawlOptions } = value
onCrawlOptionsChange(crawlOptions)
onCrawlOptionsChange(crawlOptions as CrawlOptions)
setStep(Step.running)
try {
const passToServerCrawlOptions: any = {

View File

@ -1,6 +1,7 @@
import React from 'react'
import Button from '@/app/components/base/button'
import type { FormType } from '@/app/components/base/form'
import { useTranslation } from 'react-i18next'
type ActionsProps = {
form: FormType
@ -11,13 +12,15 @@ const Actions = ({
form,
onBack,
}: ActionsProps) => {
const { t } = useTranslation()
return (
<div className='flex items-center justify-end gap-x-2 p-4 pt-2'>
<Button
variant='secondary'
onClick={onBack}
>
Back to Data Source
{t('datasetPipeline.operations.backToDataSource')}
</Button>
<Button
variant='primary'
@ -25,7 +28,7 @@ const Actions = ({
form.handleSubmit()
}}
>
Process
{t('datasetPipeline.operations.process')}
</Button>
</div>
)

View File

@ -1,57 +1,58 @@
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
import { useMemo } from 'react'
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
import type { FormData } from './options'
import { useTranslation } from 'react-i18next'
import { useStore } from '@/app/components/workflow/store'
import { InputVarType } from '@/app/components/workflow/types'
import { usePipelineProcessingParams } from '@/service/use-pipeline'
type PartialInputVarType = InputVarType.textInput | InputVarType.number | InputVarType.select | InputVarType.checkbox
const VAR_TYPE_MAP: Record<PartialInputVarType, BaseFieldType> = {
[InputVarType.textInput]: BaseFieldType.textInput,
[InputVarType.number]: BaseFieldType.numberInput,
[InputVarType.select]: BaseFieldType.select,
[InputVarType.checkbox]: BaseFieldType.checkbox,
}
export const useConfigurations = () => {
const { t } = useTranslation()
const maxValue = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
const pipelineId = useStore(state => state.pipelineId)
const { data: paramsConfig } = usePipelineProcessingParams(pipelineId!)
const configurations: BaseConfiguration<FormData>[] = [
{
type: BaseFieldType.textInput,
variable: 'separator',
label: t('datasetCreation.stepTwo.separator'),
required: false,
showConditions: [],
placeholder: t('datasetCreation.stepTwo.separatorPlaceholder'),
tooltip: t('datasetCreation.stepTwo.separatorTip'),
},
{
type: BaseFieldType.numberInput,
variable: 'max_tokens',
label: t('datasetCreation.stepTwo.maxLength'),
required: false,
min: 1,
max: maxValue,
showConditions: [],
placeholder: `${maxValue}`,
},
{
type: BaseFieldType.numberInput,
variable: 'chunk_overlap',
label: t('datasetCreation.stepTwo.overlap'),
required: false,
min: 1,
showConditions: [],
placeholder: t('datasetCreation.stepTwo.overlap') || '',
tooltip: t('datasetCreation.stepTwo.overlapTip'),
},
{
type: BaseFieldType.checkbox,
variable: 'remove_extra_spaces',
label: t('datasetCreation.stepTwo.removeExtraSpaces'),
required: false,
showConditions: [],
},
{
type: BaseFieldType.checkbox,
variable: 'remove_urls_emails',
label: t('datasetCreation.stepTwo.removeUrlEmails'),
required: false,
showConditions: [],
},
]
const initialData = useMemo(() => {
const variables = paramsConfig?.variables || []
return variables.reduce((acc, item) => {
const type = VAR_TYPE_MAP[item.type as PartialInputVarType]
if (type === BaseFieldType.textInput)
acc[item.variable] = ''
if (type === BaseFieldType.numberInput)
acc[item.variable] = 0
if (type === BaseFieldType.select)
acc[item.variable] = item.options?.[0] || ''
if (type === BaseFieldType.checkbox)
acc[item.variable] = true
return acc
}, {} as Record<string, any>)
}, [paramsConfig])
return configurations
const configurations = useMemo(() => {
const variables = paramsConfig?.variables || []
const configs = variables.map(item => ({
type: VAR_TYPE_MAP[item.type as PartialInputVarType],
variable: item.variable,
label: item.label,
required: item.required,
maxLength: item.max_length,
options: item.options?.map(option => ({
label: option,
value: option,
})),
showConditions: [],
default: item.default,
}))
return configs
}, [paramsConfig])
return {
initialData,
configurations,
}
}

View File

@ -6,21 +6,15 @@ import type { FormType } from '@/app/components/base/form'
import { useCallback } from 'react'
type DocumentProcessingProps = {
payload: any
onProcess: (data: any) => void
onProcess: (data: Record<string, any>) => void
onBack: () => void
}
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
const DEFAULT_OVERLAP = 50
const DocumentProcessing = ({
payload,
onProcess,
onBack,
}: DocumentProcessingProps) => {
const configurations = useConfigurations()
const { initialData, configurations } = useConfigurations()
const schema = generateZodSchema(configurations)
const renderCustomActions = useCallback((form: FormType) => (
@ -28,23 +22,13 @@ const DocumentProcessing = ({
), [onBack])
return (
<div>
<Options
initialData={{
separator: DEFAULT_SEGMENT_IDENTIFIER,
max_tokens: DEFAULT_MAXIMUM_CHUNK_LENGTH,
chunk_overlap: DEFAULT_OVERLAP,
remove_extra_spaces: true,
remove_urls_emails: false,
}}
configurations={configurations}
schema={schema}
onSubmit={(data) => {
onProcess(data)
}}
CustomActions={renderCustomActions}
/>
</div>
<Options
initialData={initialData}
configurations={configurations}
schema={schema}
onSubmit={onProcess}
CustomActions={renderCustomActions}
/>
)
}

View File

@ -5,20 +5,12 @@ import type { BaseConfiguration } from '@/app/components/base/form/form-scenario
import Toast from '@/app/components/base/toast'
import type { ZodSchema } from 'zod'
export type FormData = {
separator: string
max_tokens: number
chunk_overlap: number
remove_extra_spaces: boolean
remove_urls_emails: boolean
}
type OptionsProps = {
initialData: FormData
configurations: BaseConfiguration<FormData>[]
initialData: Record<string, any>
configurations: BaseConfiguration[]
schema: ZodSchema
CustomActions: (form: FormType) => React.JSX.Element
onSubmit: (data: FormData) => void
onSubmit: (data: Record<string, any>) => void
}
const Options = ({
@ -62,7 +54,7 @@ const Options = ({
>
<div className='flex flex-col gap-3 px-4 pb-6 pt-3'>
{configurations.map((config, index) => {
const FieldComponent = BaseField<FormData>({
const FieldComponent = BaseField({
initialData,
config,
})

View File

@ -1,58 +1,61 @@
import { useTranslation } from 'react-i18next'
import type { DataSourceOption } from './types'
import type { DataSourceOption, Datasource } from './types'
import { TestRunStep } from './types'
import { DataSourceType } from '@/models/datasets'
import { DataSourceProvider } from '@/models/common'
export const useTestRunSteps = () => {
// TODO: i18n
const { t } = useTranslation()
const steps = [
{
label: 'DATA SOURCE',
label: t('datasetPipeline.testRun.steps.dataSource'),
value: TestRunStep.dataSource,
},
{
label: 'DOCUMENT PROCESSING',
label: t('datasetPipeline.testRun.steps.documentProcessing'),
value: TestRunStep.documentProcessing,
},
]
return steps
}
export const useDataSourceOptions = (dataSources: string[]) => {
// TODO: i18n
export const useDataSourceOptions = (dataSources: Datasource[]) => {
const { t } = useTranslation()
const options: DataSourceOption[] = []
dataSources.forEach((source) => {
if (source === DataSourceType.FILE) {
if (source.type === DataSourceType.FILE) {
options.push({
label: 'Local Files',
value: DataSourceType.FILE,
label: t('datasetPipeline.testRun.dataSource.localFiles'),
value: source.nodeId,
type: DataSourceType.FILE,
})
}
if (source === DataSourceType.NOTION) {
if (source.type === DataSourceType.NOTION) {
options.push({
label: 'Notion',
value: DataSourceType.NOTION,
value: source.nodeId,
type: DataSourceType.NOTION,
})
}
if (source === DataSourceProvider.fireCrawl) {
if (source.type === DataSourceProvider.fireCrawl) {
options.push({
label: 'Firecrawl',
value: DataSourceProvider.fireCrawl,
value: source.nodeId,
type: DataSourceProvider.fireCrawl,
})
}
if (source === DataSourceProvider.jinaReader) {
if (source.type === DataSourceProvider.jinaReader) {
options.push({
label: 'Jina Reader',
value: DataSourceProvider.jinaReader,
value: source.nodeId,
type: DataSourceProvider.jinaReader,
})
}
if (source === DataSourceProvider.waterCrawl) {
if (source.type === DataSourceProvider.waterCrawl) {
options.push({
label: 'Water Crawl',
value: DataSourceProvider.waterCrawl,
value: source.nodeId,
type: DataSourceProvider.waterCrawl,
})
}
})

View File

@ -1,4 +1,4 @@
import { useWorkflowStore } from '@/app/components/workflow/store'
import { useStore as useWorkflowStoreWithSelector } from '@/app/components/workflow/store'
import { RiCloseLine } from '@remixicon/react'
import { useCallback, useMemo, useState } from 'react'
import StepIndicator from './step-indicator'
@ -18,11 +18,19 @@ import JinaReader from './data-source/website/jina-reader'
import WaterCrawl from './data-source/website/water-crawl'
import Actions from './data-source/actions'
import DocumentProcessing from './document-processing'
import { useTranslation } from 'react-i18next'
import { useWorkflowRun } from '../../../hooks'
import type { Datasource } from './types'
const TestRunPanel = () => {
const workflowStore = useWorkflowStore()
const [currentStep, setCurrentStep] = useState(2)
const [dataSource, setDataSource] = useState<string>(DataSourceProvider.waterCrawl)
const { t } = useTranslation()
const setShowDebugAndPreviewPanel = useWorkflowStoreWithSelector(state => state.setShowDebugAndPreviewPanel)
const [currentStep, setCurrentStep] = useState(1)
const [datasource, setDatasource] = useState<Datasource>({
nodeId: '1',
type: DataSourceType.FILE,
config: {},
})
const [fileList, setFiles] = useState<FileItem[]>([])
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
@ -33,7 +41,28 @@ const TestRunPanel = () => {
const enableBilling = useProviderContextSelector(state => state.enableBilling)
const steps = useTestRunSteps()
const dataSources = ['upload_file', 'notion_import', 'firecrawl', 'jinareader', 'watercrawl'] // TODO: replace with real data sources from API
// TODO: replace with real data sources from API
const dataSources = useMemo(() => [{
nodeId: '1',
type: DataSourceType.FILE,
config: {},
}, {
nodeId: '2',
type: DataSourceType.NOTION,
config: {},
}, {
nodeId: '3',
type: DataSourceProvider.fireCrawl,
config: {},
}, {
nodeId: '4',
type: DataSourceProvider.jinaReader,
config: {},
}, {
nodeId: '5',
type: DataSourceProvider.waterCrawl,
config: {},
}], [])
const allFileLoaded = (fileList.length > 0 && fileList.every(file => file.file.id))
const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
@ -48,25 +77,27 @@ const TestRunPanel = () => {
}, [fileList, isShowVectorSpaceFull])
const nextBtnDisabled = useMemo(() => {
if (dataSource === DataSourceType.FILE)
if (datasource.type === DataSourceType.FILE)
return nextDisabled
if (dataSource === DataSourceType.NOTION)
if (datasource.type === DataSourceType.NOTION)
return isShowVectorSpaceFull || !notionPages.length
if (dataSource === DataSourceProvider.fireCrawl
|| dataSource === DataSourceProvider.jinaReader
|| dataSource === DataSourceProvider.waterCrawl)
if (datasource.type === DataSourceProvider.fireCrawl
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl)
return isShowVectorSpaceFull || !websitePages.length
return false
}, [dataSource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
const handleClose = () => {
const { setShowDebugAndPreviewPanel } = workflowStore.getState()
setShowDebugAndPreviewPanel(false)
}
const handleDataSourceSelect = useCallback((option: string) => {
setDataSource(option)
}, [])
const dataSource = dataSources.find(dataSource => dataSource.nodeId === option)
if (!dataSource)
return
setDatasource(dataSource)
}, [dataSources])
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
const newList = produce(list, (draft) => {
@ -95,6 +126,32 @@ const TestRunPanel = () => {
setCurrentStep(preStep => preStep - 1)
}, [])
const { handleRun } = useWorkflowRun()
const handleProcess = useCallback(() => {
const datasourceInfo: Record<string, any> = {}
if (datasource.type === DataSourceType.FILE)
datasourceInfo.fileId = fileList.map(file => file.fileID)
if (datasource.type === DataSourceType.NOTION) {
datasourceInfo.workspaceId = notionPages[0].workspace_id
datasourceInfo.page = notionPages.map((page) => {
const { workspace_id, ...rest } = page
return rest
})
}
if (datasource.type === DataSourceProvider.fireCrawl
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl) {
datasourceInfo.jobId = websiteCrawlJobId
datasourceInfo.result = websitePages
}
handleRun({
inputs: {},
datasource_type: datasource,
datasource_info: datasourceInfo,
})
}, [datasource, fileList, handleRun, notionPages, websiteCrawlJobId, websitePages])
return (
<div
className='relative flex h-full w-[480px] flex-col rounded-l-2xl border-y-[0.5px] border-l-[0.5px] border-components-panel-border bg-components-panel-bg shadow-xl shadow-shadow-shadow-1'
@ -108,7 +165,7 @@ const TestRunPanel = () => {
</button>
<div className='flex flex-col gap-y-0.5 px-3 pb-2 pt-3.5'>
<div className='system-md-semibold-uppercase flex items-center justify-between pl-1 pr-8 text-text-primary'>
TEST RUN
{t('datasetPipeline.testRun.title')}
</div>
<StepIndicator steps={steps} currentStep={currentStep} />
</div>
@ -119,10 +176,10 @@ const TestRunPanel = () => {
<div className='flex flex-col gap-y-4 px-4 py-2'>
<DataSourceOptions
dataSources={dataSources}
dataSourceType={dataSource}
dataSourceNodeId={datasource.nodeId}
onSelect={handleDataSourceSelect}
/>
{dataSource === DataSourceType.FILE && (
{datasource.type === DataSourceType.FILE && (
<LocalFile
files={fileList}
updateFile={updateFile}
@ -130,13 +187,13 @@ const TestRunPanel = () => {
notSupportBatchUpload={notSupportBatchUpload}
/>
)}
{dataSource === DataSourceType.NOTION && (
{datasource.type === DataSourceType.NOTION && (
<Notion
notionPages={notionPages}
updateNotionPages={updateNotionPages}
/>
)}
{dataSource === DataSourceProvider.fireCrawl && (
{datasource.type === DataSourceProvider.fireCrawl && (
<Firecrawl
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
@ -145,7 +202,7 @@ const TestRunPanel = () => {
onCrawlOptionsChange={setCrawlOptions}
/>
)}
{dataSource === DataSourceProvider.jinaReader && (
{datasource.type === DataSourceProvider.jinaReader && (
<JinaReader
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
@ -154,7 +211,7 @@ const TestRunPanel = () => {
onCrawlOptionsChange={setCrawlOptions}
/>
)}
{dataSource === DataSourceProvider.waterCrawl && (
{datasource.type === DataSourceProvider.waterCrawl && (
<WaterCrawl
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
@ -174,10 +231,7 @@ const TestRunPanel = () => {
{
currentStep === 2 && (
<DocumentProcessing
payload={{}}
onProcess={(data) => {
console.log('Processing data:', data)
}}
onProcess={handleProcess}
onBack={handleBackStep}
/>
)

View File

@ -1,3 +1,6 @@
import type { DataSourceProvider } from '@/models/common'
import type { DataSourceType } from '@/models/datasets'
export enum TestRunStep {
dataSource = 'dataSource',
documentProcessing = 'documentProcessing',
@ -6,4 +9,11 @@ export enum TestRunStep {
export type DataSourceOption = {
label: string
value: string
type: DataSourceType | DataSourceProvider
}
export type Datasource = {
nodeId: string
type: DataSourceType | DataSourceProvider
config: any
}

View File

@ -23,6 +23,8 @@ const translation = {
editInfo: 'Edit info',
exportDSL: 'Export DSL',
useTemplate: 'Use this Knowledge Pipeline',
backToDataSource: 'Back to Data Source',
process: 'Process',
},
knowledgeNameAndIcon: 'Knowledge name & icon',
knowledgeNameAndIconPlaceholder: 'Please enter the name of the Knowledge Base',
@ -43,6 +45,16 @@ const translation = {
structure: 'Structure',
structureTooltip: 'Chunk Structure determines how documents are split and indexed—offering General, Parent-Child, and Q&A modes—and is unique to each knowledge base.',
},
testRun: {
title: 'Test Run',
steps: {
dataSource: 'Data Source',
documentProcessing: 'Document Processing',
},
dataSource: {
localFiles: 'Local Files',
},
},
}
export default translation

View File

@ -23,6 +23,8 @@ const translation = {
editInfo: '编辑信息',
exportDSL: '导出 DSL',
useTemplate: '使用此知识库流水线',
backToDataSource: '返回数据源',
process: '处理',
},
knowledgeNameAndIcon: '知识库名称和图标',
knowledgeNameAndIconPlaceholder: '请输入知识库名称',
@ -43,6 +45,16 @@ const translation = {
structure: '文档结构',
structureTooltip: '文档结构决定了文档的拆分和索引方式Dify 提供了通用、父子和问答模式,每个知识库的文档结构是唯一的。',
},
testRun: {
title: '测试运行',
steps: {
dataSource: '数据源',
documentProcessing: '文档处理',
},
dataSource: {
localFiles: '本地文件',
},
},
}
export default translation

View File

@ -79,6 +79,7 @@ export type Variables = {
max_length: number
required: boolean
options?: string[]
default: string | number | boolean
}
export type PipelineProcessingParamsResponse = {