feat: Implement document processing component with configuration and action handling

This commit is contained in:
twwu 2025-04-28 15:55:24 +08:00
parent 8f07e088f5
commit 53f2882077
10 changed files with 294 additions and 35 deletions

View File

@ -5,7 +5,7 @@ import Button from '../../../button'
import { useTranslation } from 'react-i18next'
type ActionsProps = {
CustomActions?: (form: FormType) => React.ReactNode
CustomActions?: (form: FormType) => React.ReactNode | React.JSX.Element
}
const Actions = ({

View File

@ -1,4 +1,4 @@
import type { ZodSchema, ZodString } from 'zod'
import type { ZodNumber, ZodSchema, ZodString } from 'zod'
import { z } from 'zod'
import { type BaseConfiguration, BaseFieldType } from './types'
@ -26,6 +26,21 @@ export const generateZodSchema = <T>(fields: BaseConfiguration<T>[]) => {
break
}
if (field.maxLength) {
if ([BaseFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`)
}
if (field.min) {
if ([BaseFieldType.numberInput].includes(field.type))
zodType = (zodType as ZodNumber).min(field.min, `${field.label} must be at least ${field.min}`)
}
if (field.max) {
if ([BaseFieldType.numberInput].includes(field.type))
zodType = (zodType as ZodNumber).max(field.max, `${field.label} exceeds max value of ${field.max}`)
}
if (field.required) {
if ([BaseFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).nonempty(`${field.label} is required`)
@ -34,21 +49,6 @@ export const generateZodSchema = <T>(fields: BaseConfiguration<T>[]) => {
zodType = zodType.optional()
}
if (field.maxLength) {
if ([BaseFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`)
}
if (field.min) {
if ([BaseFieldType.numberInput].includes(field.type))
zodType = (zodType as ZodString).min(field.min, `${field.label} must be at least ${field.min}`)
}
if (field.max) {
if ([BaseFieldType.numberInput].includes(field.type))
zodType = (zodType as ZodString).max(field.max, `${field.label} exceeds max value of ${field.max}`)
}
shape[field.variable] = zodType
})

View File

@ -45,14 +45,6 @@ export const generateZodSchema = <T>(fields: InputFieldConfiguration<T>[]) => {
break
}
if (field.required) {
if ([InputFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).nonempty(`${field.label} is required`)
}
else {
zodType = zodType.optional()
}
if (field.maxLength) {
if ([InputFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`)
@ -68,6 +60,14 @@ export const generateZodSchema = <T>(fields: InputFieldConfiguration<T>[]) => {
zodType = (zodType as ZodString).max(field.max, `${field.label} exceeds max value of ${field.max}`)
}
if (field.required) {
if ([InputFieldType.textInput].includes(field.type))
zodType = (zodType as ZodString).nonempty(`${field.label} is required`)
}
else {
zodType = zodType.optional()
}
shape[field.variable] = zodType
})

View File

@ -1,3 +1,4 @@
import React from 'react'
import cn from '@/utils/classnames'
type OptionCardProps = {
@ -37,4 +38,4 @@ const OptionCard = ({
)
}
export default OptionCard
export default React.memo(OptionCard)

View File

@ -0,0 +1,25 @@
import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
type ActionsProps = {
disabled?: boolean
handleNextStep: () => void
}
const Actions = ({
disabled,
handleNextStep,
}: ActionsProps) => {
const { t } = useTranslation()
return (
<div className='flex justify-end p-4 pt-2'>
<Button disabled={disabled} variant='primary' onClick={handleNextStep}>
<span className='px-0.5'>{t('datasetCreation.stepOne.button')}</span>
</Button>
</div>
)
}
export default React.memo(Actions)

View File

@ -0,0 +1,34 @@
import React from 'react'
import Button from '@/app/components/base/button'
import type { FormType } from '@/app/components/base/form'
type ActionsProps = {
form: FormType
onBack: () => void
}
const Actions = ({
form,
onBack,
}: ActionsProps) => {
return (
<div className='flex items-center justify-end gap-x-2 p-4 pt-2'>
<Button
variant='secondary'
onClick={onBack}
>
Back to Data Source
</Button>
<Button
variant='primary'
onClick={() => {
form.handleSubmit()
}}
>
Process
</Button>
</div>
)
}
export default React.memo(Actions)

View File

@ -0,0 +1,57 @@
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
import type { FormData } from './options'
import { useTranslation } from 'react-i18next'
export const useConfigurations = () => {
const { t } = useTranslation()
const maxValue = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10)
const configurations: BaseConfiguration<FormData>[] = [
{
type: BaseFieldType.textInput,
variable: 'separator',
label: t('datasetCreation.stepTwo.separator'),
required: false,
showConditions: [],
placeholder: t('datasetCreation.stepTwo.separatorPlaceholder'),
tooltip: t('datasetCreation.stepTwo.separatorTip'),
},
{
type: BaseFieldType.numberInput,
variable: 'max_tokens',
label: t('datasetCreation.stepTwo.maxLength'),
required: false,
min: 1,
max: maxValue,
showConditions: [],
placeholder: `${maxValue}`,
},
{
type: BaseFieldType.numberInput,
variable: 'chunk_overlap',
label: t('datasetCreation.stepTwo.overlap'),
required: false,
min: 1,
showConditions: [],
placeholder: t('datasetCreation.stepTwo.overlap') || '',
tooltip: t('datasetCreation.stepTwo.overlapTip'),
},
{
type: BaseFieldType.checkbox,
variable: 'remove_extra_spaces',
label: t('datasetCreation.stepTwo.removeExtraSpaces'),
required: false,
showConditions: [],
},
{
type: BaseFieldType.checkbox,
variable: 'remove_urls_emails',
label: t('datasetCreation.stepTwo.removeUrlEmails'),
required: false,
showConditions: [],
},
]
return configurations
}

View File

@ -0,0 +1,51 @@
import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils'
import { useConfigurations } from './hooks'
import Options from './options'
import Actions from './actions'
import type { FormType } from '@/app/components/base/form'
import { useCallback } from 'react'
type DocumentProcessingProps = {
payload: any
onProcess: (data: any) => void
onBack: () => void
}
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024
const DEFAULT_OVERLAP = 50
const DocumentProcessing = ({
payload,
onProcess,
onBack,
}: DocumentProcessingProps) => {
const configurations = useConfigurations()
const schema = generateZodSchema(configurations)
const renderCustomActions = useCallback((form: FormType) => (
<Actions form={form} onBack={onBack} />
), [onBack])
return (
<div>
<Options
initialData={{
separator: DEFAULT_SEGMENT_IDENTIFIER,
max_tokens: DEFAULT_MAXIMUM_CHUNK_LENGTH,
chunk_overlap: DEFAULT_OVERLAP,
remove_extra_spaces: true,
remove_urls_emails: false,
}}
configurations={configurations}
schema={schema}
onSubmit={(data) => {
onProcess(data)
}}
CustomActions={renderCustomActions}
/>
</div>
)
}
export default DocumentProcessing

View File

@ -0,0 +1,81 @@
import type { FormType } from '@/app/components/base/form'
import { useAppForm } from '@/app/components/base/form'
import BaseField from '@/app/components/base/form/form-scenarios/base/field'
import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types'
import Toast from '@/app/components/base/toast'
import type { ZodSchema } from 'zod'
export type FormData = {
separator: string
max_tokens: number
chunk_overlap: number
remove_extra_spaces: boolean
remove_urls_emails: boolean
}
type OptionsProps = {
initialData: FormData
configurations: BaseConfiguration<FormData>[]
schema: ZodSchema
CustomActions: (form: FormType) => React.JSX.Element
onSubmit: (data: FormData) => void
}
const Options = ({
initialData,
configurations,
schema,
CustomActions,
onSubmit,
}: OptionsProps) => {
const form = useAppForm({
defaultValues: initialData,
validators: {
onSubmit: ({ value }) => {
const result = schema.safeParse(value)
if (!result.success) {
const issues = result.error.issues
const firstIssue = issues[0]
const errorMessage = `"${firstIssue.path.join('.')}" ${firstIssue.message}`
Toast.notify({
type: 'error',
message: errorMessage,
})
return errorMessage
}
return undefined
},
},
onSubmit: ({ value }) => {
onSubmit(value)
},
})
return (
<form
className='w-full'
onSubmit={(e) => {
e.preventDefault()
e.stopPropagation()
form.handleSubmit()
}}
>
<div className='flex flex-col gap-3 px-4 pb-6 pt-3'>
{configurations.map((config, index) => {
const FieldComponent = BaseField<FormData>({
initialData,
config,
})
return <FieldComponent key={index} form={form} />
})}
</div>
<form.AppForm>
<form.Actions
CustomActions={CustomActions}
/>
</form.AppForm>
</form>
)
}
export default Options

View File

@ -8,8 +8,6 @@ import type { CrawlOptions, CrawlResultItem, FileItem } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import LocalFile from './data-source/local-file'
import produce from 'immer'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import { useProviderContextSelector } from '@/context/provider-context'
import { DataSourceProvider, type NotionPage } from '@/models/common'
import Notion from './data-source/notion'
@ -18,10 +16,11 @@ import { DEFAULT_CRAWL_OPTIONS } from './consts'
import Firecrawl from './data-source/website/firecrawl'
import JinaReader from './data-source/website/jina-reader'
import WaterCrawl from './data-source/website/water-crawl'
import Actions from './data-source/actions'
import DocumentProcessing from './document-processing'
const TestRunPanel = () => {
const { t } = useTranslation()
const [currentStep, setCurrentStep] = useState(1)
const [currentStep, setCurrentStep] = useState(2)
const [dataSource, setDataSource] = useState<string>(DataSourceProvider.waterCrawl)
const [fileList, setFiles] = useState<FileItem[]>([])
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
@ -91,6 +90,10 @@ const TestRunPanel = () => {
setCurrentStep(preStep => preStep + 1)
}, [])
const handleBackStep = useCallback(() => {
setCurrentStep(preStep => preStep - 1)
}, [])
return (
<div
className='relative flex h-full w-[480px] flex-col rounded-l-2xl border-y-[0.5px] border-l-[0.5px] border-components-panel-border bg-components-panel-bg shadow-xl shadow-shadow-shadow-1'
@ -163,14 +166,21 @@ const TestRunPanel = () => {
<VectorSpaceFull />
)}
</div>
<div className='flex justify-end p-4 pt-2'>
<Button disabled={nextBtnDisabled} variant='primary' onClick={handleNextStep}>
<span className='px-0.5'>{t('datasetCreation.stepOne.button')}</span>
</Button>
</div>
<Actions disabled={nextBtnDisabled} handleNextStep={handleNextStep} />
</>
)
}
{
currentStep === 2 && (
<DocumentProcessing
payload={{}}
onProcess={(data) => {
console.log('Processing data:', data)
}}
onBack={handleBackStep}
/>
)
}
</div>
</div>
)