From 53f28820776e5067b5629fe08f7fa145ce9d8478 Mon Sep 17 00:00:00 2001 From: twwu Date: Mon, 28 Apr 2025 15:55:24 +0800 Subject: [PATCH] feat: Implement document processing component with configuration and action handling --- .../base/form/components/form/actions.tsx | 2 +- .../base/form/form-scenarios/base/utils.ts | 32 ++++---- .../form/form-scenarios/input-field/utils.ts | 16 ++-- .../data-source-options/option-card.tsx | 3 +- .../panel/test-run/data-source/actions.tsx | 25 ++++++ .../test-run/document-processing/actions.tsx | 34 ++++++++ .../test-run/document-processing/hooks.ts | 57 +++++++++++++ .../test-run/document-processing/index.tsx | 51 ++++++++++++ .../test-run/document-processing/options.tsx | 81 +++++++++++++++++++ .../components/panel/test-run/index.tsx | 28 ++++--- 10 files changed, 294 insertions(+), 35 deletions(-) create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/data-source/actions.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/document-processing/actions.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/document-processing/hooks.ts create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/document-processing/index.tsx create mode 100644 web/app/components/rag-pipeline/components/panel/test-run/document-processing/options.tsx diff --git a/web/app/components/base/form/components/form/actions.tsx b/web/app/components/base/form/components/form/actions.tsx index 4e22350c90..bf7620d341 100644 --- a/web/app/components/base/form/components/form/actions.tsx +++ b/web/app/components/base/form/components/form/actions.tsx @@ -5,7 +5,7 @@ import Button from '../../../button' import { useTranslation } from 'react-i18next' type ActionsProps = { - CustomActions?: (form: FormType) => React.ReactNode + CustomActions?: (form: FormType) => React.ReactNode | React.JSX.Element } const Actions = ({ diff --git a/web/app/components/base/form/form-scenarios/base/utils.ts b/web/app/components/base/form/form-scenarios/base/utils.ts index d23e6bf015..d92abe109e 100644 --- a/web/app/components/base/form/form-scenarios/base/utils.ts +++ b/web/app/components/base/form/form-scenarios/base/utils.ts @@ -1,4 +1,4 @@ -import type { ZodSchema, ZodString } from 'zod' +import type { ZodNumber, ZodSchema, ZodString } from 'zod' import { z } from 'zod' import { type BaseConfiguration, BaseFieldType } from './types' @@ -26,6 +26,21 @@ export const generateZodSchema = (fields: BaseConfiguration[]) => { break } + if (field.maxLength) { + if ([BaseFieldType.textInput].includes(field.type)) + zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`) + } + + if (field.min) { + if ([BaseFieldType.numberInput].includes(field.type)) + zodType = (zodType as ZodNumber).min(field.min, `${field.label} must be at least ${field.min}`) + } + + if (field.max) { + if ([BaseFieldType.numberInput].includes(field.type)) + zodType = (zodType as ZodNumber).max(field.max, `${field.label} exceeds max value of ${field.max}`) + } + if (field.required) { if ([BaseFieldType.textInput].includes(field.type)) zodType = (zodType as ZodString).nonempty(`${field.label} is required`) @@ -34,21 +49,6 @@ export const generateZodSchema = (fields: BaseConfiguration[]) => { zodType = zodType.optional() } - if (field.maxLength) { - if ([BaseFieldType.textInput].includes(field.type)) - zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`) - } - - if (field.min) { - if ([BaseFieldType.numberInput].includes(field.type)) - zodType = (zodType as ZodString).min(field.min, `${field.label} must be at least ${field.min}`) - } - - if (field.max) { - if ([BaseFieldType.numberInput].includes(field.type)) - zodType = (zodType as ZodString).max(field.max, `${field.label} exceeds max value of ${field.max}`) - } - shape[field.variable] = zodType }) diff --git a/web/app/components/base/form/form-scenarios/input-field/utils.ts b/web/app/components/base/form/form-scenarios/input-field/utils.ts index 06300d31b8..797cc32e95 100644 --- a/web/app/components/base/form/form-scenarios/input-field/utils.ts +++ b/web/app/components/base/form/form-scenarios/input-field/utils.ts @@ -45,14 +45,6 @@ export const generateZodSchema = (fields: InputFieldConfiguration[]) => { break } - if (field.required) { - if ([InputFieldType.textInput].includes(field.type)) - zodType = (zodType as ZodString).nonempty(`${field.label} is required`) - } - else { - zodType = zodType.optional() - } - if (field.maxLength) { if ([InputFieldType.textInput].includes(field.type)) zodType = (zodType as ZodString).max(field.maxLength, `${field.label} exceeds max length of ${field.maxLength}`) @@ -68,6 +60,14 @@ export const generateZodSchema = (fields: InputFieldConfiguration[]) => { zodType = (zodType as ZodString).max(field.max, `${field.label} exceeds max value of ${field.max}`) } + if (field.required) { + if ([InputFieldType.textInput].includes(field.type)) + zodType = (zodType as ZodString).nonempty(`${field.label} is required`) + } + else { + zodType = zodType.optional() + } + shape[field.variable] = zodType }) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source-options/option-card.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source-options/option-card.tsx index 95003bdf15..efc4a42905 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/data-source-options/option-card.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source-options/option-card.tsx @@ -1,3 +1,4 @@ +import React from 'react' import cn from '@/utils/classnames' type OptionCardProps = { @@ -37,4 +38,4 @@ const OptionCard = ({ ) } -export default OptionCard +export default React.memo(OptionCard) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/data-source/actions.tsx b/web/app/components/rag-pipeline/components/panel/test-run/data-source/actions.tsx new file mode 100644 index 0000000000..3bf14aefc0 --- /dev/null +++ b/web/app/components/rag-pipeline/components/panel/test-run/data-source/actions.tsx @@ -0,0 +1,25 @@ +import React from 'react' +import Button from '@/app/components/base/button' +import { useTranslation } from 'react-i18next' + +type ActionsProps = { + disabled?: boolean + handleNextStep: () => void +} + +const Actions = ({ + disabled, + handleNextStep, +}: ActionsProps) => { + const { t } = useTranslation() + + return ( +
+ +
+ ) +} + +export default React.memo(Actions) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/document-processing/actions.tsx b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/actions.tsx new file mode 100644 index 0000000000..c04a8546cf --- /dev/null +++ b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/actions.tsx @@ -0,0 +1,34 @@ +import React from 'react' +import Button from '@/app/components/base/button' +import type { FormType } from '@/app/components/base/form' + +type ActionsProps = { + form: FormType + onBack: () => void +} + +const Actions = ({ + form, + onBack, +}: ActionsProps) => { + return ( +
+ + +
+ ) +} + +export default React.memo(Actions) diff --git a/web/app/components/rag-pipeline/components/panel/test-run/document-processing/hooks.ts b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/hooks.ts new file mode 100644 index 0000000000..e3836a749c --- /dev/null +++ b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/hooks.ts @@ -0,0 +1,57 @@ +import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types' +import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types' +import type { FormData } from './options' +import { useTranslation } from 'react-i18next' + +export const useConfigurations = () => { + const { t } = useTranslation() + const maxValue = Number.parseInt(globalThis.document?.body?.getAttribute('data-public-indexing-max-segmentation-tokens-length') || '4000', 10) + + const configurations: BaseConfiguration[] = [ + { + type: BaseFieldType.textInput, + variable: 'separator', + label: t('datasetCreation.stepTwo.separator'), + required: false, + showConditions: [], + placeholder: t('datasetCreation.stepTwo.separatorPlaceholder'), + tooltip: t('datasetCreation.stepTwo.separatorTip'), + }, + { + type: BaseFieldType.numberInput, + variable: 'max_tokens', + label: t('datasetCreation.stepTwo.maxLength'), + required: false, + min: 1, + max: maxValue, + showConditions: [], + placeholder: `≤ ${maxValue}`, + }, + { + type: BaseFieldType.numberInput, + variable: 'chunk_overlap', + label: t('datasetCreation.stepTwo.overlap'), + required: false, + min: 1, + showConditions: [], + placeholder: t('datasetCreation.stepTwo.overlap') || '', + tooltip: t('datasetCreation.stepTwo.overlapTip'), + }, + { + type: BaseFieldType.checkbox, + variable: 'remove_extra_spaces', + label: t('datasetCreation.stepTwo.removeExtraSpaces'), + required: false, + showConditions: [], + }, + { + type: BaseFieldType.checkbox, + variable: 'remove_urls_emails', + label: t('datasetCreation.stepTwo.removeUrlEmails'), + required: false, + showConditions: [], + }, + ] + + return configurations +} diff --git a/web/app/components/rag-pipeline/components/panel/test-run/document-processing/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/index.tsx new file mode 100644 index 0000000000..b6c2cb96d5 --- /dev/null +++ b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/index.tsx @@ -0,0 +1,51 @@ +import { generateZodSchema } from '@/app/components/base/form/form-scenarios/base/utils' +import { useConfigurations } from './hooks' +import Options from './options' +import Actions from './actions' +import type { FormType } from '@/app/components/base/form' +import { useCallback } from 'react' + +type DocumentProcessingProps = { + payload: any + onProcess: (data: any) => void + onBack: () => void +} + +const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' +const DEFAULT_MAXIMUM_CHUNK_LENGTH = 1024 +const DEFAULT_OVERLAP = 50 + +const DocumentProcessing = ({ + payload, + onProcess, + onBack, +}: DocumentProcessingProps) => { + const configurations = useConfigurations() + const schema = generateZodSchema(configurations) + + const renderCustomActions = useCallback((form: FormType) => ( + + ), [onBack]) + + return ( +
+ { + onProcess(data) + }} + CustomActions={renderCustomActions} + /> +
+ ) +} + +export default DocumentProcessing diff --git a/web/app/components/rag-pipeline/components/panel/test-run/document-processing/options.tsx b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/options.tsx new file mode 100644 index 0000000000..4b7dcdd6fa --- /dev/null +++ b/web/app/components/rag-pipeline/components/panel/test-run/document-processing/options.tsx @@ -0,0 +1,81 @@ +import type { FormType } from '@/app/components/base/form' +import { useAppForm } from '@/app/components/base/form' +import BaseField from '@/app/components/base/form/form-scenarios/base/field' +import type { BaseConfiguration } from '@/app/components/base/form/form-scenarios/base/types' +import Toast from '@/app/components/base/toast' +import type { ZodSchema } from 'zod' + +export type FormData = { + separator: string + max_tokens: number + chunk_overlap: number + remove_extra_spaces: boolean + remove_urls_emails: boolean +} + +type OptionsProps = { + initialData: FormData + configurations: BaseConfiguration[] + schema: ZodSchema + CustomActions: (form: FormType) => React.JSX.Element + onSubmit: (data: FormData) => void +} + +const Options = ({ + initialData, + configurations, + schema, + CustomActions, + onSubmit, +}: OptionsProps) => { + const form = useAppForm({ + defaultValues: initialData, + validators: { + onSubmit: ({ value }) => { + const result = schema.safeParse(value) + if (!result.success) { + const issues = result.error.issues + const firstIssue = issues[0] + const errorMessage = `"${firstIssue.path.join('.')}" ${firstIssue.message}` + Toast.notify({ + type: 'error', + message: errorMessage, + }) + return errorMessage + } + return undefined + }, + }, + onSubmit: ({ value }) => { + onSubmit(value) + }, + }) + + return ( +
{ + e.preventDefault() + e.stopPropagation() + form.handleSubmit() + }} + > +
+ {configurations.map((config, index) => { + const FieldComponent = BaseField({ + initialData, + config, + }) + return + })} +
+ + + +
+ ) +} + +export default Options diff --git a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx index 1688fac522..f6c95c7a62 100644 --- a/web/app/components/rag-pipeline/components/panel/test-run/index.tsx +++ b/web/app/components/rag-pipeline/components/panel/test-run/index.tsx @@ -8,8 +8,6 @@ import type { CrawlOptions, CrawlResultItem, FileItem } from '@/models/datasets' import { DataSourceType } from '@/models/datasets' import LocalFile from './data-source/local-file' import produce from 'immer' -import Button from '@/app/components/base/button' -import { useTranslation } from 'react-i18next' import { useProviderContextSelector } from '@/context/provider-context' import { DataSourceProvider, type NotionPage } from '@/models/common' import Notion from './data-source/notion' @@ -18,10 +16,11 @@ import { DEFAULT_CRAWL_OPTIONS } from './consts' import Firecrawl from './data-source/website/firecrawl' import JinaReader from './data-source/website/jina-reader' import WaterCrawl from './data-source/website/water-crawl' +import Actions from './data-source/actions' +import DocumentProcessing from './document-processing' const TestRunPanel = () => { - const { t } = useTranslation() - const [currentStep, setCurrentStep] = useState(1) + const [currentStep, setCurrentStep] = useState(2) const [dataSource, setDataSource] = useState(DataSourceProvider.waterCrawl) const [fileList, setFiles] = useState([]) const [notionPages, setNotionPages] = useState([]) @@ -91,6 +90,10 @@ const TestRunPanel = () => { setCurrentStep(preStep => preStep + 1) }, []) + const handleBackStep = useCallback(() => { + setCurrentStep(preStep => preStep - 1) + }, []) + return (
{ )}
-
- -
+ ) } + { + currentStep === 2 && ( + { + console.log('Processing data:', data) + }} + onBack={handleBackStep} + /> + ) + } )