From 6f3a1c9d7214ea47a3c5e69780edfd68c5fce355 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Thu, 21 Nov 2024 11:40:17 +0800 Subject: [PATCH] feat: parent child state --- .../datasets/create/step-two/index.tsx | 244 ++++++++++-------- .../datasets/create/step-two/inputs.tsx | 75 ++++++ 2 files changed, 214 insertions(+), 105 deletions(-) create mode 100644 web/app/components/datasets/create/step-two/inputs.tsx diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 0ea6cd2baa..82e9d8fae3 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -14,7 +14,6 @@ import { import Link from 'next/link' import { groupBy } from 'lodash-es' import Image from 'next/image' -import { Switch } from '@headlessui/react' import SettingCog from '../assets/setting-gear-mod.svg' import OrangeEffect from '../assets/option-card-effect-orange.svg' import FamilyMod from '../assets/family-mod.svg' @@ -28,6 +27,7 @@ import unescape from './unescape' import escape from './escape' import { OptionCard } from './option-card' import LanguageSelect from './language-select' +import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import cn from '@/utils/classnames' import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import { @@ -37,7 +37,6 @@ import { fetchDefaultProcessRule, } from '@/service/datasets' import Button from '@/app/components/base/button' -import Input from '@/app/components/base/input' import Loading from '@/app/components/base/loading' import FloatRightContainer from '@/app/components/base/float-right-container' import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config' @@ -62,6 +61,7 @@ import Checkbox from '@/app/components/base/checkbox' import RadioCard from '@/app/components/base/radio-card' import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' import { IS_CE_EDITION } from '@/config' +import Switch from '@/app/components/base/switch' const TextLabel: FC = (props) => { return @@ -107,6 +107,19 @@ enum IndexingType { const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' +type ParentChildConfig = { + chunkForContext: 'paragraph' | 'full_doc' + parent: { + delimiter: string + maxLength: number + } + child: { + delimiter: string + maxLength: number + } + rules: PreProcessingRule[] +} + const StepTwo = ({ isSetting, documentDetail, @@ -173,6 +186,19 @@ const StepTwo = ({ })() const [isCreating, setIsCreating] = useState(false) + const [parentChildConfig, setParentChildConfig] = useState({ + chunkForContext: 'paragraph', + parent: { + delimiter: '\\n\\n', + maxLength: 4000, + }, + child: { + delimiter: '\\n\\n', + maxLength: 4000, + }, + rules: [], + }) + const scrollHandle = (e: Event) => { if ((e.target as HTMLDivElement).scrollTop > 0) setScrolled(true) @@ -653,54 +679,19 @@ const StepTwo = ({ >
- - {t('datasetCreation.stepTwo.separator')} - - {t('datasetCreation.stepTwo.separatorTip')} -
- } - /> -
}> - setSegmentIdentifier(e.target.value)} - /> - - - {t('datasetCreation.stepTwo.maxLength')} - }> - setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))} - /> - - - {t('datasetCreation.stepTwo.overlap')} - - {t('datasetCreation.stepTwo.overlapTip')} - - } - /> - }> - setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} /> - + setSegmentIdentifier(e.target.value)} + /> + setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))} + /> + setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} + />
@@ -740,58 +731,103 @@ const StepTwo = ({ } >
- +
+ Parent-chunk for Context - - } - title={'Paragraph'} - description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'} - isChosen={true} - chosenConfig={ -
- - setSegmentIdentifier(e.target.value)} /> - - - setSegmentIdentifier(e.target.value)} /> - -
- } - /> - } - title={'Full Doc'} - description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'} - isChosen={true} - /> - - - Child-chunk for Retrieval - -
- - setSegmentIdentifier(e.target.value)} /> - - - setSegmentIdentifier(e.target.value)} /> - + + } + title={'Paragraph'} + description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'} + isChosen={parentChildConfig.chunkForContext === 'paragraph'} + onChosen={() => setParentChildConfig( + { + ...parentChildConfig, + chunkForContext: 'paragraph', + }, + )} + chosenConfig={ +
+ setParentChildConfig({ + ...parentChildConfig, + parent: { + ...parentChildConfig.parent, + delimiter: e.target.value, + }, + })} + /> + setParentChildConfig({ + ...parentChildConfig, + parent: { + ...parentChildConfig.parent, + maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10), + }, + })} + /> +
+ } + /> + } + title={'Full Doc'} + description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'} + onChosen={() => setParentChildConfig( + { + ...parentChildConfig, + chunkForContext: 'full_doc', + }, + )} + isChosen={parentChildConfig.chunkForContext === 'full_doc'} + />
- - Text Pre-processing Rules -
- {rules.map(rule => ( -
{ - ruleChangeHandle(rule.id) - }}> - - -
- ))} + + Child-chunk for Retrieval + +
+ setParentChildConfig({ + ...parentChildConfig, + child: { + ...parentChildConfig.child, + delimiter: e.target.value, + }, + })} + /> + setParentChildConfig({ + ...parentChildConfig, + child: { + ...parentChildConfig.child, + maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10), + }, + })} + /> +
+ + + Text Pre-processing Rules + +
+ {rules.map(rule => ( +
{ + ruleChangeHandle(rule.id) + }}> + + +
+ ))} +
@@ -876,13 +912,11 @@ const StepTwo = ({
-
- -
+
{docForm === DocForm.QA && !QATipHide && (
diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx new file mode 100644 index 0000000000..56100918e1 --- /dev/null +++ b/web/app/components/datasets/create/step-two/inputs.tsx @@ -0,0 +1,75 @@ +import type { FC, PropsWithChildren, ReactNode } from 'react' +import { useTranslation } from 'react-i18next' +import type { InputProps } from '@/app/components/base/input' +import Input from '@/app/components/base/input' +import Tooltip from '@/app/components/base/tooltip' + +const TextLabel: FC = (props) => { + return +} + +const FormField: FC> = (props) => { + return
+ {props.label} + {props.children} +
+} + +export const DelimiterInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.separator')} + + {t('datasetCreation.stepTwo.separatorTip')} +
+ } + /> + }> + + +} + +export const MaxLengthInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.maxLength')} + }> + + +} + +export const OverlapInput: FC = (props) => { + const { t } = useTranslation() + return + {t('datasetCreation.stepTwo.overlap')} + + {t('datasetCreation.stepTwo.overlapTip')} + + } + /> + }> + + +}