feat: parent child state

This commit is contained in:
AkaraChen 2024-11-21 11:40:17 +08:00
parent 13bb4aa721
commit 6f3a1c9d72
2 changed files with 214 additions and 105 deletions

View File

@ -14,7 +14,6 @@ import {
import Link from 'next/link'
import { groupBy } from 'lodash-es'
import Image from 'next/image'
import { Switch } from '@headlessui/react'
import SettingCog from '../assets/setting-gear-mod.svg'
import OrangeEffect from '../assets/option-card-effect-orange.svg'
import FamilyMod from '../assets/family-mod.svg'
@ -28,6 +27,7 @@ import unescape from './unescape'
import escape from './escape'
import { OptionCard } from './option-card'
import LanguageSelect from './language-select'
import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs'
import cn from '@/utils/classnames'
import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FileIndexingEstimateResponse, FullDocumentDetail, IndexingEstimateParams, NotionInfo, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets'
import {
@ -37,7 +37,6 @@ import {
fetchDefaultProcessRule,
} from '@/service/datasets'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import Loading from '@/app/components/base/loading'
import FloatRightContainer from '@/app/components/base/float-right-container'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
@ -62,6 +61,7 @@ import Checkbox from '@/app/components/base/checkbox'
import RadioCard from '@/app/components/base/radio-card'
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
import { IS_CE_EDITION } from '@/config'
import Switch from '@/app/components/base/switch'
const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
@ -107,6 +107,19 @@ enum IndexingType {
const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n'
type ParentChildConfig = {
chunkForContext: 'paragraph' | 'full_doc'
parent: {
delimiter: string
maxLength: number
}
child: {
delimiter: string
maxLength: number
}
rules: PreProcessingRule[]
}
const StepTwo = ({
isSetting,
documentDetail,
@ -173,6 +186,19 @@ const StepTwo = ({
})()
const [isCreating, setIsCreating] = useState(false)
const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>({
chunkForContext: 'paragraph',
parent: {
delimiter: '\\n\\n',
maxLength: 4000,
},
child: {
delimiter: '\\n\\n',
maxLength: 4000,
},
rules: [],
})
const scrollHandle = (e: Event) => {
if ((e.target as HTMLDivElement).scrollTop > 0)
setScrolled(true)
@ -653,54 +679,19 @@ const StepTwo = ({
>
<div className='space-y-4'>
<div className='flex gap-2'>
<FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.separator')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.separatorTip')}
</div>
}
/>
</div>}>
<Input
type="text"
className='h-9'
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''} value={segmentIdentifier}
onChange={e => setSegmentIdentifier(e.target.value)}
/>
</FormField>
<FormField label={<div>
{t('datasetCreation.stepTwo.maxLength')}
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
value={max}
max={4000}
min={1}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
</FormField>
<FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.overlap')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.overlapTip')}
</div>
}
/>
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} />
</FormField>
<DelimiterInput
value={segmentIdentifier}
onChange={e => setSegmentIdentifier(e.target.value)}
/>
<MaxLengthInput
value={max}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
<OverlapInput
value={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
/>
</div>
<div className='space-y-2'>
<div className='w-full flex flex-col'>
@ -740,58 +731,103 @@ const StepTwo = ({
}
>
<div className='space-y-4'>
<TextLabel>
<div className='space-y-2'>
<TextLabel>
Parent-chunk for Context
</TextLabel>
<RadioCard
icon={<Image src={Note} alt='' />}
title={'Paragraph'}
description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
isChosen={true}
chosenConfig={
<div className='flex gap-2'>
<FormField label={'Delimiter'}>
<Input type="text" placeholder={'\n\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} />
</FormField>
<FormField label={'Maximum chunk length'}>
<Input type="number" placeholder={'\n\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} />
</FormField>
</div>
}
/>
<RadioCard
icon={<Image src={FileList} alt='' />}
title={'Full Doc'}
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
isChosen={true}
/>
<TextLabel>
Child-chunk for Retrieval
</TextLabel>
<div className='flex gap-2'>
<FormField label={'Delimiter'}>
<Input type="text" placeholder={'\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} />
</FormField>
<FormField label={'Maximum chunk length'}>
<Input type="number" placeholder={'\n'} value={segmentIdentifier} onChange={e => setSegmentIdentifier(e.target.value)} />
</FormField>
</TextLabel>
<RadioCard
icon={<Image src={Note} alt='' />}
title={'Paragraph'}
description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
isChosen={parentChildConfig.chunkForContext === 'paragraph'}
onChosen={() => setParentChildConfig(
{
...parentChildConfig,
chunkForContext: 'paragraph',
},
)}
chosenConfig={
<div className='flex gap-2'>
<DelimiterInput
value={parentChildConfig.parent.delimiter}
onChange={e => setParentChildConfig({
...parentChildConfig,
parent: {
...parentChildConfig.parent,
delimiter: e.target.value,
},
})}
/>
<MaxLengthInput
value={parentChildConfig.parent.maxLength}
onChange={e => setParentChildConfig({
...parentChildConfig,
parent: {
...parentChildConfig.parent,
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
},
})}
/>
</div>
}
/>
<RadioCard
icon={<Image src={FileList} alt='' />}
title={'Full Doc'}
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
onChosen={() => setParentChildConfig(
{
...parentChildConfig,
chunkForContext: 'full_doc',
},
)}
isChosen={parentChildConfig.chunkForContext === 'full_doc'}
/>
</div>
<TextLabel>
Text Pre-processing Rules
</TextLabel>
<div className='space-y-2'>
{rules.map(rule => (
<div key={rule.id} className={s.ruleItem} onClick={() => {
ruleChangeHandle(rule.id)
}}>
<Checkbox
checked={rule.enabled}
/>
<label className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label>
</div>
))}
<TextLabel>
Child-chunk for Retrieval
</TextLabel>
<div className='flex gap-2'>
<DelimiterInput
value={parentChildConfig.child.delimiter}
onChange={e => setParentChildConfig({
...parentChildConfig,
child: {
...parentChildConfig.child,
delimiter: e.target.value,
},
})}
/>
<MaxLengthInput
value={parentChildConfig.child.maxLength}
onChange={e => setParentChildConfig({
...parentChildConfig,
child: {
...parentChildConfig.child,
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
},
})}
/>
</div>
<TextLabel>
Text Pre-processing Rules
</TextLabel>
<div className='space-y-2'>
{rules.map(rule => (
<div key={rule.id} className={s.ruleItem} onClick={() => {
ruleChangeHandle(rule.id)
}}>
<Checkbox
checked={rule.enabled}
/>
<label className="ml-2 text-sm font-normal cursor-pointer text-gray-800">{getRuleName(rule.id)}</label>
</div>
))}
</div>
</div>
</div>
</OptionCard>
@ -876,13 +912,11 @@ const StepTwo = ({
<LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} />
</div>
</div>
<div className='shrink-0'>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleSwitch}
size='md'
/>
</div>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleSwitch}
size='md'
/>
</div>
{docForm === DocForm.QA && !QATipHide && (
<div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'>

View File

@ -0,0 +1,75 @@
import type { FC, PropsWithChildren, ReactNode } from 'react'
import { useTranslation } from 'react-i18next'
import type { InputProps } from '@/app/components/base/input'
import Input from '@/app/components/base/input'
import Tooltip from '@/app/components/base/tooltip'
const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
}
const FormField: FC<PropsWithChildren<{ label: ReactNode }>> = (props) => {
return <div className='space-y-2 flex-1'>
<TextLabel>{props.label}</TextLabel>
{props.children}
</div>
}
export const DelimiterInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.separator')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.separatorTip')}
</div>
}
/>
</div>}>
<Input
type="text"
className='h-9'
placeholder={t('datasetCreation.stepTwo.separatorPlaceholder') || ''}
{...props}
/>
</FormField>
}
export const MaxLengthInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div>
{t('datasetCreation.stepTwo.maxLength')}
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
max={4000}
min={1}
{...props}
/>
</FormField>
}
export const OverlapInput: FC<InputProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.overlap')}
<Tooltip
popupContent={
<div className='max-w-[200px]'>
{t('datasetCreation.stepTwo.overlapTip')}
</div>
}
/>
</div>}>
<Input
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
min={1}
{...props}
/>
</FormField>
}