feat: settings ui for database pre-preprocessing

This commit is contained in:
AkaraChen 2024-11-25 17:57:31 +08:00
parent bba9301788
commit 1a6a28f650
14 changed files with 250 additions and 71 deletions

View File

@ -0,0 +1,57 @@
import { useState } from 'react'
import type { FC, SetStateAction } from 'react'
import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react'
import Input, { type InputProps } from '../input'
import classNames from '@/utils/classnames'
export type InputNumberProps = {
unit?: string
onChange: (value: number) => void
amount?: number
size?: 'sm' | 'md'
} & Omit<InputProps, 'value' | 'onChange' | 'size'>
export const InputNumber: FC<InputNumberProps> = (props) => {
const { unit, className, onChange, defaultValue = 0, amount = 1, size = 'sm', max, min, ...rest } = props
const [val, setVal] = useState<number>(defaultValue as number)
const update = (value: SetStateAction<number>) => {
const current = typeof value === 'function' ? value(val) : value as number
if (max && current >= (max as number))
return
if (min && current <= (min as number))
return
setVal(value)
}
const inc = () => update(val => val + amount)
const dec = () => update(val => val - amount)
return <div className='flex'>
<Input {...rest}
className={classNames('rounded-r-none', className)}
value={val}
max={max}
min={min}
onChange={(e) => {
const parsed = Number(e.target.value)
if (Number.isNaN(parsed))
return
setVal(parsed)
onChange(parsed)
}}
/>
{unit && <div className='flex items-center bg-components-input-bg-normal text-[13px] text-text-placeholder pr-2'>{unit}</div>}
<div className='flex flex-col bg-components-input-bg-normal rounded-r-md border-l text-text-tertiary'>
<button onClick={inc} className={classNames(
size === 'sm' ? 'pt-1' : 'pt-1.5',
'px-1.5 hover:bg-components-input-bg-hover',
)}>
<RiArrowUpSLine className='size-3' />
</button>
<button onClick={dec} className={classNames(
size === 'sm' ? 'pb-1' : 'pb-1.5',
'px-1.5 hover:bg-components-input-bg-hover',
)}>
<RiArrowDownSLine className='size-3' />
</button>
</div>
</div>
}

View File

@ -1,5 +1,6 @@
'use client'
import type { FC } from 'react'
import { InputNumber } from '../input-number'
import Tooltip from '@/app/components/base/tooltip'
import Slider from '@/app/components/base/slider'
import Switch from '@/app/components/base/switch'
@ -47,13 +48,20 @@ const ParamItem: FC<Props> = ({ className, id, name, noTooltip, tip, step = 0.1,
</div>
<div className="mt-2 flex items-center">
<div className="mr-4 flex shrink-0 items-center">
<input disabled={!enable} type="number" min={min} max={max} step={step} className="block w-[48px] h-7 text-xs leading-[18px] rounded-lg border-0 pl-1 pl py-1.5 bg-gray-50 text-gray-900 placeholder:text-gray-400 focus:ring-1 focus:ring-inset focus:ring-primary-600 disabled:opacity-60" value={(value === null || value === undefined) ? '' : value} onChange={(e) => {
const value = parseFloat(e.target.value)
if (value < min || value > max)
return
<InputNumber
disabled={!enable}
type="number"
min={min}
max={max}
step={step}
size='sm'
onChange={(value) => {
if (value < min || value > max)
return
onChange(id, value)
}} />
onChange(id, value)
}}
/>
</div>
<div className="flex items-center h-7 grow">
<Slider

View File

@ -5,7 +5,7 @@ import { useTranslation } from 'react-i18next'
import Image from 'next/image'
import RetrievalParamConfig from '../retrieval-param-config'
import { OptionCard } from '../../create/step-two/option-card'
import Selection from '../../create/assets/selection-mod.svg'
import { retrievalIcon } from '../../create/icons'
import { RETRIEVE_METHOD } from '@/types/app'
import type { RetrievalConfig } from '@/types/app'
@ -22,7 +22,7 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
return (
<div className='space-y-2'>
<OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
title={t('dataset.retrieval.invertedIndex.title')}
description={t('dataset.retrieval.invertedIndex.description')} isActive
activeHeaderClassName='bg-gradient-to-r from-[#F0EEFA] to-[#F9FAFB]'

View File

@ -5,10 +5,8 @@ import { useTranslation } from 'react-i18next'
import Image from 'next/image'
import RetrievalParamConfig from '../retrieval-param-config'
import { OptionCard } from '../../create/step-two/option-card'
import Selection from '../../create/assets/selection-mod.svg'
import Research from '../../create/assets/research-mod.svg'
import PatternRecognition from '../../create/assets/pattern-recognition-mod.svg'
import Effect from '../../create/assets/option-card-effect-purple.svg'
import { retrievalIcon } from '../../create/icons'
import type { RetrievalConfig } from '@/types/app'
import { RETRIEVE_METHOD } from '@/types/app'
import { useProviderContext } from '@/context/provider-context'
@ -59,7 +57,7 @@ const RetrievalMethodConfig: FC<Props> = ({
return (
<div className='space-y-2'>
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
title={t('dataset.retrieval.semantic_search.title')}
description={t('dataset.retrieval.semantic_search.description')}
isActive={
@ -80,7 +78,7 @@ const RetrievalMethodConfig: FC<Props> = ({
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={Research} alt='' />}
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
title={t('dataset.retrieval.full_text_search.title')}
description={t('dataset.retrieval.full_text_search.description')}
isActive={
@ -101,7 +99,7 @@ const RetrievalMethodConfig: FC<Props> = ({
</OptionCard>
)}
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
<OptionCard icon={<Image className='w-4 h-4' src={PatternRecognition} alt='' />}
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
title={
<div className='flex items-center space-x-1'>
<div>{t('dataset.retrieval.hybrid_search.title')}</div>

View File

@ -7,7 +7,11 @@ import { omit } from 'lodash-es'
import { ArrowRightIcon } from '@heroicons/react/24/solid'
import {
RiErrorWarningFill,
RiLoader2Fill,
RiTerminalBoxLine,
} from '@remixicon/react'
import Image from 'next/image'
import { indexMethodIcon, retrievalIcon } from '../icons'
import s from './index.module.css'
import cn from '@/utils/classnames'
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
@ -23,15 +27,21 @@ import UpgradeBtn from '@/app/components/billing/upgrade-btn'
import { useProviderContext } from '@/context/provider-context'
import Tooltip from '@/app/components/base/tooltip'
import { sleep } from '@/utils'
import { RETRIEVE_METHOD } from '@/types/app'
type Props = {
datasetId: string
batchId: string
documents?: FullDocumentDetail[]
indexingType?: string
retrievalMethod?: string
}
const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
const RuleDetail: FC<{
sourceData?: ProcessRuleResponse
indexingType?: string
retrievalMethod?: string
}> = ({ sourceData, indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const segmentationRuleMap = {
@ -81,10 +91,40 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
displayedValue={String(getValue(field))}
/>
})}
<FieldInfo
label={t('datasetCreation.stepTwo.indexMode')}
displayedValue={t(`datasetCreation.stepTwo.${indexingType}`) as string}
valueIcon={
<Image
src={
indexingType === 'economy'
? indexMethodIcon.economical
: indexMethodIcon.high_quality
}
alt=''
/>
}
/>
<FieldInfo
label={t('datasetSettings.form.retrievalSetting.title')}
displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
valueIcon={
<Image
src={
retrievalMethod === RETRIEVE_METHOD.fullText
? retrievalIcon.fullText
: RETRIEVE_METHOD.semantic
? retrievalIcon.vector
: retrievalIcon.hybrid
}
alt=''
/>
}
/>
</div>
}
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
const { t } = useTranslation()
const { enableBilling, plan } = useProviderContext()
@ -146,6 +186,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
const navToDocumentList = () => {
router.push(`/datasets/${datasetId}/documents`)
}
const navToApiDocs = () => {
router.push('/datasets?category=api')
}
const isEmbedding = useMemo(() => {
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
@ -177,13 +220,17 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
return doc?.data_source_info.notion_page_icon
}
const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
return (
<>
<div className='h-5 flex items-center mb-5'>
<div className={s.embeddingStatus}>
{isEmbedding && t('datasetDocuments.embedding.processing')}
{isEmbedding && <div className='flex items-center'>
<RiLoader2Fill className='size-4 mr-1 animate-spin' />
{t('datasetDocuments.embedding.processing')}
</div>}
{isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
</div>
</div>
@ -258,11 +305,19 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
</div>
))}
</div>
<RuleDetail sourceData={ruleDetail} />
<div className='flex items-center gap-2 mt-10'>
<RuleDetail sourceData={ruleDetail} indexingType={
indexingType
}
retrievalMethod={retrievalMethod}
/>
<div className='flex items-center gap-2 my-10'>
<Button className='w-fit' onClick={navToApiDocs}>
<RiTerminalBoxLine className='size-4 mr-2' />
<span>Access the API</span>
</Button>
<Button className='w-fit' variant='primary' onClick={navToDocumentList}>
<span>{t('datasetCreation.stepThree.navTo')}</span>
<ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
<ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
</Button>
</div>
</>

View File

@ -0,0 +1,16 @@
import GoldIcon from './assets/gold.svg'
import Piggybank from './assets/piggy-bank-mod.svg'
import Selection from './assets/selection-mod.svg'
import Research from './assets/research-mod.svg'
import PatternRecognition from './assets/pattern-recognition-mod.svg'
export const indexMethodIcon = {
high_quality: GoldIcon,
economical: Piggybank,
}
export const retrievalIcon = {
vector: Selection,
fullText: Research,
hybrid: PatternRecognition,
}

View File

@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
const [step, setStep] = useState(1)
const [indexingTypeCache, setIndexTypeCache] = useState('')
const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
const [fileList, setFiles] = useState<FileItem[]>([])
const [result, setResult] = useState<createDocumentResponse | undefined>()
const [hasError, setHasError] = useState(false)
@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
const updateResultCache = (res?: createDocumentResponse) => {
setResult(res)
}
const updateRetrievalMethodCache = (method: string) => {
setRetrievalMethodCache(method)
}
const nextStep = useCallback(() => {
setStep(step + 1)
@ -156,6 +160,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
websiteCrawlJobId={websiteCrawlJobId}
onStepChange={changeStep}
updateIndexingTypeCache={updateIndexingTypeCache}
updateRetrievalMethodCache={updateRetrievalMethodCache}
updateResultCache={updateResultCache}
crawlOptions={crawlOptions}
/>}
@ -163,6 +168,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
datasetId={datasetId}
datasetName={detail?.name}
indexingType={detail?.indexing_technique || indexingTypeCache}
retrievalMethod={detail?.retrieval_model?.search_method || retrievalMethodCache}
creationCache={result}
/>}
</div>

View File

@ -7,15 +7,17 @@ import s from './index.module.css'
import cn from '@/utils/classnames'
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets'
import AppIcon from '@/app/components/base/app-icon'
type StepThreeProps = {
datasetId?: string
datasetName?: string
indexingType?: string
retrievalMethod?: string
creationCache?: createDocumentResponse
}
const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => {
const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => {
const { t } = useTranslation()
const media = useBreakpoints()
@ -30,8 +32,13 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
<div className={s.creationInfo}>
<div className={s.title}>{t('datasetCreation.stepThree.creationTitle')}</div>
<div className={s.content}>{t('datasetCreation.stepThree.creationContent')}</div>
<div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
<div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
<div className='flex gap-4'>
<AppIcon {...creationCache?.dataset} className='size-14' />
<div className='w-full'>
<div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
<div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
</div>
</div>
</div>
<div className={s.dividerLine} />
</>
@ -47,6 +54,7 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
batchId={creationCache?.batch || ''}
documents={creationCache?.documents as FullDocumentDetail[]}
indexingType={indexingType || creationCache?.dataset?.indexing_technique}
retrievalMethod={retrievalMethod || creationCache?.dataset?.retrieval_model?.search_method}
/>
</div>
</div>

View File

@ -16,10 +16,9 @@ import Image from 'next/image'
import SettingCog from '../assets/setting-gear-mod.svg'
import OrangeEffect from '../assets/option-card-effect-orange.svg'
import FamilyMod from '../assets/family-mod.svg'
import GoldIcon from '../assets/gold.svg'
import Piggybank from '../assets/piggy-bank-mod.svg'
import Note from '../assets/note-mod.svg'
import FileList from '../assets/file-list-3-fill.svg'
import { indexMethodIcon } from '../icons'
import PreviewItem, { PreviewType } from './preview-item'
import s from './index.module.css'
import unescape from './unescape'
@ -80,6 +79,7 @@ type StepTwoProps = {
onSetting: () => void
datasetId?: string
indexingType?: ValueOf<IndexingType>
retrievalMethod?: string
dataSourceType: DataSourceType
files: CustomFile[]
notionPages?: NotionPage[]
@ -89,6 +89,7 @@ type StepTwoProps = {
websiteCrawlJobId?: string
onStepChange?: (delta: number) => void
updateIndexingTypeCache?: (type: string) => void
updateRetrievalMethodCache?: (method: string) => void
updateResultCache?: (res: createDocumentResponse) => void
onSave?: () => void
onCancel?: () => void
@ -137,6 +138,7 @@ const StepTwo = ({
updateResultCache,
onSave,
onCancel,
updateRetrievalMethodCache,
}: StepTwoProps) => {
const { t } = useTranslation()
const { locale } = useContext(I18n)
@ -507,6 +509,8 @@ const StepTwo = ({
})
updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
updateResultCache && updateResultCache(res)
// eslint-disable-next-line @typescript-eslint/no-use-before-define
updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
}
else {
res = await createDocument({
@ -643,19 +647,21 @@ const StepTwo = ({
<div className='max-w-[640px]'>
<div className='space-y-4'>
<OptionCard
title={'General'}
icon={<Image src={SettingCog} alt='General' />}
title={t('datasetCreation.stepTwo.general')}
icon={<Image src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
activeHeaderClassName='bg-gradient-to-r from-[#EFF0F9] to-[#F9FAFB]'
description={'General text chunking mode, the chunks retrieved and recalled are the same.'}
description={t('datasetCreation.stepTwo.generalTip')}
isActive={SegmentType.AUTO === segmentationType}
onClick={() => setSegmentationType(SegmentType.AUTO)}
actions={
<>
<Button variant={'secondary-accent'}>
<RiSearchEyeLine className='h-4 w-4 mr-1.5' />
Preview Chunk
{t('datasetCreation.stepTwo.previewChunk')}
</Button>
<Button variant={'ghost'} disabled>
{t('datasetCreation.stepTwo.reset')}
</Button>
<Button variant={'ghost'} disabled>Reset</Button>
</>
}
>
@ -666,13 +672,13 @@ const StepTwo = ({
onChange={e => setSegmentIdentifier(e.target.value)}
/>
<MaxLengthInput
value={max}
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
defaultValue={max}
onChange={setMax}
/>
<OverlapInput
value={overlap}
defaultValue={overlap}
min={1}
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
onChange={setOverlap}
/>
</div>
<div className='space-y-2'>
@ -695,32 +701,34 @@ const StepTwo = ({
</div>
</OptionCard>
<OptionCard
title={'Parent-child'}
icon={<Image src={FamilyMod} alt='Parent-child' />}
title={t('datasetCreation.stepTwo.parentChild')}
icon={<Image src={FamilyMod} alt={t('datasetCreation.stepTwo.parentChild')} />}
effectImg={OrangeEffect.src}
activeHeaderClassName='bg-gradient-to-r from-[#F9F1EE] to-[#F9FAFB]'
description={'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.'}
description={t('datasetCreation.stepTwo.parentChildTip')}
isActive={SegmentType.CUSTOM === segmentationType}
onClick={() => setSegmentationType(SegmentType.CUSTOM)}
actions={
<>
<Button variant={'secondary-accent'}>
<RiSearchEyeLine className='h-4 w-4 mr-1.5' />
Preview Chunk
{t('datasetCreation.stepTwo.previewChunk')}
</Button>
<Button variant={'ghost'} onClick={resetRules}>
{t('datasetCreation.stepTwo.reset')}
</Button>
<Button variant={'ghost'} onClick={resetRules}>Reset</Button>
</>
}
>
<div className='space-y-4'>
<div className='space-y-2'>
<TextLabel>
Parent-chunk for Context
{t('datasetCreation.stepTwo.parentChunkForContext')}
</TextLabel>
<RadioCard
icon={<Image src={Note} alt='' />}
title={'Paragraph'}
description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
title={t('datasetCreation.stepTwo.paragraph')}
description={t('datasetCreation.stepTwo.paragraphTip')}
isChosen={parentChildConfig.chunkForContext === 'paragraph'}
onChosen={() => setParentChildConfig(
{
@ -741,12 +749,12 @@ const StepTwo = ({
})}
/>
<MaxLengthInput
value={parentChildConfig.parent.maxLength}
onChange={e => setParentChildConfig({
defaultValue={parentChildConfig.parent.maxLength}
onChange={value => setParentChildConfig({
...parentChildConfig,
parent: {
...parentChildConfig.parent,
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
maxLength: value,
},
})}
/>
@ -755,8 +763,8 @@ const StepTwo = ({
/>
<RadioCard
icon={<Image src={FileList} alt='' />}
title={'Full Doc'}
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
title={t('datasetCreation.stepTwo.fullDoc')}
description={t('datasetCreation.stepTwo.fullDocTip')}
onChosen={() => setParentChildConfig(
{
...parentChildConfig,
@ -769,7 +777,7 @@ const StepTwo = ({
<div className='space-y-2'>
<TextLabel>
Child-chunk for Retrieval
{t('datasetCreation.stepTwo.childChunkForRetrieval')}
</TextLabel>
<div className='flex gap-2'>
<DelimiterInput
@ -783,20 +791,20 @@ const StepTwo = ({
})}
/>
<MaxLengthInput
value={parentChildConfig.child.maxLength}
defaultValue={parentChildConfig.child.maxLength}
onChange={e => setParentChildConfig({
onChange={value => setParentChildConfig({
...parentChildConfig,
child: {
...parentChildConfig.child,
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
maxLength: value,
},
})}
/>
</div>
<TextLabel>
Text Pre-processing Rules
{t('datasetCreation.stepTwo.rules')}
</TextLabel>
<div className='space-y-2'>
{rules.map(rule => (
@ -834,7 +842,7 @@ const StepTwo = ({
}}
>
<div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
<Image src={GoldIcon} alt='Gold Icon' width={20} height={20} />
<Image src={indexMethodIcon.high_quality} alt='Gold Icon' width={20} height={20} />
</div>
{!hasSetIndexType && <span className={cn(s.radio)} />}
<div className={s.typeHeader}>
@ -865,7 +873,7 @@ const StepTwo = ({
onClick={changeToEconomicalType}
>
<div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
<Image src={Piggybank} alt='Economical Icon' width={20} height={20} />
<Image src={indexMethodIcon.economical} alt='Economical Icon' width={20} height={20} />
</div>
{!hasSetIndexType && <span className={cn(s.radio)} />}
<div className={s.typeHeader}>

View File

@ -3,6 +3,8 @@ import { useTranslation } from 'react-i18next'
import type { InputProps } from '@/app/components/base/input'
import Input from '@/app/components/base/input'
import Tooltip from '@/app/components/base/tooltip'
import type { InputNumberProps } from '@/app/components/base/input-number'
import { InputNumber } from '@/app/components/base/input-number'
const TextLabel: FC<PropsWithChildren> = (props) => {
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
@ -36,12 +38,12 @@ export const DelimiterInput: FC<InputProps> = (props) => {
</FormField>
}
export const MaxLengthInput: FC<InputProps> = (props) => {
export const MaxLengthInput: FC<InputNumberProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div>
return <FormField label={<div className='h-[14px]'>
{t('datasetCreation.stepTwo.maxLength')}
</div>}>
<Input
<InputNumber
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
@ -52,7 +54,7 @@ export const MaxLengthInput: FC<InputProps> = (props) => {
</FormField>
}
export const OverlapInput: FC<InputProps> = (props) => {
export const OverlapInput: FC<InputNumberProps> = (props) => {
const { t } = useTranslation()
return <FormField label={<div className='flex'>
{t('datasetCreation.stepTwo.overlap')}
@ -64,7 +66,7 @@ export const OverlapInput: FC<InputProps> = (props) => {
}
/>
</div>}>
<Input
<InputNumber
type="number"
className='h-9'
placeholder={t('datasetCreation.stepTwo.overlap') || ''}

View File

@ -1,5 +1,5 @@
'use client'
import type { FC } from 'react'
import type { FC, ReactNode } from 'react'
import React, { useEffect, useState } from 'react'
import { PencilIcon } from '@heroicons/react/24/outline'
import { useTranslation } from 'react-i18next'
@ -24,6 +24,7 @@ import type { DocType, FullDocumentDetail } from '@/models/datasets'
import { CUSTOMIZABLE_DOC_TYPES } from '@/models/datasets'
import type { inputType, metadataType } from '@/hooks/use-metadata'
import { useBookCategories, useBusinessDocCategories, useLanguages, useMetadataMap, usePersonalDocCategories } from '@/hooks/use-metadata'
import classNames from '@/utils/classnames'
const map2Options = (map: { [key: string]: string }) => {
return Object.keys(map).map(key => ({ value: key, name: map[key] }))
@ -32,6 +33,7 @@ const map2Options = (map: { [key: string]: string }) => {
type IFieldInfoProps = {
label: string
value?: string
valueIcon?: ReactNode
displayedValue?: string
defaultValue?: string
showEdit?: boolean
@ -43,6 +45,7 @@ type IFieldInfoProps = {
export const FieldInfo: FC<IFieldInfoProps> = ({
label,
value = '',
valueIcon,
displayedValue = '',
defaultValue,
showEdit = false,
@ -58,7 +61,8 @@ export const FieldInfo: FC<IFieldInfoProps> = ({
return (
<div className={cn(s.fieldInfo, editAlignTop && '!items-start', readAlignTop && '!items-start pt-1')}>
<div className={cn(s.label, editAlignTop && 'pt-1')}>{label}</div>
<div className={s.value}>
<div className={classNames(s.value, 'flex items-center gap-1')}>
{valueIcon}
{!showEdit
? displayedValue
: inputType === 'select'

View File

@ -1,16 +1,12 @@
'use client'
import { Stepper } from '../components/datasets/create/stepper'
import { useState } from 'react'
import { InputNumber } from '../components/base/input-number'
// import { Stepper } from '../components/datasets/create/stepper'
export default function Page() {
const [step, setStep] = useState(0)
return <div className='p-4'>
<Stepper
steps={[
{ name: 'Data Source' },
{ name: 'Document Processing' },
{ name: 'Execute & Finish' },
]}
activeStepIndex={1}
/>
<InputNumber onChange={setStep} unit={'tokens'} />
</div>
}

View File

@ -99,6 +99,16 @@ const translation = {
autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.',
custom: 'Custom',
customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
general: 'General',
generalTip: 'General text chunking mode, the chunks retrieved and recalled are the same.',
parentChild: 'Parent-child',
parentChildTip: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.',
parentChunkForContext: 'Parent-chunk for Context',
childChunkForRetrieval: 'Child-chunk for Retrieval',
paragraph: 'Paragraph',
paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.',
fullDoc: 'Full Doc',
fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.',
separator: 'Delimiter',
separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines',

View File

@ -99,6 +99,16 @@ const translation = {
autoDescription: '自动设置分段规则与预处理规则,如果不了解这些参数建议选择此项',
custom: '自定义',
customDescription: '自定义分段规则、分段长度以及预处理规则等参数',
general: '通用',
generalTip: '通用文本分块模式,检索和回忆的块是相同的',
parentChild: '父子分段',
parentChildTip: '使用父子模式时,子块用于检索,父块用作上下文',
parentChunkForContext: '父块用作上下文',
childChunkForRetrieval: '子块用于检索',
paragraph: '段落',
paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的母块',
fullDoc: '全文',
fullDocTip: '整个文档用作父块并直接检索。请注意出于性能原因超过10000个标记的文本将被自动截断。',
separator: '分段标识符',
separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n当段落超过最大块长度时会按行进行分割。你也可以使用自定义的特殊分隔符例如 ***)。',
separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行',
@ -112,6 +122,7 @@ const translation = {
removeUrlEmails: '删除所有 URL 和电子邮件地址',
removeStopwords: '去除停用词,例如 “a”“an”“the” 等',
preview: '确认并预览',
previewChunk: '预览块',
reset: '重置',
indexMode: '索引方式',
qualified: '高质量',