mirror of https://github.com/langgenius/dify.git
feat: settings ui for database pre-preprocessing
This commit is contained in:
parent
bba9301788
commit
1a6a28f650
|
|
@ -0,0 +1,57 @@
|
|||
import { useState } from 'react'
|
||||
import type { FC, SetStateAction } from 'react'
|
||||
import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react'
|
||||
import Input, { type InputProps } from '../input'
|
||||
import classNames from '@/utils/classnames'
|
||||
|
||||
export type InputNumberProps = {
|
||||
unit?: string
|
||||
onChange: (value: number) => void
|
||||
amount?: number
|
||||
size?: 'sm' | 'md'
|
||||
} & Omit<InputProps, 'value' | 'onChange' | 'size'>
|
||||
|
||||
export const InputNumber: FC<InputNumberProps> = (props) => {
|
||||
const { unit, className, onChange, defaultValue = 0, amount = 1, size = 'sm', max, min, ...rest } = props
|
||||
const [val, setVal] = useState<number>(defaultValue as number)
|
||||
const update = (value: SetStateAction<number>) => {
|
||||
const current = typeof value === 'function' ? value(val) : value as number
|
||||
if (max && current >= (max as number))
|
||||
return
|
||||
if (min && current <= (min as number))
|
||||
return
|
||||
setVal(value)
|
||||
}
|
||||
const inc = () => update(val => val + amount)
|
||||
const dec = () => update(val => val - amount)
|
||||
return <div className='flex'>
|
||||
<Input {...rest}
|
||||
className={classNames('rounded-r-none', className)}
|
||||
value={val}
|
||||
max={max}
|
||||
min={min}
|
||||
onChange={(e) => {
|
||||
const parsed = Number(e.target.value)
|
||||
if (Number.isNaN(parsed))
|
||||
return
|
||||
setVal(parsed)
|
||||
onChange(parsed)
|
||||
}}
|
||||
/>
|
||||
{unit && <div className='flex items-center bg-components-input-bg-normal text-[13px] text-text-placeholder pr-2'>{unit}</div>}
|
||||
<div className='flex flex-col bg-components-input-bg-normal rounded-r-md border-l text-text-tertiary'>
|
||||
<button onClick={inc} className={classNames(
|
||||
size === 'sm' ? 'pt-1' : 'pt-1.5',
|
||||
'px-1.5 hover:bg-components-input-bg-hover',
|
||||
)}>
|
||||
<RiArrowUpSLine className='size-3' />
|
||||
</button>
|
||||
<button onClick={dec} className={classNames(
|
||||
size === 'sm' ? 'pb-1' : 'pb-1.5',
|
||||
'px-1.5 hover:bg-components-input-bg-hover',
|
||||
)}>
|
||||
<RiArrowDownSLine className='size-3' />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import { InputNumber } from '../input-number'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import Slider from '@/app/components/base/slider'
|
||||
import Switch from '@/app/components/base/switch'
|
||||
|
|
@ -47,13 +48,20 @@ const ParamItem: FC<Props> = ({ className, id, name, noTooltip, tip, step = 0.1,
|
|||
</div>
|
||||
<div className="mt-2 flex items-center">
|
||||
<div className="mr-4 flex shrink-0 items-center">
|
||||
<input disabled={!enable} type="number" min={min} max={max} step={step} className="block w-[48px] h-7 text-xs leading-[18px] rounded-lg border-0 pl-1 pl py-1.5 bg-gray-50 text-gray-900 placeholder:text-gray-400 focus:ring-1 focus:ring-inset focus:ring-primary-600 disabled:opacity-60" value={(value === null || value === undefined) ? '' : value} onChange={(e) => {
|
||||
const value = parseFloat(e.target.value)
|
||||
if (value < min || value > max)
|
||||
return
|
||||
<InputNumber
|
||||
disabled={!enable}
|
||||
type="number"
|
||||
min={min}
|
||||
max={max}
|
||||
step={step}
|
||||
size='sm'
|
||||
onChange={(value) => {
|
||||
if (value < min || value > max)
|
||||
return
|
||||
|
||||
onChange(id, value)
|
||||
}} />
|
||||
onChange(id, value)
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex items-center h-7 grow">
|
||||
<Slider
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import { useTranslation } from 'react-i18next'
|
|||
import Image from 'next/image'
|
||||
import RetrievalParamConfig from '../retrieval-param-config'
|
||||
import { OptionCard } from '../../create/step-two/option-card'
|
||||
import Selection from '../../create/assets/selection-mod.svg'
|
||||
import { retrievalIcon } from '../../create/icons'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
import type { RetrievalConfig } from '@/types/app'
|
||||
|
||||
|
|
@ -22,7 +22,7 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
|
|||
|
||||
return (
|
||||
<div className='space-y-2'>
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
|
||||
title={t('dataset.retrieval.invertedIndex.title')}
|
||||
description={t('dataset.retrieval.invertedIndex.description')} isActive
|
||||
activeHeaderClassName='bg-gradient-to-r from-[#F0EEFA] to-[#F9FAFB]'
|
||||
|
|
|
|||
|
|
@ -5,10 +5,8 @@ import { useTranslation } from 'react-i18next'
|
|||
import Image from 'next/image'
|
||||
import RetrievalParamConfig from '../retrieval-param-config'
|
||||
import { OptionCard } from '../../create/step-two/option-card'
|
||||
import Selection from '../../create/assets/selection-mod.svg'
|
||||
import Research from '../../create/assets/research-mod.svg'
|
||||
import PatternRecognition from '../../create/assets/pattern-recognition-mod.svg'
|
||||
import Effect from '../../create/assets/option-card-effect-purple.svg'
|
||||
import { retrievalIcon } from '../../create/icons'
|
||||
import type { RetrievalConfig } from '@/types/app'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
import { useProviderContext } from '@/context/provider-context'
|
||||
|
|
@ -59,7 +57,7 @@ const RetrievalMethodConfig: FC<Props> = ({
|
|||
return (
|
||||
<div className='space-y-2'>
|
||||
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
|
||||
title={t('dataset.retrieval.semantic_search.title')}
|
||||
description={t('dataset.retrieval.semantic_search.description')}
|
||||
isActive={
|
||||
|
|
@ -80,7 +78,7 @@ const RetrievalMethodConfig: FC<Props> = ({
|
|||
</OptionCard>
|
||||
)}
|
||||
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={Research} alt='' />}
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
|
||||
title={t('dataset.retrieval.full_text_search.title')}
|
||||
description={t('dataset.retrieval.full_text_search.description')}
|
||||
isActive={
|
||||
|
|
@ -101,7 +99,7 @@ const RetrievalMethodConfig: FC<Props> = ({
|
|||
</OptionCard>
|
||||
)}
|
||||
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={PatternRecognition} alt='' />}
|
||||
<OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
|
||||
title={
|
||||
<div className='flex items-center space-x-1'>
|
||||
<div>{t('dataset.retrieval.hybrid_search.title')}</div>
|
||||
|
|
|
|||
|
|
@ -7,7 +7,11 @@ import { omit } from 'lodash-es'
|
|||
import { ArrowRightIcon } from '@heroicons/react/24/solid'
|
||||
import {
|
||||
RiErrorWarningFill,
|
||||
RiLoader2Fill,
|
||||
RiTerminalBoxLine,
|
||||
} from '@remixicon/react'
|
||||
import Image from 'next/image'
|
||||
import { indexMethodIcon, retrievalIcon } from '../icons'
|
||||
import s from './index.module.css'
|
||||
import cn from '@/utils/classnames'
|
||||
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
||||
|
|
@ -23,15 +27,21 @@ import UpgradeBtn from '@/app/components/billing/upgrade-btn'
|
|||
import { useProviderContext } from '@/context/provider-context'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import { sleep } from '@/utils'
|
||||
import { RETRIEVE_METHOD } from '@/types/app'
|
||||
|
||||
type Props = {
|
||||
datasetId: string
|
||||
batchId: string
|
||||
documents?: FullDocumentDetail[]
|
||||
indexingType?: string
|
||||
retrievalMethod?: string
|
||||
}
|
||||
|
||||
const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
|
||||
const RuleDetail: FC<{
|
||||
sourceData?: ProcessRuleResponse
|
||||
indexingType?: string
|
||||
retrievalMethod?: string
|
||||
}> = ({ sourceData, indexingType, retrievalMethod }) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const segmentationRuleMap = {
|
||||
|
|
@ -81,10 +91,40 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
|
|||
displayedValue={String(getValue(field))}
|
||||
/>
|
||||
})}
|
||||
<FieldInfo
|
||||
label={t('datasetCreation.stepTwo.indexMode')}
|
||||
displayedValue={t(`datasetCreation.stepTwo.${indexingType}`) as string}
|
||||
valueIcon={
|
||||
<Image
|
||||
src={
|
||||
indexingType === 'economy'
|
||||
? indexMethodIcon.economical
|
||||
: indexMethodIcon.high_quality
|
||||
}
|
||||
alt=''
|
||||
/>
|
||||
}
|
||||
/>
|
||||
<FieldInfo
|
||||
label={t('datasetSettings.form.retrievalSetting.title')}
|
||||
displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
|
||||
valueIcon={
|
||||
<Image
|
||||
src={
|
||||
retrievalMethod === RETRIEVE_METHOD.fullText
|
||||
? retrievalIcon.fullText
|
||||
: RETRIEVE_METHOD.semantic
|
||||
? retrievalIcon.vector
|
||||
: retrievalIcon.hybrid
|
||||
}
|
||||
alt=''
|
||||
/>
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
}
|
||||
|
||||
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
|
||||
const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
|
||||
const { t } = useTranslation()
|
||||
const { enableBilling, plan } = useProviderContext()
|
||||
|
||||
|
|
@ -146,6 +186,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
|
|||
const navToDocumentList = () => {
|
||||
router.push(`/datasets/${datasetId}/documents`)
|
||||
}
|
||||
const navToApiDocs = () => {
|
||||
router.push('/datasets?category=api')
|
||||
}
|
||||
|
||||
const isEmbedding = useMemo(() => {
|
||||
return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
|
||||
|
|
@ -177,13 +220,17 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
|
|||
|
||||
return doc?.data_source_info.notion_page_icon
|
||||
}
|
||||
const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
|
||||
const isSourceEmbedding = (detail: IndexingStatusResponse) =>
|
||||
['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
|
||||
|
||||
return (
|
||||
<>
|
||||
<div className='h-5 flex items-center mb-5'>
|
||||
<div className={s.embeddingStatus}>
|
||||
{isEmbedding && t('datasetDocuments.embedding.processing')}
|
||||
{isEmbedding && <div className='flex items-center'>
|
||||
<RiLoader2Fill className='size-4 mr-1 animate-spin' />
|
||||
{t('datasetDocuments.embedding.processing')}
|
||||
</div>}
|
||||
{isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -258,11 +305,19 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
|
|||
</div>
|
||||
))}
|
||||
</div>
|
||||
<RuleDetail sourceData={ruleDetail} />
|
||||
<div className='flex items-center gap-2 mt-10'>
|
||||
<RuleDetail sourceData={ruleDetail} indexingType={
|
||||
indexingType
|
||||
}
|
||||
retrievalMethod={retrievalMethod}
|
||||
/>
|
||||
<div className='flex items-center gap-2 my-10'>
|
||||
<Button className='w-fit' onClick={navToApiDocs}>
|
||||
<RiTerminalBoxLine className='size-4 mr-2' />
|
||||
<span>Access the API</span>
|
||||
</Button>
|
||||
<Button className='w-fit' variant='primary' onClick={navToDocumentList}>
|
||||
<span>{t('datasetCreation.stepThree.navTo')}</span>
|
||||
<ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
|
||||
<ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
|
||||
</Button>
|
||||
</div>
|
||||
</>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
import GoldIcon from './assets/gold.svg'
|
||||
import Piggybank from './assets/piggy-bank-mod.svg'
|
||||
import Selection from './assets/selection-mod.svg'
|
||||
import Research from './assets/research-mod.svg'
|
||||
import PatternRecognition from './assets/pattern-recognition-mod.svg'
|
||||
|
||||
export const indexMethodIcon = {
|
||||
high_quality: GoldIcon,
|
||||
economical: Piggybank,
|
||||
}
|
||||
|
||||
export const retrievalIcon = {
|
||||
vector: Selection,
|
||||
fullText: Research,
|
||||
hybrid: PatternRecognition,
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
|||
const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
|
||||
const [step, setStep] = useState(1)
|
||||
const [indexingTypeCache, setIndexTypeCache] = useState('')
|
||||
const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
|
||||
const [fileList, setFiles] = useState<FileItem[]>([])
|
||||
const [result, setResult] = useState<createDocumentResponse | undefined>()
|
||||
const [hasError, setHasError] = useState(false)
|
||||
|
|
@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
|||
const updateResultCache = (res?: createDocumentResponse) => {
|
||||
setResult(res)
|
||||
}
|
||||
const updateRetrievalMethodCache = (method: string) => {
|
||||
setRetrievalMethodCache(method)
|
||||
}
|
||||
|
||||
const nextStep = useCallback(() => {
|
||||
setStep(step + 1)
|
||||
|
|
@ -156,6 +160,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
|||
websiteCrawlJobId={websiteCrawlJobId}
|
||||
onStepChange={changeStep}
|
||||
updateIndexingTypeCache={updateIndexingTypeCache}
|
||||
updateRetrievalMethodCache={updateRetrievalMethodCache}
|
||||
updateResultCache={updateResultCache}
|
||||
crawlOptions={crawlOptions}
|
||||
/>}
|
||||
|
|
@ -163,6 +168,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
|
|||
datasetId={datasetId}
|
||||
datasetName={detail?.name}
|
||||
indexingType={detail?.indexing_technique || indexingTypeCache}
|
||||
retrievalMethod={detail?.retrieval_model?.search_method || retrievalMethodCache}
|
||||
creationCache={result}
|
||||
/>}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -7,15 +7,17 @@ import s from './index.module.css'
|
|||
import cn from '@/utils/classnames'
|
||||
import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
|
||||
import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets'
|
||||
import AppIcon from '@/app/components/base/app-icon'
|
||||
|
||||
type StepThreeProps = {
|
||||
datasetId?: string
|
||||
datasetName?: string
|
||||
indexingType?: string
|
||||
retrievalMethod?: string
|
||||
creationCache?: createDocumentResponse
|
||||
}
|
||||
|
||||
const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => {
|
||||
const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => {
|
||||
const { t } = useTranslation()
|
||||
|
||||
const media = useBreakpoints()
|
||||
|
|
@ -30,8 +32,13 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
|
|||
<div className={s.creationInfo}>
|
||||
<div className={s.title}>{t('datasetCreation.stepThree.creationTitle')}</div>
|
||||
<div className={s.content}>{t('datasetCreation.stepThree.creationContent')}</div>
|
||||
<div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
|
||||
<div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
|
||||
<div className='flex gap-4'>
|
||||
<AppIcon {...creationCache?.dataset} className='size-14' />
|
||||
<div className='w-full'>
|
||||
<div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
|
||||
<div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div className={s.dividerLine} />
|
||||
</>
|
||||
|
|
@ -47,6 +54,7 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
|
|||
batchId={creationCache?.batch || ''}
|
||||
documents={creationCache?.documents as FullDocumentDetail[]}
|
||||
indexingType={indexingType || creationCache?.dataset?.indexing_technique}
|
||||
retrievalMethod={retrievalMethod || creationCache?.dataset?.retrieval_model?.search_method}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -16,10 +16,9 @@ import Image from 'next/image'
|
|||
import SettingCog from '../assets/setting-gear-mod.svg'
|
||||
import OrangeEffect from '../assets/option-card-effect-orange.svg'
|
||||
import FamilyMod from '../assets/family-mod.svg'
|
||||
import GoldIcon from '../assets/gold.svg'
|
||||
import Piggybank from '../assets/piggy-bank-mod.svg'
|
||||
import Note from '../assets/note-mod.svg'
|
||||
import FileList from '../assets/file-list-3-fill.svg'
|
||||
import { indexMethodIcon } from '../icons'
|
||||
import PreviewItem, { PreviewType } from './preview-item'
|
||||
import s from './index.module.css'
|
||||
import unescape from './unescape'
|
||||
|
|
@ -80,6 +79,7 @@ type StepTwoProps = {
|
|||
onSetting: () => void
|
||||
datasetId?: string
|
||||
indexingType?: ValueOf<IndexingType>
|
||||
retrievalMethod?: string
|
||||
dataSourceType: DataSourceType
|
||||
files: CustomFile[]
|
||||
notionPages?: NotionPage[]
|
||||
|
|
@ -89,6 +89,7 @@ type StepTwoProps = {
|
|||
websiteCrawlJobId?: string
|
||||
onStepChange?: (delta: number) => void
|
||||
updateIndexingTypeCache?: (type: string) => void
|
||||
updateRetrievalMethodCache?: (method: string) => void
|
||||
updateResultCache?: (res: createDocumentResponse) => void
|
||||
onSave?: () => void
|
||||
onCancel?: () => void
|
||||
|
|
@ -137,6 +138,7 @@ const StepTwo = ({
|
|||
updateResultCache,
|
||||
onSave,
|
||||
onCancel,
|
||||
updateRetrievalMethodCache,
|
||||
}: StepTwoProps) => {
|
||||
const { t } = useTranslation()
|
||||
const { locale } = useContext(I18n)
|
||||
|
|
@ -507,6 +509,8 @@ const StepTwo = ({
|
|||
})
|
||||
updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
|
||||
updateResultCache && updateResultCache(res)
|
||||
// eslint-disable-next-line @typescript-eslint/no-use-before-define
|
||||
updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
|
||||
}
|
||||
else {
|
||||
res = await createDocument({
|
||||
|
|
@ -643,19 +647,21 @@ const StepTwo = ({
|
|||
<div className='max-w-[640px]'>
|
||||
<div className='space-y-4'>
|
||||
<OptionCard
|
||||
title={'General'}
|
||||
icon={<Image src={SettingCog} alt='General' />}
|
||||
title={t('datasetCreation.stepTwo.general')}
|
||||
icon={<Image src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
|
||||
activeHeaderClassName='bg-gradient-to-r from-[#EFF0F9] to-[#F9FAFB]'
|
||||
description={'General text chunking mode, the chunks retrieved and recalled are the same.'}
|
||||
description={t('datasetCreation.stepTwo.generalTip')}
|
||||
isActive={SegmentType.AUTO === segmentationType}
|
||||
onClick={() => setSegmentationType(SegmentType.AUTO)}
|
||||
actions={
|
||||
<>
|
||||
<Button variant={'secondary-accent'}>
|
||||
<RiSearchEyeLine className='h-4 w-4 mr-1.5' />
|
||||
Preview Chunk
|
||||
{t('datasetCreation.stepTwo.previewChunk')}
|
||||
</Button>
|
||||
<Button variant={'ghost'} disabled>
|
||||
{t('datasetCreation.stepTwo.reset')}
|
||||
</Button>
|
||||
<Button variant={'ghost'} disabled>Reset</Button>
|
||||
</>
|
||||
}
|
||||
>
|
||||
|
|
@ -666,13 +672,13 @@ const StepTwo = ({
|
|||
onChange={e => setSegmentIdentifier(e.target.value)}
|
||||
/>
|
||||
<MaxLengthInput
|
||||
value={max}
|
||||
onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
||||
defaultValue={max}
|
||||
onChange={setMax}
|
||||
/>
|
||||
<OverlapInput
|
||||
value={overlap}
|
||||
defaultValue={overlap}
|
||||
min={1}
|
||||
onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
||||
onChange={setOverlap}
|
||||
/>
|
||||
</div>
|
||||
<div className='space-y-2'>
|
||||
|
|
@ -695,32 +701,34 @@ const StepTwo = ({
|
|||
</div>
|
||||
</OptionCard>
|
||||
<OptionCard
|
||||
title={'Parent-child'}
|
||||
icon={<Image src={FamilyMod} alt='Parent-child' />}
|
||||
title={t('datasetCreation.stepTwo.parentChild')}
|
||||
icon={<Image src={FamilyMod} alt={t('datasetCreation.stepTwo.parentChild')} />}
|
||||
effectImg={OrangeEffect.src}
|
||||
activeHeaderClassName='bg-gradient-to-r from-[#F9F1EE] to-[#F9FAFB]'
|
||||
description={'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.'}
|
||||
description={t('datasetCreation.stepTwo.parentChildTip')}
|
||||
isActive={SegmentType.CUSTOM === segmentationType}
|
||||
onClick={() => setSegmentationType(SegmentType.CUSTOM)}
|
||||
actions={
|
||||
<>
|
||||
<Button variant={'secondary-accent'}>
|
||||
<RiSearchEyeLine className='h-4 w-4 mr-1.5' />
|
||||
Preview Chunk
|
||||
{t('datasetCreation.stepTwo.previewChunk')}
|
||||
</Button>
|
||||
<Button variant={'ghost'} onClick={resetRules}>
|
||||
{t('datasetCreation.stepTwo.reset')}
|
||||
</Button>
|
||||
<Button variant={'ghost'} onClick={resetRules}>Reset</Button>
|
||||
</>
|
||||
}
|
||||
>
|
||||
<div className='space-y-4'>
|
||||
<div className='space-y-2'>
|
||||
<TextLabel>
|
||||
Parent-chunk for Context
|
||||
{t('datasetCreation.stepTwo.parentChunkForContext')}
|
||||
</TextLabel>
|
||||
<RadioCard
|
||||
icon={<Image src={Note} alt='' />}
|
||||
title={'Paragraph'}
|
||||
description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
|
||||
title={t('datasetCreation.stepTwo.paragraph')}
|
||||
description={t('datasetCreation.stepTwo.paragraphTip')}
|
||||
isChosen={parentChildConfig.chunkForContext === 'paragraph'}
|
||||
onChosen={() => setParentChildConfig(
|
||||
{
|
||||
|
|
@ -741,12 +749,12 @@ const StepTwo = ({
|
|||
})}
|
||||
/>
|
||||
<MaxLengthInput
|
||||
value={parentChildConfig.parent.maxLength}
|
||||
onChange={e => setParentChildConfig({
|
||||
defaultValue={parentChildConfig.parent.maxLength}
|
||||
onChange={value => setParentChildConfig({
|
||||
...parentChildConfig,
|
||||
parent: {
|
||||
...parentChildConfig.parent,
|
||||
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
|
||||
maxLength: value,
|
||||
},
|
||||
})}
|
||||
/>
|
||||
|
|
@ -755,8 +763,8 @@ const StepTwo = ({
|
|||
/>
|
||||
<RadioCard
|
||||
icon={<Image src={FileList} alt='' />}
|
||||
title={'Full Doc'}
|
||||
description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
|
||||
title={t('datasetCreation.stepTwo.fullDoc')}
|
||||
description={t('datasetCreation.stepTwo.fullDocTip')}
|
||||
onChosen={() => setParentChildConfig(
|
||||
{
|
||||
...parentChildConfig,
|
||||
|
|
@ -769,7 +777,7 @@ const StepTwo = ({
|
|||
|
||||
<div className='space-y-2'>
|
||||
<TextLabel>
|
||||
Child-chunk for Retrieval
|
||||
{t('datasetCreation.stepTwo.childChunkForRetrieval')}
|
||||
</TextLabel>
|
||||
<div className='flex gap-2'>
|
||||
<DelimiterInput
|
||||
|
|
@ -783,20 +791,20 @@ const StepTwo = ({
|
|||
})}
|
||||
/>
|
||||
<MaxLengthInput
|
||||
value={parentChildConfig.child.maxLength}
|
||||
defaultValue={parentChildConfig.child.maxLength}
|
||||
|
||||
onChange={e => setParentChildConfig({
|
||||
onChange={value => setParentChildConfig({
|
||||
...parentChildConfig,
|
||||
child: {
|
||||
...parentChildConfig.child,
|
||||
maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
|
||||
maxLength: value,
|
||||
},
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<TextLabel>
|
||||
Text Pre-processing Rules
|
||||
{t('datasetCreation.stepTwo.rules')}
|
||||
</TextLabel>
|
||||
<div className='space-y-2'>
|
||||
{rules.map(rule => (
|
||||
|
|
@ -834,7 +842,7 @@ const StepTwo = ({
|
|||
}}
|
||||
>
|
||||
<div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
|
||||
<Image src={GoldIcon} alt='Gold Icon' width={20} height={20} />
|
||||
<Image src={indexMethodIcon.high_quality} alt='Gold Icon' width={20} height={20} />
|
||||
</div>
|
||||
{!hasSetIndexType && <span className={cn(s.radio)} />}
|
||||
<div className={s.typeHeader}>
|
||||
|
|
@ -865,7 +873,7 @@ const StepTwo = ({
|
|||
onClick={changeToEconomicalType}
|
||||
>
|
||||
<div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
|
||||
<Image src={Piggybank} alt='Economical Icon' width={20} height={20} />
|
||||
<Image src={indexMethodIcon.economical} alt='Economical Icon' width={20} height={20} />
|
||||
</div>
|
||||
{!hasSetIndexType && <span className={cn(s.radio)} />}
|
||||
<div className={s.typeHeader}>
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ import { useTranslation } from 'react-i18next'
|
|||
import type { InputProps } from '@/app/components/base/input'
|
||||
import Input from '@/app/components/base/input'
|
||||
import Tooltip from '@/app/components/base/tooltip'
|
||||
import type { InputNumberProps } from '@/app/components/base/input-number'
|
||||
import { InputNumber } from '@/app/components/base/input-number'
|
||||
|
||||
const TextLabel: FC<PropsWithChildren> = (props) => {
|
||||
return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
|
||||
|
|
@ -36,12 +38,12 @@ export const DelimiterInput: FC<InputProps> = (props) => {
|
|||
</FormField>
|
||||
}
|
||||
|
||||
export const MaxLengthInput: FC<InputProps> = (props) => {
|
||||
export const MaxLengthInput: FC<InputNumberProps> = (props) => {
|
||||
const { t } = useTranslation()
|
||||
return <FormField label={<div>
|
||||
return <FormField label={<div className='h-[14px]'>
|
||||
{t('datasetCreation.stepTwo.maxLength')}
|
||||
</div>}>
|
||||
<Input
|
||||
<InputNumber
|
||||
type="number"
|
||||
className='h-9'
|
||||
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
||||
|
|
@ -52,7 +54,7 @@ export const MaxLengthInput: FC<InputProps> = (props) => {
|
|||
</FormField>
|
||||
}
|
||||
|
||||
export const OverlapInput: FC<InputProps> = (props) => {
|
||||
export const OverlapInput: FC<InputNumberProps> = (props) => {
|
||||
const { t } = useTranslation()
|
||||
return <FormField label={<div className='flex'>
|
||||
{t('datasetCreation.stepTwo.overlap')}
|
||||
|
|
@ -64,7 +66,7 @@ export const OverlapInput: FC<InputProps> = (props) => {
|
|||
}
|
||||
/>
|
||||
</div>}>
|
||||
<Input
|
||||
<InputNumber
|
||||
type="number"
|
||||
className='h-9'
|
||||
placeholder={t('datasetCreation.stepTwo.overlap') || ''}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
'use client'
|
||||
import type { FC } from 'react'
|
||||
import type { FC, ReactNode } from 'react'
|
||||
import React, { useEffect, useState } from 'react'
|
||||
import { PencilIcon } from '@heroicons/react/24/outline'
|
||||
import { useTranslation } from 'react-i18next'
|
||||
|
|
@ -24,6 +24,7 @@ import type { DocType, FullDocumentDetail } from '@/models/datasets'
|
|||
import { CUSTOMIZABLE_DOC_TYPES } from '@/models/datasets'
|
||||
import type { inputType, metadataType } from '@/hooks/use-metadata'
|
||||
import { useBookCategories, useBusinessDocCategories, useLanguages, useMetadataMap, usePersonalDocCategories } from '@/hooks/use-metadata'
|
||||
import classNames from '@/utils/classnames'
|
||||
|
||||
const map2Options = (map: { [key: string]: string }) => {
|
||||
return Object.keys(map).map(key => ({ value: key, name: map[key] }))
|
||||
|
|
@ -32,6 +33,7 @@ const map2Options = (map: { [key: string]: string }) => {
|
|||
type IFieldInfoProps = {
|
||||
label: string
|
||||
value?: string
|
||||
valueIcon?: ReactNode
|
||||
displayedValue?: string
|
||||
defaultValue?: string
|
||||
showEdit?: boolean
|
||||
|
|
@ -43,6 +45,7 @@ type IFieldInfoProps = {
|
|||
export const FieldInfo: FC<IFieldInfoProps> = ({
|
||||
label,
|
||||
value = '',
|
||||
valueIcon,
|
||||
displayedValue = '',
|
||||
defaultValue,
|
||||
showEdit = false,
|
||||
|
|
@ -58,7 +61,8 @@ export const FieldInfo: FC<IFieldInfoProps> = ({
|
|||
return (
|
||||
<div className={cn(s.fieldInfo, editAlignTop && '!items-start', readAlignTop && '!items-start pt-1')}>
|
||||
<div className={cn(s.label, editAlignTop && 'pt-1')}>{label}</div>
|
||||
<div className={s.value}>
|
||||
<div className={classNames(s.value, 'flex items-center gap-1')}>
|
||||
{valueIcon}
|
||||
{!showEdit
|
||||
? displayedValue
|
||||
: inputType === 'select'
|
||||
|
|
|
|||
|
|
@ -1,16 +1,12 @@
|
|||
'use client'
|
||||
|
||||
import { Stepper } from '../components/datasets/create/stepper'
|
||||
import { useState } from 'react'
|
||||
import { InputNumber } from '../components/base/input-number'
|
||||
// import { Stepper } from '../components/datasets/create/stepper'
|
||||
|
||||
export default function Page() {
|
||||
const [step, setStep] = useState(0)
|
||||
return <div className='p-4'>
|
||||
<Stepper
|
||||
steps={[
|
||||
{ name: 'Data Source' },
|
||||
{ name: 'Document Processing' },
|
||||
{ name: 'Execute & Finish' },
|
||||
]}
|
||||
activeStepIndex={1}
|
||||
/>
|
||||
<InputNumber onChange={setStep} unit={'tokens'} />
|
||||
</div>
|
||||
}
|
||||
|
|
|
|||
|
|
@ -99,6 +99,16 @@ const translation = {
|
|||
autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.',
|
||||
custom: 'Custom',
|
||||
customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
|
||||
general: 'General',
|
||||
generalTip: 'General text chunking mode, the chunks retrieved and recalled are the same.',
|
||||
parentChild: 'Parent-child',
|
||||
parentChildTip: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.',
|
||||
parentChunkForContext: 'Parent-chunk for Context',
|
||||
childChunkForRetrieval: 'Child-chunk for Retrieval',
|
||||
paragraph: 'Paragraph',
|
||||
paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.',
|
||||
fullDoc: 'Full Doc',
|
||||
fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.',
|
||||
separator: 'Delimiter',
|
||||
separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
|
||||
separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines',
|
||||
|
|
|
|||
|
|
@ -99,6 +99,16 @@ const translation = {
|
|||
autoDescription: '自动设置分段规则与预处理规则,如果不了解这些参数建议选择此项',
|
||||
custom: '自定义',
|
||||
customDescription: '自定义分段规则、分段长度以及预处理规则等参数',
|
||||
general: '通用',
|
||||
generalTip: '通用文本分块模式,检索和回忆的块是相同的',
|
||||
parentChild: '父子分段',
|
||||
parentChildTip: '使用父子模式时,子块用于检索,父块用作上下文',
|
||||
parentChunkForContext: '父块用作上下文',
|
||||
childChunkForRetrieval: '子块用于检索',
|
||||
paragraph: '段落',
|
||||
paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的母块',
|
||||
fullDoc: '全文',
|
||||
fullDocTip: '整个文档用作父块并直接检索。请注意,出于性能原因,超过10000个标记的文本将被自动截断。',
|
||||
separator: '分段标识符',
|
||||
separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n),当段落超过最大块长度时,会按行进行分割。你也可以使用自定义的特殊分隔符(例如 ***)。',
|
||||
separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行',
|
||||
|
|
@ -112,6 +122,7 @@ const translation = {
|
|||
removeUrlEmails: '删除所有 URL 和电子邮件地址',
|
||||
removeStopwords: '去除停用词,例如 “a”,“an”,“the” 等',
|
||||
preview: '确认并预览',
|
||||
previewChunk: '预览块',
|
||||
reset: '重置',
|
||||
indexMode: '索引方式',
|
||||
qualified: '高质量',
|
||||
|
|
|
|||
Loading…
Reference in New Issue