From 1a6a28f650df36ce0008734994db0a53561e6d82 Mon Sep 17 00:00:00 2001
From: AkaraChen <akarachen@outlook.com>
Date: Mon, 25 Nov 2024 17:57:31 +0800
Subject: [PATCH] feat: settings ui for database pre-preprocessing

---
 .../components/base/input-number/index.tsx    | 57 +++++++++++++++
 web/app/components/base/param-item/index.tsx  | 20 ++++--
 .../index.tsx                                 |  4 +-
 .../common/retrieval-method-config/index.tsx  | 10 ++-
 .../create/embedding-process/index.tsx        | 69 ++++++++++++++++--
 web/app/components/datasets/create/icons.ts   | 16 +++++
 web/app/components/datasets/create/index.tsx  |  6 ++
 .../datasets/create/step-three/index.tsx      | 14 +++-
 .../datasets/create/step-two/index.tsx        | 70 +++++++++++--------
 .../datasets/create/step-two/inputs.tsx       | 12 ++--
 .../documents/detail/metadata/index.tsx       |  8 ++-
 web/app/dev-preview/page.tsx                  | 14 ++--
 web/i18n/en-US/dataset-creation.ts            | 10 +++
 web/i18n/zh-Hans/dataset-creation.ts          | 11 +++
 14 files changed, 250 insertions(+), 71 deletions(-)
 create mode 100644 web/app/components/base/input-number/index.tsx
 create mode 100644 web/app/components/datasets/create/icons.ts
diff --git a/web/app/components/base/input-number/index.tsx b/web/app/components/base/input-number/index.tsx
new file mode 100644
index 0000000000..222686a0a8
--- /dev/null
+++ b/web/app/components/base/input-number/index.tsx
@@ -0,0 +1,57 @@
+import { useState } from 'react'
+import type { FC, SetStateAction } from 'react'
+import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react'
+import Input, { type InputProps } from '../input'
+import classNames from '@/utils/classnames'
+
+export type InputNumberProps = {
+  unit?: string
+  onChange: (value: number) => void
+  amount?: number
+  size?: 'sm' | 'md'
+} & Omit<InputProps, 'value' | 'onChange' | 'size'>
+
+export const InputNumber: FC<InputNumberProps> = (props) => {
+  const { unit, className, onChange, defaultValue = 0, amount = 1, size = 'sm', max, min, ...rest } = props
+  const [val, setVal] = useState<number>(defaultValue as number)
+  const update = (value: SetStateAction<number>) => {
+    const current = typeof value === 'function' ? value(val) : value as number
+    if (max && current >= (max as number))
+      return
+    if (min && current <= (min as number))
+      return
+    setVal(value)
+  }
+  const inc = () => update(val => val + amount)
+  const dec = () => update(val => val - amount)
+  return <div className='flex'>
+    <Input {...rest}
+      className={classNames('rounded-r-none', className)}
+      value={val}
+      max={max}
+      min={min}
+      onChange={(e) => {
+        const parsed = Number(e.target.value)
+        if (Number.isNaN(parsed))
+          return
+        setVal(parsed)
+        onChange(parsed)
+      }}
+    />
+    {unit && <div className='flex items-center bg-components-input-bg-normal text-[13px] text-text-placeholder pr-2'>{unit}</div>}
+    <div className='flex flex-col bg-components-input-bg-normal rounded-r-md border-l text-text-tertiary'>
+      <button onClick={inc} className={classNames(
+        size === 'sm' ? 'pt-1' : 'pt-1.5',
+        'px-1.5 hover:bg-components-input-bg-hover',
+      )}>
+        <RiArrowUpSLine className='size-3' />
+      </button>
+      <button onClick={dec} className={classNames(
+        size === 'sm' ? 'pb-1' : 'pb-1.5',
+        'px-1.5 hover:bg-components-input-bg-hover',
+      )}>
+        <RiArrowDownSLine className='size-3' />
+      </button>
+    </div>
+  </div>
+}
diff --git a/web/app/components/base/param-item/index.tsx b/web/app/components/base/param-item/index.tsx
index 49acc81484..bd18d35ebc 100644
--- a/web/app/components/base/param-item/index.tsx
+++ b/web/app/components/base/param-item/index.tsx
@@ -1,5 +1,6 @@
 'use client'
 import type { FC } from 'react'
+import { InputNumber } from '../input-number'
 import Tooltip from '@/app/components/base/tooltip'
 import Slider from '@/app/components/base/slider'
 import Switch from '@/app/components/base/switch'
@@ -47,13 +48,20 @@ const ParamItem: FC<Props> = ({ className, id, name, noTooltip, tip, step = 0.1,
       </div>
       <div className="mt-2 flex items-center">
         <div className="mr-4 flex shrink-0 items-center">
-          <input disabled={!enable} type="number" min={min} max={max} step={step} className="block w-[48px] h-7 text-xs leading-[18px] rounded-lg border-0 pl-1 pl py-1.5 bg-gray-50 text-gray-900  placeholder:text-gray-400 focus:ring-1 focus:ring-inset focus:ring-primary-600 disabled:opacity-60" value={(value === null || value === undefined) ? '' : value} onChange={(e) => {
-            const value = parseFloat(e.target.value)
-            if (value < min || value > max)
-              return
+          <InputNumber
+            disabled={!enable}
+            type="number"
+            min={min}
+            max={max}
+            step={step}
+            size='sm'
+            onChange={(value) => {
+              if (value < min || value > max)
+                return
 
-            onChange(id, value)
-          }} />
+              onChange(id, value)
+            }}
+          />
         </div>
         <div className="flex items-center h-7 grow">
           <Slider
diff --git a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
index 7108d359d9..8c46747b63 100644
--- a/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
+++ b/web/app/components/datasets/common/economical-retrieval-method-config/index.tsx
@@ -5,7 +5,7 @@ import { useTranslation } from 'react-i18next'
 import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
 import { OptionCard } from '../../create/step-two/option-card'
-import Selection from '../../create/assets/selection-mod.svg'
+import { retrievalIcon } from '../../create/icons'
 import { RETRIEVE_METHOD } from '@/types/app'
 import type { RetrievalConfig } from '@/types/app'
 
@@ -22,7 +22,7 @@ const EconomicalRetrievalMethodConfig: FC<Props> = ({
 
   return (
     <div className='space-y-2'>
-      <OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
+      <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
         title={t('dataset.retrieval.invertedIndex.title')}
         description={t('dataset.retrieval.invertedIndex.description')} isActive
         activeHeaderClassName='bg-gradient-to-r from-[#F0EEFA] to-[#F9FAFB]'
diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx
index 62ead1467f..5a17b73569 100644
--- a/web/app/components/datasets/common/retrieval-method-config/index.tsx
+++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx
@@ -5,10 +5,8 @@ import { useTranslation } from 'react-i18next'
 import Image from 'next/image'
 import RetrievalParamConfig from '../retrieval-param-config'
 import { OptionCard } from '../../create/step-two/option-card'
-import Selection from '../../create/assets/selection-mod.svg'
-import Research from '../../create/assets/research-mod.svg'
-import PatternRecognition from '../../create/assets/pattern-recognition-mod.svg'
 import Effect from '../../create/assets/option-card-effect-purple.svg'
+import { retrievalIcon } from '../../create/icons'
 import type { RetrievalConfig } from '@/types/app'
 import { RETRIEVE_METHOD } from '@/types/app'
 import { useProviderContext } from '@/context/provider-context'
@@ -59,7 +57,7 @@ const RetrievalMethodConfig: FC<Props> = ({
   return (
     <div className='space-y-2'>
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={Selection} alt='' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.vector} alt='' />}
           title={t('dataset.retrieval.semantic_search.title')}
           description={t('dataset.retrieval.semantic_search.description')}
           isActive={
@@ -80,7 +78,7 @@ const RetrievalMethodConfig: FC<Props> = ({
         </OptionCard>
       )}
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={Research} alt='' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.fullText} alt='' />}
           title={t('dataset.retrieval.full_text_search.title')}
           description={t('dataset.retrieval.full_text_search.description')}
           isActive={
@@ -101,7 +99,7 @@ const RetrievalMethodConfig: FC<Props> = ({
         </OptionCard>
       )}
       {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && (
-        <OptionCard icon={<Image className='w-4 h-4' src={PatternRecognition} alt='' />}
+        <OptionCard icon={<Image className='w-4 h-4' src={retrievalIcon.hybrid} alt='' />}
           title={
             <div className='flex items-center space-x-1'>
               <div>{t('dataset.retrieval.hybrid_search.title')}</div>
diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx
index 7786582085..f6d500ef15 100644
--- a/web/app/components/datasets/create/embedding-process/index.tsx
+++ b/web/app/components/datasets/create/embedding-process/index.tsx
@@ -7,7 +7,11 @@ import { omit } from 'lodash-es'
 import { ArrowRightIcon } from '@heroicons/react/24/solid'
 import {
   RiErrorWarningFill,
+  RiLoader2Fill,
+  RiTerminalBoxLine,
 } from '@remixicon/react'
+import Image from 'next/image'
+import { indexMethodIcon, retrievalIcon } from '../icons'
 import s from './index.module.css'
 import cn from '@/utils/classnames'
 import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
@@ -23,15 +27,21 @@ import UpgradeBtn from '@/app/components/billing/upgrade-btn'
 import { useProviderContext } from '@/context/provider-context'
 import Tooltip from '@/app/components/base/tooltip'
 import { sleep } from '@/utils'
+import { RETRIEVE_METHOD } from '@/types/app'
 
 type Props = {
   datasetId: string
   batchId: string
   documents?: FullDocumentDetail[]
   indexingType?: string
+  retrievalMethod?: string
 }
 
-const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => {
+const RuleDetail: FC<{
+  sourceData?: ProcessRuleResponse
+  indexingType?: string
+  retrievalMethod?: string
+}> = ({ sourceData, indexingType, retrievalMethod }) => {
   const { t } = useTranslation()
 
   const segmentationRuleMap = {
@@ -81,10 +91,40 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) =>
         displayedValue={String(getValue(field))}
       />
     })}
+    <FieldInfo
+      label={t('datasetCreation.stepTwo.indexMode')}
+      displayedValue={t(`datasetCreation.stepTwo.${indexingType}`) as string}
+      valueIcon={
+        <Image
+          src={
+            indexingType === 'economy'
+              ? indexMethodIcon.economical
+              : indexMethodIcon.high_quality
+          }
+          alt=''
+        />
+      }
+    />
+    <FieldInfo
+      label={t('datasetSettings.form.retrievalSetting.title')}
+      displayedValue={t(`datasetSettings.form.retrievalSetting.${retrievalMethod}`) as string}
+      valueIcon={
+        <Image
+          src={
+            retrievalMethod === RETRIEVE_METHOD.fullText
+              ? retrievalIcon.fullText
+              : RETRIEVE_METHOD.semantic
+                ? retrievalIcon.vector
+                : retrievalIcon.hybrid
+          }
+          alt=''
+        />
+      }
+    />
   </div>
 }
 
-const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType }) => {
+const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => {
   const { t } = useTranslation()
   const { enableBilling, plan } = useProviderContext()
 
@@ -146,6 +186,9 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
   const navToDocumentList = () => {
     router.push(`/datasets/${datasetId}/documents`)
   }
+  const navToApiDocs = () => {
+    router.push('/datasets?category=api')
+  }
 
   const isEmbedding = useMemo(() => {
     return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''))
@@ -177,13 +220,17 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
 
     return doc?.data_source_info.notion_page_icon
   }
-  const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
+  const isSourceEmbedding = (detail: IndexingStatusResponse) =>
+    ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '')
 
   return (
     <>
       <div className='h-5 flex items-center mb-5'>
         <div className={s.embeddingStatus}>
-          {isEmbedding && t('datasetDocuments.embedding.processing')}
+          {isEmbedding && <div className='flex items-center'>
+            <RiLoader2Fill className='size-4 mr-1 animate-spin' />
+            {t('datasetDocuments.embedding.processing')}
+          </div>}
           {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
         </div>
       </div>
@@ -258,11 +305,19 @@ const EmbeddingProcess: FC<Props> = ({ datasetId, batchId, documents = [], index
           </div>
         ))}
       </div>
-      <RuleDetail sourceData={ruleDetail} />
-      <div className='flex items-center gap-2 mt-10'>
+      <RuleDetail sourceData={ruleDetail} indexingType={
+        indexingType
+      }
+      retrievalMethod={retrievalMethod}
+      />
+      <div className='flex items-center gap-2 my-10'>
+        <Button className='w-fit' onClick={navToApiDocs}>
+          <RiTerminalBoxLine className='size-4 mr-2' />
+          <span>Access the API</span>
+        </Button>
         <Button className='w-fit' variant='primary' onClick={navToDocumentList}>
           <span>{t('datasetCreation.stepThree.navTo')}</span>
-          <ArrowRightIcon className='h-4 w-4 ml-2 stroke-current stroke-1' />
+          <ArrowRightIcon className='size-4 ml-2 stroke-current stroke-1' />
         </Button>
       </div>
     </>
diff --git a/web/app/components/datasets/create/icons.ts b/web/app/components/datasets/create/icons.ts
new file mode 100644
index 0000000000..80c4b6c944
--- /dev/null
+++ b/web/app/components/datasets/create/icons.ts
@@ -0,0 +1,16 @@
+import GoldIcon from './assets/gold.svg'
+import Piggybank from './assets/piggy-bank-mod.svg'
+import Selection from './assets/selection-mod.svg'
+import Research from './assets/research-mod.svg'
+import PatternRecognition from './assets/pattern-recognition-mod.svg'
+
+export const indexMethodIcon = {
+  high_quality: GoldIcon,
+  economical: Piggybank,
+}
+
+export const retrievalIcon = {
+  vector: Selection,
+  fullText: Research,
+  hybrid: PatternRecognition,
+}
diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx
index 440e9c0ea2..3829abe27b 100644
--- a/web/app/components/datasets/create/index.tsx
+++ b/web/app/components/datasets/create/index.tsx
@@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const [dataSourceType, setDataSourceType] = useState<DataSourceType>(DataSourceType.FILE)
   const [step, setStep] = useState(1)
   const [indexingTypeCache, setIndexTypeCache] = useState('')
+  const [retrievalMethodCache, setRetrievalMethodCache] = useState('')
   const [fileList, setFiles] = useState<FileItem[]>([])
   const [result, setResult] = useState<createDocumentResponse | undefined>()
   const [hasError, setHasError] = useState(false)
@@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
   const updateResultCache = (res?: createDocumentResponse) => {
     setResult(res)
   }
+  const updateRetrievalMethodCache = (method: string) => {
+    setRetrievalMethodCache(method)
+  }
 
   const nextStep = useCallback(() => {
     setStep(step + 1)
@@ -156,6 +160,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
           websiteCrawlJobId={websiteCrawlJobId}
           onStepChange={changeStep}
           updateIndexingTypeCache={updateIndexingTypeCache}
+          updateRetrievalMethodCache={updateRetrievalMethodCache}
           updateResultCache={updateResultCache}
           crawlOptions={crawlOptions}
         />}
@@ -163,6 +168,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => {
           datasetId={datasetId}
           datasetName={detail?.name}
           indexingType={detail?.indexing_technique || indexingTypeCache}
+          retrievalMethod={detail?.retrieval_model?.search_method || retrievalMethodCache}
           creationCache={result}
         />}
       </div>
diff --git a/web/app/components/datasets/create/step-three/index.tsx b/web/app/components/datasets/create/step-three/index.tsx
index 85471f4513..1e7c49ac37 100644
--- a/web/app/components/datasets/create/step-three/index.tsx
+++ b/web/app/components/datasets/create/step-three/index.tsx
@@ -7,15 +7,17 @@ import s from './index.module.css'
 import cn from '@/utils/classnames'
 import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints'
 import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets'
+import AppIcon from '@/app/components/base/app-icon'
 
 type StepThreeProps = {
   datasetId?: string
   datasetName?: string
   indexingType?: string
+  retrievalMethod?: string
   creationCache?: createDocumentResponse
 }
 
-const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => {
+const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => {
   const { t } = useTranslation()
 
   const media = useBreakpoints()
@@ -30,8 +32,13 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
               <div className={s.creationInfo}>
                 <div className={s.title}>{t('datasetCreation.stepThree.creationTitle')}</div>
                 <div className={s.content}>{t('datasetCreation.stepThree.creationContent')}</div>
-                <div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
-                <div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
+                <div className='flex gap-4'>
+                  <AppIcon {...creationCache?.dataset} className='size-14' />
+                  <div className='w-full'>
+                    <div className={s.label}>{t('datasetCreation.stepThree.label')}</div>
+                    <div className={s.datasetName}>{datasetName || creationCache?.dataset?.name}</div>
+                  </div>
+                </div>
               </div>
               <div className={s.dividerLine} />
             </>
@@ -47,6 +54,7 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
             batchId={creationCache?.batch || ''}
             documents={creationCache?.documents as FullDocumentDetail[]}
             indexingType={indexingType || creationCache?.dataset?.indexing_technique}
+            retrievalMethod={retrievalMethod || creationCache?.dataset?.retrieval_model?.search_method}
           />
         </div>
       </div>
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx
index 536b426cb4..88f7661a03 100644
--- a/web/app/components/datasets/create/step-two/index.tsx
+++ b/web/app/components/datasets/create/step-two/index.tsx
@@ -16,10 +16,9 @@ import Image from 'next/image'
 import SettingCog from '../assets/setting-gear-mod.svg'
 import OrangeEffect from '../assets/option-card-effect-orange.svg'
 import FamilyMod from '../assets/family-mod.svg'
-import GoldIcon from '../assets/gold.svg'
-import Piggybank from '../assets/piggy-bank-mod.svg'
 import Note from '../assets/note-mod.svg'
 import FileList from '../assets/file-list-3-fill.svg'
+import { indexMethodIcon } from '../icons'
 import PreviewItem, { PreviewType } from './preview-item'
 import s from './index.module.css'
 import unescape from './unescape'
@@ -80,6 +79,7 @@ type StepTwoProps = {
   onSetting: () => void
   datasetId?: string
   indexingType?: ValueOf<IndexingType>
+  retrievalMethod?: string
   dataSourceType: DataSourceType
   files: CustomFile[]
   notionPages?: NotionPage[]
@@ -89,6 +89,7 @@ type StepTwoProps = {
   websiteCrawlJobId?: string
   onStepChange?: (delta: number) => void
   updateIndexingTypeCache?: (type: string) => void
+  updateRetrievalMethodCache?: (method: string) => void
   updateResultCache?: (res: createDocumentResponse) => void
   onSave?: () => void
   onCancel?: () => void
@@ -137,6 +138,7 @@ const StepTwo = ({
   updateResultCache,
   onSave,
   onCancel,
+  updateRetrievalMethodCache,
 }: StepTwoProps) => {
   const { t } = useTranslation()
   const { locale } = useContext(I18n)
@@ -507,6 +509,8 @@ const StepTwo = ({
         })
         updateIndexingTypeCache && updateIndexingTypeCache(indexType as string)
         updateResultCache && updateResultCache(res)
+        // eslint-disable-next-line @typescript-eslint/no-use-before-define
+        updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string)
       }
       else {
         res = await createDocument({
@@ -643,19 +647,21 @@ const StepTwo = ({
           <div className='max-w-[640px]'>
             <div className='space-y-4'>
               <OptionCard
-                title={'General'}
-                icon={<Image src={SettingCog} alt='General' />}
+                title={t('datasetCreation.stepTwo.general')}
+                icon={<Image src={SettingCog} alt={t('datasetCreation.stepTwo.general')} />}
                 activeHeaderClassName='bg-gradient-to-r from-[#EFF0F9] to-[#F9FAFB]'
-                description={'General text chunking mode, the chunks retrieved and recalled are the same.'}
+                description={t('datasetCreation.stepTwo.generalTip')}
                 isActive={SegmentType.AUTO === segmentationType}
                 onClick={() => setSegmentationType(SegmentType.AUTO)}
                 actions={
                   <>
                     <Button variant={'secondary-accent'}>
                       <RiSearchEyeLine className='h-4 w-4 mr-1.5' />
-                      Preview Chunk
+                      {t('datasetCreation.stepTwo.previewChunk')}
+                    </Button>
+                    <Button variant={'ghost'} disabled>
+                      {t('datasetCreation.stepTwo.reset')}
                     </Button>
-                    <Button variant={'ghost'} disabled>Reset</Button>
                   </>
                 }
               >
@@ -666,13 +672,13 @@ const StepTwo = ({
                       onChange={e => setSegmentIdentifier(e.target.value)}
                     />
                     <MaxLengthInput
-                      value={max}
-                      onChange={e => setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))}
+                      defaultValue={max}
+                      onChange={setMax}
                     />
                     <OverlapInput
-                      value={overlap}
+                      defaultValue={overlap}
                       min={1}
-                      onChange={e => setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))}
+                      onChange={setOverlap}
                     />
                   </div>
                   <div className='space-y-2'>
@@ -695,32 +701,34 @@ const StepTwo = ({
                 </div>
               </OptionCard>
               <OptionCard
-                title={'Parent-child'}
-                icon={<Image src={FamilyMod} alt='Parent-child' />}
+                title={t('datasetCreation.stepTwo.parentChild')}
+                icon={<Image src={FamilyMod} alt={t('datasetCreation.stepTwo.parentChild')} />}
                 effectImg={OrangeEffect.src}
                 activeHeaderClassName='bg-gradient-to-r from-[#F9F1EE] to-[#F9FAFB]'
-                description={'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.'}
+                description={t('datasetCreation.stepTwo.parentChildTip')}
                 isActive={SegmentType.CUSTOM === segmentationType}
                 onClick={() => setSegmentationType(SegmentType.CUSTOM)}
                 actions={
                   <>
                     <Button variant={'secondary-accent'}>
                       <RiSearchEyeLine className='h-4 w-4 mr-1.5' />
-                      Preview Chunk
+                      {t('datasetCreation.stepTwo.previewChunk')}
+                    </Button>
+                    <Button variant={'ghost'} onClick={resetRules}>
+                      {t('datasetCreation.stepTwo.reset')}
                     </Button>
-                    <Button variant={'ghost'} onClick={resetRules}>Reset</Button>
                   </>
                 }
               >
                 <div className='space-y-4'>
                   <div className='space-y-2'>
                     <TextLabel>
-                    Parent-chunk for Context
+                      {t('datasetCreation.stepTwo.parentChunkForContext')}
                     </TextLabel>
                     <RadioCard
                       icon={<Image src={Note} alt='' />}
-                      title={'Paragraph'}
-                      description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'}
+                      title={t('datasetCreation.stepTwo.paragraph')}
+                      description={t('datasetCreation.stepTwo.paragraphTip')}
                       isChosen={parentChildConfig.chunkForContext === 'paragraph'}
                       onChosen={() => setParentChildConfig(
                         {
@@ -741,12 +749,12 @@ const StepTwo = ({
                             })}
                           />
                           <MaxLengthInput
-                            value={parentChildConfig.parent.maxLength}
-                            onChange={e => setParentChildConfig({
+                            defaultValue={parentChildConfig.parent.maxLength}
+                            onChange={value => setParentChildConfig({
                               ...parentChildConfig,
                               parent: {
                                 ...parentChildConfig.parent,
-                                maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
+                                maxLength: value,
                               },
                             })}
                           />
@@ -755,8 +763,8 @@ const StepTwo = ({
                     />
                     <RadioCard
                       icon={<Image src={FileList} alt='' />}
-                      title={'Full Doc'}
-                      description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'}
+                      title={t('datasetCreation.stepTwo.fullDoc')}
+                      description={t('datasetCreation.stepTwo.fullDocTip')}
                       onChosen={() => setParentChildConfig(
                         {
                           ...parentChildConfig,
@@ -769,7 +777,7 @@ const StepTwo = ({
 
                   <div className='space-y-2'>
                     <TextLabel>
-                      Child-chunk for Retrieval
+                      {t('datasetCreation.stepTwo.childChunkForRetrieval')}
                     </TextLabel>
                     <div className='flex gap-2'>
                       <DelimiterInput
@@ -783,20 +791,20 @@ const StepTwo = ({
                         })}
                       />
                       <MaxLengthInput
-                        value={parentChildConfig.child.maxLength}
+                        defaultValue={parentChildConfig.child.maxLength}
 
-                        onChange={e => setParentChildConfig({
+                        onChange={value => setParentChildConfig({
                           ...parentChildConfig,
                           child: {
                             ...parentChildConfig.child,
-                            maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10),
+                            maxLength: value,
                           },
                         })}
                       />
                     </div>
 
                     <TextLabel>
-                    Text Pre-processing Rules
+                      {t('datasetCreation.stepTwo.rules')}
                     </TextLabel>
                     <div className='space-y-2'>
                       {rules.map(rule => (
@@ -834,7 +842,7 @@ const StepTwo = ({
                   }}
                 >
                   <div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
-                    <Image src={GoldIcon} alt='Gold Icon' width={20} height={20} />
+                    <Image src={indexMethodIcon.high_quality} alt='Gold Icon' width={20} height={20} />
                   </div>
                   {!hasSetIndexType && <span className={cn(s.radio)} />}
                   <div className={s.typeHeader}>
@@ -865,7 +873,7 @@ const StepTwo = ({
                   onClick={changeToEconomicalType}
                 >
                   <div className='h-8 p-1.5 bg-white rounded-lg border border-[#101828]/10 justify-center items-center inline-flex absolute left-5 top-[18px]'>
-                    <Image src={Piggybank} alt='Economical Icon' width={20} height={20} />
+                    <Image src={indexMethodIcon.economical} alt='Economical Icon' width={20} height={20} />
                   </div>
                   {!hasSetIndexType && <span className={cn(s.radio)} />}
                   <div className={s.typeHeader}>
diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx
index 56100918e1..899f5b120c 100644
--- a/web/app/components/datasets/create/step-two/inputs.tsx
+++ b/web/app/components/datasets/create/step-two/inputs.tsx
@@ -3,6 +3,8 @@ import { useTranslation } from 'react-i18next'
 import type { InputProps } from '@/app/components/base/input'
 import Input from '@/app/components/base/input'
 import Tooltip from '@/app/components/base/tooltip'
+import type { InputNumberProps } from '@/app/components/base/input-number'
+import { InputNumber } from '@/app/components/base/input-number'
 
 const TextLabel: FC<PropsWithChildren> = (props) => {
   return <label className='text-[#354052] text-xs font-semibold leading-none'>{props.children}</label>
@@ -36,12 +38,12 @@ export const DelimiterInput: FC<InputProps> = (props) => {
   </FormField>
 }
 
-export const MaxLengthInput: FC<InputProps> = (props) => {
+export const MaxLengthInput: FC<InputNumberProps> = (props) => {
   const { t } = useTranslation()
-  return <FormField label={<div>
+  return <FormField label={<div className='h-[14px]'>
     {t('datasetCreation.stepTwo.maxLength')}
   </div>}>
-    <Input
+    <InputNumber
       type="number"
       className='h-9'
       placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
@@ -52,7 +54,7 @@ export const MaxLengthInput: FC<InputProps> = (props) => {
   </FormField>
 }
 
-export const OverlapInput: FC<InputProps> = (props) => {
+export const OverlapInput: FC<InputNumberProps> = (props) => {
   const { t } = useTranslation()
   return <FormField label={<div className='flex'>
     {t('datasetCreation.stepTwo.overlap')}
@@ -64,7 +66,7 @@ export const OverlapInput: FC<InputProps> = (props) => {
       }
     />
   </div>}>
-    <Input
+    <InputNumber
       type="number"
       className='h-9'
       placeholder={t('datasetCreation.stepTwo.overlap') || ''}
diff --git a/web/app/components/datasets/documents/detail/metadata/index.tsx b/web/app/components/datasets/documents/detail/metadata/index.tsx
index 9990ff7404..27b1c27db6 100644
--- a/web/app/components/datasets/documents/detail/metadata/index.tsx
+++ b/web/app/components/datasets/documents/detail/metadata/index.tsx
@@ -1,5 +1,5 @@
 'use client'
-import type { FC } from 'react'
+import type { FC, ReactNode } from 'react'
 import React, { useEffect, useState } from 'react'
 import { PencilIcon } from '@heroicons/react/24/outline'
 import { useTranslation } from 'react-i18next'
@@ -24,6 +24,7 @@ import type { DocType, FullDocumentDetail } from '@/models/datasets'
 import { CUSTOMIZABLE_DOC_TYPES } from '@/models/datasets'
 import type { inputType, metadataType } from '@/hooks/use-metadata'
 import { useBookCategories, useBusinessDocCategories, useLanguages, useMetadataMap, usePersonalDocCategories } from '@/hooks/use-metadata'
+import classNames from '@/utils/classnames'
 
 const map2Options = (map: { [key: string]: string }) => {
   return Object.keys(map).map(key => ({ value: key, name: map[key] }))
@@ -32,6 +33,7 @@ const map2Options = (map: { [key: string]: string }) => {
 type IFieldInfoProps = {
   label: string
   value?: string
+  valueIcon?: ReactNode
   displayedValue?: string
   defaultValue?: string
   showEdit?: boolean
@@ -43,6 +45,7 @@ type IFieldInfoProps = {
 export const FieldInfo: FC<IFieldInfoProps> = ({
   label,
   value = '',
+  valueIcon,
   displayedValue = '',
   defaultValue,
   showEdit = false,
@@ -58,7 +61,8 @@ export const FieldInfo: FC<IFieldInfoProps> = ({
   return (
     <div className={cn(s.fieldInfo, editAlignTop && '!items-start', readAlignTop && '!items-start pt-1')}>
       <div className={cn(s.label, editAlignTop && 'pt-1')}>{label}</div>
-      <div className={s.value}>
+      <div className={classNames(s.value, 'flex items-center gap-1')}>
+        {valueIcon}
         {!showEdit
           ? displayedValue
           : inputType === 'select'
diff --git a/web/app/dev-preview/page.tsx b/web/app/dev-preview/page.tsx
index 176cf8360d..99041cd513 100644
--- a/web/app/dev-preview/page.tsx
+++ b/web/app/dev-preview/page.tsx
@@ -1,16 +1,12 @@
 'use client'
 
-import { Stepper } from '../components/datasets/create/stepper'
+import { useState } from 'react'
+import { InputNumber } from '../components/base/input-number'
+// import { Stepper } from '../components/datasets/create/stepper'
 
 export default function Page() {
+  const [step, setStep] = useState(0)
   return <div className='p-4'>
-    <Stepper
-      steps={[
-        { name: 'Data Source' },
-        { name: 'Document Processing' },
-        { name: 'Execute & Finish' },
-      ]}
-      activeStepIndex={1}
-    />
+    <InputNumber onChange={setStep} unit={'tokens'} />
   </div>
 }
diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts
index de885671a7..1f0bac376b 100644
--- a/web/i18n/en-US/dataset-creation.ts
+++ b/web/i18n/en-US/dataset-creation.ts
@@ -99,6 +99,16 @@ const translation = {
     autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.',
     custom: 'Custom',
     customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.',
+    general: 'General',
+    generalTip: 'General text chunking mode, the chunks retrieved and recalled are the same.',
+    parentChild: 'Parent-child',
+    parentChildTip: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.',
+    parentChunkForContext: 'Parent-chunk for Context',
+    childChunkForRetrieval: 'Child-chunk for Retrieval',
+    paragraph: 'Paragraph',
+    paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.',
+    fullDoc: 'Full Doc',
+    fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.',
     separator: 'Delimiter',
     separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
     separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines',
diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts
index fac809d7e2..30a61ca720 100644
--- a/web/i18n/zh-Hans/dataset-creation.ts
+++ b/web/i18n/zh-Hans/dataset-creation.ts
@@ -99,6 +99,16 @@ const translation = {
     autoDescription: '自动设置分段规则与预处理规则，如果不了解这些参数建议选择此项',
     custom: '自定义',
     customDescription: '自定义分段规则、分段长度以及预处理规则等参数',
+    general: '通用',
+    generalTip: '通用文本分块模式，检索和回忆的块是相同的',
+    parentChild: '父子分段',
+    parentChildTip: '使用父子模式时，子块用于检索，父块用作上下文',
+    parentChunkForContext: '父块用作上下文',
+    childChunkForRetrieval: '子块用于检索',
+    paragraph: '段落',
+    paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落，使用拆分文本作为检索的母块',
+    fullDoc: '全文',
+    fullDocTip: '整个文档用作父块并直接检索。请注意，出于性能原因，超过10000个标记的文本将被自动截断。',
     separator: '分段标识符',
     separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符（\\n\\n,\\n），当段落超过最大块长度时，会按行进行分割。你也可以使用自定义的特殊分隔符（例如 ***）。',
     separatorPlaceholder: '\\n\\n 用于分段；\\n 用于分行',
@@ -112,6 +122,7 @@ const translation = {
     removeUrlEmails: '删除所有 URL 和电子邮件地址',
     removeStopwords: '去除停用词，例如 “a”，“an”，“the” 等',
     preview: '确认并预览',
+    previewChunk: '预览块',
     reset: '重置',
     indexMode: '索引方式',
     qualified: '高质量',