From 1a6a28f650df36ce0008734994db0a53561e6d82 Mon Sep 17 00:00:00 2001 From: AkaraChen Date: Mon, 25 Nov 2024 17:57:31 +0800 Subject: [PATCH] feat: settings ui for database pre-preprocessing --- .../components/base/input-number/index.tsx | 57 +++++++++++++++ web/app/components/base/param-item/index.tsx | 20 ++++-- .../index.tsx | 4 +- .../common/retrieval-method-config/index.tsx | 10 ++- .../create/embedding-process/index.tsx | 69 ++++++++++++++++-- web/app/components/datasets/create/icons.ts | 16 +++++ web/app/components/datasets/create/index.tsx | 6 ++ .../datasets/create/step-three/index.tsx | 14 +++- .../datasets/create/step-two/index.tsx | 70 +++++++++++-------- .../datasets/create/step-two/inputs.tsx | 12 ++-- .../documents/detail/metadata/index.tsx | 8 ++- web/app/dev-preview/page.tsx | 14 ++-- web/i18n/en-US/dataset-creation.ts | 10 +++ web/i18n/zh-Hans/dataset-creation.ts | 11 +++ 14 files changed, 250 insertions(+), 71 deletions(-) create mode 100644 web/app/components/base/input-number/index.tsx create mode 100644 web/app/components/datasets/create/icons.ts diff --git a/web/app/components/base/input-number/index.tsx b/web/app/components/base/input-number/index.tsx new file mode 100644 index 0000000000..222686a0a8 --- /dev/null +++ b/web/app/components/base/input-number/index.tsx @@ -0,0 +1,57 @@ +import { useState } from 'react' +import type { FC, SetStateAction } from 'react' +import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react' +import Input, { type InputProps } from '../input' +import classNames from '@/utils/classnames' + +export type InputNumberProps = { + unit?: string + onChange: (value: number) => void + amount?: number + size?: 'sm' | 'md' +} & Omit + +export const InputNumber: FC = (props) => { + const { unit, className, onChange, defaultValue = 0, amount = 1, size = 'sm', max, min, ...rest } = props + const [val, setVal] = useState(defaultValue as number) + const update = (value: SetStateAction) => { + const current = typeof value === 'function' ? value(val) : value as number + if (max && current >= (max as number)) + return + if (min && current <= (min as number)) + return + setVal(value) + } + const inc = () => update(val => val + amount) + const dec = () => update(val => val - amount) + return
+ { + const parsed = Number(e.target.value) + if (Number.isNaN(parsed)) + return + setVal(parsed) + onChange(parsed) + }} + /> + {unit &&
{unit}
} +
+ + +
+
+} diff --git a/web/app/components/base/param-item/index.tsx b/web/app/components/base/param-item/index.tsx index 49acc81484..bd18d35ebc 100644 --- a/web/app/components/base/param-item/index.tsx +++ b/web/app/components/base/param-item/index.tsx @@ -1,5 +1,6 @@ 'use client' import type { FC } from 'react' +import { InputNumber } from '../input-number' import Tooltip from '@/app/components/base/tooltip' import Slider from '@/app/components/base/slider' import Switch from '@/app/components/base/switch' @@ -47,13 +48,20 @@ const ParamItem: FC = ({ className, id, name, noTooltip, tip, step = 0.1,
- { - const value = parseFloat(e.target.value) - if (value < min || value > max) - return + { + if (value < min || value > max) + return - onChange(id, value) - }} /> + onChange(id, value) + }} + />
= ({ return (
- } + } title={t('dataset.retrieval.invertedIndex.title')} description={t('dataset.retrieval.invertedIndex.description')} isActive activeHeaderClassName='bg-gradient-to-r from-[#F0EEFA] to-[#F9FAFB]' diff --git a/web/app/components/datasets/common/retrieval-method-config/index.tsx b/web/app/components/datasets/common/retrieval-method-config/index.tsx index 62ead1467f..5a17b73569 100644 --- a/web/app/components/datasets/common/retrieval-method-config/index.tsx +++ b/web/app/components/datasets/common/retrieval-method-config/index.tsx @@ -5,10 +5,8 @@ import { useTranslation } from 'react-i18next' import Image from 'next/image' import RetrievalParamConfig from '../retrieval-param-config' import { OptionCard } from '../../create/step-two/option-card' -import Selection from '../../create/assets/selection-mod.svg' -import Research from '../../create/assets/research-mod.svg' -import PatternRecognition from '../../create/assets/pattern-recognition-mod.svg' import Effect from '../../create/assets/option-card-effect-purple.svg' +import { retrievalIcon } from '../../create/icons' import type { RetrievalConfig } from '@/types/app' import { RETRIEVE_METHOD } from '@/types/app' import { useProviderContext } from '@/context/provider-context' @@ -59,7 +57,7 @@ const RetrievalMethodConfig: FC = ({ return (
{supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.semantic_search.title')} description={t('dataset.retrieval.semantic_search.description')} isActive={ @@ -80,7 +78,7 @@ const RetrievalMethodConfig: FC = ({ )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={t('dataset.retrieval.full_text_search.title')} description={t('dataset.retrieval.full_text_search.description')} isActive={ @@ -101,7 +99,7 @@ const RetrievalMethodConfig: FC = ({ )} {supportRetrievalMethods.includes(RETRIEVE_METHOD.semantic) && ( - } + } title={
{t('dataset.retrieval.hybrid_search.title')}
diff --git a/web/app/components/datasets/create/embedding-process/index.tsx b/web/app/components/datasets/create/embedding-process/index.tsx index 7786582085..f6d500ef15 100644 --- a/web/app/components/datasets/create/embedding-process/index.tsx +++ b/web/app/components/datasets/create/embedding-process/index.tsx @@ -7,7 +7,11 @@ import { omit } from 'lodash-es' import { ArrowRightIcon } from '@heroicons/react/24/solid' import { RiErrorWarningFill, + RiLoader2Fill, + RiTerminalBoxLine, } from '@remixicon/react' +import Image from 'next/image' +import { indexMethodIcon, retrievalIcon } from '../icons' import s from './index.module.css' import cn from '@/utils/classnames' import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata' @@ -23,15 +27,21 @@ import UpgradeBtn from '@/app/components/billing/upgrade-btn' import { useProviderContext } from '@/context/provider-context' import Tooltip from '@/app/components/base/tooltip' import { sleep } from '@/utils' +import { RETRIEVE_METHOD } from '@/types/app' type Props = { datasetId: string batchId: string documents?: FullDocumentDetail[] indexingType?: string + retrievalMethod?: string } -const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => { +const RuleDetail: FC<{ + sourceData?: ProcessRuleResponse + indexingType?: string + retrievalMethod?: string +}> = ({ sourceData, indexingType, retrievalMethod }) => { const { t } = useTranslation() const segmentationRuleMap = { @@ -81,10 +91,40 @@ const RuleDetail: FC<{ sourceData?: ProcessRuleResponse }> = ({ sourceData }) => displayedValue={String(getValue(field))} /> })} + + } + /> + + } + />
} -const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType }) => { +const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], indexingType, retrievalMethod }) => { const { t } = useTranslation() const { enableBilling, plan } = useProviderContext() @@ -146,6 +186,9 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index const navToDocumentList = () => { router.push(`/datasets/${datasetId}/documents`) } + const navToApiDocs = () => { + router.push('/datasets?category=api') + } const isEmbedding = useMemo(() => { return indexingStatusBatchDetail.some(indexingStatusDetail => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || '')) @@ -177,13 +220,17 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index return doc?.data_source_info.notion_page_icon } - const isSourceEmbedding = (detail: IndexingStatusResponse) => ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') + const isSourceEmbedding = (detail: IndexingStatusResponse) => + ['indexing', 'splitting', 'parsing', 'cleaning', 'waiting'].includes(detail.indexing_status || '') return ( <>
- {isEmbedding && t('datasetDocuments.embedding.processing')} + {isEmbedding &&
+ + {t('datasetDocuments.embedding.processing')} +
} {isEmbeddingCompleted && t('datasetDocuments.embedding.completed')}
@@ -258,11 +305,19 @@ const EmbeddingProcess: FC = ({ datasetId, batchId, documents = [], index
))}
- -
+ +
+
diff --git a/web/app/components/datasets/create/icons.ts b/web/app/components/datasets/create/icons.ts new file mode 100644 index 0000000000..80c4b6c944 --- /dev/null +++ b/web/app/components/datasets/create/icons.ts @@ -0,0 +1,16 @@ +import GoldIcon from './assets/gold.svg' +import Piggybank from './assets/piggy-bank-mod.svg' +import Selection from './assets/selection-mod.svg' +import Research from './assets/research-mod.svg' +import PatternRecognition from './assets/pattern-recognition-mod.svg' + +export const indexMethodIcon = { + high_quality: GoldIcon, + economical: Piggybank, +} + +export const retrievalIcon = { + vector: Selection, + fullText: Research, + hybrid: PatternRecognition, +} diff --git a/web/app/components/datasets/create/index.tsx b/web/app/components/datasets/create/index.tsx index 440e9c0ea2..3829abe27b 100644 --- a/web/app/components/datasets/create/index.tsx +++ b/web/app/components/datasets/create/index.tsx @@ -36,6 +36,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const [dataSourceType, setDataSourceType] = useState(DataSourceType.FILE) const [step, setStep] = useState(1) const [indexingTypeCache, setIndexTypeCache] = useState('') + const [retrievalMethodCache, setRetrievalMethodCache] = useState('') const [fileList, setFiles] = useState([]) const [result, setResult] = useState() const [hasError, setHasError] = useState(false) @@ -80,6 +81,9 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { const updateResultCache = (res?: createDocumentResponse) => { setResult(res) } + const updateRetrievalMethodCache = (method: string) => { + setRetrievalMethodCache(method) + } const nextStep = useCallback(() => { setStep(step + 1) @@ -156,6 +160,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { websiteCrawlJobId={websiteCrawlJobId} onStepChange={changeStep} updateIndexingTypeCache={updateIndexingTypeCache} + updateRetrievalMethodCache={updateRetrievalMethodCache} updateResultCache={updateResultCache} crawlOptions={crawlOptions} />} @@ -163,6 +168,7 @@ const DatasetUpdateForm = ({ datasetId }: DatasetUpdateFormProps) => { datasetId={datasetId} datasetName={detail?.name} indexingType={detail?.indexing_technique || indexingTypeCache} + retrievalMethod={detail?.retrieval_model?.search_method || retrievalMethodCache} creationCache={result} />}
diff --git a/web/app/components/datasets/create/step-three/index.tsx b/web/app/components/datasets/create/step-three/index.tsx index 85471f4513..1e7c49ac37 100644 --- a/web/app/components/datasets/create/step-three/index.tsx +++ b/web/app/components/datasets/create/step-three/index.tsx @@ -7,15 +7,17 @@ import s from './index.module.css' import cn from '@/utils/classnames' import useBreakpoints, { MediaType } from '@/hooks/use-breakpoints' import type { FullDocumentDetail, createDocumentResponse } from '@/models/datasets' +import AppIcon from '@/app/components/base/app-icon' type StepThreeProps = { datasetId?: string datasetName?: string indexingType?: string + retrievalMethod?: string creationCache?: createDocumentResponse } -const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: StepThreeProps) => { +const StepThree = ({ datasetId, datasetName, indexingType, creationCache, retrievalMethod }: StepThreeProps) => { const { t } = useTranslation() const media = useBreakpoints() @@ -30,8 +32,13 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step
{t('datasetCreation.stepThree.creationTitle')}
{t('datasetCreation.stepThree.creationContent')}
-
{t('datasetCreation.stepThree.label')}
-
{datasetName || creationCache?.dataset?.name}
+
+ +
+
{t('datasetCreation.stepThree.label')}
+
{datasetName || creationCache?.dataset?.name}
+
+
@@ -47,6 +54,7 @@ const StepThree = ({ datasetId, datasetName, indexingType, creationCache }: Step batchId={creationCache?.batch || ''} documents={creationCache?.documents as FullDocumentDetail[]} indexingType={indexingType || creationCache?.dataset?.indexing_technique} + retrievalMethod={retrievalMethod || creationCache?.dataset?.retrieval_model?.search_method} />
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 536b426cb4..88f7661a03 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -16,10 +16,9 @@ import Image from 'next/image' import SettingCog from '../assets/setting-gear-mod.svg' import OrangeEffect from '../assets/option-card-effect-orange.svg' import FamilyMod from '../assets/family-mod.svg' -import GoldIcon from '../assets/gold.svg' -import Piggybank from '../assets/piggy-bank-mod.svg' import Note from '../assets/note-mod.svg' import FileList from '../assets/file-list-3-fill.svg' +import { indexMethodIcon } from '../icons' import PreviewItem, { PreviewType } from './preview-item' import s from './index.module.css' import unescape from './unescape' @@ -80,6 +79,7 @@ type StepTwoProps = { onSetting: () => void datasetId?: string indexingType?: ValueOf + retrievalMethod?: string dataSourceType: DataSourceType files: CustomFile[] notionPages?: NotionPage[] @@ -89,6 +89,7 @@ type StepTwoProps = { websiteCrawlJobId?: string onStepChange?: (delta: number) => void updateIndexingTypeCache?: (type: string) => void + updateRetrievalMethodCache?: (method: string) => void updateResultCache?: (res: createDocumentResponse) => void onSave?: () => void onCancel?: () => void @@ -137,6 +138,7 @@ const StepTwo = ({ updateResultCache, onSave, onCancel, + updateRetrievalMethodCache, }: StepTwoProps) => { const { t } = useTranslation() const { locale } = useContext(I18n) @@ -507,6 +509,8 @@ const StepTwo = ({ }) updateIndexingTypeCache && updateIndexingTypeCache(indexType as string) updateResultCache && updateResultCache(res) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + updateRetrievalMethodCache && updateRetrievalMethodCache(retrievalConfig.search_method as string) } else { res = await createDocument({ @@ -643,19 +647,21 @@ const StepTwo = ({
} + title={t('datasetCreation.stepTwo.general')} + icon={{t('datasetCreation.stepTwo.general')}} activeHeaderClassName='bg-gradient-to-r from-[#EFF0F9] to-[#F9FAFB]' - description={'General text chunking mode, the chunks retrieved and recalled are the same.'} + description={t('datasetCreation.stepTwo.generalTip')} isActive={SegmentType.AUTO === segmentationType} onClick={() => setSegmentationType(SegmentType.AUTO)} actions={ <> + - } > @@ -666,13 +672,13 @@ const StepTwo = ({ onChange={e => setSegmentIdentifier(e.target.value)} /> setMax(parseInt(e.target.value.replace(/^0+/, ''), 10))} + defaultValue={max} + onChange={setMax} /> setOverlap(parseInt(e.target.value.replace(/^0+/, ''), 10))} + onChange={setOverlap} />
@@ -695,32 +701,34 @@ const StepTwo = ({
} + title={t('datasetCreation.stepTwo.parentChild')} + icon={{t('datasetCreation.stepTwo.parentChild')}} effectImg={OrangeEffect.src} activeHeaderClassName='bg-gradient-to-r from-[#F9F1EE] to-[#F9FAFB]' - description={'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.'} + description={t('datasetCreation.stepTwo.parentChildTip')} isActive={SegmentType.CUSTOM === segmentationType} onClick={() => setSegmentationType(SegmentType.CUSTOM)} actions={ <> + - } >
- Parent-chunk for Context + {t('datasetCreation.stepTwo.parentChunkForContext')} } - title={'Paragraph'} - description={'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.'} + title={t('datasetCreation.stepTwo.paragraph')} + description={t('datasetCreation.stepTwo.paragraphTip')} isChosen={parentChildConfig.chunkForContext === 'paragraph'} onChosen={() => setParentChildConfig( { @@ -741,12 +749,12 @@ const StepTwo = ({ })} /> setParentChildConfig({ + defaultValue={parentChildConfig.parent.maxLength} + onChange={value => setParentChildConfig({ ...parentChildConfig, parent: { ...parentChildConfig.parent, - maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10), + maxLength: value, }, })} /> @@ -755,8 +763,8 @@ const StepTwo = ({ /> } - title={'Full Doc'} - description={'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.'} + title={t('datasetCreation.stepTwo.fullDoc')} + description={t('datasetCreation.stepTwo.fullDocTip')} onChosen={() => setParentChildConfig( { ...parentChildConfig, @@ -769,7 +777,7 @@ const StepTwo = ({
- Child-chunk for Retrieval + {t('datasetCreation.stepTwo.childChunkForRetrieval')}
setParentChildConfig({ + onChange={value => setParentChildConfig({ ...parentChildConfig, child: { ...parentChildConfig.child, - maxLength: parseInt(e.target.value.replace(/^0+/, ''), 10), + maxLength: value, }, })} />
- Text Pre-processing Rules + {t('datasetCreation.stepTwo.rules')}
{rules.map(rule => ( @@ -834,7 +842,7 @@ const StepTwo = ({ }} >
- Gold Icon + Gold Icon
{!hasSetIndexType && }
@@ -865,7 +873,7 @@ const StepTwo = ({ onClick={changeToEconomicalType} >
- Economical Icon + Economical Icon
{!hasSetIndexType && }
diff --git a/web/app/components/datasets/create/step-two/inputs.tsx b/web/app/components/datasets/create/step-two/inputs.tsx index 56100918e1..899f5b120c 100644 --- a/web/app/components/datasets/create/step-two/inputs.tsx +++ b/web/app/components/datasets/create/step-two/inputs.tsx @@ -3,6 +3,8 @@ import { useTranslation } from 'react-i18next' import type { InputProps } from '@/app/components/base/input' import Input from '@/app/components/base/input' import Tooltip from '@/app/components/base/tooltip' +import type { InputNumberProps } from '@/app/components/base/input-number' +import { InputNumber } from '@/app/components/base/input-number' const TextLabel: FC = (props) => { return @@ -36,12 +38,12 @@ export const DelimiterInput: FC = (props) => { } -export const MaxLengthInput: FC = (props) => { +export const MaxLengthInput: FC = (props) => { const { t } = useTranslation() - return + return {t('datasetCreation.stepTwo.maxLength')}
}> - = (props) => { } -export const OverlapInput: FC = (props) => { +export const OverlapInput: FC = (props) => { const { t } = useTranslation() return {t('datasetCreation.stepTwo.overlap')} @@ -64,7 +66,7 @@ export const OverlapInput: FC = (props) => { } />
}> - { return Object.keys(map).map(key => ({ value: key, name: map[key] })) @@ -32,6 +33,7 @@ const map2Options = (map: { [key: string]: string }) => { type IFieldInfoProps = { label: string value?: string + valueIcon?: ReactNode displayedValue?: string defaultValue?: string showEdit?: boolean @@ -43,6 +45,7 @@ type IFieldInfoProps = { export const FieldInfo: FC = ({ label, value = '', + valueIcon, displayedValue = '', defaultValue, showEdit = false, @@ -58,7 +61,8 @@ export const FieldInfo: FC = ({ return (
{label}
-
+
+ {valueIcon} {!showEdit ? displayedValue : inputType === 'select' diff --git a/web/app/dev-preview/page.tsx b/web/app/dev-preview/page.tsx index 176cf8360d..99041cd513 100644 --- a/web/app/dev-preview/page.tsx +++ b/web/app/dev-preview/page.tsx @@ -1,16 +1,12 @@ 'use client' -import { Stepper } from '../components/datasets/create/stepper' +import { useState } from 'react' +import { InputNumber } from '../components/base/input-number' +// import { Stepper } from '../components/datasets/create/stepper' export default function Page() { + const [step, setStep] = useState(0) return
- +
} diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index de885671a7..1f0bac376b 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -99,6 +99,16 @@ const translation = { autoDescription: 'Automatically set chunk and preprocessing rules. Unfamiliar users are recommended to select this.', custom: 'Custom', customDescription: 'Customize chunks rules, chunks length, and preprocessing rules, etc.', + general: 'General', + generalTip: 'General text chunking mode, the chunks retrieved and recalled are the same.', + parentChild: 'Parent-child', + parentChildTip: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.', + parentChunkForContext: 'Parent-chunk for Context', + childChunkForRetrieval: 'Child-chunk for Retrieval', + paragraph: 'Paragraph', + paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.', + fullDoc: 'Full Doc', + fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.', separator: 'Delimiter', separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).', separatorPlaceholder: '\\n\\n for separating paragraphs; \\n for separating lines', diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index fac809d7e2..30a61ca720 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -99,6 +99,16 @@ const translation = { autoDescription: '自动设置分段规则与预处理规则,如果不了解这些参数建议选择此项', custom: '自定义', customDescription: '自定义分段规则、分段长度以及预处理规则等参数', + general: '通用', + generalTip: '通用文本分块模式,检索和回忆的块是相同的', + parentChild: '父子分段', + parentChildTip: '使用父子模式时,子块用于检索,父块用作上下文', + parentChunkForContext: '父块用作上下文', + childChunkForRetrieval: '子块用于检索', + paragraph: '段落', + paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的母块', + fullDoc: '全文', + fullDocTip: '整个文档用作父块并直接检索。请注意,出于性能原因,超过10000个标记的文本将被自动截断。', separator: '分段标识符', separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n),当段落超过最大块长度时,会按行进行分割。你也可以使用自定义的特殊分隔符(例如 ***)。', separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行', @@ -112,6 +122,7 @@ const translation = { removeUrlEmails: '删除所有 URL 和电子邮件地址', removeStopwords: '去除停用词,例如 “a”,“an”,“the” 等', preview: '确认并预览', + previewChunk: '预览块', reset: '重置', indexMode: '索引方式', qualified: '高质量',