mirror of
https://github.com/langgenius/dify.git
synced 2026-05-13 08:57:28 +08:00
Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Yansong Zhang <916125788@qq.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: hj24 <mambahj24@gmail.com> Co-authored-by: hj24 <huangjian@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com> Co-authored-by: CodingOnStar <hanxujiang@dify.com> Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: Ayush Baluni <73417844+aayushbaluni@users.noreply.github.com> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: jimcody1995 <jjimcody@gmail.com> Co-authored-by: James <63717587+jamesrayammons@users.noreply.github.com> Co-authored-by: Yunlu Wen <yunlu.wen@dify.ai> Co-authored-by: Stephen Zhou <hi@hyoban.cc> Co-authored-by: Coding On Star <447357187@qq.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jerryzai <jerryzh8710@protonmail.com> Co-authored-by: NVIDIAN <speedy.hpc@hotmail.com> Co-authored-by: ai-hpc <ai-hpc@users.noreply.github.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org> Co-authored-by: Junghwan <70629228+shaun0927@users.noreply.github.com> Co-authored-by: HeYinKazune <70251095+HeYin-OS@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: yyh <yuanyouhuilyz@gmail.com> Co-authored-by: Jingyi <jingyi.qi@dify.ai> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: sxxtony <166789813+sxxtony@users.noreply.github.com>
133 lines
5.1 KiB
TypeScript
133 lines
5.1 KiB
TypeScript
import type { FC } from 'react'
|
|
import type { ProcessRuleResponse } from '@/models/datasets'
|
|
import { useCallback } from 'react'
|
|
import { useTranslation } from 'react-i18next'
|
|
import { FieldInfo } from '@/app/components/datasets/documents/detail/metadata'
|
|
import { ProcessMode } from '@/models/datasets'
|
|
import { RETRIEVE_METHOD } from '@/types/app'
|
|
import { indexMethodIcon, retrievalIcon } from '../icons'
|
|
import { IndexingType } from '../step-two'
|
|
|
|
type RuleDetailProps = {
|
|
sourceData?: ProcessRuleResponse
|
|
indexingType?: string
|
|
retrievalMethod?: RETRIEVE_METHOD
|
|
}
|
|
|
|
// Lookup table for pre-processing rule names
|
|
const PRE_PROCESSING_RULE_KEYS = {
|
|
remove_extra_spaces: 'stepTwo.removeExtraSpaces',
|
|
remove_urls_emails: 'stepTwo.removeUrlEmails',
|
|
remove_stopwords: 'stepTwo.removeStopwords',
|
|
} as const
|
|
|
|
// Lookup table for retrieval method icons
|
|
const RETRIEVAL_ICON_MAP: Partial<Record<RETRIEVE_METHOD, string>> = {
|
|
[RETRIEVE_METHOD.fullText]: retrievalIcon.fullText,
|
|
[RETRIEVE_METHOD.hybrid]: retrievalIcon.hybrid,
|
|
[RETRIEVE_METHOD.semantic]: retrievalIcon.vector,
|
|
[RETRIEVE_METHOD.invertedIndex]: retrievalIcon.fullText,
|
|
[RETRIEVE_METHOD.keywordSearch]: retrievalIcon.fullText,
|
|
}
|
|
|
|
const isNumber = (value: unknown): value is number => typeof value === 'number'
|
|
|
|
const RuleDetail: FC<RuleDetailProps> = ({ sourceData, indexingType, retrievalMethod }) => {
|
|
const { t } = useTranslation()
|
|
|
|
const segmentationRuleLabels = {
|
|
mode: t('embedding.mode', { ns: 'datasetDocuments' }),
|
|
segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }),
|
|
textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }),
|
|
}
|
|
|
|
const getRuleName = useCallback((key: string): string | undefined => {
|
|
const translationKey = PRE_PROCESSING_RULE_KEYS[key as keyof typeof PRE_PROCESSING_RULE_KEYS]
|
|
return translationKey ? t(translationKey, { ns: 'datasetCreation' }) : undefined
|
|
}, [t])
|
|
|
|
const getModeValue = useCallback((): string => {
|
|
if (!sourceData?.mode)
|
|
return '-'
|
|
|
|
if (sourceData.mode === ProcessMode.general)
|
|
return t('embedding.custom', { ns: 'datasetDocuments' })
|
|
|
|
const parentModeLabel = sourceData.rules?.parent_mode === 'paragraph'
|
|
? t('parentMode.paragraph', { ns: 'dataset' })
|
|
: t('parentMode.fullDoc', { ns: 'dataset' })
|
|
|
|
return `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${parentModeLabel}`
|
|
}, [sourceData, t])
|
|
|
|
const getSegmentLengthValue = useCallback((): string | number => {
|
|
if (!sourceData?.mode)
|
|
return '-'
|
|
|
|
const maxTokens = isNumber(sourceData.rules?.segmentation?.max_tokens)
|
|
? sourceData.rules.segmentation.max_tokens
|
|
: '-'
|
|
|
|
if (sourceData.mode === ProcessMode.general)
|
|
return maxTokens
|
|
|
|
const childMaxTokens = isNumber(sourceData.rules?.subchunk_segmentation?.max_tokens)
|
|
? sourceData.rules.subchunk_segmentation.max_tokens
|
|
: '-'
|
|
|
|
return `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}`
|
|
}, [sourceData, t])
|
|
|
|
const getTextCleaningValue = useCallback((): string => {
|
|
if (!sourceData?.mode)
|
|
return '-'
|
|
|
|
const enabledRules = sourceData.rules?.pre_processing_rules?.filter(rule => rule.enabled) || []
|
|
const ruleNames = enabledRules
|
|
.map((rule) => {
|
|
const name = getRuleName(rule.id)
|
|
return typeof name === 'string' ? name : ''
|
|
})
|
|
.filter(name => name)
|
|
return ruleNames.length > 0 ? ruleNames.join(',') : '-'
|
|
}, [sourceData, getRuleName])
|
|
|
|
const fieldValueGetters: Record<string, () => string | number> = {
|
|
mode: getModeValue,
|
|
segmentLength: getSegmentLengthValue,
|
|
textCleaning: getTextCleaningValue,
|
|
}
|
|
|
|
const isEconomical = indexingType === IndexingType.ECONOMICAL
|
|
const indexMethodIconSrc = isEconomical ? indexMethodIcon.economical : indexMethodIcon.high_quality
|
|
const indexModeLabel = t(`stepTwo.${isEconomical ? 'economical' : 'qualified'}`, { ns: 'datasetCreation' })
|
|
|
|
const effectiveRetrievalMethod = isEconomical ? 'keyword_search' : (retrievalMethod ?? 'semantic_search')
|
|
const retrievalLabel = t(`retrieval.${effectiveRetrievalMethod}.title`, { ns: 'dataset' })
|
|
const retrievalIconSrc = RETRIEVAL_ICON_MAP[retrievalMethod as keyof typeof RETRIEVAL_ICON_MAP] ?? retrievalIcon.vector
|
|
|
|
return (
|
|
<div className="flex flex-col gap-1">
|
|
{Object.keys(segmentationRuleLabels).map(field => (
|
|
<FieldInfo
|
|
key={field}
|
|
label={segmentationRuleLabels[field as keyof typeof segmentationRuleLabels]}
|
|
displayedValue={String(fieldValueGetters[field]!())}
|
|
/>
|
|
))}
|
|
<FieldInfo
|
|
label={t('stepTwo.indexMode', { ns: 'datasetCreation' })}
|
|
displayedValue={indexModeLabel}
|
|
valueIcon={<img className="size-4" src={indexMethodIconSrc} alt="" />}
|
|
/>
|
|
<FieldInfo
|
|
label={t('form.retrievalSetting.title', { ns: 'datasetSettings' })}
|
|
displayedValue={retrievalLabel}
|
|
valueIcon={<img className="size-4" src={retrievalIconSrc} alt="" />}
|
|
/>
|
|
</div>
|
|
)
|
|
}
|
|
|
|
export default RuleDetail
|