import type { FC } from 'react' import type { CommonResponse } from '@/models/common' import type { IndexingStatusResponse, ProcessRuleResponse } from '@/models/datasets' import { RiLoader2Line, RiPauseCircleLine, RiPlayCircleLine } from '@remixicon/react' import Image from 'next/image' import * as React from 'react' import { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import Divider from '@/app/components/base/divider' import { ToastContext } from '@/app/components/base/toast' import { ProcessMode } from '@/models/datasets' import { fetchIndexingStatus as doFetchIndexingStatus, pauseDocIndexing, resumeDocIndexing, } from '@/service/datasets' import { useProcessRule } from '@/service/knowledge/use-dataset' import { RETRIEVE_METHOD } from '@/types/app' import { asyncRunSafe, sleep } from '@/utils' import { cn } from '@/utils/classnames' import { indexMethodIcon, retrievalIcon } from '../../../create/icons' import { IndexingType } from '../../../create/step-two' import { useDocumentContext } from '../context' import { FieldInfo } from '../metadata' import EmbeddingSkeleton from './skeleton' type IEmbeddingDetailProps = { datasetId?: string documentId?: string indexingType?: IndexingType retrievalMethod?: RETRIEVE_METHOD detailUpdate: VoidFunction } type IRuleDetailProps = { sourceData?: ProcessRuleResponse indexingType?: IndexingType retrievalMethod?: RETRIEVE_METHOD } const RuleDetail: FC = React.memo(({ sourceData, indexingType, retrievalMethod, }) => { const { t } = useTranslation() const segmentationRuleMap = { mode: t('embedding.mode', { ns: 'datasetDocuments' }), segmentLength: t('embedding.segmentLength', { ns: 'datasetDocuments' }), textCleaning: t('embedding.textCleaning', { ns: 'datasetDocuments' }), } const getRuleName = (key: string) => { if (key === 'remove_extra_spaces') return t('stepTwo.removeExtraSpaces', { ns: 'datasetCreation' }) if (key === 'remove_urls_emails') return t('stepTwo.removeUrlEmails', { ns: 'datasetCreation' }) if (key === 'remove_stopwords') return t('stepTwo.removeStopwords', { ns: 'datasetCreation' }) } const isNumber = (value: unknown) => { return typeof value === 'number' } const getValue = useCallback((field: string) => { let value: string | number | undefined = '-' const maxTokens = isNumber(sourceData?.rules?.segmentation?.max_tokens) ? sourceData.rules.segmentation.max_tokens : value const childMaxTokens = isNumber(sourceData?.rules?.subchunk_segmentation?.max_tokens) ? sourceData.rules.subchunk_segmentation.max_tokens : value switch (field) { case 'mode': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? (t('embedding.custom', { ns: 'datasetDocuments' }) as string) : `${t('embedding.hierarchical', { ns: 'datasetDocuments' })} · ${sourceData?.rules?.parent_mode === 'paragraph' ? t('parentMode.paragraph', { ns: 'dataset' }) : t('parentMode.fullDoc', { ns: 'dataset' })}` break case 'segmentLength': value = !sourceData?.mode ? value : sourceData.mode === ProcessMode.general ? maxTokens : `${t('embedding.parentMaxTokens', { ns: 'datasetDocuments' })} ${maxTokens}; ${t('embedding.childMaxTokens', { ns: 'datasetDocuments' })} ${childMaxTokens}` break default: value = !sourceData?.mode ? value : sourceData?.rules?.pre_processing_rules?.filter(rule => rule.enabled).map(rule => getRuleName(rule.id)).join(',') break } return value }, [sourceData]) return (
{Object.keys(segmentationRuleMap).map((field) => { return ( ) })}
)} /> )} />
) }) RuleDetail.displayName = 'RuleDetail' const EmbeddingDetail: FC = ({ datasetId: dstId, documentId: docId, detailUpdate, indexingType, retrievalMethod, }) => { const { t } = useTranslation() const { notify } = useContext(ToastContext) const datasetId = useDocumentContext(s => s.datasetId) const documentId = useDocumentContext(s => s.documentId) const localDatasetId = dstId ?? datasetId const localDocumentId = docId ?? documentId const [indexingStatusDetail, setIndexingStatusDetail] = useState(null) const fetchIndexingStatus = async () => { const status = await doFetchIndexingStatus({ datasetId: localDatasetId, documentId: localDocumentId }) setIndexingStatusDetail(status) return status } const isStopQuery = useRef(false) const stopQueryStatus = useCallback(() => { isStopQuery.current = true }, []) const startQueryStatus = useCallback(async () => { if (isStopQuery.current) return try { const indexingStatusDetail = await fetchIndexingStatus() if (['completed', 'error', 'paused'].includes(indexingStatusDetail?.indexing_status)) { stopQueryStatus() detailUpdate() return } await sleep(2500) await startQueryStatus() } catch { await sleep(2500) await startQueryStatus() } }, [stopQueryStatus]) useEffect(() => { isStopQuery.current = false startQueryStatus() return () => { stopQueryStatus() } }, [startQueryStatus, stopQueryStatus]) const { data: ruleDetail } = useProcessRule(localDocumentId) const isEmbedding = useMemo(() => ['indexing', 'splitting', 'parsing', 'cleaning'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingCompleted = useMemo(() => ['completed'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingPaused = useMemo(() => ['paused'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const isEmbeddingError = useMemo(() => ['error'].includes(indexingStatusDetail?.indexing_status || ''), [indexingStatusDetail]) const percent = useMemo(() => { const completedCount = indexingStatusDetail?.completed_segments || 0 const totalCount = indexingStatusDetail?.total_segments || 0 if (totalCount === 0) return 0 const percent = Math.round(completedCount * 100 / totalCount) return percent > 100 ? 100 : percent }, [indexingStatusDetail]) const handleSwitch = async () => { const opApi = isEmbedding ? pauseDocIndexing : resumeDocIndexing const [e] = await asyncRunSafe(opApi({ datasetId: localDatasetId, documentId: localDocumentId }) as Promise) if (!e) { notify({ type: 'success', message: t('actionMsg.modifiedSuccessfully', { ns: 'common' }) }) // if the embedding is resumed from paused, we need to start the query status if (isEmbeddingPaused) { isStopQuery.current = false startQueryStatus() detailUpdate() } setIndexingStatusDetail(null) } else { notify({ type: 'error', message: t('actionMsg.modifiedUnsuccessfully', { ns: 'common' }) }) } } return ( <>
{isEmbedding && } {isEmbedding && t('embedding.processing', { ns: 'datasetDocuments' })} {isEmbeddingCompleted && t('embedding.completed', { ns: 'datasetDocuments' })} {isEmbeddingPaused && t('embedding.paused', { ns: 'datasetDocuments' })} {isEmbeddingError && t('embedding.error', { ns: 'datasetDocuments' })} {isEmbedding && ( )} {isEmbeddingPaused && ( )}
{/* progress bar */}
{`${t('embedding.segments', { ns: 'datasetDocuments' })} ${indexingStatusDetail?.completed_segments || '--'}/${indexingStatusDetail?.total_segments || '--'} · ${percent}%`}
) } export default React.memo(EmbeddingDetail)