refactor: enhance document processing UI and functionality with new components and translations

This commit is contained in:
twwu 2025-05-22 23:05:58 +08:00
parent fe435c23c3
commit 21c24977d8
14 changed files with 363 additions and 35 deletions

View File

@ -15,11 +15,11 @@ export const useAddDocumentsSteps = () => {
value: AddDocumentsStep.dataSource,
},
{
label: t('datasetPipeline.addDocuments.steps.ProcessDocuments'),
label: t('datasetPipeline.addDocuments.steps.processDocuments'),
value: AddDocumentsStep.processDocuments,
},
{
label: t('datasetPipeline.addDocuments.steps.ProcessingDocuments'),
label: t('datasetPipeline.addDocuments.steps.processingDocuments'),
value: AddDocumentsStep.processingDocuments,
},
]

View File

@ -25,10 +25,12 @@ import FilePreview from './preview/file-preview'
import NotionPagePreview from './preview/notion-page-preview'
import WebsitePreview from './preview/web-preview'
import ProcessDocuments from './process-documents'
import ChunkPreview from './preview/chunk-preview'
import Processing from './processing'
const TestRunPanel = () => {
const { t } = useTranslation()
const [currentStep, setCurrentStep] = useState(1)
const [currentStep, setCurrentStep] = useState(3)
const [datasource, setDatasource] = useState<Datasource>()
const [fileList, setFiles] = useState<FileItem[]>([])
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
@ -40,7 +42,10 @@ const TestRunPanel = () => {
const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling)
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
@ -120,6 +125,10 @@ const TestRunPanel = () => {
setCurrentStep(preStep => preStep - 1)
}, [])
const handlePreviewChunks = useCallback((data: Record<string, any>) => {
console.log(data)
}, [])
const handleProcess = useCallback((data: Record<string, any>) => {
if (!datasource)
return
@ -146,7 +155,8 @@ const TestRunPanel = () => {
}
// todo: Run Pipeline
console.log('datasource_type', datasource_type)
}, [datasource, fileList, notionPages, websiteCrawlJobId, websitePages])
handleNextStep()
}, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages])
if (isFetchingPipelineInfo) {
return (
@ -232,24 +242,47 @@ const TestRunPanel = () => {
<ProcessDocuments
dataSourceNodeId={datasource?.nodeId || ''}
onProcess={handleProcess}
onPreview={handlePreviewChunks}
onBack={handleBackStep}
/>
)
}
{
currentStep === 3 && (
<Processing
datasetId={datasetId!}
batchId={''}
documents={[]}
indexingType={indexingType!}
retrievalMethod={retrievalMethod!}
/>
)
}
</div>
</div>
{/* Preview */}
<div className='flex h-full flex-1 shrink-0 flex-col pl-2 pt-2'>
{
currentStep === 1 && (
<>
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
</>
)
}
</div>
{
currentStep === 1 && (
<div className='flex h-full flex-1 shrink-0 flex-col pl-2 pt-2'>
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
</div>
)
}
{
currentStep === 2 && (
<ChunkPreview
datasource={datasource!}
files={fileList.map(file => file.file)}
notionPages={notionPages}
websitePages={websitePages}
isIdle={true}
isPending={true}
estimateData={undefined}
/>
)
}
</div>
)
}

View File

@ -30,14 +30,16 @@ const LeftHeader = ({
<div className='system-md-semibold text-text-primary'>
{steps[currentStep - 1]?.label}
</div>
<a
className='absolute -left-11 top-3.5'
href={`/datasets/${datasetId}/documents`}
>
<Button variant='secondary-accent' className='size-9 rounded-full p-0'>
<RiArrowLeftLine className='size-5 ' />
</Button>
</a>
{currentStep !== steps.length && (
<a
className='absolute -left-11 top-3.5'
href={`/datasets/${datasetId}/documents`}
>
<Button variant='secondary-accent' className='size-9 rounded-full p-0'>
<RiArrowLeftLine className='size-5 ' />
</Button>
</a>
)}
<Effect className='left-8 top-[-34px] opacity-20' />
</div>
)

View File

@ -0,0 +1,204 @@
import React, { useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { PreviewContainer } from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import { ChunkingMode, DataSourceType } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { ChunkContainer, QAPreview } from '../../../chunk'
import { FormattedText } from '../../../formatted-text/formatted'
import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
import { RiSearchEyeLine } from '@remixicon/react'
import Badge from '@/app/components/base/badge'
type ChunkPreviewProps = {
datasource: Datasource
files: CustomFile[]
notionPages: NotionPage[]
websitePages: CrawlResultItem[]
isIdle: boolean
isPending: boolean
estimateData: FileIndexingEstimateResponse | undefined
}
const ChunkPreview = ({
datasource,
files,
notionPages,
websitePages,
isIdle,
isPending,
estimateData,
}: ChunkPreviewProps) => {
const { t } = useTranslation()
const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form)
const [previewFile, setPreviewFile] = useState<DocumentItem>(files[0] as DocumentItem)
const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(notionPages[0])
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
const dataSourceType = useMemo(() => {
const type = datasource.type
if (type === DataSourceProvider.fireCrawl || type === DataSourceProvider.jinaReader || type === DataSourceProvider.waterCrawl)
return DataSourceType.WEB
return type
}, [datasource.type])
return (
<PreviewContainer
header={<PreviewHeader
title={t('datasetCreation.stepTwo.preview')}
>
<div className='flex items-center gap-1'>
{dataSourceType === DataSourceType.FILE
&& <PreviewDocumentPicker
files={files as Array<Required<CustomFile>>}
onChange={(selected) => {
setPreviewFile(selected)
}}
value={previewFile}
/>
}
{dataSourceType === DataSourceType.NOTION
&& <PreviewDocumentPicker
files={
notionPages.map(page => ({
id: page.page_id,
name: page.page_name,
extension: 'md',
}))
}
onChange={(selected) => {
const selectedPage = notionPages.find(page => page.page_id === selected.id)
setPreviewNotionPage(selectedPage!)
}}
value={{
id: previewNotionPage?.page_id || '',
name: previewNotionPage?.page_name || '',
extension: 'md',
}}
/>
}
{dataSourceType === DataSourceType.WEB
&& <PreviewDocumentPicker
files={
websitePages.map(page => ({
id: page.source_url,
name: page.title,
extension: 'md',
}))
}
onChange={(selected) => {
const selectedPage = websitePages.find(page => page.source_url === selected.id)
setPreviewWebsitePage(selectedPage!)
}}
value={
{
id: previewWebsitePage?.source_url || '',
name: previewWebsitePage?.title || '',
extension: 'md',
}
}
/>
}
{
currentDocForm !== ChunkingMode.qa
&& <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
count: estimateData?.total_segments || 0,
}) as string}
/>
}
</div>
</PreviewHeader>}
className='relative flex h-full w-1/2 shrink-0 p-4 pr-0'
mainClassName='space-y-6'
>
{currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && (
estimateData?.qa_preview.map((item, index) => (
<ChunkContainer
key={item.question}
label={`Chunk-${index + 1}`}
characterCount={item.question.length + item.answer.length}
>
<QAPreview qa={item} />
</ChunkContainer>
))
)}
{currentDocForm === ChunkingMode.text && estimateData?.preview && (
estimateData?.preview.map((item, index) => (
<ChunkContainer
key={item.content}
label={`Chunk-${index + 1}`}
characterCount={item.content.length}
>
{item.content}
</ChunkContainer>
))
)}
{currentDocForm === ChunkingMode.parentChild && estimateData?.preview && (
estimateData?.preview?.map((item, index) => {
const indexForLabel = index + 1
// const childChunks = parentChildConfig.chunkForContext === 'full-doc'
// ? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH)
// : item.child_chunks
return (
<ChunkContainer
key={item.content}
label={`Chunk-${indexForLabel}`}
characterCount={item.content.length}
>
<FormattedText>
{item.child_chunks.map((child, index) => {
const indexForLabel = index + 1
return (
<PreviewSlice
key={child}
label={`C-${indexForLabel}`}
text={child}
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
dividerClassName='leading-7'
/>
)
})}
</FormattedText>
</ChunkContainer>
)
})
)}
{!isIdle && (
<div className='flex h-full w-full items-center justify-center'>
<div className='flex flex-col items-center justify-center gap-3'>
<RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
<p className='text-sm text-text-tertiary'>
{t('datasetCreation.stepTwo.previewChunkTip')}
</p>
</div>
</div>
)}
{isPending && (
<div className='space-y-6'>
{Array.from({ length: 10 }, (_, i) => (
<SkeletonContainer key={i}>
<SkeletonRow>
<SkeletonRectangle className='w-20' />
<SkeletonPoint />
<SkeletonRectangle className='w-24' />
</SkeletonRow>
<SkeletonRectangle className='w-full' />
<SkeletonRectangle className='w-full' />
<SkeletonRectangle className='w-[422px]' />
</SkeletonContainer>
))}
</div>
)}
</PreviewContainer>
)
}
export default React.memo(ChunkPreview)

View File

@ -1,6 +1,7 @@
import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import { RiArrowLeftLine } from '@remixicon/react'
type ActionsProps = {
onBack: () => void
@ -18,8 +19,10 @@ const Actions = ({
<Button
variant='secondary'
onClick={onBack}
className='gap-x-0.5'
>
{t('datasetPipeline.operations.dataSource')}
<RiArrowLeftLine className='size-4' />
<span className='px-0.5'>{t('datasetPipeline.operations.dataSource')}</span>
</Button>
<Button
variant='primary'

View File

@ -1,6 +1,7 @@
import React from 'react'
import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import { RiSearchEyeLine } from '@remixicon/react'
type HeaderProps = {
onReset: () => void
@ -21,10 +22,15 @@ const Header = ({
{t('datasetPipeline.addDocuments.stepTwo.chunkSettings')}
</div>
<Button variant='ghost' disabled={disableReset} onClick={onReset}>
{t('common.operations.reset')}
{t('common.operation.reset')}
</Button>
<Button variant='primary' onClick={onPreview}>
{t('common.operations.reset')}
<Button
variant='secondary-accent'
onClick={onPreview}
className='gap-x-0.5'
>
<RiSearchEyeLine className='size-4' />
<span className='px-0.5'>{t('datasetPipeline.addDocuments.stepTwo.previewChunks')}</span>
</Button>
</div>
)

View File

@ -1,8 +1,8 @@
import { useMemo } from 'react'
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
import { useStore } from '@/app/components/workflow/store'
import { usePublishedPipelineProcessingParams } from '@/service/use-pipeline'
import { PipelineInputVarType } from '@/models/pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
type PartialInputVarType = PipelineInputVarType.textInput | PipelineInputVarType.number | PipelineInputVarType.select | PipelineInputVarType.checkbox
@ -14,7 +14,7 @@ const VAR_TYPE_MAP: Record<PartialInputVarType, BaseFieldType> = {
}
export const useConfigurations = (datasourceNodeId: string) => {
const pipelineId = useStore(state => state.pipelineId)
const pipelineId = useDatasetDetailContextWithSelector(state => state.dataset?.pipeline_id)
const { data: paramsConfig } = usePublishedPipelineProcessingParams({
pipeline_id: pipelineId!,
node_id: datasourceNodeId,

View File

@ -8,36 +8,45 @@ import Header from './header'
type ProcessDocumentsProps = {
dataSourceNodeId: string
onProcess: (data: Record<string, any>) => void
onPreview: (data: Record<string, any>) => void
onBack: () => void
}
const ProcessDocuments = ({
dataSourceNodeId,
onProcess,
onPreview,
onBack,
}: ProcessDocumentsProps) => {
const formRef = useRef<any>(null)
const isPreview = useRef(false)
const { initialData, configurations } = useConfigurations(dataSourceNodeId)
const schema = generateZodSchema(configurations)
const handleProcess = useCallback(() => {
isPreview.current = false
formRef.current?.submit()
}, [])
const handlePreview = useCallback(() => {
isPreview.current = true
formRef.current?.submit()
}, [])
const handleSubmit = useCallback((data: Record<string, any>) => {
isPreview.current ? onPreview(data) : onProcess(data)
}, [onPreview, onProcess])
const handleReset = useCallback(() => {
formRef.current?.reset()
}, [])
return (
<div className='flex flex-col gap-y-4 pt-4'>
<div className='flex flex-col rounded-lg border-components-panel-border bg-components-panel-bg'>
<div className='flex flex-col rounded-lg border border-components-panel-border bg-components-panel-bg'>
<Header
onReset={handleReset}
disableReset={formRef.current.isDirty()}
disableReset={!formRef.current?.isDirty()}
onPreview={handlePreview}
/>
<Options
@ -45,7 +54,7 @@ const ProcessDocuments = ({
initialData={initialData}
configurations={configurations}
schema={schema}
onSubmit={onProcess}
onSubmit={handleSubmit}
/>
</div>
<Actions onBack={onBack} onProcess={handleProcess} />

View File

@ -66,7 +66,7 @@ const Options = ({
form.handleSubmit()
}}
>
<div className='flex flex-col gap-3 px-4 py-3'>
<div className='flex flex-col gap-3 border-t border-divider-subtle px-4 py-3'>
{configurations.map((config, index) => {
const FieldComponent = BaseField({
initialData,

View File

@ -0,0 +1,63 @@
'use client'
import React from 'react'
import { useTranslation } from 'react-i18next'
import { RiBookOpenLine } from '@remixicon/react'
import type { FullDocumentDetail, InitialDocumentDetail } from '@/models/datasets'
import EmbeddingProcess from '../../../create/embedding-process'
import { useGetDocLanguage } from '@/context/i18n'
type ProcessingProps = {
datasetId: string
indexingType: string
retrievalMethod: string
batchId: string
documents: InitialDocumentDetail[]
}
const Processing = ({
datasetId,
batchId,
documents,
indexingType,
retrievalMethod,
}: ProcessingProps) => {
const { t } = useTranslation()
const docLanguage = useGetDocLanguage()
return (
<div className='flex h-full w-full justify-center overflow-hidden'>
<div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'>
<div className='max-w-[640px]'>
<EmbeddingProcess
datasetId={datasetId}
batchId={batchId || ''}
documents={documents as FullDocumentDetail[]}
indexingType={indexingType}
retrievalMethod={retrievalMethod}
/>
</div>
</div>
<div className='w-2/5 pr-8 pt-[88px]'>
<div className='flex w-[328px] flex-col gap-3 rounded-xl bg-background-section p-6'>
<div className='flex size-10 items-center justify-center rounded-[10px] bg-components-card-bg shadow-lg shadow-shadow-shadow-5'>
<RiBookOpenLine className='size-5 text-text-accent' />
</div>
<div className='flex flex-col gap-y-2'>
<div className='system-xl-semibold text-text-secondary'>{t('datasetCreation.stepThree.sideTipTitle')}</div>
<div className='system-sm-regular text-text-tertiary'>{t('datasetCreation.stepThree.sideTipContent')}</div>
<a
href={`https://docs.dify.ai/${docLanguage}/guides/knowledge-base/integrate-knowledge-within-application`}
target='_blank'
rel='noreferrer noopener'
className='system-sm-regular text-text-accent'
>
{t('datasetPipeline.addDocuments.stepThree.learnMore')}
</a>
</div>
</div>
</div>
</div>
)
}
export default Processing

View File

@ -201,7 +201,7 @@ const translation = {
resume: 'Resume processing',
navTo: 'Go to document',
sideTipTitle: 'What\'s next',
sideTipContent: 'After the document finishes indexing, the Knowledge can be integrated into the application as context, you can find the context setting in the prompt orchestration page. You can also create it as an independent ChatGPT indexing plugin for release.',
sideTipContent: 'After finishing document indexing, you can manage and edit documents, run retrieval tests, and modify knowledge settings. Knowledge can then be integrated into your application as context, so make sure to adjust the Retrieval Setting to ensure optimal performance.',
modelTitle: 'Are you sure to stop embedding?',
modelContent: 'If you need to resume processing later, you will continue from where you left off.',
modelButtonConfirm: 'Confirm',

View File

@ -85,6 +85,10 @@ const translation = {
},
stepTwo: {
chunkSettings: 'Chunk Settings',
previewChunks: 'Preview Chunks',
},
stepThree: {
learnMore: 'Learn more',
},
characters: 'characters',
},

View File

@ -201,7 +201,7 @@ const translation = {
resume: '恢复处理',
navTo: '前往文档',
sideTipTitle: '接下来做什么',
sideTipContent: '当文档完成索引处理后,知识库即可集成至应用内作为上下文使用,你可以在提示词编排页找到上下文设置。你也可以创建成可独立使用的 ChatGPT 索引插件发布。',
sideTipContent: '当文档完成索引后,您可以管理和编辑文档、运行检索测试以及修改知识库设置。知识库即可集成到应用程序内作为上下文使用,因此请调整检索设置以确保最佳性能。',
modelTitle: '确认停止索引过程吗?',
modelContent: '如果您需要稍后恢复处理,则从停止处继续。',
modelButtonConfirm: '确认停止',

View File

@ -85,6 +85,10 @@ const translation = {
},
stepTwo: {
chunkSettings: '分段设置',
previewChunks: '预览分段',
},
stepThree: {
learnMore: '了解更多',
},
characters: '字符',
},